1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798 |
- import os
- import sqlite3
- import hashlib
- import concurrent.futures
- # 连接到 SQLite 数据库并创建表
- conn = sqlite3.connect('file_data.db')
- cursor = conn.cursor()
- # 创建表格
- cursor.execute('''
- CREATE TABLE IF NOT EXISTS files (
- id TEXT PRIMARY KEY,
- path TEXT,
- name TEXT,
- type TEXT,
- size INTEGER,
- modification_time TIMESTAMP,
- md5 TEXT
- )
- ''')
- def calculate_md5(file_path):
- md5_hash = hashlib.md5()
- with open(file_path, "rb") as f:
- for chunk in iter(lambda: f.read(4096), b""):
- md5_hash.update(chunk)
- return md5_hash.hexdigest()
- def insert_file_data(directory, ignore_list):
- for root, _, files in os.walk(directory):
- for file in files:
- file_path = os.path.join(root, file)
- if os.path.exists(file_path): # 检查文件是否存在
- file_name, file_extension = os.path.splitext(file)
- file_type = file_extension[1:] # Remove the leading dot from extension
- # 检查是否在忽略列表中(文件名或目录名)
- should_ignore = False
- for ignore_item in ignore_list:
- if ignore_item in file_path:
- should_ignore = True
- break
- if ignore_item in file_type:
- should_ignore = True
- break
- if should_ignore:
- continue
- # Get file stats
- stat_info = os.stat(file_path)
- size = stat_info.st_size
- modification_time = stat_info.st_mtime
- # 检查是否存在相同ID
- cursor.execute('SELECT id FROM files WHERE id=?', (file_path,))
- existing_id = cursor.fetchone()
- if not existing_id:
- md5_digest = calculate_md5(file_path)
- cursor.execute(
- 'INSERT INTO files (id, path, name, type, size, modification_time, md5) VALUES (?, ?, ?, ?, ?, ?, ?)',
- (file_path, file_path, file_name, file_type, size, modification_time, md5_digest))
- conn.commit()
- else:
- print(f"文件不存在: {file_path}")
- ignore_list = [
- 'node_modules',
- '.idea',
- 'jar',
- '.git',
- '.DS_Store',
- 'CleanMyMac X.app',
- '.pnpm-store',
- 'IINA.app',
- 'venv',
- 'python',
- '/Volumes/16T/newFiles/开放/可执行文件/bin/bit'
- ]
- # target_directory = '/path/to/your/directory'
- # target_directory = '/Users/honghaitao/PycharmProjects/pythonProject/diff_file/file'
- # target_directory = '/Volumes/16T/柚木'
- # target_directory = '/Volumes/20T/待归类'
- # target_directory = '/Volumes/16T/电视剧'
- # target_directory = '/Volumes/16T/电影'
- # target_directory = '/Volumes/16T/电子书'
- # target_directory = '/Volumes/16T/工作'
- target_directory = '/Volumes/16T'
- insert_file_data(target_directory, ignore_list)
- # 关闭数据库连接
- conn.close()
|