import os import sqlite3 import hashlib import concurrent.futures # 连接到 SQLite 数据库并创建表 conn = sqlite3.connect('file_data.db') cursor = conn.cursor() # 创建表格 cursor.execute(''' CREATE TABLE IF NOT EXISTS files ( id TEXT PRIMARY KEY, path TEXT, name TEXT, type TEXT, size INTEGER, modification_time TIMESTAMP, md5 TEXT ) ''') def calculate_md5(file_path): md5_hash = hashlib.md5() with open(file_path, "rb") as f: for chunk in iter(lambda: f.read(4096), b""): md5_hash.update(chunk) return md5_hash.hexdigest() def insert_file_data(directory, ignore_list): for root, _, files in os.walk(directory): for file in files: file_path = os.path.join(root, file) if os.path.exists(file_path): # 检查文件是否存在 file_name, file_extension = os.path.splitext(file) file_type = file_extension[1:] # Remove the leading dot from extension # 检查是否在忽略列表中(文件名或目录名) should_ignore = False for ignore_item in ignore_list: if ignore_item in file_path: should_ignore = True break if ignore_item in file_type: should_ignore = True break if should_ignore: continue # Get file stats stat_info = os.stat(file_path) size = stat_info.st_size modification_time = stat_info.st_mtime # 检查是否存在相同ID cursor.execute('SELECT id FROM files WHERE id=?', (file_path,)) existing_id = cursor.fetchone() if not existing_id: md5_digest = calculate_md5(file_path) cursor.execute( 'INSERT INTO files (id, path, name, type, size, modification_time, md5) VALUES (?, ?, ?, ?, ?, ?, ?)', (file_path, file_path, file_name, file_type, size, modification_time, md5_digest)) conn.commit() else: print(f"文件不存在: {file_path}") ignore_list = [ 'node_modules', '.idea', 'jar', '.git', '.DS_Store', 'CleanMyMac X.app', '.pnpm-store', 'IINA.app', 'venv', 'python', '/Volumes/16T/newFiles/开放/可执行文件/bin/bit' ] # target_directory = '/path/to/your/directory' # target_directory = '/Users/honghaitao/PycharmProjects/pythonProject/diff_file/file' # target_directory = '/Volumes/16T/柚木' # target_directory = '/Volumes/20T/待归类' # target_directory = '/Volumes/16T/电视剧' # target_directory = '/Volumes/16T/电影' # target_directory = '/Volumes/16T/电子书' # target_directory = '/Volumes/16T/工作' target_directory = '/Volumes/16T' insert_file_data(target_directory, ignore_list) # 关闭数据库连接 conn.close()