import sqlite3 import shutil # 连接到原始数据库 conn = sqlite3.connect('file_data.db') cursor = conn.cursor() # 连接到新的数据库 conn_new = sqlite3.connect('file_data_2.db') cursor_new = conn_new.cursor() # 创建新表格 cursor_new.execute(''' CREATE TABLE IF NOT EXISTS files ( id TEXT PRIMARY KEY, path TEXT, name TEXT, type TEXT, size INTEGER, modification_time TIMESTAMP, md5 TEXT, source_database TEXT ) ''') # 查找相同 MD5 哈希值的数据 cursor.execute('SELECT md5 FROM files GROUP BY md5 HAVING COUNT(md5) > 1') duplicate_md5_rows = cursor.fetchall() for md5_row in duplicate_md5_rows: md5_value = md5_row[0] cursor.execute('SELECT * FROM files WHERE md5=?', (md5_value,)) duplicate_files = cursor.fetchall() # 检查相同的 ID,避免重复插入 existing_ids = set() for file_data in duplicate_files: id_value = file_data[0] if id_value not in existing_ids: # 检查新数据库中是否已存在相同ID的记录 cursor_new.execute('SELECT id FROM files WHERE id=?', (id_value,)) existing_id = cursor_new.fetchone() if not existing_id: file_data_with_source = list(file_data) file_data_with_source.append('file_data.db') # 添加数据来源 cursor_new.execute( 'INSERT INTO files (id, path, name, type, size, modification_time, md5, source_database) VALUES (?, ?, ?, ?, ?, ?, ?, ?)', file_data_with_source) existing_ids.add(id_value) # 将已存在的 ID 加入集合 conn_new.commit() # 关闭数据库连接 conn.close() conn_new.close()