1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859 |
- import sqlite3
- import shutil
- # 连接到原始数据库
- conn = sqlite3.connect('file_data.db')
- cursor = conn.cursor()
- # 连接到新的数据库
- conn_new = sqlite3.connect('file_data_2.db')
- cursor_new = conn_new.cursor()
- # 创建新表格
- cursor_new.execute('''
- CREATE TABLE IF NOT EXISTS files (
- id TEXT PRIMARY KEY,
- path TEXT,
- name TEXT,
- type TEXT,
- size INTEGER,
- modification_time TIMESTAMP,
- md5 TEXT,
- source_database TEXT
- )
- ''')
- # 查找相同 MD5 哈希值的数据
- cursor.execute('SELECT md5 FROM files GROUP BY md5 HAVING COUNT(md5) > 1')
- duplicate_md5_rows = cursor.fetchall()
- for md5_row in duplicate_md5_rows:
- md5_value = md5_row[0]
- cursor.execute('SELECT * FROM files WHERE md5=?', (md5_value,))
- duplicate_files = cursor.fetchall()
- # 检查相同的 ID,避免重复插入
- existing_ids = set()
- for file_data in duplicate_files:
- id_value = file_data[0]
- if id_value not in existing_ids:
- # 检查新数据库中是否已存在相同ID的记录
- cursor_new.execute('SELECT id FROM files WHERE id=?', (id_value,))
- existing_id = cursor_new.fetchone()
- if not existing_id:
- file_data_with_source = list(file_data)
- file_data_with_source.append('file_data.db') # 添加数据来源
- cursor_new.execute(
- 'INSERT INTO files (id, path, name, type, size, modification_time, md5, source_database) VALUES (?, ?, ?, ?, ?, ?, ?, ?)',
- file_data_with_source)
- existing_ids.add(id_value) # 将已存在的 ID 加入集合
- conn_new.commit()
- # 关闭数据库连接
- conn.close()
- conn_new.close()
|