getDeff.py 1.7 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859
  1. import sqlite3
  2. import shutil
  3. # 连接到原始数据库
  4. conn = sqlite3.connect('file_data.db')
  5. cursor = conn.cursor()
  6. # 连接到新的数据库
  7. conn_new = sqlite3.connect('file_data_2.db')
  8. cursor_new = conn_new.cursor()
  9. # 创建新表格
  10. cursor_new.execute('''
  11. CREATE TABLE IF NOT EXISTS files (
  12. id TEXT PRIMARY KEY,
  13. path TEXT,
  14. name TEXT,
  15. type TEXT,
  16. size INTEGER,
  17. modification_time TIMESTAMP,
  18. md5 TEXT,
  19. source_database TEXT
  20. )
  21. ''')
  22. # 查找相同 MD5 哈希值的数据
  23. cursor.execute('SELECT md5 FROM files GROUP BY md5 HAVING COUNT(md5) > 1')
  24. duplicate_md5_rows = cursor.fetchall()
  25. for md5_row in duplicate_md5_rows:
  26. md5_value = md5_row[0]
  27. cursor.execute('SELECT * FROM files WHERE md5=?', (md5_value,))
  28. duplicate_files = cursor.fetchall()
  29. # 检查相同的 ID,避免重复插入
  30. existing_ids = set()
  31. for file_data in duplicate_files:
  32. id_value = file_data[0]
  33. if id_value not in existing_ids:
  34. # 检查新数据库中是否已存在相同ID的记录
  35. cursor_new.execute('SELECT id FROM files WHERE id=?', (id_value,))
  36. existing_id = cursor_new.fetchone()
  37. if not existing_id:
  38. file_data_with_source = list(file_data)
  39. file_data_with_source.append('file_data.db') # 添加数据来源
  40. cursor_new.execute(
  41. 'INSERT INTO files (id, path, name, type, size, modification_time, md5, source_database) VALUES (?, ?, ?, ?, ?, ?, ?, ?)',
  42. file_data_with_source)
  43. existing_ids.add(id_value) # 将已存在的 ID 加入集合
  44. conn_new.commit()
  45. # 关闭数据库连接
  46. conn.close()
  47. conn_new.close()