diff.py 2.2 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071
  1. import os
  2. import sqlite3
  3. import hashlib
  4. import concurrent.futures
  5. # 连接到 SQLite 数据库并创建表
  6. conn = sqlite3.connect('file_data.db')
  7. cursor = conn.cursor()
  8. # 创建表格
  9. cursor.execute('''
  10. CREATE TABLE IF NOT EXISTS files (
  11. id TEXT PRIMARY KEY,
  12. path TEXT,
  13. name TEXT,
  14. type TEXT,
  15. size INTEGER,
  16. modification_time TIMESTAMP,
  17. md5 TEXT
  18. )
  19. ''')
  20. def calculate_md5(file_path):
  21. md5_hash = hashlib.md5()
  22. with open(file_path, "rb") as f:
  23. for chunk in iter(lambda: f.read(4096), b""):
  24. md5_hash.update(chunk)
  25. return md5_hash.hexdigest()
  26. def insert_file_data(directory):
  27. for root, _, files in os.walk(directory):
  28. for file in files:
  29. file_path = os.path.join(root, file)
  30. file_name, file_extension = os.path.splitext(file)
  31. file_type = file_extension[1:] # Remove the leading dot from extension
  32. # Get file stats
  33. stat_info = os.stat(file_path)
  34. size = stat_info.st_size
  35. modification_time = stat_info.st_mtime
  36. # Calculate MD5 hash
  37. # md5_hash = hashlib.md5()
  38. # with open(file_path, "rb") as f:
  39. # for chunk in iter(lambda: f.read(4096), b""):
  40. # md5_hash.update(chunk)
  41. # md5_digest = md5_hash.hexdigest()
  42. # 检查是否存在相同ID
  43. cursor.execute('SELECT id FROM files WHERE id=?', (file_path,))
  44. existing_id = cursor.fetchone()
  45. if not existing_id:
  46. md5_digest = calculate_md5(file_path)
  47. cursor.execute(
  48. 'INSERT INTO files (id, path, name, type, size, modification_time, md5) VALUES (?, ?, ?, ?, ?, ?, ?)',
  49. (file_path, file_path, file_name, file_type, size, modification_time, md5_digest))
  50. conn.commit()
  51. # target_directory = '/path/to/your/directory'
  52. # target_directory = '/Users/honghaitao/PycharmProjects/pythonProject/diff_file/file'
  53. # target_directory = '/Volumes/16T/柚木'
  54. # target_directory = '/Volumes/20T/待归类'
  55. target_directory = '/Volumes/16T/电视剧'
  56. insert_file_data(target_directory)
  57. # 关闭数据库连接
  58. conn.close()