diff.py 2.5 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586
  1. import os
  2. import sqlite3
  3. import hashlib
  4. import concurrent.futures
  5. # 连接到 SQLite 数据库并创建表
  6. conn = sqlite3.connect('file_data.db')
  7. cursor = conn.cursor()
  8. # 创建表格
  9. cursor.execute('''
  10. CREATE TABLE IF NOT EXISTS files (
  11. id TEXT PRIMARY KEY,
  12. path TEXT,
  13. name TEXT,
  14. type TEXT,
  15. size INTEGER,
  16. modification_time TIMESTAMP,
  17. md5 TEXT
  18. )
  19. ''')
  20. def calculate_md5(file_path):
  21. md5_hash = hashlib.md5()
  22. with open(file_path, "rb") as f:
  23. for chunk in iter(lambda: f.read(4096), b""):
  24. md5_hash.update(chunk)
  25. return md5_hash.hexdigest()
  26. def insert_file_data(directory, ignore_list):
  27. for root, _, files in os.walk(directory):
  28. for file in files:
  29. file_path = os.path.join(root, file)
  30. file_name, file_extension = os.path.splitext(file)
  31. file_type = file_extension[1:] # Remove the leading dot from extension
  32. # 检查是否在忽略列表中(文件名或目录名)
  33. should_ignore = False
  34. for ignore_item in ignore_list:
  35. if ignore_item in file_path:
  36. should_ignore = True
  37. break
  38. if ignore_item in file_type:
  39. should_ignore = True
  40. break
  41. if should_ignore:
  42. continue
  43. # Get file stats
  44. stat_info = os.stat(file_path)
  45. size = stat_info.st_size
  46. modification_time = stat_info.st_mtime
  47. # 检查是否存在相同ID
  48. cursor.execute('SELECT id FROM files WHERE id=?', (file_path,))
  49. existing_id = cursor.fetchone()
  50. if not existing_id:
  51. md5_digest = calculate_md5(file_path)
  52. cursor.execute(
  53. 'INSERT INTO files (id, path, name, type, size, modification_time, md5) VALUES (?, ?, ?, ?, ?, ?, ?)',
  54. (file_path, file_path, file_name, file_type, size, modification_time, md5_digest))
  55. conn.commit()
  56. ignore_list = [
  57. 'node_modules',
  58. '.idea',
  59. 'jar',
  60. '.git',
  61. '.DS_Store'
  62. ]
  63. # target_directory = '/path/to/your/directory'
  64. # target_directory = '/Users/honghaitao/PycharmProjects/pythonProject/diff_file/file'
  65. # target_directory = '/Volumes/16T/柚木'
  66. # target_directory = '/Volumes/20T/待归类'
  67. # target_directory = '/Volumes/16T/电视剧'
  68. target_directory = '/Volumes/16T/电影'
  69. insert_file_data(target_directory, ignore_list)
  70. # 关闭数据库连接
  71. conn.close()