diff.py 3.0 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798
  1. import os
  2. import sqlite3
  3. import hashlib
  4. import concurrent.futures
  5. # 连接到 SQLite 数据库并创建表
  6. conn = sqlite3.connect('file_data.db')
  7. cursor = conn.cursor()
  8. # 创建表格
  9. cursor.execute('''
  10. CREATE TABLE IF NOT EXISTS files (
  11. id TEXT PRIMARY KEY,
  12. path TEXT,
  13. name TEXT,
  14. type TEXT,
  15. size INTEGER,
  16. modification_time TIMESTAMP,
  17. md5 TEXT
  18. )
  19. ''')
  20. def calculate_md5(file_path):
  21. md5_hash = hashlib.md5()
  22. with open(file_path, "rb") as f:
  23. for chunk in iter(lambda: f.read(4096), b""):
  24. md5_hash.update(chunk)
  25. return md5_hash.hexdigest()
  26. def insert_file_data(directory, ignore_list):
  27. for root, _, files in os.walk(directory):
  28. for file in files:
  29. file_path = os.path.join(root, file)
  30. if os.path.exists(file_path): # 检查文件是否存在
  31. file_name, file_extension = os.path.splitext(file)
  32. file_type = file_extension[1:] # Remove the leading dot from extension
  33. # 检查是否在忽略列表中(文件名或目录名)
  34. should_ignore = False
  35. for ignore_item in ignore_list:
  36. if ignore_item in file_path:
  37. should_ignore = True
  38. break
  39. if ignore_item in file_type:
  40. should_ignore = True
  41. break
  42. if should_ignore:
  43. continue
  44. # Get file stats
  45. stat_info = os.stat(file_path)
  46. size = stat_info.st_size
  47. modification_time = stat_info.st_mtime
  48. # 检查是否存在相同ID
  49. cursor.execute('SELECT id FROM files WHERE id=?', (file_path,))
  50. existing_id = cursor.fetchone()
  51. if not existing_id:
  52. md5_digest = calculate_md5(file_path)
  53. cursor.execute(
  54. 'INSERT INTO files (id, path, name, type, size, modification_time, md5) VALUES (?, ?, ?, ?, ?, ?, ?)',
  55. (file_path, file_path, file_name, file_type, size, modification_time, md5_digest))
  56. conn.commit()
  57. else:
  58. print(f"文件不存在: {file_path}")
  59. ignore_list = [
  60. 'node_modules',
  61. '.idea',
  62. 'jar',
  63. '.git',
  64. '.DS_Store',
  65. 'CleanMyMac X.app',
  66. '.pnpm-store',
  67. 'IINA.app',
  68. 'venv',
  69. 'python',
  70. '/Volumes/16T/newFiles/开放/可执行文件/bin/bit'
  71. ]
  72. # target_directory = '/path/to/your/directory'
  73. # target_directory = '/Users/honghaitao/PycharmProjects/pythonProject/diff_file/file'
  74. # target_directory = '/Volumes/16T/柚木'
  75. # target_directory = '/Volumes/20T/待归类'
  76. # target_directory = '/Volumes/16T/电视剧'
  77. # target_directory = '/Volumes/16T/电影'
  78. # target_directory = '/Volumes/16T/电子书'
  79. # target_directory = '/Volumes/16T/工作'
  80. target_directory = '/Volumes/16T'
  81. insert_file_data(target_directory, ignore_list)
  82. # 关闭数据库连接
  83. conn.close()