|
@@ -0,0 +1,71 @@
|
|
|
+import os
|
|
|
+import sqlite3
|
|
|
+import hashlib
|
|
|
+import concurrent.futures
|
|
|
+
|
|
|
+# 连接到 SQLite 数据库并创建表
|
|
|
+conn = sqlite3.connect('file_data.db')
|
|
|
+cursor = conn.cursor()
|
|
|
+
|
|
|
+# 创建表格
|
|
|
+cursor.execute('''
|
|
|
+ CREATE TABLE IF NOT EXISTS files (
|
|
|
+ id TEXT PRIMARY KEY,
|
|
|
+ path TEXT,
|
|
|
+ name TEXT,
|
|
|
+ type TEXT,
|
|
|
+ size INTEGER,
|
|
|
+ modification_time TIMESTAMP,
|
|
|
+ md5 TEXT
|
|
|
+ )
|
|
|
+''')
|
|
|
+
|
|
|
+def calculate_md5(file_path):
|
|
|
+ md5_hash = hashlib.md5()
|
|
|
+ with open(file_path, "rb") as f:
|
|
|
+ for chunk in iter(lambda: f.read(4096), b""):
|
|
|
+ md5_hash.update(chunk)
|
|
|
+ return md5_hash.hexdigest()
|
|
|
+
|
|
|
+def insert_file_data(directory):
|
|
|
+ for root, _, files in os.walk(directory):
|
|
|
+ for file in files:
|
|
|
+ file_path = os.path.join(root, file)
|
|
|
+ file_name, file_extension = os.path.splitext(file)
|
|
|
+ file_type = file_extension[1:] # Remove the leading dot from extension
|
|
|
+
|
|
|
+ # Get file stats
|
|
|
+ stat_info = os.stat(file_path)
|
|
|
+ size = stat_info.st_size
|
|
|
+ modification_time = stat_info.st_mtime
|
|
|
+
|
|
|
+ # Calculate MD5 hash
|
|
|
+ # md5_hash = hashlib.md5()
|
|
|
+ # with open(file_path, "rb") as f:
|
|
|
+ # for chunk in iter(lambda: f.read(4096), b""):
|
|
|
+ # md5_hash.update(chunk)
|
|
|
+ # md5_digest = md5_hash.hexdigest()
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+ # 检查是否存在相同ID
|
|
|
+ cursor.execute('SELECT id FROM files WHERE id=?', (file_path,))
|
|
|
+ existing_id = cursor.fetchone()
|
|
|
+
|
|
|
+ if not existing_id:
|
|
|
+ md5_digest = calculate_md5(file_path)
|
|
|
+ cursor.execute(
|
|
|
+ 'INSERT INTO files (id, path, name, type, size, modification_time, md5) VALUES (?, ?, ?, ?, ?, ?, ?)',
|
|
|
+ (file_path, file_path, file_name, file_type, size, modification_time, md5_digest))
|
|
|
+ conn.commit()
|
|
|
+
|
|
|
+
|
|
|
+# target_directory = '/path/to/your/directory'
|
|
|
+# target_directory = '/Users/honghaitao/PycharmProjects/pythonProject/diff_file/file'
|
|
|
+# target_directory = '/Volumes/16T/柚木'
|
|
|
+# target_directory = '/Volumes/20T/待归类'
|
|
|
+target_directory = '/Volumes/16T/电视剧'
|
|
|
+insert_file_data(target_directory)
|
|
|
+
|
|
|
+# 关闭数据库连接
|
|
|
+conn.close()
|