1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071 |
- import os
- import sqlite3
- import hashlib
- import concurrent.futures
- # 连接到 SQLite 数据库并创建表
- conn = sqlite3.connect('file_data.db')
- cursor = conn.cursor()
- # 创建表格
- cursor.execute('''
- CREATE TABLE IF NOT EXISTS files (
- id TEXT PRIMARY KEY,
- path TEXT,
- name TEXT,
- type TEXT,
- size INTEGER,
- modification_time TIMESTAMP,
- md5 TEXT
- )
- ''')
- def calculate_md5(file_path):
- md5_hash = hashlib.md5()
- with open(file_path, "rb") as f:
- for chunk in iter(lambda: f.read(4096), b""):
- md5_hash.update(chunk)
- return md5_hash.hexdigest()
- def insert_file_data(directory):
- for root, _, files in os.walk(directory):
- for file in files:
- file_path = os.path.join(root, file)
- file_name, file_extension = os.path.splitext(file)
- file_type = file_extension[1:] # Remove the leading dot from extension
- # Get file stats
- stat_info = os.stat(file_path)
- size = stat_info.st_size
- modification_time = stat_info.st_mtime
- # Calculate MD5 hash
- # md5_hash = hashlib.md5()
- # with open(file_path, "rb") as f:
- # for chunk in iter(lambda: f.read(4096), b""):
- # md5_hash.update(chunk)
- # md5_digest = md5_hash.hexdigest()
- # 检查是否存在相同ID
- cursor.execute('SELECT id FROM files WHERE id=?', (file_path,))
- existing_id = cursor.fetchone()
- if not existing_id:
- md5_digest = calculate_md5(file_path)
- cursor.execute(
- 'INSERT INTO files (id, path, name, type, size, modification_time, md5) VALUES (?, ?, ?, ?, ?, ?, ?)',
- (file_path, file_path, file_name, file_type, size, modification_time, md5_digest))
- conn.commit()
- # target_directory = '/path/to/your/directory'
- # target_directory = '/Users/honghaitao/PycharmProjects/pythonProject/diff_file/file'
- # target_directory = '/Volumes/16T/柚木'
- # target_directory = '/Volumes/20T/待归类'
- target_directory = '/Volumes/16T/电视剧'
- insert_file_data(target_directory)
- # 关闭数据库连接
- conn.close()
|