from tqdm import tqdm import os from chardet import detect # 定义日志文件路径 log_file_path = "conversion_log.txt" def detect_file_encoding(file_path): """ 自动检测文件编码。 :param file_path: 文件路径 :return: 检测到的编码 """ with open(file_path, 'rb') as f: raw_data = f.read(1024) # 读取部分数据进行检测 result = detect(raw_data) return result.get('encoding', 'utf-8') # 默认返回 utf-8 def log_message_to_file(log_file_path, message): """ 将日志信息写入日志文件 :param log_file_path: 日志文件路径 :param message: 要记录的日志信息 """ try: with open(log_file_path, 'a', encoding='utf-8') as log_file: log_file.write(message + '\n') except Exception as e: print(f"无法写入日志文件:{e}") def convert_encoding_in_dir(directory, dest_encoding, extensions): """ 转换指定目录下所有指定格式的文件的编码,并显示进度条。 :param directory: 目录路径 :param dest_encoding: 目标编码 :param extensions: 需要处理的文件扩展名列表(如 ['.txt', '.html']) """ # 获取所有指定格式的文件路径 files_to_process = [] for root, _, files in os.walk(directory): for file in files: if any(file.lower().endswith(ext) for ext in extensions): # 检查文件扩展名 files_to_process.append(os.path.join(root, file)) # 遍历文件并转换编码 for file_path in tqdm(files_to_process, desc="转换文件编码进度", unit="文件"): try: # 自动检测文件编码 # src_encoding = detect_file_encoding(file_path) src_encoding = "GB2312" # if src_encoding == 'MacRoman' or src_encoding == 'ascii': # src_encoding = "GB2312" # 读取并转换文件内容 with open(file_path, 'r', encoding=src_encoding, errors='ignore') as f: content = f.read() # 写入目标编码 with open(file_path, 'w', encoding=dest_encoding) as f: f.write(content) # 构建日志信息 log_message = f"已成功转换文件编码: {file_path} (从 {src_encoding} 转到 {dest_encoding})" log_message_to_file(log_file_path, log_message) except Exception as e: error_message = f"无法处理文件 {file_path}: {e}" log_message_to_file(log_file_path, error_message) # 示例调用 directory = "./全书" # 替换为你的目标文件夹路径 dest_encoding = "utf-8" extensions = ['.txt', '.html', '.htm', '.js'] # 需要处理的文件扩展名 convert_encoding_in_dir(directory, dest_encoding, extensions)