eccode.py 2.8 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677
  1. from tqdm import tqdm
  2. import os
  3. from chardet import detect
  4. # 定义日志文件路径
  5. log_file_path = "conversion_log.txt"
  6. def detect_file_encoding(file_path):
  7. """
  8. 自动检测文件编码。
  9. :param file_path: 文件路径
  10. :return: 检测到的编码
  11. """
  12. with open(file_path, 'rb') as f:
  13. raw_data = f.read(1024) # 读取部分数据进行检测
  14. result = detect(raw_data)
  15. return result.get('encoding', 'utf-8') # 默认返回 utf-8
  16. def log_message_to_file(log_file_path, message):
  17. """
  18. 将日志信息写入日志文件
  19. :param log_file_path: 日志文件路径
  20. :param message: 要记录的日志信息
  21. """
  22. try:
  23. with open(log_file_path, 'a', encoding='utf-8') as log_file:
  24. log_file.write(message + '\n')
  25. except Exception as e:
  26. print(f"无法写入日志文件:{e}")
  27. def convert_encoding_in_dir(directory, dest_encoding, extensions):
  28. """
  29. 转换指定目录下所有指定格式的文件的编码,并显示进度条。
  30. :param directory: 目录路径
  31. :param dest_encoding: 目标编码
  32. :param extensions: 需要处理的文件扩展名列表(如 ['.txt', '.html'])
  33. """
  34. # 获取所有指定格式的文件路径
  35. files_to_process = []
  36. for root, _, files in os.walk(directory):
  37. for file in files:
  38. if any(file.lower().endswith(ext) for ext in extensions): # 检查文件扩展名
  39. files_to_process.append(os.path.join(root, file))
  40. # 遍历文件并转换编码
  41. for file_path in tqdm(files_to_process, desc="转换文件编码进度", unit="文件"):
  42. try:
  43. # 自动检测文件编码
  44. # src_encoding = detect_file_encoding(file_path)
  45. src_encoding = "GB2312"
  46. # if src_encoding == 'MacRoman' or src_encoding == 'ascii':
  47. # src_encoding = "GB2312"
  48. # 读取并转换文件内容
  49. with open(file_path, 'r', encoding=src_encoding, errors='ignore') as f:
  50. content = f.read()
  51. # 写入目标编码
  52. with open(file_path, 'w', encoding=dest_encoding) as f:
  53. f.write(content)
  54. # 构建日志信息
  55. log_message = f"已成功转换文件编码: {file_path} (从 {src_encoding} 转到 {dest_encoding})"
  56. log_message_to_file(log_file_path, log_message)
  57. except Exception as e:
  58. error_message = f"无法处理文件 {file_path}: {e}"
  59. log_message_to_file(log_file_path, error_message)
  60. # 示例调用
  61. directory = "./全书" # 替换为你的目标文件夹路径
  62. dest_encoding = "utf-8"
  63. extensions = ['.txt', '.html', '.htm', '.js', '.css'] # 需要处理的文件扩展名
  64. convert_encoding_in_dir(directory, dest_encoding, extensions)