eccode.py 1.4 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243
  1. import os
  2. from chardet import detect
  3. def detect_file_encoding(file_path):
  4. """
  5. 自动检测文件编码。
  6. :param file_path: 文件路径
  7. :return: 检测到的编码
  8. """
  9. with open(file_path, 'rb') as f:
  10. raw_data = f.read(1024) # 读取部分数据进行检测
  11. result = detect(raw_data)
  12. return result.get('encoding', 'utf-8') # 默认返回 utf-8
  13. def convert_encoding_in_dir(directory, dest_encoding):
  14. """
  15. 转换指定目录下所有文件的编码。
  16. :param directory: 目录路径
  17. :param dest_encoding: 目标编码
  18. """
  19. for root, dirs, files in os.walk(directory):
  20. for file in files:
  21. file_path = os.path.join(root, file)
  22. try:
  23. # 自动检测文件编码
  24. src_encoding = detect_file_encoding(file_path)
  25. # 读取并转换文件内容
  26. with open(file_path, 'r', encoding=src_encoding, errors='ignore') as f:
  27. content = f.read()
  28. # 写入目标编码
  29. with open(file_path, 'w', encoding=dest_encoding) as f:
  30. f.write(content)
  31. print(f"已成功转换文件编码: {file_path} (从 {src_encoding} 转到 {dest_encoding})")
  32. except Exception as e:
  33. print(f"无法处理文件 {file_path}: {e}")
  34. # 示例调用
  35. directory = "./全书"
  36. dest_encoding = "utf-8"
  37. convert_encoding_in_dir(directory, dest_encoding)