12345678910111213141516171819202122232425262728293031323334353637383940414243 |
- import os
- from chardet import detect
- def detect_file_encoding(file_path):
- """
- 自动检测文件编码。
- :param file_path: 文件路径
- :return: 检测到的编码
- """
- with open(file_path, 'rb') as f:
- raw_data = f.read(1024) # 读取部分数据进行检测
- result = detect(raw_data)
- return result.get('encoding', 'utf-8') # 默认返回 utf-8
- def convert_encoding_in_dir(directory, dest_encoding):
- """
- 转换指定目录下所有文件的编码。
- :param directory: 目录路径
- :param dest_encoding: 目标编码
- """
- for root, dirs, files in os.walk(directory):
- for file in files:
- file_path = os.path.join(root, file)
- try:
- # 自动检测文件编码
- src_encoding = detect_file_encoding(file_path)
- # 读取并转换文件内容
- with open(file_path, 'r', encoding=src_encoding, errors='ignore') as f:
- content = f.read()
- # 写入目标编码
- with open(file_path, 'w', encoding=dest_encoding) as f:
- f.write(content)
- print(f"已成功转换文件编码: {file_path} (从 {src_encoding} 转到 {dest_encoding})")
- except Exception as e:
- print(f"无法处理文件 {file_path}: {e}")
- # 示例调用
- directory = "./全书"
- dest_encoding = "utf-8"
- convert_encoding_in_dir(directory, dest_encoding)
|