max 1 сар өмнө
parent
commit
98582ae353

+ 1 - 104
code/translate_epub_v4(单线程版本).py

@@ -17,110 +17,7 @@ import hashlib
 import yaml
 from pathlib import Path
 
-# 配置管理
-class Config:
-    def __init__(self, config_path='config.yaml'):
-        self.config_path = config_path
-        self.config = self.load_config()
-        
-        # 设置日志
-        self.setup_logging()
-        
-        # 初始化OpenAI客户端
-        self.setup_openai()
-    
-    def load_config(self):
-        """加载配置文件"""
-        if not os.path.exists(self.config_path):
-            # 创建默认配置
-            default_config = {
-                'logging': {
-                    'level': 'INFO',
-                    'format': '%(asctime)s - %(levelname)s - %(message)s',
-                    'file': 'translation.log'
-                },
-                'openai': {
-                    'base_url': 'https://api.siliconflow.cn/v1',
-                    'api_key': 'sk-',
-                    'model_name': 'deepseek-ai/DeepSeek-R1',
-                    'max_retries': 3,
-                    'retry_delay': 2,
-                    'timeout': 30,
-                    'max_concurrent_requests': 5
-                },
-                'translation': {
-                    'min_line_count': 1,
-                    'max_line_count': 5,
-                    'initial_line_count': 2,
-                    'error_threshold': 3,
-                    'success_threshold': 5,
-                    'error_cooldown': 60,
-                    'cache_size': 1000
-                },
-                'database': {
-                    'path': 'translation_progress.db',
-                    'pool_size': 5
-                },
-                'paths': {
-                    'input_dir': '002/Ops',
-                    'output_dir': '002/Ops_translated'
-                }
-            }
-            
-            # 保存默认配置
-            with open(self.config_path, 'w', encoding='utf-8') as f:
-                yaml.dump(default_config, f, allow_unicode=True)
-            
-            return default_config
-        
-        # 加载现有配置
-        with open(self.config_path, 'r', encoding='utf-8') as f:
-            return yaml.safe_load(f)
-    
-    def setup_logging(self):
-        """设置日志"""
-        logging.basicConfig(
-            level=getattr(logging, self.config['logging']['level']),
-            format=self.config['logging']['format'],
-            handlers=[
-                logging.FileHandler(self.config['logging']['file']),
-                logging.StreamHandler()
-            ]
-        )
-    
-    def setup_openai(self):
-        """设置OpenAI客户端"""
-        self.client = openai.OpenAI(
-            base_url=self.config['openai']['base_url'],
-            api_key=self.config['openai']['api_key']
-        )
-    
-    def get(self, *keys):
-        """获取配置值"""
-        value = self.config
-        for key in keys:
-            value = value[key]
-        return value
-    
-    def update(self, updates):
-        """更新配置"""
-        def deep_update(d, u):
-            for k, v in u.items():
-                if isinstance(v, dict):
-                    d[k] = deep_update(d.get(k, {}), v)
-                else:
-                    d[k] = v
-            return d
-        
-        self.config = deep_update(self.config, updates)
-        
-        # 保存更新后的配置
-        with open(self.config_path, 'w', encoding='utf-8') as f:
-            yaml.dump(self.config, f, allow_unicode=True)
-        
-        # 重新设置日志和OpenAI客户端
-        self.setup_logging()
-        self.setup_openai()
+
 
 # 创建全局的配置实例
 config = Config()

+ 713 - 0
code/translate_epub_v4(单线程版本)V2.py

@@ -0,0 +1,713 @@
+import os
+import re
+import openai
+import time
+from tqdm import tqdm
+import sqlite3
+from datetime import datetime
+import logging
+from tenacity import retry, stop_after_attempt, wait_exponential, retry_if_exception_type
+import asyncio
+import yaml
+
+# 配置管理
+class Config:
+    def __init__(self, config_path='config.yaml'):
+        self.config_path = config_path
+        self.config = self.load_config()
+        
+        # 设置日志
+        self.setup_logging()
+        
+        # 初始化OpenAI客户端
+        self.setup_openai()
+    
+    def load_config(self):
+        """加载配置文件"""
+        if not os.path.exists(self.config_path):
+            # 创建默认配置
+            default_config = {
+                'logging': {
+                    'level': 'INFO',
+                    'format': '%(asctime)s - %(levelname)s - %(message)s',
+                    'file': 'translation.log'
+                },
+                'openai': {
+                    'base_url': 'https://api.siliconflow.cn/v1',
+                    'api_key': 'sk-',
+                    'model_name': 'deepseek-ai/DeepSeek-R1',
+                    'max_retries': 3,
+                    'retry_delay': 2,
+                    'timeout': 30,
+                    'max_concurrent_requests': 5
+                },
+                'translation': {
+                    'min_line_count': 1,
+                    'max_line_count': 5,
+                    'initial_line_count': 2,
+                    'error_threshold': 3,
+                    'success_threshold': 5,
+                    'error_cooldown': 60,
+                    'cache_size': 1000
+                },
+                'database': {
+                    'path': 'translation_progress.db',
+                    'pool_size': 5
+                },
+                'paths': {
+                    'input_dir': '002/Ops',
+                    'output_dir': '002/Ops_translated'
+                }
+            }
+            
+            # 保存默认配置
+            with open(self.config_path, 'w', encoding='utf-8') as f:
+                yaml.dump(default_config, f, allow_unicode=True)
+            
+            return default_config
+        
+        # 加载现有配置
+        with open(self.config_path, 'r', encoding='utf-8') as f:
+            return yaml.safe_load(f)
+    
+    def setup_logging(self):
+        """设置日志"""
+        logging.basicConfig(
+            level=getattr(logging, self.config['logging']['level']),
+            format=self.config['logging']['format'],
+            handlers=[
+                logging.FileHandler(self.config['logging']['file']),
+                logging.StreamHandler()
+            ]
+        )
+    
+    def setup_openai(self):
+        """设置OpenAI客户端"""
+        self.client = openai.OpenAI(
+            base_url=self.config['openai']['base_url'],
+            api_key=self.config['openai']['api_key']
+        )
+    
+    def get(self, *keys):
+        """获取配置值"""
+        value = self.config
+        for key in keys:
+            value = value[key]
+        return value
+    
+    def update(self, updates):
+        """更新配置"""
+        def deep_update(d, u):
+            for k, v in u.items():
+                if isinstance(v, dict):
+                    d[k] = deep_update(d.get(k, {}), v)
+                else:
+                    d[k] = v
+            return d
+        
+        self.config = deep_update(self.config, updates)
+        
+        # 保存更新后的配置
+        with open(self.config_path, 'w', encoding='utf-8') as f:
+            yaml.dump(self.config, f, allow_unicode=True)
+        
+        # 重新设置日志和OpenAI客户端
+        self.setup_logging()
+        self.setup_openai()
+
+
+# 创建全局的配置实例
+config = Config()
+
+# 更新全局变量
+MODEL_CONFIG = {
+    "model_name": config.get('openai', 'model_name'),
+    "max_retries": config.get('openai', 'max_retries'),
+    "retry_delay": config.get('openai', 'retry_delay'),
+    "timeout": config.get('openai', 'timeout'),
+    "max_concurrent_requests": config.get('openai', 'max_concurrent_requests'),
+    "cache_size": config.get('translation', 'cache_size')
+}
+
+MIN_LINE_COUNT = config.get('translation', 'min_line_count')
+MAX_LINE_COUNT = config.get('translation', 'max_line_count')
+INITIAL_LINE_COUNT = config.get('translation', 'initial_line_count')
+ERROR_THRESHOLD = config.get('translation', 'error_threshold')
+SUCCESS_THRESHOLD = config.get('translation', 'success_threshold')
+
+# 更新其他类的初始化参数
+class TranslationStats:
+    def __init__(self):
+        self.start_time = time.time()
+        self.total_chars = 0
+        self.translated_chars = 0
+        self.total_requests = 0
+        self.successful_requests = 0
+        self.failed_requests = 0
+    
+    def update_stats(self, original_text, translated_text, success=True):
+        self.total_chars += len(original_text)
+        self.translated_chars += len(translated_text)
+        self.total_requests += 1
+        if success:
+            self.successful_requests += 1
+        else:
+            self.failed_requests += 1
+    
+    def get_stats(self):
+        elapsed_time = time.time() - self.start_time
+        chars_per_second = self.translated_chars / elapsed_time if elapsed_time > 0 else 0
+        success_rate = (self.successful_requests / self.total_requests * 100) if self.total_requests > 0 else 0
+        
+        return {
+            "总字符数": self.total_chars,
+            "已翻译字符数": self.translated_chars,
+            "翻译速度": f"{chars_per_second:.2f} 字符/秒",
+            "成功率": f"{success_rate:.1f}%",
+            "总请求数": self.total_requests,
+            "成功请求": self.successful_requests,
+            "失败请求": self.failed_requests,
+            "运行时间": f"{elapsed_time:.1f} 秒"
+        }
+
+# 创建全局的统计对象
+translation_stats = TranslationStats()
+class DatabaseManager:
+    def __init__(self):
+        self.db_path = config.get('database', 'path')
+        self.conn = None
+        self.init_db()
+    
+    def get_connection(self):
+        """获取数据库连接"""
+        if self.conn is None:
+            self.conn = sqlite3.connect(self.db_path)
+            self.conn.row_factory = sqlite3.Row
+        return self.conn
+    
+    def close(self):
+        """关闭数据库连接"""
+        if self.conn:
+            self.conn.close()
+            self.conn = None
+    
+    def init_db(self):
+        """初始化数据库"""
+        conn = self.get_connection()
+        c = conn.cursor()
+        
+        # 创建文件进度表
+        c.execute('''
+            CREATE TABLE IF NOT EXISTS file_progress (
+                file_path TEXT PRIMARY KEY,
+                total_lines INTEGER,
+                processed_lines INTEGER,
+                status TEXT,
+                last_updated TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
+                error_count INTEGER DEFAULT 0,
+                retry_count INTEGER DEFAULT 0
+            )
+        ''')
+        
+        # 创建行进度表
+        c.execute('''
+            CREATE TABLE IF NOT EXISTS line_progress (
+                id INTEGER PRIMARY KEY AUTOINCREMENT,
+                file_path TEXT,
+                line_index INTEGER,
+                original_text TEXT,
+                translated_text TEXT,
+                status TEXT,
+                created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
+                updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
+                error_count INTEGER DEFAULT 0,
+                retry_count INTEGER DEFAULT 0,
+                UNIQUE(file_path, line_index)
+            )
+        ''')
+        
+        # 创建错误日志表
+        c.execute('''
+            CREATE TABLE IF NOT EXISTS error_log (
+                id INTEGER PRIMARY KEY AUTOINCREMENT,
+                file_path TEXT,
+                line_index INTEGER,
+                error_type TEXT,
+                error_message TEXT,
+                created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
+                resolved_at TIMESTAMP,
+                resolution TEXT
+            )
+        ''')
+        
+        # 创建翻译组进度表
+        c.execute('''
+            CREATE TABLE IF NOT EXISTS group_progress (
+                id INTEGER PRIMARY KEY AUTOINCREMENT,
+                file_path TEXT,
+                group_index INTEGER,
+                original_text TEXT,
+                translated_text TEXT,
+                status TEXT,
+                version TEXT,
+                created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
+                updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
+                UNIQUE(file_path, group_index, version)
+            )
+        ''')
+        
+        conn.commit()
+    
+    def begin_transaction(self):
+        """开始事务"""
+        self.get_connection().execute('BEGIN TRANSACTION')
+    
+    def commit_transaction(self):
+        """提交事务"""
+        self.get_connection().commit()
+    
+    def rollback_transaction(self):
+        """回滚事务"""
+        self.get_connection().rollback()
+    
+    def get_file_progress(self, file_path):
+        """获取文件翻译进度"""
+        c = self.get_connection().cursor()
+        c.execute('SELECT * FROM file_progress WHERE file_path = ?', (file_path,))
+        return c.fetchone()
+    
+    def update_file_progress(self, file_path, total_lines, processed_lines, status):
+        """更新文件翻译进度"""
+        c = self.get_connection().cursor()
+        c.execute('''
+            INSERT OR REPLACE INTO file_progress 
+            (file_path, total_lines, processed_lines, status, last_updated)
+            VALUES (?, ?, ?, ?, CURRENT_TIMESTAMP)
+        ''', (file_path, total_lines, processed_lines, status))
+        self.get_connection().commit()
+    
+    def get_line_progress(self, file_path, line_index):
+        """获取行翻译进度"""
+        c = self.get_connection().cursor()
+        c.execute('''
+            SELECT * FROM line_progress 
+            WHERE file_path = ? AND line_index = ?
+        ''', (file_path, line_index))
+        return c.fetchone()
+    
+    def update_line_progress(self, file_path, line_index, original_text, translated_text, status):
+        """更新行翻译进度"""
+        c = self.get_connection().cursor()
+        c.execute('''
+            INSERT OR REPLACE INTO line_progress 
+            (file_path, line_index, original_text, translated_text, status, updated_at)
+            VALUES (?, ?, ?, ?, ?, CURRENT_TIMESTAMP)
+        ''', (file_path, line_index, original_text, translated_text, status))
+        self.get_connection().commit()
+    
+    def log_error(self, file_path, line_index, error_type, error_message):
+        """记录错误"""
+        c = self.get_connection().cursor()
+        c.execute('''
+            INSERT INTO error_log 
+            (file_path, line_index, error_type, error_message)
+            VALUES (?, ?, ?, ?)
+        ''', (file_path, line_index, error_type, error_message))
+        self.get_connection().commit()
+    
+    def update_group_progress(self, file_path, group_index, original_text, translated_text, status):
+        """更新翻译组进度"""
+        c = self.get_connection().cursor()
+        c.execute('''
+            INSERT OR REPLACE INTO group_progress 
+            (file_path, group_index, original_text, translated_text, status, version, updated_at)
+            VALUES (?, ?, ?, ?, ?, ?, CURRENT_TIMESTAMP)
+        ''', (file_path, group_index, original_text, translated_text, status, VERSION))
+        self.get_connection().commit()
+    
+    def get_error_stats(self):
+        """获取错误统计信息"""
+        c = self.get_connection().cursor()
+        c.execute('''
+            SELECT 
+                COUNT(*) as total_errors,
+                COUNT(CASE WHEN resolved_at IS NULL THEN 1 END) as unresolved_errors,
+                COUNT(CASE WHEN created_at > datetime('now', '-1 hour') THEN 1 END) as recent_errors
+            FROM error_log
+        ''')
+        return c.fetchone()
+
+
+class AsyncTranslationManager:
+    def __init__(self):
+        self.semaphore = asyncio.Semaphore(config.get('openai', 'max_concurrent_requests'))
+        self.session = None
+
+class TranslationCache:
+    def __init__(self):
+        self.cache = {}
+        self.max_size = config.get('translation', 'cache_size')
+        self.hits = 0
+        self.misses = 0
+
+# 创建全局实例
+line_count_manager = TranslationStats()
+db_manager = DatabaseManager()
+async_translation_manager = AsyncTranslationManager()
+translation_cache = TranslationCache()
+
+# 添加版本控制
+VERSION = "1.0.1" # 版本号,用于区分不同版本的翻译
+line_count = 4 # 每组行数,固定为4行一组
+
+def get_completed_groups(conn, file_path):
+    """获取已完成的翻译行"""
+    c = conn.cursor()
+    c.execute('''
+        SELECT group_index, translated_text 
+        FROM group_progress 
+        WHERE file_path = ? AND status = 'completed' AND version = ?
+        ORDER BY group_index
+    ''', (file_path, VERSION))
+    return c.fetchall()
+
+# """ - 输出内容要求用代码块包裹起来
+# ,只在必要时提供相应的语言注释
+#  """
+@retry(
+    stop=stop_after_attempt(MODEL_CONFIG['max_retries']),
+    wait=wait_exponential(multiplier=1, min=4, max=10),
+    retry=retry_if_exception_type((openai.APIError, openai.APITimeoutError)),
+    before_sleep=lambda retry_state: logging.warning(f"重试第 {retry_state.attempt_number} 次...")
+)
+def translate_text(text):
+    """翻译文本,使用流式输出"""
+    if not text or not text.strip():
+        logging.warning("收到空文本,跳过翻译")
+        return text
+        
+    try:
+        messages = [
+            {
+                "role": "system",
+                "content": "- 你名为epub翻译大师,专注于将任意语言的文本翻译成中文。- 你在翻译过程中,力求保留原文语意,确保翻译的准确性和完整性。- 你特别注重翻译结果要贴合现代人的阅读习惯,使译文更加流畅易懂。- 在处理包含代码结构的文本时,你会特别注意保持代码的原样。- 你的服务旨在为用户提供高效、便捷的翻译体验,帮助用户跨越语言障碍。- 在回答问题的时候,尽可能保留原来的代码结构。- 在回答问题的时候,尽可能只返回翻译后的内容和代码结构,不要返回任何其他内容。"
+            },
+            {
+                "role": "user",
+                "content": text
+            }
+        ]
+        
+        # 使用流式输出
+        stream = config.client.chat.completions.create(
+            model=MODEL_CONFIG['model_name'],
+            messages=messages,
+            timeout=MODEL_CONFIG['timeout'],
+            stream=True,  # 启用流式输出
+            temperature=0.3  # 降低随机性,使翻译更稳定
+        )
+        
+        # 收集流式输出的内容
+        translated_text = ""
+        for chunk in stream:
+            if chunk.choices[0].delta.content is not None:
+                content = chunk.choices[0].delta.content
+                translated_text += content
+                # 实时打印翻译内容
+                print(content, end='', flush=True)
+        
+        print()  # 换行
+        
+        # 验证翻译结果
+        if not translated_text or len(translated_text.strip()) == 0:
+            raise ValueError("翻译结果为空")
+            
+        # 更新统计信息
+        translation_stats.update_stats(text, translated_text, True)
+        
+        return translated_text
+        
+    except openai.APIError as e:
+        logging.error(f"OpenAI API错误: {str(e)}")
+        translation_stats.update_stats(text, "", False)
+        raise
+    except openai.APITimeoutError as e:
+        logging.error(f"OpenAI API超时: {str(e)}")
+        translation_stats.update_stats(text, "", False)
+        raise
+    except Exception as e:
+        logging.error(f"翻译出错: {str(e)}")
+        translation_stats.update_stats(text, "", False)
+        raise
+
+def process_html_file(file_path, conn):
+    """处理HTML文件"""
+    # 检查文件进度
+    progress = db_manager.get_file_progress(file_path)
+    
+    try:
+        # 尝试不同的编码方式读取文件
+        encodings = ['utf-8', 'gbk', 'gb2312', 'latin1']
+        content = None
+        
+        for encoding in encodings:
+            try:
+                with open(file_path, 'r', encoding=encoding) as f:
+                    content = f.read()
+                logging.info(f"成功使用 {encoding} 编码读取文件: {file_path}")
+                break
+            except UnicodeDecodeError:
+                continue
+        
+        if content is None:
+            raise Exception(f"无法使用支持的编码读取文件: {file_path}")
+    
+        # 使用正则表达式提取body标签内的内容和title标签
+        body_pattern = re.compile(r'<body[^>]*>(.*?)</body>', re.DOTALL)
+        title_pattern = re.compile(r'<title>(.*?)</title>', re.DOTALL)
+        
+        body_match = body_pattern.search(content)
+        title_match = title_pattern.search(content)
+        
+        if not body_match:
+            logging.warning(f"警告: {file_path} 中没有找到body标签")
+            return
+        
+        body_content = body_match.group(1)
+        
+        # 处理title标签
+        if title_match:
+            title_content = title_match.group(1).strip()
+            if title_content:  # 只有当标题内容不为空时才处理
+                logging.info(f"开始翻译标题: {title_content}")
+                translated_title = translate_text(title_content)
+                # 替换原始title内容
+                content = content.replace(f"<title>{title_content}</title>", f"<title>{translated_title}</title>")
+                logging.info(f"标题翻译完成: {translated_title}")
+            else:
+                logging.info("跳过空标题")
+        
+        # 按行分割内容,保留所有HTML标签行,但只翻译包含 <p class 的行
+        lines = []
+        for line in body_content.split('\n'):
+            line = line.strip()
+            if line and line.startswith('<'):
+                lines.append(line)
+        
+        total_lines = len(lines)
+        logging.info(f"文件 {file_path} 共有 {total_lines} 行需要处理")
+        
+        # 获取已完成的翻译
+        completed_lines = get_completed_groups(conn, file_path)
+        completed_indices = {line[0] for line in completed_lines}
+        
+        # 计算已处理的进度
+        if progress:
+            progress_percentage = round(progress['processed_lines']*100/progress['total_lines'], 2)
+            logging.info(f"文件 {file_path} 已处理进度: {progress['processed_lines']}/{progress['total_lines']} 行 ({progress_percentage}%)")
+        
+        # 逐行处理内容
+        translated_lines = []
+        try:
+            with tqdm(range(0, len(lines), line_count), desc=f"处理文件 {os.path.basename(file_path)}", unit="组") as pbar:
+                for i in range(0, len(lines), line_count):
+                    group_index = i // line_count
+                    
+                    # 检查是否已完成
+                    if group_index in completed_indices:
+                        # 使用已完成的翻译
+                        for line in completed_lines:
+                            if line[0] == group_index:
+                                translated_lines.extend(line[1].split('\n'))
+                                break
+                        pbar.update(1)
+                        continue
+                    
+                    # 获取当前组的行
+                    group = lines[i:i+line_count]
+                    if group:
+                        try:
+                            # 收集需要翻译的段落
+                            paragraphs_to_translate = []
+                            paragraph_indices = []
+                            for idx, line in enumerate(group):
+                                if '<p class' in line or line.startswith('<h'):
+                                    paragraphs_to_translate.append(line)
+                                    paragraph_indices.append(idx)
+                            
+                            # 如果有需要翻译的段落,进行翻译
+                            if paragraphs_to_translate:
+                                # 将所有需要翻译的段落合并成一个文本
+                                combined_text = "\n".join(paragraphs_to_translate)
+                                logging.info(f"开始翻译第 {i+1}-{min(i+line_count, len(lines))} 行")
+                                translated_text = translate_text(combined_text)
+                                
+                                # 分割翻译后的文本
+                                translated_paragraphs = translated_text.split('\n')
+                                
+                                # 将翻译后的段落放回原位置
+                                translated_group = group.copy()
+                                for idx, translated in zip(paragraph_indices, translated_paragraphs):
+                                    translated_group[idx] = translated
+                            else:
+                                translated_group = group
+                            
+                            # 保存原始文本和翻译后的文本
+                            original_text = "\n".join(group)
+                            translated_text = "\n".join(translated_group)
+                            
+                            # 更新翻译组进度
+                            db_manager.update_group_progress(file_path, group_index, original_text, translated_text, 'completed')
+                            
+                            # 分割翻译后的文本
+                            translated_lines.extend(translated_group)
+                            
+                            # 更新文件进度
+                            processed_lines = min((group_index + 1) * line_count, total_lines)
+                            db_manager.update_file_progress(file_path, total_lines, processed_lines, 'in_progress')
+                            
+                            # 显示当前统计信息
+                            stats = translation_stats.get_stats()
+                            pbar.set_postfix(stats)
+                            
+                            # 添加较小的延迟以避免API限制
+                            time.sleep(0.1)
+                            
+                        except Exception as e:
+                            logging.error(f"处理组 {group_index} 时出错: {str(e)}")
+                            # 记录错误但继续处理
+                            db_manager.log_error(file_path, group_index, "group_processing_error", str(e))
+                            continue
+                    
+                    pbar.update(1)
+            
+            # 替换原始内容
+            if translated_lines:
+                # 构建新的body内容
+                new_body_content = []
+                current_index = 0
+                
+                # 遍历原始内容,替换需要翻译的部分
+                for line in body_content.split('\n'):
+                    line = line.strip()
+                    if not line:
+                        new_body_content.append('')
+                        continue
+                        
+                    if line.startswith('<'):
+                        if ('<p class' in line or line.startswith('<h')) and current_index < len(translated_lines):
+                            # 替换翻译后的内容
+                            new_body_content.append(translated_lines[current_index])
+                            current_index += 1
+                        else:
+                            # 保持原样
+                            new_body_content.append(line)
+                    else:
+                        # 保持非HTML内容原样
+                        new_body_content.append(line)
+                
+                # 将新内容重新组合
+                new_body_content = '\n'.join(new_body_content)
+                
+                # 替换原始内容中的body部分
+                new_content = content.replace(body_content, new_body_content)
+                
+                # 保存修改后的文件
+                output_dir = config.get('paths', 'output_dir')
+                os.makedirs(output_dir, exist_ok=True)
+                output_path = os.path.join(output_dir, os.path.basename(file_path))
+                
+                with open(output_path, 'w', encoding='utf-8') as f:
+                    f.write(new_content)
+                
+                # 更新完成状态
+                db_manager.update_file_progress(file_path, total_lines, total_lines, 'completed')
+                logging.info(f"文件 {file_path} 翻译完成,已保存到 {output_path}")
+                
+                # 显示最终统计信息
+                logging.info("\n翻译统计信息:")
+                for key, value in translation_stats.get_stats().items():
+                    logging.info(f"{key}: {value}")
+                
+        except KeyboardInterrupt:
+            logging.warning("\n检测到中断,保存当前进度...")
+            if 'processed_lines' in locals():
+                db_manager.update_file_progress(file_path, total_lines, processed_lines, 'interrupted')
+            # 显示中断时的统计信息
+            logging.info("\n中断时的统计信息:")
+            for key, value in translation_stats.get_stats().items():
+                logging.info(f"{key}: {value}")
+            raise
+        except Exception as e:
+            logging.error(f"处理文件时出错: {str(e)}")
+            if 'processed_lines' in locals():
+                db_manager.update_file_progress(file_path, total_lines, processed_lines, 'error')
+            raise
+            
+    except Exception as e:
+        logging.error(f"读取文件时出错: {str(e)}")
+        return
+
+def main():
+    ops_dir = config.get('paths', 'input_dir')
+    html_files = [f for f in os.listdir(ops_dir) if f.endswith('.html')]
+    
+    # 按文件名排序
+    html_files.sort()
+    
+    total_files = len(html_files)
+    print(f"找到 {total_files} 个HTML文件需要处理")
+    print(f"开始时间: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
+    
+    # 初始化数据库连接
+    conn = db_manager.get_connection()
+    
+    try:
+        for file_index, filename in enumerate(html_files, 1):
+            file_path = os.path.join(ops_dir, filename)
+            print(f"\n开始处理第 {file_index}/{total_files} 个文件: {filename}")
+            print("-" * 50)
+            
+            # 检查文件是否已完成
+            progress = db_manager.get_file_progress(file_path)
+            if progress and progress['status'] == 'completed':
+                print(f"文件 {filename} 已经完成翻译,跳过")
+                continue
+            
+            try:
+                process_html_file(file_path, conn)
+                print(f"\n完成第 {file_index}/{total_files} 个文件: {filename}")
+                print("-" * 50)
+            except Exception as e:
+                print(f"\n处理文件 {filename} 时出错: {str(e)}")
+                print("继续处理下一个文件...")
+                continue
+            
+            # 显示当前总体进度
+            completed_files = sum(1 for f in html_files[:file_index] 
+                                if db_manager.get_file_progress(os.path.join(ops_dir, f)) and 
+                                db_manager.get_file_progress(os.path.join(ops_dir, f))['status'] == 'completed')
+            print(f"\n总体进度: {completed_files}/{total_files} 个文件完成 "
+                  f"({round(completed_files*100/total_files, 2)}%)")
+            
+            # 显示统计信息
+            print("\n当前统计信息:")
+            for key, value in translation_stats.get_stats().items():
+                print(f"{key}: {value}")
+            
+            # 在文件之间添加短暂延迟
+            if file_index < total_files:
+                print("\n等待 5 秒后处理下一个文件...")
+                time.sleep(5)
+    
+    except KeyboardInterrupt:
+        print("\n程序被用户中断")
+    finally:
+        db_manager.close()
+        print(f"\n结束时间: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
+        print("\n最终统计信息:")
+        for key, value in translation_stats.get_stats().items():
+            print(f"{key}: {value}")
+
+if __name__ == "__main__":
+    main()