Browse Source

2025-06-07 23:07:49

max 1 month ago
parent
commit
cb07323b83

+ 0 - 1
.gitignore

@@ -8,4 +8,3 @@ translation_progress.db
 002/.DS_Store
 001/.DS_Store
 .DS_Store
-code/

+ 10 - 0
code/.gitignore

@@ -0,0 +1,10 @@
+*.epub
+*.zip
+*.html
+translation_progress.db
+*.db
+*.log
+translation_progress.db
+002/.DS_Store
+001/.DS_Store
+.DS_Store

+ 39 - 0
code/config.yaml

@@ -0,0 +1,39 @@
+
+# base_url="https://api.chatnio.net/v1",
+# api_key="sk-"
+# base_url="https://api.siliconflow.cn/v1",
+# api_key="sk-"
+database:
+  path: translation_progress.db
+  pool_size: 5
+logging:
+  file: translation.log
+  format: '%(asctime)s - %(levelname)s - %(message)s'
+  level: INFO
+openai:
+  # siliconflow
+  # api_key: sk-
+  # base_url: https://api.siliconflow.cn/v1
+  # deepseek
+  api_key: sk-
+  base_url: https://api.deepseek.com/v1
+  max_concurrent_requests: 5
+  max_retries: 3
+  # siliconflow
+  # model_name: deepseek-ai/DeepSeek-R1
+  # deepseek
+  # model_name: deepseek-chat
+  model_name: deepseek-reasoner
+  retry_delay: 2
+  timeout: 30
+paths:
+  input_dir: 002/Ops
+  output_dir: 002/Ops_translated
+translation:
+  cache_size: 1000
+  error_cooldown: 60
+  error_threshold: 3
+  initial_line_count: 3
+  max_line_count: 10
+  min_line_count: 3
+  success_threshold: 10

+ 33 - 0
code/config_v3.yaml

@@ -0,0 +1,33 @@
+database:
+  path: translation_progress.db
+  pool_size: 5
+logging:
+  file: translation.log
+  format: '%(asctime)s - %(levelname)s - %(message)s'
+  level: INFO
+openai:
+  # siliconflow
+  # api_key: sk-
+  # base_url: https://api.siliconflow.cn/v1
+  # deepseek
+  api_key: sk-
+  base_url: https://api.deepseek.com/v1
+  max_concurrent_requests: 5
+  max_retries: 3
+  # siliconflow
+  # model_name: deepseek-ai/DeepSeek-R1
+  # deepseek
+  model_name: deepseek-chat
+  retry_delay: 2
+  timeout: 30
+paths:
+  input_dir: 002/Ops
+  output_dir: 002/Ops_translated
+translation:
+  cache_size: 1000
+  error_cooldown: 60
+  error_threshold: 3
+  initial_line_count: 2
+  max_line_count: 5
+  min_line_count: 1
+  success_threshold: 5

+ 26 - 0
code/main.py

@@ -0,0 +1,26 @@
+import openai
+
+client = openai.OpenAI(
+    
+)
+
+# 预设的对话内容
+messages = [
+    {
+        "role": "system",
+        "content": "- 你名为epub翻译大师,专注于将任意语言的文本翻译成中文。- 你在翻译过程中,力求保留原文语意,确保翻译的准确性和完整性。- 你特别注重翻译结果要贴合现代人的阅读习惯,使译文更加流畅易懂。- 在处理包含代码结构的文本时,你会特别注意保持代码的原样,只在必要时提供相应的语言注释。- 你的服务旨在为用户提供高效、便捷的翻译体验,帮助用户跨越语言障碍。- 在回答问题的时候,尽可能保留原来的代码结构。- 输出内容要求用代码块包裹起来"
+    },
+    {
+        "role": "user",
+        "content": """```<p class="p34">It was on the evening of this Saturday that Arthur gathered up his courage and asked Angela to come and walk through the ruins with him. Angela hesitated a little; the shadow of something about to happen had fallen on her mind; but the extraordinary beauty of the evening, to say nothing of the prospect of his company, turned the scale in Arthur&rsquo;s favour.</p>
+<p class="p34">It was one of those nights of which, if we are lucky, we get some five or six in the course of an English summer. The moon was at her full, and, the twilight ended, she filled the heavens with her light. Every twig and blade of grass showed out as clearly as in the day, but looked like frosted silver. The silence was intense, and so still was the air that the sharp shadows of the trees were motionless upon the grass, only growing with the growing hours. It was one of those nights that fill us with an indescribable emotion, bringing us into closer companionship with the unseen than ever does the garish, busy day. In such an hour, we can sometimes feel, or think that we can feel, other presences around us, and involuntarily we listen for the whisper of the wings and the half-forgotten voices of our beloved.</p>
+<p class="p34">On this particular evening some such feeling was stirring in Angela&rsquo;s heart as with slow steps she led the way into the little village churchyard, a similar spot to that which is to be found in many a country parish, except that, the population being very small, there were but few recent graves. Most of the mounds had no head-stones to recall the names of the neglected dead, but here and there were dotted discoloured slabs, some sunk a foot or two into the soil, a few lying prone upon it, and the remainder thrown by the gradual subsidence of their supports into every variety of angle, as though they had been suddenly halted in the maddest whirl of a grotesque dance of death.</p>```"""
+    }
+]
+
+response = client.chat.completions.create(
+    model="Qwen/Qwen3-32B",
+    messages=messages
+)
+
+print(response.choices[0].message.content)

+ 3 - 0
code/requirements.txt

@@ -0,0 +1,3 @@
+beautifulsoup4==4.13.4
+openai==1.84.0
+tqdm==4.66.2

+ 989 - 0
code/translate_epub.py

@@ -0,0 +1,989 @@
+import os
+import re
+from bs4 import BeautifulSoup
+import openai
+import time
+from tqdm import tqdm
+import sqlite3
+import json
+from datetime import datetime
+import logging
+from tenacity import retry, stop_after_attempt, wait_exponential, retry_if_exception_type
+import asyncio
+import aiohttp
+from concurrent.futures import ThreadPoolExecutor
+from functools import lru_cache
+import hashlib
+import yaml
+from pathlib import Path
+import multiprocessing
+from multiprocessing import Pool, Manager, Lock
+import queue
+
+# 配置管理
+class Config:
+    def __init__(self, config_path='config.yaml'):
+        self.config_path = config_path
+        self.config = self.load_config()
+        
+        # 设置日志
+        self.setup_logging()
+        
+        # 初始化OpenAI客户端
+        self.setup_openai()
+    
+    def load_config(self):
+        """加载配置文件"""
+        if not os.path.exists(self.config_path):
+            # 创建默认配置
+            default_config = {
+                'logging': {
+                    'level': 'INFO',
+                    'format': '%(asctime)s - %(levelname)s - %(message)s',
+                    'file': 'translation.log'
+                },
+                'openai': {
+                    'base_url': 'https://api.siliconflow.cn/v1',
+                    'api_key': 'sk-',
+                    'model_name': 'deepseek-ai/DeepSeek-R1',
+                    'max_retries': 3,
+                    'retry_delay': 2,
+                    'timeout': 30,
+                    'max_concurrent_requests': 5
+                },
+                'translation': {
+                    'min_line_count': 1,
+                    'max_line_count': 5,
+                    'initial_line_count': 2,
+                    'error_threshold': 3,
+                    'success_threshold': 5,
+                    'error_cooldown': 60,
+                    'cache_size': 1000
+                },
+                'database': {
+                    'path': 'translation_progress.db',
+                    'pool_size': 5
+                },
+                'paths': {
+                    'input_dir': '002/Ops',
+                    'output_dir': '002/Ops_translated'
+                }
+            }
+            
+            # 保存默认配置
+            with open(self.config_path, 'w', encoding='utf-8') as f:
+                yaml.dump(default_config, f, allow_unicode=True)
+            
+            return default_config
+        
+        # 加载现有配置
+        with open(self.config_path, 'r', encoding='utf-8') as f:
+            return yaml.safe_load(f)
+    
+    def setup_logging(self):
+        """设置日志"""
+        logging.basicConfig(
+            level=getattr(logging, self.config['logging']['level']),
+            format=self.config['logging']['format'],
+            handlers=[
+                logging.FileHandler(self.config['logging']['file']),
+                logging.StreamHandler()
+            ]
+        )
+    
+    def setup_openai(self):
+        """设置OpenAI客户端"""
+        self.client = openai.OpenAI(
+            base_url=self.config['openai']['base_url'],
+            api_key=self.config['openai']['api_key']
+        )
+    
+    def get(self, *keys):
+        """获取配置值"""
+        value = self.config
+        for key in keys:
+            value = value[key]
+        return value
+    
+    def update(self, updates):
+        """更新配置"""
+        def deep_update(d, u):
+            for k, v in u.items():
+                if isinstance(v, dict):
+                    d[k] = deep_update(d.get(k, {}), v)
+                else:
+                    d[k] = v
+            return d
+        
+        self.config = deep_update(self.config, updates)
+        
+        # 保存更新后的配置
+        with open(self.config_path, 'w', encoding='utf-8') as f:
+            yaml.dump(self.config, f, allow_unicode=True)
+        
+        # 重新设置日志和OpenAI客户端
+        self.setup_logging()
+        self.setup_openai()
+
+# 创建全局的配置实例
+config = Config()
+
+# 更新全局变量
+MODEL_CONFIG = {
+    "model_name": config.get('openai', 'model_name'),
+    "max_retries": config.get('openai', 'max_retries'),
+    "retry_delay": config.get('openai', 'retry_delay'),
+    "timeout": config.get('openai', 'timeout'),
+    "max_concurrent_requests": config.get('openai', 'max_concurrent_requests'),
+    "cache_size": config.get('translation', 'cache_size')
+}
+
+MIN_LINE_COUNT = config.get('translation', 'min_line_count')
+MAX_LINE_COUNT = config.get('translation', 'max_line_count')
+INITIAL_LINE_COUNT = config.get('translation', 'initial_line_count')
+ERROR_THRESHOLD = config.get('translation', 'error_threshold')
+SUCCESS_THRESHOLD = config.get('translation', 'success_threshold')
+
+# 更新其他类的初始化参数
+class LineCountManager:
+    def __init__(self):
+        self.current_line_count = INITIAL_LINE_COUNT
+        self.consecutive_errors = 0
+        self.consecutive_successes = 0
+        self.last_error_time = None
+        self.error_cooldown = config.get('translation', 'error_cooldown')
+        self.version = f"1.0.{INITIAL_LINE_COUNT}"
+        self.error_history = []
+    
+    def adjust_line_count(self, success):
+        """根据翻译结果调整行数"""
+        current_time = time.time()
+        
+        # 检查是否在冷却期内
+        if self.last_error_time and (current_time - self.last_error_time) < self.error_cooldown:
+            return self.current_line_count
+        
+        if success:
+            self.consecutive_errors = 0
+            self.consecutive_successes = 0  # 重置成功计数,但不增加行数
+        else:
+            self.consecutive_successes = 0
+            self.consecutive_errors += 1
+            self.last_error_time = current_time
+            
+            # 记录错误
+            self.error_history.append({
+                'time': current_time,
+                'line_count': self.current_line_count
+            })
+            
+            # 如果连续错误次数达到阈值,减少行数
+            if self.consecutive_errors >= ERROR_THRESHOLD:
+                if self.current_line_count > MIN_LINE_COUNT:
+                    self.current_line_count -= 1
+                    self.consecutive_errors = 0
+                    self.version = f"1.0.{self.current_line_count}"
+                    logging.warning(f"翻译连续失败,减少行数到 {self.current_line_count},版本更新为 {self.version}")
+        
+        return self.current_line_count
+    
+    def get_error_stats(self):
+        """获取错误统计信息"""
+        if not self.error_history:
+            return "无错误记录"
+        
+        recent_errors = [e for e in self.error_history if time.time() - e['time'] < 3600]  # 最近一小时的错误
+        return {
+            "总错误数": len(self.error_history),
+            "最近一小时错误数": len(recent_errors),
+            "当前行数": self.current_line_count,
+            "连续错误": self.consecutive_errors,
+            "连续成功": self.consecutive_successes
+        }
+
+class DatabaseManager:
+    def __init__(self):
+        self.db_path = config.get('database', 'path')
+        self.conn = None
+        self.init_db()
+    
+    def get_connection(self):
+        """获取数据库连接"""
+        if self.conn is None:
+            self.conn = sqlite3.connect(self.db_path)
+            self.conn.row_factory = sqlite3.Row
+        return self.conn
+    
+    def close(self):
+        """关闭数据库连接"""
+        if self.conn:
+            self.conn.close()
+            self.conn = None
+    
+    def init_db(self):
+        """初始化数据库"""
+        conn = self.get_connection()
+        c = conn.cursor()
+        
+        # 创建文件进度表
+        c.execute('''
+            CREATE TABLE IF NOT EXISTS file_progress (
+                file_path TEXT PRIMARY KEY,
+                total_lines INTEGER,
+                processed_lines INTEGER,
+                status TEXT,
+                version TEXT,
+                last_updated TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
+                error_count INTEGER DEFAULT 0,
+                retry_count INTEGER DEFAULT 0
+            )
+        ''')
+        
+        # 创建翻译组进度表
+        c.execute('''
+            CREATE TABLE IF NOT EXISTS group_progress (
+                id INTEGER PRIMARY KEY AUTOINCREMENT,
+                file_path TEXT,
+                group_index INTEGER,
+                original_text TEXT,
+                translated_text TEXT,
+                status TEXT,
+                version TEXT,
+                created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
+                updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
+                error_count INTEGER DEFAULT 0,
+                retry_count INTEGER DEFAULT 0,
+                UNIQUE(file_path, group_index, version)
+            )
+        ''')
+        
+        # 创建错误日志表
+        c.execute('''
+            CREATE TABLE IF NOT EXISTS error_log (
+                id INTEGER PRIMARY KEY AUTOINCREMENT,
+                file_path TEXT,
+                group_index INTEGER,
+                error_type TEXT,
+                error_message TEXT,
+                created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
+                resolved_at TIMESTAMP,
+                resolution TEXT
+            )
+        ''')
+        
+        conn.commit()
+    
+    def begin_transaction(self):
+        """开始事务"""
+        self.get_connection().execute('BEGIN TRANSACTION')
+    
+    def commit_transaction(self):
+        """提交事务"""
+        self.get_connection().commit()
+    
+    def rollback_transaction(self):
+        """回滚事务"""
+        self.get_connection().rollback()
+    
+    def get_file_progress(self, file_path):
+        """获取文件翻译进度"""
+        c = self.get_connection().cursor()
+        c.execute('SELECT * FROM file_progress WHERE file_path = ?', (file_path,))
+        return c.fetchone()
+    
+    def update_file_progress(self, file_path, total_lines, processed_lines, status):
+        """更新文件翻译进度"""
+        c = self.get_connection().cursor()
+        c.execute('''
+            INSERT OR REPLACE INTO file_progress 
+            (file_path, total_lines, processed_lines, status, version, last_updated)
+            VALUES (?, ?, ?, ?, ?, CURRENT_TIMESTAMP)
+        ''', (file_path, total_lines, processed_lines, status, line_count_manager.version))
+        self.get_connection().commit()
+    
+    def get_group_progress(self, file_path, group_index):
+        """获取翻译组进度"""
+        c = self.get_connection().cursor()
+        c.execute('''
+            SELECT * FROM group_progress 
+            WHERE file_path = ? AND group_index = ? AND version = ?
+        ''', (file_path, group_index, line_count_manager.version))
+        return c.fetchone()
+    
+    def update_group_progress(self, file_path, group_index, original_text, translated_text, status):
+        """更新翻译组进度"""
+        c = self.get_connection().cursor()
+        c.execute('''
+            INSERT OR REPLACE INTO group_progress 
+            (file_path, group_index, original_text, translated_text, status, version, updated_at)
+            VALUES (?, ?, ?, ?, ?, ?, CURRENT_TIMESTAMP)
+        ''', (file_path, group_index, original_text, translated_text, status, line_count_manager.version))
+        self.get_connection().commit()
+    
+    def log_error(self, file_path, group_index, error_type, error_message):
+        """记录错误"""
+        c = self.get_connection().cursor()
+        c.execute('''
+            INSERT INTO error_log 
+            (file_path, group_index, error_type, error_message)
+            VALUES (?, ?, ?, ?)
+        ''', (file_path, group_index, error_type, error_message))
+        self.get_connection().commit()
+    
+    def get_error_stats(self):
+        """获取错误统计信息"""
+        c = self.get_connection().cursor()
+        c.execute('''
+            SELECT 
+                COUNT(*) as total_errors,
+                COUNT(CASE WHEN resolved_at IS NULL THEN 1 END) as unresolved_errors,
+                COUNT(CASE WHEN created_at > datetime('now', '-1 hour') THEN 1 END) as recent_errors
+            FROM error_log
+        ''')
+        return c.fetchone()
+
+class AsyncTranslationManager:
+    def __init__(self):
+        self.semaphore = asyncio.Semaphore(config.get('openai', 'max_concurrent_requests'))
+        self.session = None
+
+class TranslationCache:
+    def __init__(self):
+        self.cache = {}
+        self.max_size = config.get('translation', 'cache_size')
+        self.hits = 0
+        self.misses = 0
+
+# 创建全局实例
+line_count_manager = LineCountManager()
+db_manager = DatabaseManager()
+async_translation_manager = AsyncTranslationManager()
+translation_cache = TranslationCache()
+
+# 添加版本控制
+VERSION = "1.0.1" # 版本号,用于区分不同版本的翻译
+line_count = 2 # 每组行数,越大越快,但越容易出错
+
+class TranslationStats:
+    def __init__(self):
+        self.start_time = time.time()
+        self.total_chars = 0
+        self.translated_chars = 0
+        self.total_requests = 0
+        self.successful_requests = 0
+        self.failed_requests = 0
+    
+    def update_stats(self, original_text, translated_text, success=True):
+        self.total_chars += len(original_text)
+        self.translated_chars += len(translated_text)
+        self.total_requests += 1
+        if success:
+            self.successful_requests += 1
+        else:
+            self.failed_requests += 1
+    
+    def get_stats(self):
+        elapsed_time = time.time() - self.start_time
+        chars_per_second = self.translated_chars / elapsed_time if elapsed_time > 0 else 0
+        success_rate = (self.successful_requests / self.total_requests * 100) if self.total_requests > 0 else 0
+        
+        return {
+            "总字符数": self.total_chars,
+            "已翻译字符数": self.translated_chars,
+            "翻译速度": f"{chars_per_second:.2f} 字符/秒",
+            "成功率": f"{success_rate:.1f}%",
+            "总请求数": self.total_requests,
+            "成功请求": self.successful_requests,
+            "失败请求": self.failed_requests,
+            "运行时间": f"{elapsed_time:.1f} 秒"
+        }
+    
+    def to_dict(self):
+        """返回可序列化的字典"""
+        return {
+            "total_chars": self.total_chars,
+            "translated_chars": self.translated_chars,
+            "total_requests": self.total_requests,
+            "successful_requests": self.successful_requests,
+            "failed_requests": self.failed_requests,
+            "elapsed_time": time.time() - self.start_time
+        }
+    
+    @classmethod
+    def from_dict(cls, data):
+        """从字典创建实例"""
+        stats = cls()
+        stats.total_chars = data.get("total_chars", 0)
+        stats.translated_chars = data.get("translated_chars", 0)
+        stats.total_requests = data.get("total_requests", 0)
+        stats.successful_requests = data.get("successful_requests", 0)
+        stats.failed_requests = data.get("failed_requests", 0)
+        stats.start_time = time.time() - data.get("elapsed_time", 0)
+        return stats
+
+# 创建全局的统计对象
+translation_stats = TranslationStats()
+
+def get_completed_groups(conn, file_path):
+    """获取已完成的翻译组"""
+    c = conn.cursor()
+    c.execute('''
+        SELECT group_index, translated_text 
+        FROM group_progress 
+        WHERE file_path = ? AND status = 'completed' AND version = ?
+        ORDER BY group_index
+    ''', (file_path, line_count_manager.version))
+    return c.fetchall()
+
+# """ - 输出内容要求用代码块包裹起来
+# ,只在必要时提供相应的语言注释
+#  """
+@retry(
+    stop=stop_after_attempt(MODEL_CONFIG['max_retries']),
+    wait=wait_exponential(multiplier=1, min=4, max=10),
+    retry=retry_if_exception_type((openai.APIError, openai.APITimeoutError)),
+    before_sleep=lambda retry_state: logging.warning(f"重试第 {retry_state.attempt_number} 次...")
+)
+def translate_text(text):
+    """翻译文本,使用流式输出"""
+    try:
+        messages = [
+            {
+                "role": "system",
+                "content": "- 你名为epub翻译大师,专注于将任意语言的文本翻译成中文。- 你在翻译过程中,力求保留原文语意,确保翻译的准确性和完整性。- 你特别注重翻译结果要贴合现代人的阅读习惯,使译文更加流畅易懂。- 在处理包含代码结构的文本时,你会特别注意保持代码的原样。- 你的服务旨在为用户提供高效、便捷的翻译体验,帮助用户跨越语言障碍。- 在回答问题的时候,尽可能保留原来的代码结构。- 在回答问题的时候,尽可能只返回翻译后的内容和代码结构,不要返回任何其他内容。- 在任何情况下都不要返回 翻译说明,不要返回任何其他内容。"
+            },
+            {
+                "role": "user",
+                "content": text
+            }
+        ]
+        
+        # 使用流式输出
+        stream = config.client.chat.completions.create(
+            model=MODEL_CONFIG['model_name'],
+            messages=messages,
+            timeout=MODEL_CONFIG['timeout'],
+            stream=True  # 启用流式输出
+        )
+        
+        # 收集流式输出的内容
+        translated_text = ""
+        for chunk in stream:
+            if chunk.choices[0].delta.content is not None:
+                content = chunk.choices[0].delta.content
+                translated_text += content
+                # 实时打印翻译内容
+                print(content, end='', flush=True)
+        
+        print()  # 换行
+        line_count_manager.adjust_line_count(True)
+        
+        # 更新统计信息
+        if hasattr(process_files_batch, 'process_stats'):
+            process_files_batch.process_stats.update_stats(text, translated_text, True)
+        
+        return translated_text
+        
+    except Exception as e:
+        logging.error(f"翻译出错: {str(e)}")
+        line_count_manager.adjust_line_count(False)
+        
+        # 更新统计信息
+        if hasattr(process_files_batch, 'process_stats'):
+            process_files_batch.process_stats.update_stats(text, "", False)
+        
+        raise
+
+def process_html_file(file_path, conn):
+    """处理HTML文件"""
+    # 检查文件进度
+    progress = db_manager.get_file_progress(file_path)
+    
+    try:
+        # 尝试不同的编码方式读取文件
+        encodings = ['utf-8', 'gbk', 'gb2312', 'latin1']
+        content = None
+        
+        for encoding in encodings:
+            try:
+                with open(file_path, 'r', encoding=encoding) as f:
+                    content = f.read()
+                break
+            except UnicodeDecodeError:
+                continue
+        
+        if content is None:
+            raise Exception(f"无法使用支持的编码读取文件: {file_path}")
+    
+        # 使用正则表达式提取body标签内的内容
+        body_pattern = re.compile(r'<body[^>]*>(.*?)</body>', re.DOTALL)
+        body_match = body_pattern.search(content)
+        
+        if not body_match:
+            print(f"警告: {file_path} 中没有找到body标签")
+            return
+        
+        body_content = body_match.group(1)
+        
+        # 按行分割内容,保留所有HTML标签行,但只翻译包含 <p class 的行
+        lines = []
+        for line in body_content.split('\n'):
+            line = line.strip()
+            if line and line.startswith('<'):
+                lines.append(line)
+        
+        total_lines = len(lines)
+        
+        # 获取已完成的翻译组
+        completed_groups = get_completed_groups(conn, file_path)
+        completed_indices = {group[0] for group in completed_groups}
+        
+        # 计算已处理的进度
+        if progress:
+            print(f"文件 {file_path} 已处理进度: {progress[2]}/{progress[1]} 行 ({round(progress[2]*100/progress[1], 2)}%)")
+        
+        # 按组处理内容
+        translated_lines = []
+        try:
+            with tqdm(range(0, len(lines), line_count_manager.current_line_count), 
+                     desc=f"处理文件 {os.path.basename(file_path)}", 
+                     unit="组") as pbar:
+                for i in pbar:
+                    group_index = i // line_count_manager.current_line_count
+                    
+                    # 检查是否已完成
+                    if group_index in completed_indices:
+                        # 使用已完成的翻译
+                        for group in completed_groups:
+                            if group[0] == group_index:
+                                translated_lines.extend(group[1].split('\n'))
+                                break
+                        continue
+                    
+                    group = lines[i:i+line_count_manager.current_line_count]
+                    if group:
+                        # 保存原始文本
+                        original_text = "\n".join(group)
+                        
+                        # 收集需要翻译的段落
+                        paragraphs_to_translate = []
+                        paragraph_indices = []
+                        for idx, line in enumerate(group):
+                            if '<p class' in line or line.startswith('<h'):
+                                paragraphs_to_translate.append(line)
+                                paragraph_indices.append(idx)
+                        
+                        # 如果有需要翻译的段落,进行翻译
+                        if paragraphs_to_translate:
+                            translated_paragraphs = []
+                            for paragraph in paragraphs_to_translate:
+                                print(f"\n翻译段落 {len(translated_paragraphs) + 1}/{len(paragraphs_to_translate)}:")
+                                translated_paragraph = translate_text(paragraph)
+                                translated_paragraphs.append(translated_paragraph)
+                            
+                            # 将翻译后的段落放回原位置
+                            translated_group = group.copy()
+                            for idx, translated in zip(paragraph_indices, translated_paragraphs):
+                                translated_group[idx] = translated
+                        else:
+                            translated_group = group
+                        
+                        translated_text = "\n".join(translated_group)
+                        
+                        # 更新翻译组进度
+                        db_manager.update_group_progress(file_path, group_index, original_text, translated_text, 'completed')
+                        
+                        # 分割翻译后的文本
+                        translated_lines.extend(translated_group)
+                        
+                        # 更新文件进度
+                        processed_lines = min((group_index + 1) * line_count_manager.current_line_count, total_lines)
+                        db_manager.update_file_progress(file_path, total_lines, processed_lines, 'in_progress')
+                        
+                        # 显示当前统计信息
+                        stats = translation_stats.get_stats()
+                        pbar.set_postfix(stats)
+                        
+                        # 添加较小的延迟以避免API限制
+                        time.sleep(0.1)  # 减少延迟时间
+            
+            # 替换原始内容
+            if translated_lines:
+                # 构建新的body内容
+                new_body_content = []
+                current_index = 0
+                
+                # 遍历原始内容,替换需要翻译的部分
+                for line in body_content.split('\n'):
+                    line = line.strip()
+                    if not line:
+                        new_body_content.append('')
+                        continue
+                        
+                    if line.startswith('<'):
+                        if ('<p class' in line or line.startswith('<h')) and current_index < len(translated_lines):
+                            # 替换翻译后的内容
+                            new_body_content.append(translated_lines[current_index])
+                            current_index += 1
+                        else:
+                            # 保持原样
+                            new_body_content.append(line)
+                    else:
+                        # 保持非HTML内容原样
+                        new_body_content.append(line)
+                
+                # 将新内容重新组合
+                new_body_content = '\n'.join(new_body_content)
+                
+                # 替换原始内容中的body部分
+                new_content = content.replace(body_content, new_body_content)
+                
+                # 保存修改后的文件
+                output_dir = config.get('paths', 'output_dir')
+                os.makedirs(output_dir, exist_ok=True)
+                output_path = os.path.join(output_dir, os.path.basename(file_path))
+                
+                with open(output_path, 'w', encoding='utf-8') as f:
+                    f.write(new_content)
+                
+                # 更新完成状态
+                db_manager.update_file_progress(file_path, total_lines, total_lines, 'completed')
+                print(f"文件 {file_path} 翻译完成,已保存到 {output_path}")
+                
+                # 显示最终统计信息
+                print("\n翻译统计信息:")
+                for key, value in translation_stats.get_stats().items():
+                    print(f"{key}: {value}")
+                
+        except KeyboardInterrupt:
+            print("\n检测到中断,保存当前进度...")
+            if 'processed_lines' in locals():
+                db_manager.update_file_progress(file_path, total_lines, processed_lines, 'interrupted')
+            # 显示中断时的统计信息
+            print("\n中断时的统计信息:")
+            for key, value in translation_stats.get_stats().items():
+                print(f"{key}: {value}")
+            raise
+        except Exception as e:
+            print(f"处理文件时出错: {str(e)}")
+            if 'processed_lines' in locals():
+                db_manager.update_file_progress(file_path, total_lines, processed_lines, 'error')
+            raise
+            
+    except Exception as e:
+        print(f"读取文件时出错: {str(e)}")
+        return
+
+def process_files_batch(file_batch, process_id):
+    """处理一批文件的函数,用于多进程执行"""
+    try:
+        # 为每个进程创建独立的数据库连接
+        process_db = DatabaseManager()
+        conn = process_db.get_connection()
+        
+        # 创建进程级别的统计对象
+        process_stats = TranslationStats()
+        
+        def translate_with_stats(text):
+            """包装翻译函数以收集统计信息"""
+            try:
+                messages = [
+                    {
+                        "role": "system",
+                        "content": "- 你名为epub翻译大师,专注于将任意语言的文本翻译成中文。- 你在翻译过程中,力求保留原文语意,确保翻译的准确性和完整性。- 你特别注重翻译结果要贴合现代人的阅读习惯,使译文更加流畅易懂。- 在处理包含代码结构的文本时,你会特别注意保持代码的原样。- 你的服务旨在为用户提供高效、便捷的翻译体验,帮助用户跨越语言障碍。- 在回答问题的时候,尽可能保留原来的代码结构。- 在回答问题的时候,尽可能只返回翻译后的内容和代码结构,不要返回任何其他内容。- 在任何情况下都不要返回 翻译说明,不要返回任何其他内容。"
+                    },
+                    {
+                        "role": "user",
+                        "content": text
+                    }
+                ]
+                
+                # 使用流式输出
+                stream = config.client.chat.completions.create(
+                    model=MODEL_CONFIG['model_name'],
+                    messages=messages,
+                    timeout=MODEL_CONFIG['timeout'],
+                    stream=True  # 启用流式输出
+                )
+                
+                # 收集流式输出的内容
+                translated_text = ""
+                for chunk in stream:
+                    if chunk.choices[0].delta.content is not None:
+                        content = chunk.choices[0].delta.content
+                        translated_text += content
+                        # 实时打印翻译内容
+                        print(content, end='', flush=True)
+                
+                print()  # 换行
+                line_count_manager.adjust_line_count(True)
+                
+                # 更新统计信息
+                process_stats.update_stats(text, translated_text, True)
+                return translated_text
+                
+            except Exception as e:
+                logging.error(f"翻译出错: {str(e)}")
+                line_count_manager.adjust_line_count(False)
+                
+                # 更新统计信息
+                process_stats.update_stats(text, "", False)
+                raise
+        
+        for filename in tqdm(file_batch, desc=f"进程 {process_id} 处理文件", unit="文件"):
+            file_path = os.path.join(config.get('paths', 'input_dir'), filename)
+            
+            # 修改 process_html_file 函数调用,使用新的翻译函数
+            try:
+                # 尝试不同的编码方式读取文件
+                encodings = ['utf-8', 'gbk', 'gb2312', 'latin1']
+                content = None
+                
+                for encoding in encodings:
+                    try:
+                        with open(file_path, 'r', encoding=encoding) as f:
+                            content = f.read()
+                        break
+                    except UnicodeDecodeError:
+                        continue
+                
+                if content is None:
+                    raise Exception(f"无法使用支持的编码读取文件: {file_path}")
+            
+                # 使用正则表达式提取body标签内的内容
+                body_pattern = re.compile(r'<body[^>]*>(.*?)</body>', re.DOTALL)
+                body_match = body_pattern.search(content)
+                
+                if not body_match:
+                    print(f"警告: {file_path} 中没有找到body标签")
+                    continue
+                
+                body_content = body_match.group(1)
+                
+                # 按行分割内容,保留所有HTML标签行,但只翻译包含 <p class 的行
+                lines = []
+                for line in body_content.split('\n'):
+                    line = line.strip()
+                    if line and line.startswith('<'):
+                        lines.append(line)
+                
+                total_lines = len(lines)
+                
+                # 获取已完成的翻译组
+                completed_groups = get_completed_groups(conn, file_path)
+                completed_indices = {group[0] for group in completed_groups}
+                
+                # 计算已处理的进度
+                progress = db_manager.get_file_progress(file_path)
+                if progress:
+                    print(f"文件 {file_path} 已处理进度: {progress[2]}/{progress[1]} 行 ({round(progress[2]*100/progress[1], 2)}%)")
+                
+                # 按组处理内容
+                translated_lines = []
+                with tqdm(range(0, len(lines), line_count_manager.current_line_count), 
+                         desc=f"处理文件 {os.path.basename(file_path)}", 
+                         unit="组") as pbar:
+                    for i in pbar:
+                        group_index = i // line_count_manager.current_line_count
+                        
+                        # 检查是否已完成
+                        if group_index in completed_indices:
+                            # 使用已完成的翻译
+                            for group in completed_groups:
+                                if group[0] == group_index:
+                                    translated_lines.extend(group[1].split('\n'))
+                                    break
+                            continue
+                        
+                        group = lines[i:i+line_count_manager.current_line_count]
+                        if group:
+                            # 保存原始文本
+                            original_text = "\n".join(group)
+                            
+                            # 收集需要翻译的段落
+                            paragraphs_to_translate = []
+                            paragraph_indices = []
+                            for idx, line in enumerate(group):
+                                if '<p class' in line or line.startswith('<h'):
+                                    paragraphs_to_translate.append(line)
+                                    paragraph_indices.append(idx)
+                            
+                            # 如果有需要翻译的段落,进行翻译
+                            if paragraphs_to_translate:
+                                translated_paragraphs = []
+                                for paragraph in paragraphs_to_translate:
+                                    print(f"\n翻译段落 {len(translated_paragraphs) + 1}/{len(paragraphs_to_translate)}:")
+                                    translated_paragraph = translate_with_stats(paragraph)
+                                    translated_paragraphs.append(translated_paragraph)
+                                
+                                # 将翻译后的段落放回原位置
+                                translated_group = group.copy()
+                                for idx, translated in zip(paragraph_indices, translated_paragraphs):
+                                    translated_group[idx] = translated
+                            else:
+                                translated_group = group
+                            
+                            translated_text = "\n".join(translated_group)
+                            
+                            # 更新翻译组进度
+                            db_manager.update_group_progress(file_path, group_index, original_text, translated_text, 'completed')
+                            
+                            # 分割翻译后的文本
+                            translated_lines.extend(translated_group)
+                            
+                            # 更新文件进度
+                            processed_lines = min((group_index + 1) * line_count_manager.current_line_count, total_lines)
+                            db_manager.update_file_progress(file_path, total_lines, processed_lines, 'in_progress')
+                            
+                            # 显示当前统计信息
+                            stats = process_stats.get_stats()
+                            pbar.set_postfix(stats)
+                            
+                            # 添加较小的延迟以避免API限制
+                            time.sleep(0.1)  # 减少延迟时间
+                
+                # 替换原始内容
+                if translated_lines:
+                    # 构建新的body内容
+                    new_body_content = []
+                    current_index = 0
+                    
+                    # 遍历原始内容,替换需要翻译的部分
+                    for line in body_content.split('\n'):
+                        line = line.strip()
+                        if not line:
+                            new_body_content.append('')
+                            continue
+                            
+                        if line.startswith('<'):
+                            if ('<p class' in line or line.startswith('<h')) and current_index < len(translated_lines):
+                                # 替换翻译后的内容
+                                new_body_content.append(translated_lines[current_index])
+                                current_index += 1
+                            else:
+                                # 保持原样
+                                new_body_content.append(line)
+                        else:
+                            # 保持非HTML内容原样
+                            new_body_content.append(line)
+                    
+                    # 将新内容重新组合
+                    new_body_content = '\n'.join(new_body_content)
+                    
+                    # 替换原始内容中的body部分
+                    new_content = content.replace(body_content, new_body_content)
+                    
+                    # 保存修改后的文件
+                    output_dir = config.get('paths', 'output_dir')
+                    os.makedirs(output_dir, exist_ok=True)
+                    output_path = os.path.join(output_dir, os.path.basename(file_path))
+                    
+                    with open(output_path, 'w', encoding='utf-8') as f:
+                        f.write(new_content)
+                    
+                    # 更新完成状态
+                    db_manager.update_file_progress(file_path, total_lines, total_lines, 'completed')
+                    print(f"文件 {file_path} 翻译完成,已保存到 {output_path}")
+                    
+                    # 显示最终统计信息
+                    print("\n翻译统计信息:")
+                    for key, value in process_stats.get_stats().items():
+                        print(f"{key}: {value}")
+                    
+            except Exception as e:
+                print(f"处理文件时出错: {str(e)}")
+                if 'processed_lines' in locals():
+                    db_manager.update_file_progress(file_path, total_lines, processed_lines, 'error')
+                raise
+        
+        # 返回进程的统计信息
+        return process_stats.to_dict()
+            
+    except Exception as e:
+        logging.error(f"进程 {process_id} 发生错误: {str(e)}")
+        return None
+    finally:
+        if 'process_db' in locals():
+            process_db.close()
+
+def aggregate_stats(stats_list):
+    """聚合所有进程的统计信息"""
+    aggregated_stats = {
+        "total_chars": 0,
+        "translated_chars": 0,
+        "total_requests": 0,
+        "successful_requests": 0,
+        "failed_requests": 0,
+        "elapsed_time": 0
+    }
+    
+    for stats in stats_list:
+        if not stats:
+            continue
+            
+        for key in aggregated_stats:
+            if key in stats:
+                if key == "elapsed_time":
+                    aggregated_stats[key] = max(aggregated_stats[key], stats[key])
+                else:
+                    aggregated_stats[key] += stats[key]
+    
+    # 创建统计对象并格式化输出
+    final_stats = TranslationStats.from_dict(aggregated_stats)
+    return final_stats.get_stats()
+
+def main():
+    # 设置进程数
+    num_processes = 2
+    
+    # 获取输入目录中的所有HTML文件
+    input_dir = config.get('paths', 'input_dir')
+    html_files = [f for f in os.listdir(input_dir) if f.endswith('.html')]
+    
+    print(f"找到 {len(html_files)} 个HTML文件需要处理")
+    print(f"开始时间: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
+    
+    # 调整进程数,确保不超过文件数量
+    num_processes = min(num_processes, len(html_files))
+    
+    # 将文件列表分成多个批次
+    if num_processes > 0:
+        batch_size = max(1, len(html_files) // num_processes)
+        file_batches = [html_files[i:i + batch_size] for i in range(0, len(html_files), batch_size)]
+    else:
+        print("没有找到需要处理的HTML文件")
+        return
+    
+    try:
+        # 创建进程池
+        with Pool(processes=num_processes) as pool:
+            # 启动多个进程处理文件并收集结果
+            results = []
+            for i, batch in enumerate(file_batches):
+                result = pool.apply_async(process_files_batch, args=(batch, i+1))
+                results.append(result)
+            
+            # 等待所有进程完成并收集结果
+            stats_list = [r.get() for r in results]
+            
+    except Exception as e:
+        logging.error(f"进程池执行出错: {str(e)}")
+        stats_list = []
+    
+    # 聚合统计信息
+    final_stats = aggregate_stats(stats_list)
+    
+    print(f"\n结束时间: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
+    print("\n最终统计信息:")
+    for key, value in final_stats.items():
+        print(f"{key}: {value}")
+
+if __name__ == "__main__":
+    # 设置多进程启动方法
+    multiprocessing.set_start_method('spawn')
+    # 设置日志格式
+    logging.basicConfig(
+        level=logging.INFO,
+        format='%(asctime)s - %(levelname)s - %(message)s'
+    )
+    main() 

+ 431 - 0
code/translate_epub_v1.py

@@ -0,0 +1,431 @@
+import os
+import re
+from bs4 import BeautifulSoup
+import openai
+import time
+from tqdm import tqdm
+import sqlite3
+import json
+
+# 初始化OpenAI客户端
+client = openai.OpenAI(
+    # chatnio
+    # base_url="https://api.chatnio.net/v1",
+    # api_key="sk-"
+    # deepseek
+    # base_url="https://api.deepseek.com/v1",
+    # api_key="sk-"
+    # Qwen/Qwen3-32B
+    base_url="https://api.siliconflow.cn/v1",
+    api_key="sk-"
+)
+
+# model_name = "Qwen/Qwen3-32B"   # Qwen/Qwen3-32B
+model_name = "deepseek-ai/DeepSeek-R1"   # deepseek-ai/DeepSeek-R1
+# 添加版本控制
+VERSION = "1.0.1" # 版本号,用于区分不同版本的翻译
+line_count = 2 # 每组行数,越大越快,但越容易出错
+
+# 自动调整参数
+MIN_LINE_COUNT = 1
+MAX_LINE_COUNT = 5
+INITIAL_LINE_COUNT = 2
+ERROR_THRESHOLD = 3  # 连续错误次数阈值
+SUCCESS_THRESHOLD = 5  # 连续成功次数阈值
+
+class LineCountManager:
+    def __init__(self):
+        self.current_line_count = INITIAL_LINE_COUNT
+        self.consecutive_errors = 0
+        self.consecutive_successes = 0
+        self.last_error_time = None
+        self.error_cooldown = 60  # 错误冷却时间(秒)
+        self.version = f"1.0.{INITIAL_LINE_COUNT}"  # 初始版本号
+    
+    def adjust_line_count(self, success):
+        current_time = time.time()
+        
+        # 检查是否在冷却期内
+        if self.last_error_time and (current_time - self.last_error_time) < self.error_cooldown:
+            return self.current_line_count
+        
+        if success:
+            self.consecutive_errors = 0
+            self.consecutive_successes += 1
+            
+            # 如果连续成功次数达到阈值,尝试增加行数
+            if self.consecutive_successes >= SUCCESS_THRESHOLD:
+                if self.current_line_count < MAX_LINE_COUNT:
+                    self.current_line_count += 1
+                    self.consecutive_successes = 0
+                    self.version = f"1.0.{self.current_line_count}"  # 更新版本号
+                    print(f"翻译连续成功,增加行数到 {self.current_line_count},版本更新为 {self.version}")
+        else:
+            self.consecutive_successes = 0
+            self.consecutive_errors += 1
+            self.last_error_time = current_time
+            
+            # 如果连续错误次数达到阈值,减少行数
+            if self.consecutive_errors >= ERROR_THRESHOLD:
+                if self.current_line_count > MIN_LINE_COUNT:
+                    self.current_line_count -= 1
+                    self.consecutive_errors = 0
+                    self.version = f"1.0.{self.current_line_count}"  # 更新版本号
+                    print(f"翻译连续失败,减少行数到 {self.current_line_count},版本更新为 {self.version}")
+        
+        return self.current_line_count
+
+# 创建全局的LineCountManager实例
+line_count_manager = LineCountManager()
+
+def init_db():
+    """初始化数据库"""
+    conn = sqlite3.connect('translation_progress.db')
+    c = conn.cursor()
+    
+    # 检查是否需要迁移数据库
+    try:
+        c.execute("SELECT version FROM file_progress LIMIT 1")
+    except sqlite3.OperationalError:
+        # 如果表不存在或没有version字段,进行迁移
+        print("正在更新数据库结构...")
+        
+        # 备份旧表
+        c.execute("ALTER TABLE file_progress RENAME TO file_progress_old")
+        c.execute("ALTER TABLE group_progress RENAME TO group_progress_old")
+        
+        # 创建新表
+        c.execute('''
+            CREATE TABLE IF NOT EXISTS file_progress (
+                file_path TEXT PRIMARY KEY,
+                total_lines INTEGER,
+                processed_lines INTEGER,
+                status TEXT,
+                version TEXT,
+                last_updated TIMESTAMP DEFAULT CURRENT_TIMESTAMP
+            )
+        ''')
+        
+        c.execute('''
+            CREATE TABLE IF NOT EXISTS group_progress (
+                id INTEGER PRIMARY KEY AUTOINCREMENT,
+                file_path TEXT,
+                group_index INTEGER,
+                original_text TEXT,
+                translated_text TEXT,
+                status TEXT,
+                version TEXT,
+                created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
+                updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
+                UNIQUE(file_path, group_index, version)
+            )
+        ''')
+        
+        # 迁移数据
+        try:
+            c.execute('''
+                INSERT INTO file_progress 
+                (file_path, total_lines, processed_lines, status, version, last_updated)
+                SELECT file_path, total_lines, processed_lines, status, ?, last_updated
+                FROM file_progress_old
+            ''', (line_count_manager.version,))
+            
+            c.execute('''
+                INSERT INTO group_progress 
+                (file_path, group_index, original_text, translated_text, status, version, created_at, updated_at)
+                SELECT file_path, group_index, original_text, translated_text, status, ?, created_at, updated_at
+                FROM group_progress_old
+            ''', (line_count_manager.version,))
+            
+            # 删除旧表
+            c.execute("DROP TABLE file_progress_old")
+            c.execute("DROP TABLE group_progress_old")
+            
+            print("数据库迁移完成")
+        except sqlite3.OperationalError as e:
+            print(f"迁移数据时出错: {str(e)}")
+            # 如果迁移失败,回滚到原始表
+            c.execute("DROP TABLE IF EXISTS file_progress")
+            c.execute("DROP TABLE IF EXISTS group_progress")
+            c.execute("ALTER TABLE file_progress_old RENAME TO file_progress")
+            c.execute("ALTER TABLE group_progress_old RENAME TO group_progress")
+            raise
+    else:
+        # 如果表已存在且包含version字段,创建新表
+        c.execute('''
+            CREATE TABLE IF NOT EXISTS file_progress (
+                file_path TEXT PRIMARY KEY,
+                total_lines INTEGER,
+                processed_lines INTEGER,
+                status TEXT,
+                version TEXT,
+                last_updated TIMESTAMP DEFAULT CURRENT_TIMESTAMP
+            )
+        ''')
+        
+        c.execute('''
+            CREATE TABLE IF NOT EXISTS group_progress (
+                id INTEGER PRIMARY KEY AUTOINCREMENT,
+                file_path TEXT,
+                group_index INTEGER,
+                original_text TEXT,
+                translated_text TEXT,
+                status TEXT,
+                version TEXT,
+                created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
+                updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
+                UNIQUE(file_path, group_index, version)
+            )
+        ''')
+    
+    conn.commit()
+    return conn
+
+def get_file_progress(conn, file_path):
+    """获取文件翻译进度"""
+    c = conn.cursor()
+    c.execute('SELECT * FROM file_progress WHERE file_path = ?', (file_path,))
+    return c.fetchone()
+
+def update_file_progress(conn, file_path, total_lines, processed_lines, status):
+    """更新文件翻译进度"""
+    c = conn.cursor()
+    c.execute('''
+        INSERT OR REPLACE INTO file_progress 
+        (file_path, total_lines, processed_lines, status, version, last_updated)
+        VALUES (?, ?, ?, ?, ?, CURRENT_TIMESTAMP)
+    ''', (file_path, total_lines, processed_lines, status, line_count_manager.version))
+    conn.commit()
+
+def get_group_progress(conn, file_path, group_index):
+    """获取翻译组进度"""
+    c = conn.cursor()
+    c.execute('''
+        SELECT * FROM group_progress 
+        WHERE file_path = ? AND group_index = ?
+    ''', (file_path, group_index))
+    return c.fetchone()
+
+def update_group_progress(conn, file_path, group_index, original_text, translated_text, status):
+    """更新翻译组进度"""
+    c = conn.cursor()
+    c.execute('''
+        INSERT OR REPLACE INTO group_progress 
+        (file_path, group_index, original_text, translated_text, status, version, updated_at)
+        VALUES (?, ?, ?, ?, ?, ?, CURRENT_TIMESTAMP)
+    ''', (file_path, group_index, original_text, translated_text, status, line_count_manager.version))
+    conn.commit()
+
+def get_completed_groups(conn, file_path):
+    """获取已完成的翻译组"""
+    c = conn.cursor()
+    c.execute('''
+        SELECT group_index, translated_text 
+        FROM group_progress 
+        WHERE file_path = ? AND status = 'completed' AND version = ?
+        ORDER BY group_index
+    ''', (file_path, line_count_manager.version))
+    return c.fetchall()
+
+# """ - 输出内容要求用代码块包裹起来
+# ,只在必要时提供相应的语言注释
+#  """
+def translate_text(text, max_retries=3):
+    """翻译文本,添加重试机制"""
+    for attempt in range(max_retries):
+        try:
+            messages = [
+                {
+                    "role": "system",
+                    "content": "- 你名为epub翻译大师,专注于将任意语言的文本翻译成中文。- 你在翻译过程中,力求保留原文语意,确保翻译的准确性和完整性。- 你特别注重翻译结果要贴合现代人的阅读习惯,使译文更加流畅易懂。- 在处理包含代码结构的文本时,你会特别注意保持代码的原样。- 你的服务旨在为用户提供高效、便捷的翻译体验,帮助用户跨越语言障碍。- 在回答问题的时候,尽可能保留原来的代码结构。"
+                },
+                {
+                    "role": "user",
+                    "content": text
+                }
+            ]
+            
+            response = client.chat.completions.create(
+                model=model_name,
+                messages=messages
+            )
+            # 翻译成功,调整行数
+            line_count_manager.adjust_line_count(True)
+            return response.choices[0].message.content
+        except Exception as e:
+            if attempt == max_retries - 1:
+                print(f"翻译失败,已达到最大重试次数: {str(e)}")
+                # 翻译失败,调整行数
+                line_count_manager.adjust_line_count(False)
+                return text
+            print(f"翻译出错,正在重试 ({attempt + 1}/{max_retries}): {str(e)}")
+            time.sleep(2 ** attempt)  # 指数退避
+
+def process_html_file(file_path, conn):
+    """处理HTML文件"""
+    # 检查文件进度
+    progress = get_file_progress(conn, file_path)
+    
+    try:
+        # 尝试不同的编码方式读取文件
+        encodings = ['utf-8', 'gbk', 'gb2312', 'latin1']
+        content = None
+        
+        for encoding in encodings:
+            try:
+                with open(file_path, 'r', encoding=encoding) as f:
+                    content = f.read()
+                break
+            except UnicodeDecodeError:
+                continue
+        
+        if content is None:
+            raise Exception(f"无法使用支持的编码读取文件: {file_path}")
+    
+        # 使用正则表达式提取body标签内的内容
+        body_pattern = re.compile(r'<body[^>]*>(.*?)</body>', re.DOTALL)
+        body_match = body_pattern.search(content)
+        
+        if not body_match:
+            print(f"警告: {file_path} 中没有找到body标签")
+            return
+        
+        body_content = body_match.group(1)
+        
+        # 按行分割内容,保留所有HTML标签行,但只翻译包含 <p class 的行
+        lines = []
+        for line in body_content.split('\n'):
+            line = line.strip()
+            if line and line.startswith('<'):
+                lines.append(line)
+        
+        total_lines = len(lines)
+        
+        # 获取已完成的翻译组
+        completed_groups = get_completed_groups(conn, file_path)
+        completed_indices = {group[0] for group in completed_groups}
+        
+        # 计算已处理的进度
+        if progress:
+            print(f"文件 {file_path} 已处理进度: {progress[2]}/{progress[1]} 行 ({round(progress[2]*100/progress[1], 2)}%)")
+        
+        # 按组处理内容
+        translated_lines = []
+        try:
+            for i in tqdm(range(0, len(lines), line_count_manager.current_line_count), desc=f"处理文件 {os.path.basename(file_path)}", unit="组"):
+                group_index = i // line_count_manager.current_line_count
+                
+                # 检查是否已完成
+                if group_index in completed_indices:
+                    # 使用已完成的翻译
+                    for group in completed_groups:
+                        if group[0] == group_index:
+                            translated_lines.extend(group[1].split('\n'))
+                            break
+                    continue
+                
+                group = lines[i:i+line_count_manager.current_line_count]
+                if group:
+                    # 保存原始文本
+                    original_text = "\n".join(group)
+                    
+                    # 收集需要翻译的段落
+                    paragraphs_to_translate = []
+                    paragraph_indices = []
+                    for idx, line in enumerate(group):
+                        if '<p class' in line:
+                            paragraphs_to_translate.append(line)
+                            paragraph_indices.append(idx)
+                    
+                    # 如果有需要翻译的段落,进行翻译
+                    if paragraphs_to_translate:
+                        translated_paragraphs = []
+                        for paragraph in paragraphs_to_translate:
+                            translated_paragraph = translate_text(paragraph)
+                            translated_paragraphs.append(translated_paragraph)
+                        
+                        # 将翻译后的段落放回原位置
+                        translated_group = group.copy()
+                        for idx, translated in zip(paragraph_indices, translated_paragraphs):
+                            translated_group[idx] = translated
+                    else:
+                        translated_group = group
+                    
+                    translated_text = "\n".join(translated_group)
+                    
+                    # 更新翻译组进度
+                    update_group_progress(conn, file_path, group_index, original_text, translated_text, 'completed')
+                    
+                    # 分割翻译后的文本
+                    translated_lines.extend(translated_group)
+                    
+                    # 更新文件进度
+                    processed_lines = min((group_index + 1) * line_count_manager.current_line_count, total_lines)
+                    update_file_progress(conn, file_path, total_lines, processed_lines, 'in_progress')
+                    
+                    # 添加延迟以避免API限制
+                    time.sleep(0.5)  # 添加适当的延迟
+            
+            # 替换原始内容
+            if translated_lines:
+                # 保持原始内容的顺序和结构
+                new_body_content = body_content
+                current_index = 0
+                
+                # 遍历原始内容,替换需要翻译的部分
+                for line in body_content.split('\n'):
+                    line = line.strip()
+                    if line and line.startswith('<'):
+                        if '<p class' in line and current_index < len(translated_lines):
+                            # 替换翻译后的内容
+                            new_body_content = new_body_content.replace(line, translated_lines[current_index])
+                            current_index += 1
+                        else:
+                            # 保持原样
+                            continue
+                
+                new_content = content.replace(body_content, new_body_content)
+                
+                # 保存修改后的文件
+                with open(file_path, 'w', encoding='utf-8') as f:
+                    f.write(new_content)
+                
+                # 更新完成状态
+                update_file_progress(conn, file_path, total_lines, total_lines, 'completed')
+                print(f"文件 {file_path} 翻译完成")
+                
+        except KeyboardInterrupt:
+            print("\n检测到中断,保存当前进度...")
+            if 'processed_lines' in locals():
+                update_file_progress(conn, file_path, total_lines, processed_lines, 'interrupted')
+            raise
+        except Exception as e:
+            print(f"处理文件时出错: {str(e)}")
+            if 'processed_lines' in locals():
+                update_file_progress(conn, file_path, total_lines, processed_lines, 'error')
+            raise
+            
+    except Exception as e:
+        print(f"读取文件时出错: {str(e)}")
+        return
+
+def main():
+    ops_dir = "002/Ops"
+    html_files = [f for f in os.listdir(ops_dir) if f.endswith('.html')]
+    
+    print(f"找到 {len(html_files)} 个HTML文件需要处理")
+    
+    # 初始化数据库连接
+    conn = init_db()
+    
+    try:
+        for filename in tqdm(html_files, desc="处理文件", unit="文件"):
+            file_path = os.path.join(ops_dir, filename)
+            process_html_file(file_path, conn)
+    except KeyboardInterrupt:
+        print("\n程序被用户中断")
+    finally:
+        conn.close()
+
+if __name__ == "__main__":
+    main() 

+ 503 - 0
code/translate_epub_v2.py

@@ -0,0 +1,503 @@
+import os
+import re
+from bs4 import BeautifulSoup
+import openai
+import time
+from tqdm import tqdm
+import sqlite3
+import json
+from datetime import datetime
+
+# 初始化OpenAI客户端
+client = openai.OpenAI(
+    # chatnio
+    # base_url="https://api.chatnio.net/v1",
+    # api_key="sk-"
+    # deepseek
+    # base_url="https://api.deepseek.com/v1",
+    # api_key="sk-"
+    # Qwen/Qwen3-32B
+    base_url="https://api.siliconflow.cn/v1",
+    api_key="sk-"
+)
+
+# model_name = "Qwen/Qwen3-32B"   # Qwen/Qwen3-32B
+model_name = "deepseek-ai/DeepSeek-R1"   # deepseek-ai/DeepSeek-R1
+# 添加版本控制
+VERSION = "1.0.1" # 版本号,用于区分不同版本的翻译
+line_count = 2 # 每组行数,越大越快,但越容易出错
+
+# 自动调整参数
+MIN_LINE_COUNT = 1
+MAX_LINE_COUNT = 5
+INITIAL_LINE_COUNT = 2
+ERROR_THRESHOLD = 3  # 连续错误次数阈值
+SUCCESS_THRESHOLD = 5  # 连续成功次数阈值
+
+class LineCountManager:
+    def __init__(self):
+        self.current_line_count = INITIAL_LINE_COUNT
+        self.consecutive_errors = 0
+        self.consecutive_successes = 0
+        self.last_error_time = None
+        self.error_cooldown = 60  # 错误冷却时间(秒)
+        self.version = f"1.0.{INITIAL_LINE_COUNT}"  # 初始版本号
+    
+    def adjust_line_count(self, success):
+        current_time = time.time()
+        
+        # 检查是否在冷却期内
+        if self.last_error_time and (current_time - self.last_error_time) < self.error_cooldown:
+            return self.current_line_count
+        
+        if success:
+            self.consecutive_errors = 0
+            self.consecutive_successes += 1
+            
+            # 如果连续成功次数达到阈值,尝试增加行数
+            if self.consecutive_successes >= SUCCESS_THRESHOLD:
+                if self.current_line_count < MAX_LINE_COUNT:
+                    self.current_line_count += 1
+                    self.consecutive_successes = 0
+                    self.version = f"1.0.{self.current_line_count}"  # 更新版本号
+                    print(f"翻译连续成功,增加行数到 {self.current_line_count},版本更新为 {self.version}")
+        else:
+            self.consecutive_successes = 0
+            self.consecutive_errors += 1
+            self.last_error_time = current_time
+            
+            # 如果连续错误次数达到阈值,减少行数
+            if self.consecutive_errors >= ERROR_THRESHOLD:
+                if self.current_line_count > MIN_LINE_COUNT:
+                    self.current_line_count -= 1
+                    self.consecutive_errors = 0
+                    self.version = f"1.0.{self.current_line_count}"  # 更新版本号
+                    print(f"翻译连续失败,减少行数到 {self.current_line_count},版本更新为 {self.version}")
+        
+        return self.current_line_count
+
+# 创建全局的LineCountManager实例
+line_count_manager = LineCountManager()
+
+class TranslationStats:
+    def __init__(self):
+        self.start_time = time.time()
+        self.total_chars = 0
+        self.translated_chars = 0
+        self.total_requests = 0
+        self.successful_requests = 0
+        self.failed_requests = 0
+    
+    def update_stats(self, original_text, translated_text, success=True):
+        self.total_chars += len(original_text)
+        self.translated_chars += len(translated_text)
+        self.total_requests += 1
+        if success:
+            self.successful_requests += 1
+        else:
+            self.failed_requests += 1
+    
+    def get_stats(self):
+        elapsed_time = time.time() - self.start_time
+        chars_per_second = self.translated_chars / elapsed_time if elapsed_time > 0 else 0
+        success_rate = (self.successful_requests / self.total_requests * 100) if self.total_requests > 0 else 0
+        
+        return {
+            "总字符数": self.total_chars,
+            "已翻译字符数": self.translated_chars,
+            "翻译速度": f"{chars_per_second:.2f} 字符/秒",
+            "成功率": f"{success_rate:.1f}%",
+            "总请求数": self.total_requests,
+            "成功请求": self.successful_requests,
+            "失败请求": self.failed_requests,
+            "运行时间": f"{elapsed_time:.1f} 秒"
+        }
+
+# 创建全局的统计对象
+translation_stats = TranslationStats()
+
+def init_db():
+    """初始化数据库"""
+    conn = sqlite3.connect('translation_progress.db')
+    c = conn.cursor()
+    
+    # 检查是否需要迁移数据库
+    try:
+        c.execute("SELECT version FROM file_progress LIMIT 1")
+    except sqlite3.OperationalError:
+        # 如果表不存在或没有version字段,进行迁移
+        print("正在更新数据库结构...")
+        
+        # 备份旧表
+        c.execute("ALTER TABLE file_progress RENAME TO file_progress_old")
+        c.execute("ALTER TABLE group_progress RENAME TO group_progress_old")
+        
+        # 创建新表
+        c.execute('''
+            CREATE TABLE IF NOT EXISTS file_progress (
+                file_path TEXT PRIMARY KEY,
+                total_lines INTEGER,
+                processed_lines INTEGER,
+                status TEXT,
+                version TEXT,
+                last_updated TIMESTAMP DEFAULT CURRENT_TIMESTAMP
+            )
+        ''')
+        
+        c.execute('''
+            CREATE TABLE IF NOT EXISTS group_progress (
+                id INTEGER PRIMARY KEY AUTOINCREMENT,
+                file_path TEXT,
+                group_index INTEGER,
+                original_text TEXT,
+                translated_text TEXT,
+                status TEXT,
+                version TEXT,
+                created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
+                updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
+                UNIQUE(file_path, group_index, version)
+            )
+        ''')
+        
+        # 迁移数据
+        try:
+            c.execute('''
+                INSERT INTO file_progress 
+                (file_path, total_lines, processed_lines, status, version, last_updated)
+                SELECT file_path, total_lines, processed_lines, status, ?, last_updated
+                FROM file_progress_old
+            ''', (line_count_manager.version,))
+            
+            c.execute('''
+                INSERT INTO group_progress 
+                (file_path, group_index, original_text, translated_text, status, version, created_at, updated_at)
+                SELECT file_path, group_index, original_text, translated_text, status, ?, created_at, updated_at
+                FROM group_progress_old
+            ''', (line_count_manager.version,))
+            
+            # 删除旧表
+            c.execute("DROP TABLE file_progress_old")
+            c.execute("DROP TABLE group_progress_old")
+            
+            print("数据库迁移完成")
+        except sqlite3.OperationalError as e:
+            print(f"迁移数据时出错: {str(e)}")
+            # 如果迁移失败,回滚到原始表
+            c.execute("DROP TABLE IF EXISTS file_progress")
+            c.execute("DROP TABLE IF EXISTS group_progress")
+            c.execute("ALTER TABLE file_progress_old RENAME TO file_progress")
+            c.execute("ALTER TABLE group_progress_old RENAME TO group_progress")
+            raise
+    else:
+        # 如果表已存在且包含version字段,创建新表
+        c.execute('''
+            CREATE TABLE IF NOT EXISTS file_progress (
+                file_path TEXT PRIMARY KEY,
+                total_lines INTEGER,
+                processed_lines INTEGER,
+                status TEXT,
+                version TEXT,
+                last_updated TIMESTAMP DEFAULT CURRENT_TIMESTAMP
+            )
+        ''')
+        
+        c.execute('''
+            CREATE TABLE IF NOT EXISTS group_progress (
+                id INTEGER PRIMARY KEY AUTOINCREMENT,
+                file_path TEXT,
+                group_index INTEGER,
+                original_text TEXT,
+                translated_text TEXT,
+                status TEXT,
+                version TEXT,
+                created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
+                updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
+                UNIQUE(file_path, group_index, version)
+            )
+        ''')
+    
+    conn.commit()
+    return conn
+
+def get_file_progress(conn, file_path):
+    """获取文件翻译进度"""
+    c = conn.cursor()
+    c.execute('SELECT * FROM file_progress WHERE file_path = ?', (file_path,))
+    return c.fetchone()
+
+def update_file_progress(conn, file_path, total_lines, processed_lines, status):
+    """更新文件翻译进度"""
+    c = conn.cursor()
+    c.execute('''
+        INSERT OR REPLACE INTO file_progress 
+        (file_path, total_lines, processed_lines, status, version, last_updated)
+        VALUES (?, ?, ?, ?, ?, CURRENT_TIMESTAMP)
+    ''', (file_path, total_lines, processed_lines, status, line_count_manager.version))
+    conn.commit()
+
+def get_group_progress(conn, file_path, group_index):
+    """获取翻译组进度"""
+    c = conn.cursor()
+    c.execute('''
+        SELECT * FROM group_progress 
+        WHERE file_path = ? AND group_index = ?
+    ''', (file_path, group_index))
+    return c.fetchone()
+
+def update_group_progress(conn, file_path, group_index, original_text, translated_text, status):
+    """更新翻译组进度"""
+    c = conn.cursor()
+    c.execute('''
+        INSERT OR REPLACE INTO group_progress 
+        (file_path, group_index, original_text, translated_text, status, version, updated_at)
+        VALUES (?, ?, ?, ?, ?, ?, CURRENT_TIMESTAMP)
+    ''', (file_path, group_index, original_text, translated_text, status, line_count_manager.version))
+    conn.commit()
+
+def get_completed_groups(conn, file_path):
+    """获取已完成的翻译组"""
+    c = conn.cursor()
+    c.execute('''
+        SELECT group_index, translated_text 
+        FROM group_progress 
+        WHERE file_path = ? AND status = 'completed' AND version = ?
+        ORDER BY group_index
+    ''', (file_path, line_count_manager.version))
+    return c.fetchall()
+
+# """ - 输出内容要求用代码块包裹起来
+# ,只在必要时提供相应的语言注释
+#  """
+def translate_text(text, max_retries=3):
+    """翻译文本,添加重试机制"""
+    start_time = time.time()
+    for attempt in range(max_retries):
+        try:
+            messages = [
+                {
+                    "role": "system",
+                    "content": "- 你名为epub翻译大师,专注于将任意语言的文本翻译成中文。- 你在翻译过程中,力求保留原文语意,确保翻译的准确性和完整性。- 你特别注重翻译结果要贴合现代人的阅读习惯,使译文更加流畅易懂。- 在处理包含代码结构的文本时,你会特别注意保持代码的原样。- 你的服务旨在为用户提供高效、便捷的翻译体验,帮助用户跨越语言障碍。- 在回答问题的时候,尽可能保留原来的代码结构。"
+                },
+                {
+                    "role": "user",
+                    "content": text
+                }
+            ]
+            
+            response = client.chat.completions.create(
+                model=model_name,
+                messages=messages
+            )
+            translated_text = response.choices[0].message.content
+            
+            # 更新统计信息
+            translation_stats.update_stats(text, translated_text, True)
+            
+            # 计算并显示本次翻译的速度
+            elapsed = time.time() - start_time
+            chars_per_second = len(translated_text) / elapsed if elapsed > 0 else 0
+            print(f"\n翻译速度: {chars_per_second:.2f} 字符/秒")
+            
+            # 翻译成功,调整行数
+            line_count_manager.adjust_line_count(True)
+            return translated_text
+        except Exception as e:
+            if attempt == max_retries - 1:
+                print(f"翻译失败,已达到最大重试次数: {str(e)}")
+                # 更新统计信息
+                translation_stats.update_stats(text, text, False)
+                # 翻译失败,调整行数
+                line_count_manager.adjust_line_count(False)
+                return text
+            print(f"翻译出错,正在重试 ({attempt + 1}/{max_retries}): {str(e)}")
+            time.sleep(2 ** attempt)  # 指数退避
+
+def process_html_file(file_path, conn):
+    """处理HTML文件"""
+    # 检查文件进度
+    progress = get_file_progress(conn, file_path)
+    
+    try:
+        # 尝试不同的编码方式读取文件
+        encodings = ['utf-8', 'gbk', 'gb2312', 'latin1']
+        content = None
+        
+        for encoding in encodings:
+            try:
+                with open(file_path, 'r', encoding=encoding) as f:
+                    content = f.read()
+                break
+            except UnicodeDecodeError:
+                continue
+        
+        if content is None:
+            raise Exception(f"无法使用支持的编码读取文件: {file_path}")
+    
+        # 使用正则表达式提取body标签内的内容
+        body_pattern = re.compile(r'<body[^>]*>(.*?)</body>', re.DOTALL)
+        body_match = body_pattern.search(content)
+        
+        if not body_match:
+            print(f"警告: {file_path} 中没有找到body标签")
+            return
+        
+        body_content = body_match.group(1)
+        
+        # 按行分割内容,保留所有HTML标签行,但只翻译包含 <p class 的行
+        lines = []
+        for line in body_content.split('\n'):
+            line = line.strip()
+            if line and line.startswith('<'):
+                lines.append(line)
+        
+        total_lines = len(lines)
+        
+        # 获取已完成的翻译组
+        completed_groups = get_completed_groups(conn, file_path)
+        completed_indices = {group[0] for group in completed_groups}
+        
+        # 计算已处理的进度
+        if progress:
+            print(f"文件 {file_path} 已处理进度: {progress[2]}/{progress[1]} 行 ({round(progress[2]*100/progress[1], 2)}%)")
+        
+        # 按组处理内容
+        translated_lines = []
+        try:
+            with tqdm(range(0, len(lines), line_count_manager.current_line_count), 
+                     desc=f"处理文件 {os.path.basename(file_path)}", 
+                     unit="组") as pbar:
+                for i in pbar:
+                    group_index = i // line_count_manager.current_line_count
+                    
+                    # 检查是否已完成
+                    if group_index in completed_indices:
+                        # 使用已完成的翻译
+                        for group in completed_groups:
+                            if group[0] == group_index:
+                                translated_lines.extend(group[1].split('\n'))
+                                break
+                        continue
+                    
+                    group = lines[i:i+line_count_manager.current_line_count]
+                    if group:
+                        # 保存原始文本
+                        original_text = "\n".join(group)
+                        
+                        # 收集需要翻译的段落
+                        paragraphs_to_translate = []
+                        paragraph_indices = []
+                        for idx, line in enumerate(group):
+                            if '<p class' in line:
+                                paragraphs_to_translate.append(line)
+                                paragraph_indices.append(idx)
+                        
+                        # 如果有需要翻译的段落,进行翻译
+                        if paragraphs_to_translate:
+                            translated_paragraphs = []
+                            for paragraph in paragraphs_to_translate:
+                                translated_paragraph = translate_text(paragraph)
+                                translated_paragraphs.append(translated_paragraph)
+                            
+                            # 将翻译后的段落放回原位置
+                            translated_group = group.copy()
+                            for idx, translated in zip(paragraph_indices, translated_paragraphs):
+                                translated_group[idx] = translated
+                        else:
+                            translated_group = group
+                        
+                        translated_text = "\n".join(translated_group)
+                        
+                        # 更新翻译组进度
+                        update_group_progress(conn, file_path, group_index, original_text, translated_text, 'completed')
+                        
+                        # 分割翻译后的文本
+                        translated_lines.extend(translated_group)
+                        
+                        # 更新文件进度
+                        processed_lines = min((group_index + 1) * line_count_manager.current_line_count, total_lines)
+                        update_file_progress(conn, file_path, total_lines, processed_lines, 'in_progress')
+                        
+                        # 显示当前统计信息
+                        stats = translation_stats.get_stats()
+                        pbar.set_postfix(stats)
+                        
+                        # 添加较小的延迟以避免API限制
+                        time.sleep(0.1)  # 减少延迟时间
+            
+            # 替换原始内容
+            if translated_lines:
+                # 保持原始内容的顺序和结构
+                new_body_content = body_content
+                current_index = 0
+                
+                # 遍历原始内容,替换需要翻译的部分
+                for line in body_content.split('\n'):
+                    line = line.strip()
+                    if line and line.startswith('<'):
+                        if '<p class' in line and current_index < len(translated_lines):
+                            # 替换翻译后的内容
+                            new_body_content = new_body_content.replace(line, translated_lines[current_index])
+                            current_index += 1
+                        else:
+                            # 保持原样
+                            continue
+                
+                new_content = content.replace(body_content, new_body_content)
+                
+                # 保存修改后的文件
+                with open(file_path, 'w', encoding='utf-8') as f:
+                    f.write(new_content)
+                
+                # 更新完成状态
+                update_file_progress(conn, file_path, total_lines, total_lines, 'completed')
+                print(f"文件 {file_path} 翻译完成")
+                
+                # 显示最终统计信息
+                print("\n翻译统计信息:")
+                for key, value in translation_stats.get_stats().items():
+                    print(f"{key}: {value}")
+                
+        except KeyboardInterrupt:
+            print("\n检测到中断,保存当前进度...")
+            if 'processed_lines' in locals():
+                update_file_progress(conn, file_path, total_lines, processed_lines, 'interrupted')
+            # 显示中断时的统计信息
+            print("\n中断时的统计信息:")
+            for key, value in translation_stats.get_stats().items():
+                print(f"{key}: {value}")
+            raise
+        except Exception as e:
+            print(f"处理文件时出错: {str(e)}")
+            if 'processed_lines' in locals():
+                update_file_progress(conn, file_path, total_lines, processed_lines, 'error')
+            raise
+            
+    except Exception as e:
+        print(f"读取文件时出错: {str(e)}")
+        return
+
+def main():
+    ops_dir = "002/Ops"
+    html_files = [f for f in os.listdir(ops_dir) if f.endswith('.html')]
+    
+    print(f"找到 {len(html_files)} 个HTML文件需要处理")
+    print(f"开始时间: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
+    
+    # 初始化数据库连接
+    conn = init_db()
+    
+    try:
+        for filename in tqdm(html_files, desc="处理文件", unit="文件"):
+            file_path = os.path.join(ops_dir, filename)
+            process_html_file(file_path, conn)
+    except KeyboardInterrupt:
+        print("\n程序被用户中断")
+    finally:
+        conn.close()
+        print(f"\n结束时间: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
+        print("\n最终统计信息:")
+        for key, value in translation_stats.get_stats().items():
+            print(f"{key}: {value}")
+
+if __name__ == "__main__":
+    main() 

+ 661 - 0
code/translate_epub_v3.py

@@ -0,0 +1,661 @@
+import os
+import re
+from bs4 import BeautifulSoup
+import openai
+import time
+from tqdm import tqdm
+import sqlite3
+import json
+from datetime import datetime
+import logging
+from tenacity import retry, stop_after_attempt, wait_exponential, retry_if_exception_type
+import asyncio
+import aiohttp
+from concurrent.futures import ThreadPoolExecutor
+from functools import lru_cache
+import hashlib
+import yaml
+from pathlib import Path
+
+# 配置管理
+class Config:
+    def __init__(self, config_path='config.yaml'):
+        self.config_path = config_path
+        self.config = self.load_config()
+        
+        # 设置日志
+        self.setup_logging()
+        
+        # 初始化OpenAI客户端
+        self.setup_openai()
+    
+    def load_config(self):
+        """加载配置文件"""
+        if not os.path.exists(self.config_path):
+            # 创建默认配置
+            default_config = {
+                'logging': {
+                    'level': 'INFO',
+                    'format': '%(asctime)s - %(levelname)s - %(message)s',
+                    'file': 'translation.log'
+                },
+                'openai': {
+                    'base_url': 'https://api.siliconflow.cn/v1',
+                    'api_key': 'sk-',
+                    'model_name': 'deepseek-ai/DeepSeek-R1',
+                    'max_retries': 3,
+                    'retry_delay': 2,
+                    'timeout': 30,
+                    'max_concurrent_requests': 5
+                },
+                'translation': {
+                    'min_line_count': 1,
+                    'max_line_count': 5,
+                    'initial_line_count': 2,
+                    'error_threshold': 3,
+                    'success_threshold': 5,
+                    'error_cooldown': 60,
+                    'cache_size': 1000
+                },
+                'database': {
+                    'path': 'translation_progress.db',
+                    'pool_size': 5
+                },
+                'paths': {
+                    'input_dir': '002/Ops',
+                    'output_dir': '002/Ops_translated'
+                }
+            }
+            
+            # 保存默认配置
+            with open(self.config_path, 'w', encoding='utf-8') as f:
+                yaml.dump(default_config, f, allow_unicode=True)
+            
+            return default_config
+        
+        # 加载现有配置
+        with open(self.config_path, 'r', encoding='utf-8') as f:
+            return yaml.safe_load(f)
+    
+    def setup_logging(self):
+        """设置日志"""
+        logging.basicConfig(
+            level=getattr(logging, self.config['logging']['level']),
+            format=self.config['logging']['format'],
+            handlers=[
+                logging.FileHandler(self.config['logging']['file']),
+                logging.StreamHandler()
+            ]
+        )
+    
+    def setup_openai(self):
+        """设置OpenAI客户端"""
+        self.client = openai.OpenAI(
+            base_url=self.config['openai']['base_url'],
+            api_key=self.config['openai']['api_key']
+        )
+    
+    def get(self, *keys):
+        """获取配置值"""
+        value = self.config
+        for key in keys:
+            value = value[key]
+        return value
+    
+    def update(self, updates):
+        """更新配置"""
+        def deep_update(d, u):
+            for k, v in u.items():
+                if isinstance(v, dict):
+                    d[k] = deep_update(d.get(k, {}), v)
+                else:
+                    d[k] = v
+            return d
+        
+        self.config = deep_update(self.config, updates)
+        
+        # 保存更新后的配置
+        with open(self.config_path, 'w', encoding='utf-8') as f:
+            yaml.dump(self.config, f, allow_unicode=True)
+        
+        # 重新设置日志和OpenAI客户端
+        self.setup_logging()
+        self.setup_openai()
+
+# 创建全局的配置实例
+config = Config()
+
+# 更新全局变量
+MODEL_CONFIG = {
+    "model_name": config.get('openai', 'model_name'),
+    "max_retries": config.get('openai', 'max_retries'),
+    "retry_delay": config.get('openai', 'retry_delay'),
+    "timeout": config.get('openai', 'timeout'),
+    "max_concurrent_requests": config.get('openai', 'max_concurrent_requests'),
+    "cache_size": config.get('translation', 'cache_size')
+}
+
+MIN_LINE_COUNT = config.get('translation', 'min_line_count')
+MAX_LINE_COUNT = config.get('translation', 'max_line_count')
+INITIAL_LINE_COUNT = config.get('translation', 'initial_line_count')
+ERROR_THRESHOLD = config.get('translation', 'error_threshold')
+SUCCESS_THRESHOLD = config.get('translation', 'success_threshold')
+
+# 更新其他类的初始化参数
+class LineCountManager:
+    def __init__(self):
+        self.current_line_count = INITIAL_LINE_COUNT
+        self.consecutive_errors = 0
+        self.consecutive_successes = 0
+        self.last_error_time = None
+        self.error_cooldown = config.get('translation', 'error_cooldown')
+        self.version = f"1.0.{INITIAL_LINE_COUNT}"
+        self.error_history = []
+    
+    def adjust_line_count(self, success):
+        """根据翻译结果调整行数"""
+        current_time = time.time()
+        
+        # 检查是否在冷却期内
+        if self.last_error_time and (current_time - self.last_error_time) < self.error_cooldown:
+            return self.current_line_count
+        
+        if success:
+            self.consecutive_errors = 0
+            self.consecutive_successes += 1
+            
+            # 如果连续成功次数达到阈值,尝试增加行数
+            if self.consecutive_successes >= SUCCESS_THRESHOLD:
+                if self.current_line_count < MAX_LINE_COUNT:
+                    self.current_line_count += 1
+                    self.consecutive_successes = 0
+                    self.version = f"1.0.{self.current_line_count}"
+                    logging.info(f"翻译连续成功,增加行数到 {self.current_line_count},版本更新为 {self.version}")
+        else:
+            self.consecutive_successes = 0
+            self.consecutive_errors += 1
+            self.last_error_time = current_time
+            
+            # 记录错误
+            self.error_history.append({
+                'time': current_time,
+                'line_count': self.current_line_count
+            })
+            
+            # 如果连续错误次数达到阈值,减少行数
+            if self.consecutive_errors >= ERROR_THRESHOLD:
+                if self.current_line_count > MIN_LINE_COUNT:
+                    self.current_line_count -= 1
+                    self.consecutive_errors = 0
+                    self.version = f"1.0.{self.current_line_count}"
+                    logging.warning(f"翻译连续失败,减少行数到 {self.current_line_count},版本更新为 {self.version}")
+        
+        return self.current_line_count
+    
+    def get_error_stats(self):
+        """获取错误统计信息"""
+        if not self.error_history:
+            return "无错误记录"
+        
+        recent_errors = [e for e in self.error_history if time.time() - e['time'] < 3600]  # 最近一小时的错误
+        return {
+            "总错误数": len(self.error_history),
+            "最近一小时错误数": len(recent_errors),
+            "当前行数": self.current_line_count,
+            "连续错误": self.consecutive_errors,
+            "连续成功": self.consecutive_successes
+        }
+
+class DatabaseManager:
+    def __init__(self):
+        self.db_path = config.get('database', 'path')
+        self.conn = None
+        self.init_db()
+    
+    def get_connection(self):
+        """获取数据库连接"""
+        if self.conn is None:
+            self.conn = sqlite3.connect(self.db_path)
+            self.conn.row_factory = sqlite3.Row
+        return self.conn
+    
+    def close(self):
+        """关闭数据库连接"""
+        if self.conn:
+            self.conn.close()
+            self.conn = None
+    
+    def init_db(self):
+        """初始化数据库"""
+        conn = self.get_connection()
+        c = conn.cursor()
+        
+        # 创建文件进度表
+        c.execute('''
+            CREATE TABLE IF NOT EXISTS file_progress (
+                file_path TEXT PRIMARY KEY,
+                total_lines INTEGER,
+                processed_lines INTEGER,
+                status TEXT,
+                version TEXT,
+                last_updated TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
+                error_count INTEGER DEFAULT 0,
+                retry_count INTEGER DEFAULT 0
+            )
+        ''')
+        
+        # 创建翻译组进度表
+        c.execute('''
+            CREATE TABLE IF NOT EXISTS group_progress (
+                id INTEGER PRIMARY KEY AUTOINCREMENT,
+                file_path TEXT,
+                group_index INTEGER,
+                original_text TEXT,
+                translated_text TEXT,
+                status TEXT,
+                version TEXT,
+                created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
+                updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
+                error_count INTEGER DEFAULT 0,
+                retry_count INTEGER DEFAULT 0,
+                UNIQUE(file_path, group_index, version)
+            )
+        ''')
+        
+        # 创建错误日志表
+        c.execute('''
+            CREATE TABLE IF NOT EXISTS error_log (
+                id INTEGER PRIMARY KEY AUTOINCREMENT,
+                file_path TEXT,
+                group_index INTEGER,
+                error_type TEXT,
+                error_message TEXT,
+                created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
+                resolved_at TIMESTAMP,
+                resolution TEXT
+            )
+        ''')
+        
+        conn.commit()
+    
+    def begin_transaction(self):
+        """开始事务"""
+        self.get_connection().execute('BEGIN TRANSACTION')
+    
+    def commit_transaction(self):
+        """提交事务"""
+        self.get_connection().commit()
+    
+    def rollback_transaction(self):
+        """回滚事务"""
+        self.get_connection().rollback()
+    
+    def get_file_progress(self, file_path):
+        """获取文件翻译进度"""
+        c = self.get_connection().cursor()
+        c.execute('SELECT * FROM file_progress WHERE file_path = ?', (file_path,))
+        return c.fetchone()
+    
+    def update_file_progress(self, file_path, total_lines, processed_lines, status):
+        """更新文件翻译进度"""
+        c = self.get_connection().cursor()
+        c.execute('''
+            INSERT OR REPLACE INTO file_progress 
+            (file_path, total_lines, processed_lines, status, version, last_updated)
+            VALUES (?, ?, ?, ?, ?, CURRENT_TIMESTAMP)
+        ''', (file_path, total_lines, processed_lines, status, line_count_manager.version))
+        self.get_connection().commit()
+    
+    def get_group_progress(self, file_path, group_index):
+        """获取翻译组进度"""
+        c = self.get_connection().cursor()
+        c.execute('''
+            SELECT * FROM group_progress 
+            WHERE file_path = ? AND group_index = ? AND version = ?
+        ''', (file_path, group_index, line_count_manager.version))
+        return c.fetchone()
+    
+    def update_group_progress(self, file_path, group_index, original_text, translated_text, status):
+        """更新翻译组进度"""
+        c = self.get_connection().cursor()
+        c.execute('''
+            INSERT OR REPLACE INTO group_progress 
+            (file_path, group_index, original_text, translated_text, status, version, updated_at)
+            VALUES (?, ?, ?, ?, ?, ?, CURRENT_TIMESTAMP)
+        ''', (file_path, group_index, original_text, translated_text, status, line_count_manager.version))
+        self.get_connection().commit()
+    
+    def log_error(self, file_path, group_index, error_type, error_message):
+        """记录错误"""
+        c = self.get_connection().cursor()
+        c.execute('''
+            INSERT INTO error_log 
+            (file_path, group_index, error_type, error_message)
+            VALUES (?, ?, ?, ?)
+        ''', (file_path, group_index, error_type, error_message))
+        self.get_connection().commit()
+    
+    def get_error_stats(self):
+        """获取错误统计信息"""
+        c = self.get_connection().cursor()
+        c.execute('''
+            SELECT 
+                COUNT(*) as total_errors,
+                COUNT(CASE WHEN resolved_at IS NULL THEN 1 END) as unresolved_errors,
+                COUNT(CASE WHEN created_at > datetime('now', '-1 hour') THEN 1 END) as recent_errors
+            FROM error_log
+        ''')
+        return c.fetchone()
+
+class AsyncTranslationManager:
+    def __init__(self):
+        self.semaphore = asyncio.Semaphore(config.get('openai', 'max_concurrent_requests'))
+        self.session = None
+
+class TranslationCache:
+    def __init__(self):
+        self.cache = {}
+        self.max_size = config.get('translation', 'cache_size')
+        self.hits = 0
+        self.misses = 0
+
+# 创建全局实例
+line_count_manager = LineCountManager()
+db_manager = DatabaseManager()
+async_translation_manager = AsyncTranslationManager()
+translation_cache = TranslationCache()
+
+# 添加版本控制
+VERSION = "1.0.1" # 版本号,用于区分不同版本的翻译
+line_count = 2 # 每组行数,越大越快,但越容易出错
+
+class TranslationStats:
+    def __init__(self):
+        self.start_time = time.time()
+        self.total_chars = 0
+        self.translated_chars = 0
+        self.total_requests = 0
+        self.successful_requests = 0
+        self.failed_requests = 0
+    
+    def update_stats(self, original_text, translated_text, success=True):
+        self.total_chars += len(original_text)
+        self.translated_chars += len(translated_text)
+        self.total_requests += 1
+        if success:
+            self.successful_requests += 1
+        else:
+            self.failed_requests += 1
+    
+    def get_stats(self):
+        elapsed_time = time.time() - self.start_time
+        chars_per_second = self.translated_chars / elapsed_time if elapsed_time > 0 else 0
+        success_rate = (self.successful_requests / self.total_requests * 100) if self.total_requests > 0 else 0
+        
+        return {
+            "总字符数": self.total_chars,
+            "已翻译字符数": self.translated_chars,
+            "翻译速度": f"{chars_per_second:.2f} 字符/秒",
+            "成功率": f"{success_rate:.1f}%",
+            "总请求数": self.total_requests,
+            "成功请求": self.successful_requests,
+            "失败请求": self.failed_requests,
+            "运行时间": f"{elapsed_time:.1f} 秒"
+        }
+
+# 创建全局的统计对象
+translation_stats = TranslationStats()
+
+def get_completed_groups(conn, file_path):
+    """获取已完成的翻译组"""
+    c = conn.cursor()
+    c.execute('''
+        SELECT group_index, translated_text 
+        FROM group_progress 
+        WHERE file_path = ? AND status = 'completed' AND version = ?
+        ORDER BY group_index
+    ''', (file_path, line_count_manager.version))
+    return c.fetchall()
+
+# """ - 输出内容要求用代码块包裹起来
+# ,只在必要时提供相应的语言注释
+#  """
+@retry(
+    stop=stop_after_attempt(MODEL_CONFIG['max_retries']),
+    wait=wait_exponential(multiplier=1, min=4, max=10),
+    retry=retry_if_exception_type((openai.APIError, openai.APITimeoutError)),
+    before_sleep=lambda retry_state: logging.warning(f"重试第 {retry_state.attempt_number} 次...")
+)
+def translate_text(text):
+    """翻译文本,使用tenacity进行重试"""
+    try:
+        messages = [
+            {
+                "role": "system",
+                "content": "- 你名为epub翻译大师,专注于将任意语言的文本翻译成中文。- 你在翻译过程中,力求保留原文语意,确保翻译的准确性和完整性。- 你特别注重翻译结果要贴合现代人的阅读习惯,使译文更加流畅易懂。- 在处理包含代码结构的文本时,你会特别注意保持代码的原样。- 你的服务旨在为用户提供高效、便捷的翻译体验,帮助用户跨越语言障碍。- 在回答问题的时候,尽可能保留原来的代码结构。"
+            },
+            {
+                "role": "user",
+                "content": text
+            }
+        ]
+        
+        response = config.client.chat.completions.create(
+            model=MODEL_CONFIG['model_name'],
+            messages=messages,
+            timeout=MODEL_CONFIG['timeout']
+        )
+        
+        translated_text = response.choices[0].message.content
+        line_count_manager.adjust_line_count(True)
+        return translated_text
+        
+    except Exception as e:
+        logging.error(f"翻译出错: {str(e)}")
+        line_count_manager.adjust_line_count(False)
+        raise
+
+def process_html_file(file_path, conn):
+    """处理HTML文件"""
+    # 检查文件进度
+    progress = db_manager.get_file_progress(file_path)
+    
+    try:
+        # 尝试不同的编码方式读取文件
+        encodings = ['utf-8', 'gbk', 'gb2312', 'latin1']
+        content = None
+        
+        for encoding in encodings:
+            try:
+                with open(file_path, 'r', encoding=encoding) as f:
+                    content = f.read()
+                break
+            except UnicodeDecodeError:
+                continue
+        
+        if content is None:
+            raise Exception(f"无法使用支持的编码读取文件: {file_path}")
+    
+        # 使用正则表达式提取body标签内的内容
+        body_pattern = re.compile(r'<body[^>]*>(.*?)</body>', re.DOTALL)
+        body_match = body_pattern.search(content)
+        
+        if not body_match:
+            print(f"警告: {file_path} 中没有找到body标签")
+            return
+        
+        body_content = body_match.group(1)
+        
+        # 按行分割内容,保留所有HTML标签行,但只翻译包含 <p class 的行
+        lines = []
+        for line in body_content.split('\n'):
+            line = line.strip()
+            if line and line.startswith('<'):
+                lines.append(line)
+        
+        total_lines = len(lines)
+        
+        # 获取已完成的翻译组
+        completed_groups = get_completed_groups(conn, file_path)
+        completed_indices = {group[0] for group in completed_groups}
+        
+        # 计算已处理的进度
+        if progress:
+            print(f"文件 {file_path} 已处理进度: {progress[2]}/{progress[1]} 行 ({round(progress[2]*100/progress[1], 2)}%)")
+        
+        # 按组处理内容
+        translated_lines = []
+        try:
+            with tqdm(range(0, len(lines), line_count_manager.current_line_count), 
+                     desc=f"处理文件 {os.path.basename(file_path)}", 
+                     unit="组") as pbar:
+                for i in pbar:
+                    group_index = i // line_count_manager.current_line_count
+                    
+                    # 检查是否已完成
+                    if group_index in completed_indices:
+                        # 使用已完成的翻译
+                        for group in completed_groups:
+                            if group[0] == group_index:
+                                translated_lines.extend(group[1].split('\n'))
+                                break
+                        continue
+                    
+                    group = lines[i:i+line_count_manager.current_line_count]
+                    if group:
+                        # 保存原始文本
+                        original_text = "\n".join(group)
+                        
+                        # 收集需要翻译的段落
+                        paragraphs_to_translate = []
+                        paragraph_indices = []
+                        for idx, line in enumerate(group):
+                            if '<p class' in line:
+                                paragraphs_to_translate.append(line)
+                                paragraph_indices.append(idx)
+                        
+                        # 如果有需要翻译的段落,进行翻译
+                        if paragraphs_to_translate:
+                            translated_paragraphs = []
+                            for paragraph in paragraphs_to_translate:
+                                translated_paragraph = translate_text(paragraph)
+                                translated_paragraphs.append(translated_paragraph)
+                            
+                            # 将翻译后的段落放回原位置
+                            translated_group = group.copy()
+                            for idx, translated in zip(paragraph_indices, translated_paragraphs):
+                                translated_group[idx] = translated
+                        else:
+                            translated_group = group
+                        
+                        translated_text = "\n".join(translated_group)
+                        
+                        # 更新翻译组进度
+                        db_manager.update_group_progress(file_path, group_index, original_text, translated_text, 'completed')
+                        
+                        # 分割翻译后的文本
+                        translated_lines.extend(translated_group)
+                        
+                        # 更新文件进度
+                        processed_lines = min((group_index + 1) * line_count_manager.current_line_count, total_lines)
+                        db_manager.update_file_progress(file_path, total_lines, processed_lines, 'in_progress')
+                        
+                        # 显示当前统计信息
+                        stats = translation_stats.get_stats()
+                        pbar.set_postfix(stats)
+                        
+                        # 添加较小的延迟以避免API限制
+                        time.sleep(0.1)  # 减少延迟时间
+            
+            # 替换原始内容
+            if translated_lines:
+                # 构建新的body内容
+                new_body_content = []
+                current_index = 0
+                
+                # 遍历原始内容,替换需要翻译的部分
+                for line in body_content.split('\n'):
+                    line = line.strip()
+                    if not line:
+                        new_body_content.append('')
+                        continue
+                        
+                    if line.startswith('<'):
+                        if '<p class' in line and current_index < len(translated_lines):
+                            # 替换翻译后的内容
+                            new_body_content.append(translated_lines[current_index])
+                            current_index += 1
+                        else:
+                            # 保持原样
+                            new_body_content.append(line)
+                    else:
+                        # 保持非HTML内容原样
+                        new_body_content.append(line)
+                
+                # 将新内容重新组合
+                new_body_content = '\n'.join(new_body_content)
+                
+                # 替换原始内容中的body部分
+                new_content = content.replace(body_content, new_body_content)
+                
+                # 保存修改后的文件
+                output_dir = config.get('paths', 'output_dir')
+                os.makedirs(output_dir, exist_ok=True)
+                output_path = os.path.join(output_dir, os.path.basename(file_path))
+                
+                with open(output_path, 'w', encoding='utf-8') as f:
+                    f.write(new_content)
+                
+                # 更新完成状态
+                db_manager.update_file_progress(file_path, total_lines, total_lines, 'completed')
+                print(f"文件 {file_path} 翻译完成,已保存到 {output_path}")
+                
+                # 显示最终统计信息
+                print("\n翻译统计信息:")
+                for key, value in translation_stats.get_stats().items():
+                    print(f"{key}: {value}")
+                
+        except KeyboardInterrupt:
+            print("\n检测到中断,保存当前进度...")
+            if 'processed_lines' in locals():
+                db_manager.update_file_progress(file_path, total_lines, processed_lines, 'interrupted')
+            # 显示中断时的统计信息
+            print("\n中断时的统计信息:")
+            for key, value in translation_stats.get_stats().items():
+                print(f"{key}: {value}")
+            raise
+        except Exception as e:
+            print(f"处理文件时出错: {str(e)}")
+            if 'processed_lines' in locals():
+                db_manager.update_file_progress(file_path, total_lines, processed_lines, 'error')
+            raise
+            
+    except Exception as e:
+        print(f"读取文件时出错: {str(e)}")
+        return
+
+def main():
+    ops_dir = "002/Ops"
+    html_files = [f for f in os.listdir(ops_dir) if f.endswith('.html')]
+    
+    print(f"找到 {len(html_files)} 个HTML文件需要处理")
+    print(f"开始时间: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
+    
+    # 初始化数据库连接
+    conn = db_manager.get_connection()
+    
+    try:
+        for filename in tqdm(html_files, desc="处理文件", unit="文件"):
+            file_path = os.path.join(ops_dir, filename)
+            process_html_file(file_path, conn)
+    except KeyboardInterrupt:
+        print("\n程序被用户中断")
+    finally:
+        db_manager.close()
+        print(f"\n结束时间: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
+        print("\n最终统计信息:")
+        for key, value in translation_stats.get_stats().items():
+            print(f"{key}: {value}")
+
+if __name__ == "__main__":
+    main() 

+ 665 - 0
code/translate_epub_v4.py

@@ -0,0 +1,665 @@
+import os
+import re
+from bs4 import BeautifulSoup
+import openai
+import time
+from tqdm import tqdm
+import sqlite3
+import json
+from datetime import datetime
+import logging
+from tenacity import retry, stop_after_attempt, wait_exponential, retry_if_exception_type
+import asyncio
+import aiohttp
+from concurrent.futures import ThreadPoolExecutor
+from functools import lru_cache
+import hashlib
+import yaml
+from pathlib import Path
+
+# 配置管理
+class Config:
+    def __init__(self, config_path='config.yaml'):
+        self.config_path = config_path
+        self.config = self.load_config()
+        
+        # 设置日志
+        self.setup_logging()
+        
+        # 初始化OpenAI客户端
+        self.setup_openai()
+    
+    def load_config(self):
+        """加载配置文件"""
+        if not os.path.exists(self.config_path):
+            # 创建默认配置
+            default_config = {
+                'logging': {
+                    'level': 'INFO',
+                    'format': '%(asctime)s - %(levelname)s - %(message)s',
+                    'file': 'translation.log'
+                },
+                'openai': {
+                    'base_url': 'https://api.siliconflow.cn/v1',
+                    'api_key': 'sk-',
+                    'model_name': 'deepseek-ai/DeepSeek-R1',
+                    'max_retries': 3,
+                    'retry_delay': 2,
+                    'timeout': 30,
+                    'max_concurrent_requests': 5
+                },
+                'translation': {
+                    'min_line_count': 1,
+                    'max_line_count': 5,
+                    'initial_line_count': 2,
+                    'error_threshold': 3,
+                    'success_threshold': 5,
+                    'error_cooldown': 60,
+                    'cache_size': 1000
+                },
+                'database': {
+                    'path': 'translation_progress.db',
+                    'pool_size': 5
+                },
+                'paths': {
+                    'input_dir': '002/Ops',
+                    'output_dir': '002/Ops_translated'
+                }
+            }
+            
+            # 保存默认配置
+            with open(self.config_path, 'w', encoding='utf-8') as f:
+                yaml.dump(default_config, f, allow_unicode=True)
+            
+            return default_config
+        
+        # 加载现有配置
+        with open(self.config_path, 'r', encoding='utf-8') as f:
+            return yaml.safe_load(f)
+    
+    def setup_logging(self):
+        """设置日志"""
+        logging.basicConfig(
+            level=getattr(logging, self.config['logging']['level']),
+            format=self.config['logging']['format'],
+            handlers=[
+                logging.FileHandler(self.config['logging']['file']),
+                logging.StreamHandler()
+            ]
+        )
+    
+    def setup_openai(self):
+        """设置OpenAI客户端"""
+        self.client = openai.OpenAI(
+            base_url=self.config['openai']['base_url'],
+            api_key=self.config['openai']['api_key']
+        )
+    
+    def get(self, *keys):
+        """获取配置值"""
+        value = self.config
+        for key in keys:
+            value = value[key]
+        return value
+    
+    def update(self, updates):
+        """更新配置"""
+        def deep_update(d, u):
+            for k, v in u.items():
+                if isinstance(v, dict):
+                    d[k] = deep_update(d.get(k, {}), v)
+                else:
+                    d[k] = v
+            return d
+        
+        self.config = deep_update(self.config, updates)
+        
+        # 保存更新后的配置
+        with open(self.config_path, 'w', encoding='utf-8') as f:
+            yaml.dump(self.config, f, allow_unicode=True)
+        
+        # 重新设置日志和OpenAI客户端
+        self.setup_logging()
+        self.setup_openai()
+
+# 创建全局的配置实例
+config = Config()
+
+# 更新全局变量
+MODEL_CONFIG = {
+    "model_name": config.get('openai', 'model_name'),
+    "max_retries": config.get('openai', 'max_retries'),
+    "retry_delay": config.get('openai', 'retry_delay'),
+    "timeout": config.get('openai', 'timeout'),
+    "max_concurrent_requests": config.get('openai', 'max_concurrent_requests'),
+    "cache_size": config.get('translation', 'cache_size')
+}
+
+MIN_LINE_COUNT = config.get('translation', 'min_line_count')
+MAX_LINE_COUNT = config.get('translation', 'max_line_count')
+INITIAL_LINE_COUNT = config.get('translation', 'initial_line_count')
+ERROR_THRESHOLD = config.get('translation', 'error_threshold')
+SUCCESS_THRESHOLD = config.get('translation', 'success_threshold')
+
+# 更新其他类的初始化参数
+class LineCountManager:
+    def __init__(self):
+        self.current_line_count = INITIAL_LINE_COUNT
+        self.consecutive_errors = 0
+        self.consecutive_successes = 0
+        self.last_error_time = None
+        self.error_cooldown = config.get('translation', 'error_cooldown')
+        self.version = f"1.0.{INITIAL_LINE_COUNT}"
+        self.error_history = []
+    
+    def adjust_line_count(self, success):
+        """根据翻译结果调整行数"""
+        current_time = time.time()
+        
+        # 检查是否在冷却期内
+        if self.last_error_time and (current_time - self.last_error_time) < self.error_cooldown:
+            return self.current_line_count
+        
+        if success:
+            self.consecutive_errors = 0
+            self.consecutive_successes = 0  # 重置成功计数,但不增加行数
+        else:
+            self.consecutive_successes = 0
+            self.consecutive_errors += 1
+            self.last_error_time = current_time
+            
+            # 记录错误
+            self.error_history.append({
+                'time': current_time,
+                'line_count': self.current_line_count
+            })
+            
+            # 如果连续错误次数达到阈值,减少行数
+            if self.consecutive_errors >= ERROR_THRESHOLD:
+                if self.current_line_count > MIN_LINE_COUNT:
+                    self.current_line_count -= 1
+                    self.consecutive_errors = 0
+                    self.version = f"1.0.{self.current_line_count}"
+                    logging.warning(f"翻译连续失败,减少行数到 {self.current_line_count},版本更新为 {self.version}")
+        
+        return self.current_line_count
+    
+    def get_error_stats(self):
+        """获取错误统计信息"""
+        if not self.error_history:
+            return "无错误记录"
+        
+        recent_errors = [e for e in self.error_history if time.time() - e['time'] < 3600]  # 最近一小时的错误
+        return {
+            "总错误数": len(self.error_history),
+            "最近一小时错误数": len(recent_errors),
+            "当前行数": self.current_line_count,
+            "连续错误": self.consecutive_errors,
+            "连续成功": self.consecutive_successes
+        }
+
+class DatabaseManager:
+    def __init__(self):
+        self.db_path = config.get('database', 'path')
+        self.conn = None
+        self.init_db()
+    
+    def get_connection(self):
+        """获取数据库连接"""
+        if self.conn is None:
+            self.conn = sqlite3.connect(self.db_path)
+            self.conn.row_factory = sqlite3.Row
+        return self.conn
+    
+    def close(self):
+        """关闭数据库连接"""
+        if self.conn:
+            self.conn.close()
+            self.conn = None
+    
+    def init_db(self):
+        """初始化数据库"""
+        conn = self.get_connection()
+        c = conn.cursor()
+        
+        # 创建文件进度表
+        c.execute('''
+            CREATE TABLE IF NOT EXISTS file_progress (
+                file_path TEXT PRIMARY KEY,
+                total_lines INTEGER,
+                processed_lines INTEGER,
+                status TEXT,
+                version TEXT,
+                last_updated TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
+                error_count INTEGER DEFAULT 0,
+                retry_count INTEGER DEFAULT 0
+            )
+        ''')
+        
+        # 创建翻译组进度表
+        c.execute('''
+            CREATE TABLE IF NOT EXISTS group_progress (
+                id INTEGER PRIMARY KEY AUTOINCREMENT,
+                file_path TEXT,
+                group_index INTEGER,
+                original_text TEXT,
+                translated_text TEXT,
+                status TEXT,
+                version TEXT,
+                created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
+                updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
+                error_count INTEGER DEFAULT 0,
+                retry_count INTEGER DEFAULT 0,
+                UNIQUE(file_path, group_index, version)
+            )
+        ''')
+        
+        # 创建错误日志表
+        c.execute('''
+            CREATE TABLE IF NOT EXISTS error_log (
+                id INTEGER PRIMARY KEY AUTOINCREMENT,
+                file_path TEXT,
+                group_index INTEGER,
+                error_type TEXT,
+                error_message TEXT,
+                created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
+                resolved_at TIMESTAMP,
+                resolution TEXT
+            )
+        ''')
+        
+        conn.commit()
+    
+    def begin_transaction(self):
+        """开始事务"""
+        self.get_connection().execute('BEGIN TRANSACTION')
+    
+    def commit_transaction(self):
+        """提交事务"""
+        self.get_connection().commit()
+    
+    def rollback_transaction(self):
+        """回滚事务"""
+        self.get_connection().rollback()
+    
+    def get_file_progress(self, file_path):
+        """获取文件翻译进度"""
+        c = self.get_connection().cursor()
+        c.execute('SELECT * FROM file_progress WHERE file_path = ?', (file_path,))
+        return c.fetchone()
+    
+    def update_file_progress(self, file_path, total_lines, processed_lines, status):
+        """更新文件翻译进度"""
+        c = self.get_connection().cursor()
+        c.execute('''
+            INSERT OR REPLACE INTO file_progress 
+            (file_path, total_lines, processed_lines, status, version, last_updated)
+            VALUES (?, ?, ?, ?, ?, CURRENT_TIMESTAMP)
+        ''', (file_path, total_lines, processed_lines, status, line_count_manager.version))
+        self.get_connection().commit()
+    
+    def get_group_progress(self, file_path, group_index):
+        """获取翻译组进度"""
+        c = self.get_connection().cursor()
+        c.execute('''
+            SELECT * FROM group_progress 
+            WHERE file_path = ? AND group_index = ? AND version = ?
+        ''', (file_path, group_index, line_count_manager.version))
+        return c.fetchone()
+    
+    def update_group_progress(self, file_path, group_index, original_text, translated_text, status):
+        """更新翻译组进度"""
+        c = self.get_connection().cursor()
+        c.execute('''
+            INSERT OR REPLACE INTO group_progress 
+            (file_path, group_index, original_text, translated_text, status, version, updated_at)
+            VALUES (?, ?, ?, ?, ?, ?, CURRENT_TIMESTAMP)
+        ''', (file_path, group_index, original_text, translated_text, status, line_count_manager.version))
+        self.get_connection().commit()
+    
+    def log_error(self, file_path, group_index, error_type, error_message):
+        """记录错误"""
+        c = self.get_connection().cursor()
+        c.execute('''
+            INSERT INTO error_log 
+            (file_path, group_index, error_type, error_message)
+            VALUES (?, ?, ?, ?)
+        ''', (file_path, group_index, error_type, error_message))
+        self.get_connection().commit()
+    
+    def get_error_stats(self):
+        """获取错误统计信息"""
+        c = self.get_connection().cursor()
+        c.execute('''
+            SELECT 
+                COUNT(*) as total_errors,
+                COUNT(CASE WHEN resolved_at IS NULL THEN 1 END) as unresolved_errors,
+                COUNT(CASE WHEN created_at > datetime('now', '-1 hour') THEN 1 END) as recent_errors
+            FROM error_log
+        ''')
+        return c.fetchone()
+
+class AsyncTranslationManager:
+    def __init__(self):
+        self.semaphore = asyncio.Semaphore(config.get('openai', 'max_concurrent_requests'))
+        self.session = None
+
+class TranslationCache:
+    def __init__(self):
+        self.cache = {}
+        self.max_size = config.get('translation', 'cache_size')
+        self.hits = 0
+        self.misses = 0
+
+# 创建全局实例
+line_count_manager = LineCountManager()
+db_manager = DatabaseManager()
+async_translation_manager = AsyncTranslationManager()
+translation_cache = TranslationCache()
+
+# 添加版本控制
+VERSION = "1.0.1" # 版本号,用于区分不同版本的翻译
+line_count = 2 # 每组行数,越大越快,但越容易出错
+
+class TranslationStats:
+    def __init__(self):
+        self.start_time = time.time()
+        self.total_chars = 0
+        self.translated_chars = 0
+        self.total_requests = 0
+        self.successful_requests = 0
+        self.failed_requests = 0
+    
+    def update_stats(self, original_text, translated_text, success=True):
+        self.total_chars += len(original_text)
+        self.translated_chars += len(translated_text)
+        self.total_requests += 1
+        if success:
+            self.successful_requests += 1
+        else:
+            self.failed_requests += 1
+    
+    def get_stats(self):
+        elapsed_time = time.time() - self.start_time
+        chars_per_second = self.translated_chars / elapsed_time if elapsed_time > 0 else 0
+        success_rate = (self.successful_requests / self.total_requests * 100) if self.total_requests > 0 else 0
+        
+        return {
+            "总字符数": self.total_chars,
+            "已翻译字符数": self.translated_chars,
+            "翻译速度": f"{chars_per_second:.2f} 字符/秒",
+            "成功率": f"{success_rate:.1f}%",
+            "总请求数": self.total_requests,
+            "成功请求": self.successful_requests,
+            "失败请求": self.failed_requests,
+            "运行时间": f"{elapsed_time:.1f} 秒"
+        }
+
+# 创建全局的统计对象
+translation_stats = TranslationStats()
+
+def get_completed_groups(conn, file_path):
+    """获取已完成的翻译组"""
+    c = conn.cursor()
+    c.execute('''
+        SELECT group_index, translated_text 
+        FROM group_progress 
+        WHERE file_path = ? AND status = 'completed' AND version = ?
+        ORDER BY group_index
+    ''', (file_path, line_count_manager.version))
+    return c.fetchall()
+
+# """ - 输出内容要求用代码块包裹起来
+# ,只在必要时提供相应的语言注释
+#  """
+@retry(
+    stop=stop_after_attempt(MODEL_CONFIG['max_retries']),
+    wait=wait_exponential(multiplier=1, min=4, max=10),
+    retry=retry_if_exception_type((openai.APIError, openai.APITimeoutError)),
+    before_sleep=lambda retry_state: logging.warning(f"重试第 {retry_state.attempt_number} 次...")
+)
+def translate_text(text):
+    """翻译文本,使用流式输出"""
+    try:
+        messages = [
+            {
+                "role": "system",
+                "content": "- 你名为epub翻译大师,专注于将任意语言的文本翻译成中文。- 你在翻译过程中,力求保留原文语意,确保翻译的准确性和完整性。- 你特别注重翻译结果要贴合现代人的阅读习惯,使译文更加流畅易懂。- 在处理包含代码结构的文本时,你会特别注意保持代码的原样。- 你的服务旨在为用户提供高效、便捷的翻译体验,帮助用户跨越语言障碍。- 在回答问题的时候,尽可能保留原来的代码结构。- 在回答问题的时候,尽可能只返回翻译后的内容和代码结构,不要返回任何其他内容。"
+            },
+            {
+                "role": "user",
+                "content": text
+            }
+        ]
+        
+        # 使用流式输出
+        stream = config.client.chat.completions.create(
+            model=MODEL_CONFIG['model_name'],
+            messages=messages,
+            timeout=MODEL_CONFIG['timeout'],
+            stream=True  # 启用流式输出
+        )
+        
+        # 收集流式输出的内容
+        translated_text = ""
+        for chunk in stream:
+            if chunk.choices[0].delta.content is not None:
+                content = chunk.choices[0].delta.content
+                translated_text += content
+                # 实时打印翻译内容
+                print(content, end='', flush=True)
+        
+        print()  # 换行
+        line_count_manager.adjust_line_count(True)
+        return translated_text
+        
+    except Exception as e:
+        logging.error(f"翻译出错: {str(e)}")
+        line_count_manager.adjust_line_count(False)
+        raise
+
+def process_html_file(file_path, conn):
+    """处理HTML文件"""
+    # 检查文件进度
+    progress = db_manager.get_file_progress(file_path)
+    
+    try:
+        # 尝试不同的编码方式读取文件
+        encodings = ['utf-8', 'gbk', 'gb2312', 'latin1']
+        content = None
+        
+        for encoding in encodings:
+            try:
+                with open(file_path, 'r', encoding=encoding) as f:
+                    content = f.read()
+                break
+            except UnicodeDecodeError:
+                continue
+        
+        if content is None:
+            raise Exception(f"无法使用支持的编码读取文件: {file_path}")
+    
+        # 使用正则表达式提取body标签内的内容
+        body_pattern = re.compile(r'<body[^>]*>(.*?)</body>', re.DOTALL)
+        body_match = body_pattern.search(content)
+        
+        if not body_match:
+            print(f"警告: {file_path} 中没有找到body标签")
+            return
+        
+        body_content = body_match.group(1)
+        
+        # 按行分割内容,保留所有HTML标签行,但只翻译包含 <p class 的行
+        lines = []
+        for line in body_content.split('\n'):
+            line = line.strip()
+            if line and line.startswith('<'):
+                lines.append(line)
+        
+        total_lines = len(lines)
+        
+        # 获取已完成的翻译组
+        completed_groups = get_completed_groups(conn, file_path)
+        completed_indices = {group[0] for group in completed_groups}
+        
+        # 计算已处理的进度
+        if progress:
+            print(f"文件 {file_path} 已处理进度: {progress[2]}/{progress[1]} 行 ({round(progress[2]*100/progress[1], 2)}%)")
+        
+        # 按组处理内容
+        translated_lines = []
+        try:
+            with tqdm(range(0, len(lines), line_count_manager.current_line_count), 
+                     desc=f"处理文件 {os.path.basename(file_path)}", 
+                     unit="组") as pbar:
+                for i in pbar:
+                    group_index = i // line_count_manager.current_line_count
+                    
+                    # 检查是否已完成
+                    if group_index in completed_indices:
+                        # 使用已完成的翻译
+                        for group in completed_groups:
+                            if group[0] == group_index:
+                                translated_lines.extend(group[1].split('\n'))
+                                break
+                        continue
+                    
+                    group = lines[i:i+line_count_manager.current_line_count]
+                    if group:
+                        # 保存原始文本
+                        original_text = "\n".join(group)
+                        
+                        # 收集需要翻译的段落
+                        paragraphs_to_translate = []
+                        paragraph_indices = []
+                        for idx, line in enumerate(group):
+                            if '<p class' in line or line.startswith('<h'):
+                                paragraphs_to_translate.append(line)
+                                paragraph_indices.append(idx)
+                        
+                        # 如果有需要翻译的段落,进行翻译
+                        if paragraphs_to_translate:
+                            translated_paragraphs = []
+                            for paragraph in paragraphs_to_translate:
+                                print(f"\n翻译段落 {len(translated_paragraphs) + 1}/{len(paragraphs_to_translate)}:")
+                                translated_paragraph = translate_text(paragraph)
+                                translated_paragraphs.append(translated_paragraph)
+                            
+                            # 将翻译后的段落放回原位置
+                            translated_group = group.copy()
+                            for idx, translated in zip(paragraph_indices, translated_paragraphs):
+                                translated_group[idx] = translated
+                        else:
+                            translated_group = group
+                        
+                        translated_text = "\n".join(translated_group)
+                        
+                        # 更新翻译组进度
+                        db_manager.update_group_progress(file_path, group_index, original_text, translated_text, 'completed')
+                        
+                        # 分割翻译后的文本
+                        translated_lines.extend(translated_group)
+                        
+                        # 更新文件进度
+                        processed_lines = min((group_index + 1) * line_count_manager.current_line_count, total_lines)
+                        db_manager.update_file_progress(file_path, total_lines, processed_lines, 'in_progress')
+                        
+                        # 显示当前统计信息
+                        stats = translation_stats.get_stats()
+                        pbar.set_postfix(stats)
+                        
+                        # 添加较小的延迟以避免API限制
+                        time.sleep(0.1)  # 减少延迟时间
+            
+            # 替换原始内容
+            if translated_lines:
+                # 构建新的body内容
+                new_body_content = []
+                current_index = 0
+                
+                # 遍历原始内容,替换需要翻译的部分
+                for line in body_content.split('\n'):
+                    line = line.strip()
+                    if not line:
+                        new_body_content.append('')
+                        continue
+                        
+                    if line.startswith('<'):
+                        if ('<p class' in line or line.startswith('<h')) and current_index < len(translated_lines):
+                            # 替换翻译后的内容
+                            new_body_content.append(translated_lines[current_index])
+                            current_index += 1
+                        else:
+                            # 保持原样
+                            new_body_content.append(line)
+                    else:
+                        # 保持非HTML内容原样
+                        new_body_content.append(line)
+                
+                # 将新内容重新组合
+                new_body_content = '\n'.join(new_body_content)
+                
+                # 替换原始内容中的body部分
+                new_content = content.replace(body_content, new_body_content)
+                
+                # 保存修改后的文件
+                output_dir = config.get('paths', 'output_dir')
+                os.makedirs(output_dir, exist_ok=True)
+                output_path = os.path.join(output_dir, os.path.basename(file_path))
+                
+                with open(output_path, 'w', encoding='utf-8') as f:
+                    f.write(new_content)
+                
+                # 更新完成状态
+                db_manager.update_file_progress(file_path, total_lines, total_lines, 'completed')
+                print(f"文件 {file_path} 翻译完成,已保存到 {output_path}")
+                
+                # 显示最终统计信息
+                print("\n翻译统计信息:")
+                for key, value in translation_stats.get_stats().items():
+                    print(f"{key}: {value}")
+                
+        except KeyboardInterrupt:
+            print("\n检测到中断,保存当前进度...")
+            if 'processed_lines' in locals():
+                db_manager.update_file_progress(file_path, total_lines, processed_lines, 'interrupted')
+            # 显示中断时的统计信息
+            print("\n中断时的统计信息:")
+            for key, value in translation_stats.get_stats().items():
+                print(f"{key}: {value}")
+            raise
+        except Exception as e:
+            print(f"处理文件时出错: {str(e)}")
+            if 'processed_lines' in locals():
+                db_manager.update_file_progress(file_path, total_lines, processed_lines, 'error')
+            raise
+            
+    except Exception as e:
+        print(f"读取文件时出错: {str(e)}")
+        return
+
+def main():
+    ops_dir = "002/Ops"
+    html_files = [f for f in os.listdir(ops_dir) if f.endswith('.html')]
+    
+    print(f"找到 {len(html_files)} 个HTML文件需要处理")
+    print(f"开始时间: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
+    
+    # 初始化数据库连接
+    conn = db_manager.get_connection()
+    
+    try:
+        for filename in tqdm(html_files, desc="处理文件", unit="文件"):
+            file_path = os.path.join(ops_dir, filename)
+            process_html_file(file_path, conn)
+    except KeyboardInterrupt:
+        print("\n程序被用户中断")
+    finally:
+        db_manager.close()
+        print(f"\n结束时间: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
+        print("\n最终统计信息:")
+        for key, value in translation_stats.get_stats().items():
+            print(f"{key}: {value}")
+
+if __name__ == "__main__":
+    main()