1 mês atrás · 41ddafd0d8
--- a/code/translate_epub_v4(单线程版本).py
+++ b/code/translate_epub_v4(单线程版本).py
@@ -142,62 +142,43 @@ ERROR_THRESHOLD = config.get('translation', 'error_threshold')
 
				 SUCCESS_THRESHOLD = config.get('translation', 'success_threshold')
			
 
				 
			
 
				 # 更新其他类的初始化参数
			
 
				-class LineCountManager:
			
 
				+class TranslationStats:
			
 
				     def __init__(self):
			
 
				-        self.current_line_count = INITIAL_LINE_COUNT
			
 
				-        self.consecutive_errors = 0
			
 
				-        self.consecutive_successes = 0
			
 
				-        self.last_error_time = None
			
 
				-        self.error_cooldown = config.get('translation', 'error_cooldown')
			
 
				-        self.version = f"1.0.{INITIAL_LINE_COUNT}"
			
 
				-        self.error_history = []
			
 
				+        self.start_time = time.time()
			
 
				+        self.total_chars = 0
			
 
				+        self.translated_chars = 0
			
 
				+        self.total_requests = 0
			
 
				+        self.successful_requests = 0
			
 
				+        self.failed_requests = 0
			
 
				     
			
 
				-    def adjust_line_count(self, success):
			
 
				-        """根据翻译结果调整行数"""
			
 
				-        current_time = time.time()
			
 
				-        
			
 
				-        # 检查是否在冷却期内
			
 
				-        if self.last_error_time and (current_time - self.last_error_time) < self.error_cooldown:
			
 
				-            return self.current_line_count
			
 
				-        
			
 
				+    def update_stats(self, original_text, translated_text, success=True):
			
 
				+        self.total_chars += len(original_text)
			
 
				+        self.translated_chars += len(translated_text)
			
 
				+        self.total_requests += 1
			
 
				         if success:
			
 
				-            self.consecutive_errors = 0
			
 
				-            self.consecutive_successes = 0  # 重置成功计数，但不增加行数
			
 
				+            self.successful_requests += 1
			
 
				         else:
			
 
				-            self.consecutive_successes = 0
			
 
				-            self.consecutive_errors += 1
			
 
				-            self.last_error_time = current_time
			
 
				-            
			
 
				-            # 记录错误
			
 
				-            self.error_history.append({
			
 
				-                'time': current_time,
			
 
				-                'line_count': self.current_line_count
			
 
				-            })
			
 
				-            
			
 
				-            # 如果连续错误次数达到阈值，减少行数
			
 
				-            if self.consecutive_errors >= ERROR_THRESHOLD:
			
 
				-                if self.current_line_count > MIN_LINE_COUNT:
			
 
				-                    self.current_line_count -= 1
			
 
				-                    self.consecutive_errors = 0
			
 
				-                    self.version = f"1.0.{self.current_line_count}"
			
 
				-                    logging.warning(f"翻译连续失败，减少行数到 {self.current_line_count}，版本更新为 {self.version}")
			
 
				-        
			
 
				-        return self.current_line_count
			
 
				+            self.failed_requests += 1
			
 
				     
			
 
				-    def get_error_stats(self):
			
 
				-        """获取错误统计信息"""
			
 
				-        if not self.error_history:
			
 
				-            return "无错误记录"
			
 
				+    def get_stats(self):
			
 
				+        elapsed_time = time.time() - self.start_time
			
 
				+        chars_per_second = self.translated_chars / elapsed_time if elapsed_time > 0 else 0
			
 
				+        success_rate = (self.successful_requests / self.total_requests * 100) if self.total_requests > 0 else 0
			
 
				         
			
 
				-        recent_errors = [e for e in self.error_history if time.time() - e['time'] < 3600]  # 最近一小时的错误
			
 
				         return {
			
 
				-            "总错误数": len(self.error_history),
			
 
				-            "最近一小时错误数": len(recent_errors),
			
 
				-            "当前行数": self.current_line_count,
			
 
				-            "连续错误": self.consecutive_errors,
			
 
				-            "连续成功": self.consecutive_successes
			
 
				+            "总字符数": self.total_chars,
			
 
				+            "已翻译字符数": self.translated_chars,
			
 
				+            "翻译速度": f"{chars_per_second:.2f} 字符/秒",
			
 
				+            "成功率": f"{success_rate:.1f}%",
			
 
				+            "总请求数": self.total_requests,
			
 
				+            "成功请求": self.successful_requests,
			
 
				+            "失败请求": self.failed_requests,
			
 
				+            "运行时间": f"{elapsed_time:.1f} 秒"
			
 
				         }
			
 
				 
			
 
				+# 创建全局的统计对象
			
 
				+translation_stats = TranslationStats()
			
 
				+
			
 
				 class DatabaseManager:
			
 
				     def __init__(self):
			
 
				         self.db_path = config.get('database', 'path')
			
@@ -229,28 +210,26 @@ class DatabaseManager:
 
				                 total_lines INTEGER,
			
 
				                 processed_lines INTEGER,
			
 
				                 status TEXT,
			
 
				-                version TEXT,
			
 
				                 last_updated TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
			
 
				                 error_count INTEGER DEFAULT 0,
			
 
				                 retry_count INTEGER DEFAULT 0
			
 
				             )
			
 
				         ''')
			
 
				         
			
 
				-        # 创建翻译组进度表
			
 
				+        # 创建行进度表
			
 
				         c.execute('''
			
 
				-            CREATE TABLE IF NOT EXISTS group_progress (
			
 
				+            CREATE TABLE IF NOT EXISTS line_progress (
			
 
				                 id INTEGER PRIMARY KEY AUTOINCREMENT,
			
 
				                 file_path TEXT,
			
 
				-                group_index INTEGER,
			
 
				+                line_index INTEGER,
			
 
				                 original_text TEXT,
			
 
				                 translated_text TEXT,
			
 
				                 status TEXT,
			
 
				-                version TEXT,
			
 
				                 created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
			
 
				                 updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
			
 
				                 error_count INTEGER DEFAULT 0,
			
 
				                 retry_count INTEGER DEFAULT 0,
			
 
				-                UNIQUE(file_path, group_index, version)
			
 
				+                UNIQUE(file_path, line_index)
			
 
				             )
			
 
				         ''')
			
 
				         
			
@@ -259,7 +238,7 @@ class DatabaseManager:
 
				             CREATE TABLE IF NOT EXISTS error_log (
			
 
				                 id INTEGER PRIMARY KEY AUTOINCREMENT,
			
 
				                 file_path TEXT,
			
 
				-                group_index INTEGER,
			
 
				+                line_index INTEGER,
			
 
				                 error_type TEXT,
			
 
				                 error_message TEXT,
			
 
				                 created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
			
@@ -293,38 +272,38 @@ class DatabaseManager:
 
				         c = self.get_connection().cursor()
			
 
				         c.execute('''
			
 
				             INSERT OR REPLACE INTO file_progress 
			
 
				-            (file_path, total_lines, processed_lines, status, version, last_updated)
			
 
				-            VALUES (?, ?, ?, ?, ?, CURRENT_TIMESTAMP)
			
 
				-        ''', (file_path, total_lines, processed_lines, status, line_count_manager.version))
			
 
				+            (file_path, total_lines, processed_lines, status, last_updated)
			
 
				+            VALUES (?, ?, ?, ?, CURRENT_TIMESTAMP)
			
 
				+        ''', (file_path, total_lines, processed_lines, status))
			
 
				         self.get_connection().commit()
			
 
				     
			
 
				-    def get_group_progress(self, file_path, group_index):
			
 
				-        """获取翻译组进度"""
			
 
				+    def get_line_progress(self, file_path, line_index):
			
 
				+        """获取行翻译进度"""
			
 
				         c = self.get_connection().cursor()
			
 
				         c.execute('''
			
 
				-            SELECT * FROM group_progress 
			
 
				-            WHERE file_path = ? AND group_index = ? AND version = ?
			
 
				-        ''', (file_path, group_index, line_count_manager.version))
			
 
				+            SELECT * FROM line_progress 
			
 
				+            WHERE file_path = ? AND line_index = ?
			
 
				+        ''', (file_path, line_index))
			
 
				         return c.fetchone()
			
 
				     
			
 
				-    def update_group_progress(self, file_path, group_index, original_text, translated_text, status):
			
 
				-        """更新翻译组进度"""
			
 
				+    def update_line_progress(self, file_path, line_index, original_text, translated_text, status):
			
 
				+        """更新行翻译进度"""
			
 
				         c = self.get_connection().cursor()
			
 
				         c.execute('''
			
 
				-            INSERT OR REPLACE INTO group_progress 
			
 
				-            (file_path, group_index, original_text, translated_text, status, version, updated_at)
			
 
				-            VALUES (?, ?, ?, ?, ?, ?, CURRENT_TIMESTAMP)
			
 
				-        ''', (file_path, group_index, original_text, translated_text, status, line_count_manager.version))
			
 
				+            INSERT OR REPLACE INTO line_progress 
			
 
				+            (file_path, line_index, original_text, translated_text, status, updated_at)
			
 
				+            VALUES (?, ?, ?, ?, ?, CURRENT_TIMESTAMP)
			
 
				+        ''', (file_path, line_index, original_text, translated_text, status))
			
 
				         self.get_connection().commit()
			
 
				     
			
 
				-    def log_error(self, file_path, group_index, error_type, error_message):
			
 
				+    def log_error(self, file_path, line_index, error_type, error_message):
			
 
				         """记录错误"""
			
 
				         c = self.get_connection().cursor()
			
 
				         c.execute('''
			
 
				             INSERT INTO error_log 
			
 
				-            (file_path, group_index, error_type, error_message)
			
 
				+            (file_path, line_index, error_type, error_message)
			
 
				             VALUES (?, ?, ?, ?)
			
 
				-        ''', (file_path, group_index, error_type, error_message))
			
 
				+        ''', (file_path, line_index, error_type, error_message))
			
 
				         self.get_connection().commit()
			
 
				     
			
 
				     def get_error_stats(self):
			
@@ -352,7 +331,7 @@ class TranslationCache:
 
				         self.misses = 0
			
 
				 
			
 
				 # 创建全局实例
			
 
				-line_count_manager = LineCountManager()
			
 
				+line_count_manager = TranslationStats()
			
 
				 db_manager = DatabaseManager()
			
 
				 async_translation_manager = AsyncTranslationManager()
			
 
				 translation_cache = TranslationCache()
			
@@ -361,52 +340,15 @@ translation_cache = TranslationCache()
 
				 VERSION = "1.0.1" # 版本号，用于区分不同版本的翻译
			
 
				 line_count = 2 # 每组行数，越大越快，但越容易出错
			
 
				 
			
 
				-class TranslationStats:
			
 
				-    def __init__(self):
			
 
				-        self.start_time = time.time()
			
 
				-        self.total_chars = 0
			
 
				-        self.translated_chars = 0
			
 
				-        self.total_requests = 0
			
 
				-        self.successful_requests = 0
			
 
				-        self.failed_requests = 0
			
 
				-    
			
 
				-    def update_stats(self, original_text, translated_text, success=True):
			
 
				-        self.total_chars += len(original_text)
			
 
				-        self.translated_chars += len(translated_text)
			
 
				-        self.total_requests += 1
			
 
				-        if success:
			
 
				-            self.successful_requests += 1
			
 
				-        else:
			
 
				-            self.failed_requests += 1
			
 
				-    
			
 
				-    def get_stats(self):
			
 
				-        elapsed_time = time.time() - self.start_time
			
 
				-        chars_per_second = self.translated_chars / elapsed_time if elapsed_time > 0 else 0
			
 
				-        success_rate = (self.successful_requests / self.total_requests * 100) if self.total_requests > 0 else 0
			
 
				-        
			
 
				-        return {
			
 
				-            "总字符数": self.total_chars,
			
 
				-            "已翻译字符数": self.translated_chars,
			
 
				-            "翻译速度": f"{chars_per_second:.2f} 字符/秒",
			
 
				-            "成功率": f"{success_rate:.1f}%",
			
 
				-            "总请求数": self.total_requests,
			
 
				-            "成功请求": self.successful_requests,
			
 
				-            "失败请求": self.failed_requests,
			
 
				-            "运行时间": f"{elapsed_time:.1f} 秒"
			
 
				-        }
			
 
				-
			
 
				-# 创建全局的统计对象
			
 
				-translation_stats = TranslationStats()
			
 
				-
			
 
				 def get_completed_groups(conn, file_path):
			
 
				-    """获取已完成的翻译组"""
			
 
				+    """获取已完成的翻译行"""
			
 
				     c = conn.cursor()
			
 
				     c.execute('''
			
 
				-        SELECT group_index, translated_text 
			
 
				-        FROM group_progress 
			
 
				-        WHERE file_path = ? AND status = 'completed' AND version = ?
			
 
				-        ORDER BY group_index
			
 
				-    ''', (file_path, line_count_manager.version))
			
 
				+        SELECT line_index, translated_text 
			
 
				+        FROM line_progress 
			
 
				+        WHERE file_path = ? AND status = 'completed'
			
 
				+        ORDER BY line_index
			
 
				+    ''', (file_path,))
			
 
				     return c.fetchall()
			
 
				 
			
 
				 # """ - 输出内容要求用代码块包裹起来
			
@@ -450,7 +392,6 @@ def translate_text(text):
 
				                 print(content, end='', flush=True)
			
 
				         
			
 
				         print()  # 换行
			
 
				-        line_count_manager.adjust_line_count(True)
			
 
				         
			
 
				         # 更新统计信息
			
 
				         translation_stats.update_stats(text, translated_text, True)
			
@@ -459,7 +400,6 @@ def translate_text(text):
 
				         
			
 
				     except Exception as e:
			
 
				         logging.error(f"翻译出错: {str(e)}")
			
 
				-        line_count_manager.adjust_line_count(False)
			
 
				         
			
 
				         # 更新统计信息
			
 
				         translation_stats.update_stats(text, "", False)
			
@@ -487,9 +427,12 @@ def process_html_file(file_path, conn):
 
				         if content is None:
			
 
				             raise Exception(f"无法使用支持的编码读取文件: {file_path}")
			
 
				     
			
 
				-        # 使用正则表达式提取body标签内的内容
			
 
				+        # 使用正则表达式提取body标签内的内容和title标签
			
 
				         body_pattern = re.compile(r'<body[^>]*>(.*?)</body>', re.DOTALL)
			
 
				+        title_pattern = re.compile(r'<title>(.*?)</title>', re.DOTALL)
			
 
				+        
			
 
				         body_match = body_pattern.search(content)
			
 
				+        title_match = title_pattern.search(content)
			
 
				         
			
 
				         if not body_match:
			
 
				             print(f"警告: {file_path} 中没有找到body标签")
			
@@ -497,6 +440,17 @@ def process_html_file(file_path, conn):
 
				         
			
 
				         body_content = body_match.group(1)
			
 
				         
			
 
				+        # 处理title标签
			
 
				+        if title_match:
			
 
				+            title_content = title_match.group(1).strip()
			
 
				+            if title_content:  # 只有当标题内容不为空时才处理
			
 
				+                print(f"\n翻译标题: {title_content}")
			
 
				+                translated_title = translate_text(title_content)
			
 
				+                # 替换原始title内容
			
 
				+                content = content.replace(f"<title>{title_content}</title>", f"<title>{translated_title}</title>")
			
 
				+            else:
			
 
				+                print("\n跳过空标题")
			
 
				+        
			
 
				         # 按行分割内容，保留所有HTML标签行，但只翻译包含 <p class 的行
			
 
				         lines = []
			
 
				         for line in body_content.split('\n'):
			
@@ -506,78 +460,45 @@ def process_html_file(file_path, conn):
 
				         
			
 
				         total_lines = len(lines)
			
 
				         
			
 
				-        # 获取已完成的翻译组
			
 
				-        completed_groups = get_completed_groups(conn, file_path)
			
 
				-        completed_indices = {group[0] for group in completed_groups}
			
 
				+        # 获取已完成的翻译
			
 
				+        completed_lines = get_completed_groups(conn, file_path)
			
 
				+        completed_indices = {line[0] for line in completed_lines}
			
 
				         
			
 
				         # 计算已处理的进度
			
 
				         if progress:
			
 
				-            print(f"文件 {file_path} 已处理进度: {progress[2]}/{progress[1]} 行 ({round(progress[2]*100/progress[1], 2)}%)")
			
 
				+            print(f"文件 {file_path} 已处理进度: {progress['processed_lines']}/{progress['total_lines']} 行 ({round(progress['processed_lines']*100/progress['total_lines'], 2)}%)")
			
 
				         
			
 
				-        # 按组处理内容
			
 
				+        # 逐行处理内容
			
 
				         translated_lines = []
			
 
				         try:
			
 
				-            with tqdm(range(0, len(lines), line_count_manager.current_line_count), 
			
 
				-                     desc=f"处理文件 {os.path.basename(file_path)}", 
			
 
				-                     unit="组") as pbar:
			
 
				+            with tqdm(range(len(lines)), desc=f"处理文件 {os.path.basename(file_path)}", unit="行") as pbar:
			
 
				                 for i in pbar:
			
 
				-                    group_index = i // line_count_manager.current_line_count
			
 
				-                    
			
 
				                     # 检查是否已完成
			
 
				-                    if group_index in completed_indices:
			
 
				+                    if i in completed_indices:
			
 
				                         # 使用已完成的翻译
			
 
				-                        for group in completed_groups:
			
 
				-                            if group[0] == group_index:
			
 
				-                                translated_lines.extend(group[1].split('\n'))
			
 
				+                        for line in completed_lines:
			
 
				+                            if line[0] == i:
			
 
				+                                translated_lines.append(line[1])
			
 
				                                 break
			
 
				                         continue
			
 
				                     
			
 
				-                    group = lines[i:i+line_count_manager.current_line_count]
			
 
				-                    if group:
			
 
				-                        # 保存原始文本
			
 
				-                        original_text = "\n".join(group)
			
 
				-                        
			
 
				-                        # 收集需要翻译的段落
			
 
				-                        paragraphs_to_translate = []
			
 
				-                        paragraph_indices = []
			
 
				-                        for idx, line in enumerate(group):
			
 
				-                            if '<p class' in line or line.startswith('<h'):
			
 
				-                                paragraphs_to_translate.append(line)
			
 
				-                                paragraph_indices.append(idx)
			
 
				-                        
			
 
				-                        # 如果有需要翻译的段落，进行翻译
			
 
				-                        if paragraphs_to_translate:
			
 
				-                            translated_paragraphs = []
			
 
				-                            for paragraph in paragraphs_to_translate:
			
 
				-                                print(f"\n翻译段落 {len(translated_paragraphs) + 1}/{len(paragraphs_to_translate)}:")
			
 
				-                                translated_paragraph = translate_text(paragraph)
			
 
				-                                translated_paragraphs.append(translated_paragraph)
			
 
				-                            
			
 
				-                            # 将翻译后的段落放回原位置
			
 
				-                            translated_group = group.copy()
			
 
				-                            for idx, translated in zip(paragraph_indices, translated_paragraphs):
			
 
				-                                translated_group[idx] = translated
			
 
				-                        else:
			
 
				-                            translated_group = group
			
 
				-                        
			
 
				-                        translated_text = "\n".join(translated_group)
			
 
				-                        
			
 
				-                        # 更新翻译组进度
			
 
				-                        db_manager.update_group_progress(file_path, group_index, original_text, translated_text, 'completed')
			
 
				-                        
			
 
				-                        # 分割翻译后的文本
			
 
				-                        translated_lines.extend(translated_group)
			
 
				-                        
			
 
				-                        # 更新文件进度
			
 
				-                        processed_lines = min((group_index + 1) * line_count_manager.current_line_count, total_lines)
			
 
				-                        db_manager.update_file_progress(file_path, total_lines, processed_lines, 'in_progress')
			
 
				-                        
			
 
				-                        # 显示当前统计信息
			
 
				-                        stats = translation_stats.get_stats()
			
 
				-                        pbar.set_postfix(stats)
			
 
				-                        
			
 
				-                        # 添加较小的延迟以避免API限制
			
 
				-                        time.sleep(0.1)  # 减少延迟时间
			
 
				+                    line = lines[i]
			
 
				+                    if '<p class' in line or line.startswith('<h'):
			
 
				+                        print(f"\n翻译第 {i+1}/{len(lines)} 行:")
			
 
				+                        translated_line = translate_text(line)
			
 
				+                        translated_lines.append(translated_line)
			
 
				+                    else:
			
 
				+                        translated_lines.append(line)
			
 
				+                    
			
 
				+                    # 更新文件进度
			
 
				+                    db_manager.update_file_progress(file_path, total_lines, i + 1, 'in_progress')
			
 
				+                    
			
 
				+                    # 显示当前统计信息
			
 
				+                    stats = translation_stats.get_stats()
			
 
				+                    pbar.set_postfix(stats)
			
 
				+                    
			
 
				+                    # 添加较小的延迟以避免API限制
			
 
				+                    time.sleep(0.1)
			
 
				             
			
 
				             # 替换原始内容
			
 
				             if translated_lines:
			
@@ -647,19 +568,57 @@ def process_html_file(file_path, conn):
 
				         return
			
 
				 
			
 
				 def main():
			
 
				-    ops_dir = "002/Ops"
			
 
				+    ops_dir = config.get('paths', 'input_dir')
			
 
				     html_files = [f for f in os.listdir(ops_dir) if f.endswith('.html')]
			
 
				     
			
 
				-    print(f"找到 {len(html_files)} 个HTML文件需要处理")
			
 
				+    # 按文件名排序
			
 
				+    html_files.sort()
			
 
				+    
			
 
				+    total_files = len(html_files)
			
 
				+    print(f"找到 {total_files} 个HTML文件需要处理")
			
 
				     print(f"开始时间: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
			
 
				     
			
 
				     # 初始化数据库连接
			
 
				     conn = db_manager.get_connection()
			
 
				     
			
 
				     try:
			
 
				-        for filename in tqdm(html_files, desc="处理文件", unit="文件"):
			
 
				+        for file_index, filename in enumerate(html_files, 1):
			
 
				             file_path = os.path.join(ops_dir, filename)
			
 
				-            process_html_file(file_path, conn)
			
 
				+            print(f"\n开始处理第 {file_index}/{total_files} 个文件: {filename}")
			
 
				+            print("-" * 50)
			
 
				+            
			
 
				+            # 检查文件是否已完成
			
 
				+            progress = db_manager.get_file_progress(file_path)
			
 
				+            if progress and progress['status'] == 'completed':
			
 
				+                print(f"文件 {filename} 已经完成翻译，跳过")
			
 
				+                continue
			
 
				+            
			
 
				+            try:
			
 
				+                process_html_file(file_path, conn)
			
 
				+                print(f"\n完成第 {file_index}/{total_files} 个文件: {filename}")
			
 
				+                print("-" * 50)
			
 
				+            except Exception as e:
			
 
				+                print(f"\n处理文件 {filename} 时出错: {str(e)}")
			
 
				+                print("继续处理下一个文件...")
			
 
				+                continue
			
 
				+            
			
 
				+            # 显示当前总体进度
			
 
				+            completed_files = sum(1 for f in html_files[:file_index] 
			
 
				+                                if db_manager.get_file_progress(os.path.join(ops_dir, f)) and 
			
 
				+                                db_manager.get_file_progress(os.path.join(ops_dir, f))['status'] == 'completed')
			
 
				+            print(f"\n总体进度: {completed_files}/{total_files} 个文件完成 "
			
 
				+                  f"({round(completed_files*100/total_files, 2)}%)")
			
 
				+            
			
 
				+            # 显示统计信息
			
 
				+            print("\n当前统计信息:")
			
 
				+            for key, value in translation_stats.get_stats().items():
			
 
				+                print(f"{key}: {value}")
			
 
				+            
			
 
				+            # 在文件之间添加短暂延迟
			
 
				+            if file_index < total_files:
			
 
				+                print("\n等待 5 秒后处理下一个文件...")
			
 
				+                time.sleep(5)
			
 
				+    
			
 
				     except KeyboardInterrupt:
			
 
				         print("\n程序被用户中断")
			
 
				     finally:
			
--- a/code/translate_epub_v4.py
+++ b/code/translate_epub_v4.py
@@ -1,665 +0,0 @@
 
				-import os
			
 
				-import re
			
 
				-from bs4 import BeautifulSoup
			
 
				-import openai
			
 
				-import time
			
 
				-from tqdm import tqdm
			
 
				-import sqlite3
			
 
				-import json
			
 
				-from datetime import datetime
			
 
				-import logging
			
 
				-from tenacity import retry, stop_after_attempt, wait_exponential, retry_if_exception_type
			
 
				-import asyncio
			
 
				-import aiohttp
			
 
				-from concurrent.futures import ThreadPoolExecutor
			
 
				-from functools import lru_cache
			
 
				-import hashlib
			
 
				-import yaml
			
 
				-from pathlib import Path
			
 
				-
			
 
				-# 配置管理
			
 
				-class Config:
			
 
				-    def __init__(self, config_path='config.yaml'):
			
 
				-        self.config_path = config_path
			
 
				-        self.config = self.load_config()
			
 
				-        
			
 
				-        # 设置日志
			
 
				-        self.setup_logging()
			
 
				-        
			
 
				-        # 初始化OpenAI客户端
			
 
				-        self.setup_openai()
			
 
				-    
			
 
				-    def load_config(self):
			
 
				-        """加载配置文件"""
			
 
				-        if not os.path.exists(self.config_path):
			
 
				-            # 创建默认配置
			
 
				-            default_config = {
			
 
				-                'logging': {
			
 
				-                    'level': 'INFO',
			
 
				-                    'format': '%(asctime)s - %(levelname)s - %(message)s',
			
 
				-                    'file': 'translation.log'
			
 
				-                },
			
 
				-                'openai': {
			
 
				-                    'base_url': 'https://api.siliconflow.cn/v1',
			
 
				-                    'api_key': 'sk-',
			
 
				-                    'model_name': 'deepseek-ai/DeepSeek-R1',
			
 
				-                    'max_retries': 3,
			
 
				-                    'retry_delay': 2,
			
 
				-                    'timeout': 30,
			
 
				-                    'max_concurrent_requests': 5
			
 
				-                },
			
 
				-                'translation': {
			
 
				-                    'min_line_count': 1,
			
 
				-                    'max_line_count': 5,
			
 
				-                    'initial_line_count': 2,
			
 
				-                    'error_threshold': 3,
			
 
				-                    'success_threshold': 5,
			
 
				-                    'error_cooldown': 60,
			
 
				-                    'cache_size': 1000
			
 
				-                },
			
 
				-                'database': {
			
 
				-                    'path': 'translation_progress.db',
			
 
				-                    'pool_size': 5
			
 
				-                },
			
 
				-                'paths': {
			
 
				-                    'input_dir': '002/Ops',
			
 
				-                    'output_dir': '002/Ops_translated'
			
 
				-                }
			
 
				-            }
			
 
				-            
			
 
				-            # 保存默认配置
			
 
				-            with open(self.config_path, 'w', encoding='utf-8') as f:
			
 
				-                yaml.dump(default_config, f, allow_unicode=True)
			
 
				-            
			
 
				-            return default_config
			
 
				-        
			
 
				-        # 加载现有配置
			
 
				-        with open(self.config_path, 'r', encoding='utf-8') as f:
			
 
				-            return yaml.safe_load(f)
			
 
				-    
			
 
				-    def setup_logging(self):
			
 
				-        """设置日志"""
			
 
				-        logging.basicConfig(
			
 
				-            level=getattr(logging, self.config['logging']['level']),
			
 
				-            format=self.config['logging']['format'],
			
 
				-            handlers=[
			
 
				-                logging.FileHandler(self.config['logging']['file']),
			
 
				-                logging.StreamHandler()
			
 
				-            ]
			
 
				-        )
			
 
				-    
			
 
				-    def setup_openai(self):
			
 
				-        """设置OpenAI客户端"""
			
 
				-        self.client = openai.OpenAI(
			
 
				-            base_url=self.config['openai']['base_url'],
			
 
				-            api_key=self.config['openai']['api_key']
			
 
				-        )
			
 
				-    
			
 
				-    def get(self, *keys):
			
 
				-        """获取配置值"""
			
 
				-        value = self.config
			
 
				-        for key in keys:
			
 
				-            value = value[key]
			
 
				-        return value
			
 
				-    
			
 
				-    def update(self, updates):
			
 
				-        """更新配置"""
			
 
				-        def deep_update(d, u):
			
 
				-            for k, v in u.items():
			
 
				-                if isinstance(v, dict):
			
 
				-                    d[k] = deep_update(d.get(k, {}), v)
			
 
				-                else:
			
 
				-                    d[k] = v
			
 
				-            return d
			
 
				-        
			
 
				-        self.config = deep_update(self.config, updates)
			
 
				-        
			
 
				-        # 保存更新后的配置
			
 
				-        with open(self.config_path, 'w', encoding='utf-8') as f:
			
 
				-            yaml.dump(self.config, f, allow_unicode=True)
			
 
				-        
			
 
				-        # 重新设置日志和OpenAI客户端
			
 
				-        self.setup_logging()
			
 
				-        self.setup_openai()
			
 
				-
			
 
				-# 创建全局的配置实例
			
 
				-config = Config()
			
 
				-
			
 
				-# 更新全局变量
			
 
				-MODEL_CONFIG = {
			
 
				-    "model_name": config.get('openai', 'model_name'),
			
 
				-    "max_retries": config.get('openai', 'max_retries'),
			
 
				-    "retry_delay": config.get('openai', 'retry_delay'),
			
 
				-    "timeout": config.get('openai', 'timeout'),
			
 
				-    "max_concurrent_requests": config.get('openai', 'max_concurrent_requests'),
			
 
				-    "cache_size": config.get('translation', 'cache_size')
			
 
				-}
			
 
				-
			
 
				-MIN_LINE_COUNT = config.get('translation', 'min_line_count')
			
 
				-MAX_LINE_COUNT = config.get('translation', 'max_line_count')
			
 
				-INITIAL_LINE_COUNT = config.get('translation', 'initial_line_count')
			
 
				-ERROR_THRESHOLD = config.get('translation', 'error_threshold')
			
 
				-SUCCESS_THRESHOLD = config.get('translation', 'success_threshold')
			
 
				-
			
 
				-# 更新其他类的初始化参数
			
 
				-class LineCountManager:
			
 
				-    def __init__(self):
			
 
				-        self.current_line_count = INITIAL_LINE_COUNT
			
 
				-        self.consecutive_errors = 0
			
 
				-        self.consecutive_successes = 0
			
 
				-        self.last_error_time = None
			
 
				-        self.error_cooldown = config.get('translation', 'error_cooldown')
			
 
				-        self.version = f"1.0.{INITIAL_LINE_COUNT}"
			
 
				-        self.error_history = []
			
 
				-    
			
 
				-    def adjust_line_count(self, success):
			
 
				-        """根据翻译结果调整行数"""
			
 
				-        current_time = time.time()
			
 
				-        
			
 
				-        # 检查是否在冷却期内
			
 
				-        if self.last_error_time and (current_time - self.last_error_time) < self.error_cooldown:
			
 
				-            return self.current_line_count
			
 
				-        
			
 
				-        if success:
			
 
				-            self.consecutive_errors = 0
			
 
				-            self.consecutive_successes = 0  # 重置成功计数，但不增加行数
			
 
				-        else:
			
 
				-            self.consecutive_successes = 0
			
 
				-            self.consecutive_errors += 1
			
 
				-            self.last_error_time = current_time
			
 
				-            
			
 
				-            # 记录错误
			
 
				-            self.error_history.append({
			
 
				-                'time': current_time,
			
 
				-                'line_count': self.current_line_count
			
 
				-            })
			
 
				-            
			
 
				-            # 如果连续错误次数达到阈值，减少行数
			
 
				-            if self.consecutive_errors >= ERROR_THRESHOLD:
			
 
				-                if self.current_line_count > MIN_LINE_COUNT:
			
 
				-                    self.current_line_count -= 1
			
 
				-                    self.consecutive_errors = 0
			
 
				-                    self.version = f"1.0.{self.current_line_count}"
			
 
				-                    logging.warning(f"翻译连续失败，减少行数到 {self.current_line_count}，版本更新为 {self.version}")
			
 
				-        
			
 
				-        return self.current_line_count
			
 
				-    
			
 
				-    def get_error_stats(self):
			
 
				-        """获取错误统计信息"""
			
 
				-        if not self.error_history:
			
 
				-            return "无错误记录"
			
 
				-        
			
 
				-        recent_errors = [e for e in self.error_history if time.time() - e['time'] < 3600]  # 最近一小时的错误
			
 
				-        return {
			
 
				-            "总错误数": len(self.error_history),
			
 
				-            "最近一小时错误数": len(recent_errors),
			
 
				-            "当前行数": self.current_line_count,
			
 
				-            "连续错误": self.consecutive_errors,
			
 
				-            "连续成功": self.consecutive_successes
			
 
				-        }
			
 
				-
			
 
				-class DatabaseManager:
			
 
				-    def __init__(self):
			
 
				-        self.db_path = config.get('database', 'path')
			
 
				-        self.conn = None
			
 
				-        self.init_db()
			
 
				-    
			
 
				-    def get_connection(self):
			
 
				-        """获取数据库连接"""
			
 
				-        if self.conn is None:
			
 
				-            self.conn = sqlite3.connect(self.db_path)
			
 
				-            self.conn.row_factory = sqlite3.Row
			
 
				-        return self.conn
			
 
				-    
			
 
				-    def close(self):
			
 
				-        """关闭数据库连接"""
			
 
				-        if self.conn:
			
 
				-            self.conn.close()
			
 
				-            self.conn = None
			
 
				-    
			
 
				-    def init_db(self):
			
 
				-        """初始化数据库"""
			
 
				-        conn = self.get_connection()
			
 
				-        c = conn.cursor()
			
 
				-        
			
 
				-        # 创建文件进度表
			
 
				-        c.execute('''
			
 
				-            CREATE TABLE IF NOT EXISTS file_progress (
			
 
				-                file_path TEXT PRIMARY KEY,
			
 
				-                total_lines INTEGER,
			
 
				-                processed_lines INTEGER,
			
 
				-                status TEXT,
			
 
				-                version TEXT,
			
 
				-                last_updated TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
			
 
				-                error_count INTEGER DEFAULT 0,
			
 
				-                retry_count INTEGER DEFAULT 0
			
 
				-            )
			
 
				-        ''')
			
 
				-        
			
 
				-        # 创建翻译组进度表
			
 
				-        c.execute('''
			
 
				-            CREATE TABLE IF NOT EXISTS group_progress (
			
 
				-                id INTEGER PRIMARY KEY AUTOINCREMENT,
			
 
				-                file_path TEXT,
			
 
				-                group_index INTEGER,
			
 
				-                original_text TEXT,
			
 
				-                translated_text TEXT,
			
 
				-                status TEXT,
			
 
				-                version TEXT,
			
 
				-                created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
			
 
				-                updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
			
 
				-                error_count INTEGER DEFAULT 0,
			
 
				-                retry_count INTEGER DEFAULT 0,
			
 
				-                UNIQUE(file_path, group_index, version)
			
 
				-            )
			
 
				-        ''')
			
 
				-        
			
 
				-        # 创建错误日志表
			
 
				-        c.execute('''
			
 
				-            CREATE TABLE IF NOT EXISTS error_log (
			
 
				-                id INTEGER PRIMARY KEY AUTOINCREMENT,
			
 
				-                file_path TEXT,
			
 
				-                group_index INTEGER,
			
 
				-                error_type TEXT,
			
 
				-                error_message TEXT,
			
 
				-                created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
			
 
				-                resolved_at TIMESTAMP,
			
 
				-                resolution TEXT
			
 
				-            )
			
 
				-        ''')
			
 
				-        
			
 
				-        conn.commit()
			
 
				-    
			
 
				-    def begin_transaction(self):
			
 
				-        """开始事务"""
			
 
				-        self.get_connection().execute('BEGIN TRANSACTION')
			
 
				-    
			
 
				-    def commit_transaction(self):
			
 
				-        """提交事务"""
			
 
				-        self.get_connection().commit()
			
 
				-    
			
 
				-    def rollback_transaction(self):
			
 
				-        """回滚事务"""
			
 
				-        self.get_connection().rollback()
			
 
				-    
			
 
				-    def get_file_progress(self, file_path):
			
 
				-        """获取文件翻译进度"""
			
 
				-        c = self.get_connection().cursor()
			
 
				-        c.execute('SELECT * FROM file_progress WHERE file_path = ?', (file_path,))
			
 
				-        return c.fetchone()
			
 
				-    
			
 
				-    def update_file_progress(self, file_path, total_lines, processed_lines, status):
			
 
				-        """更新文件翻译进度"""
			
 
				-        c = self.get_connection().cursor()
			
 
				-        c.execute('''
			
 
				-            INSERT OR REPLACE INTO file_progress 
			
 
				-            (file_path, total_lines, processed_lines, status, version, last_updated)
			
 
				-            VALUES (?, ?, ?, ?, ?, CURRENT_TIMESTAMP)
			
 
				-        ''', (file_path, total_lines, processed_lines, status, line_count_manager.version))
			
 
				-        self.get_connection().commit()
			
 
				-    
			
 
				-    def get_group_progress(self, file_path, group_index):
			
 
				-        """获取翻译组进度"""
			
 
				-        c = self.get_connection().cursor()
			
 
				-        c.execute('''
			
 
				-            SELECT * FROM group_progress 
			
 
				-            WHERE file_path = ? AND group_index = ? AND version = ?
			
 
				-        ''', (file_path, group_index, line_count_manager.version))
			
 
				-        return c.fetchone()
			
 
				-    
			
 
				-    def update_group_progress(self, file_path, group_index, original_text, translated_text, status):
			
 
				-        """更新翻译组进度"""
			
 
				-        c = self.get_connection().cursor()
			
 
				-        c.execute('''
			
 
				-            INSERT OR REPLACE INTO group_progress 
			
 
				-            (file_path, group_index, original_text, translated_text, status, version, updated_at)
			
 
				-            VALUES (?, ?, ?, ?, ?, ?, CURRENT_TIMESTAMP)
			
 
				-        ''', (file_path, group_index, original_text, translated_text, status, line_count_manager.version))
			
 
				-        self.get_connection().commit()
			
 
				-    
			
 
				-    def log_error(self, file_path, group_index, error_type, error_message):
			
 
				-        """记录错误"""
			
 
				-        c = self.get_connection().cursor()
			
 
				-        c.execute('''
			
 
				-            INSERT INTO error_log 
			
 
				-            (file_path, group_index, error_type, error_message)
			
 
				-            VALUES (?, ?, ?, ?)
			
 
				-        ''', (file_path, group_index, error_type, error_message))
			
 
				-        self.get_connection().commit()
			
 
				-    
			
 
				-    def get_error_stats(self):
			
 
				-        """获取错误统计信息"""
			
 
				-        c = self.get_connection().cursor()
			
 
				-        c.execute('''
			
 
				-            SELECT 
			
 
				-                COUNT(*) as total_errors,
			
 
				-                COUNT(CASE WHEN resolved_at IS NULL THEN 1 END) as unresolved_errors,
			
 
				-                COUNT(CASE WHEN created_at > datetime('now', '-1 hour') THEN 1 END) as recent_errors
			
 
				-            FROM error_log
			
 
				-        ''')
			
 
				-        return c.fetchone()
			
 
				-
			
 
				-class AsyncTranslationManager:
			
 
				-    def __init__(self):
			
 
				-        self.semaphore = asyncio.Semaphore(config.get('openai', 'max_concurrent_requests'))
			
 
				-        self.session = None
			
 
				-
			
 
				-class TranslationCache:
			
 
				-    def __init__(self):
			
 
				-        self.cache = {}
			
 
				-        self.max_size = config.get('translation', 'cache_size')
			
 
				-        self.hits = 0
			
 
				-        self.misses = 0
			
 
				-
			
 
				-# 创建全局实例
			
 
				-line_count_manager = LineCountManager()
			
 
				-db_manager = DatabaseManager()
			
 
				-async_translation_manager = AsyncTranslationManager()
			
 
				-translation_cache = TranslationCache()
			
 
				-
			
 
				-# 添加版本控制
			
 
				-VERSION = "1.0.1" # 版本号，用于区分不同版本的翻译
			
 
				-line_count = 2 # 每组行数，越大越快，但越容易出错
			
 
				-
			
 
				-class TranslationStats:
			
 
				-    def __init__(self):
			
 
				-        self.start_time = time.time()
			
 
				-        self.total_chars = 0
			
 
				-        self.translated_chars = 0
			
 
				-        self.total_requests = 0
			
 
				-        self.successful_requests = 0
			
 
				-        self.failed_requests = 0
			
 
				-    
			
 
				-    def update_stats(self, original_text, translated_text, success=True):
			
 
				-        self.total_chars += len(original_text)
			
 
				-        self.translated_chars += len(translated_text)
			
 
				-        self.total_requests += 1
			
 
				-        if success:
			
 
				-            self.successful_requests += 1
			
 
				-        else:
			
 
				-            self.failed_requests += 1
			
 
				-    
			
 
				-    def get_stats(self):
			
 
				-        elapsed_time = time.time() - self.start_time
			
 
				-        chars_per_second = self.translated_chars / elapsed_time if elapsed_time > 0 else 0
			
 
				-        success_rate = (self.successful_requests / self.total_requests * 100) if self.total_requests > 0 else 0
			
 
				-        
			
 
				-        return {
			
 
				-            "总字符数": self.total_chars,
			
 
				-            "已翻译字符数": self.translated_chars,
			
 
				-            "翻译速度": f"{chars_per_second:.2f} 字符/秒",
			
 
				-            "成功率": f"{success_rate:.1f}%",
			
 
				-            "总请求数": self.total_requests,
			
 
				-            "成功请求": self.successful_requests,
			
 
				-            "失败请求": self.failed_requests,
			
 
				-            "运行时间": f"{elapsed_time:.1f} 秒"
			
 
				-        }
			
 
				-
			
 
				-# 创建全局的统计对象
			
 
				-translation_stats = TranslationStats()
			
 
				-
			
 
				-def get_completed_groups(conn, file_path):
			
 
				-    """获取已完成的翻译组"""
			
 
				-    c = conn.cursor()
			
 
				-    c.execute('''
			
 
				-        SELECT group_index, translated_text 
			
 
				-        FROM group_progress 
			
 
				-        WHERE file_path = ? AND status = 'completed' AND version = ?
			
 
				-        ORDER BY group_index
			
 
				-    ''', (file_path, line_count_manager.version))
			
 
				-    return c.fetchall()
			
 
				-
			
 
				-# """ - 输出内容要求用代码块包裹起来
			
 
				-# ，只在必要时提供相应的语言注释
			
 
				-#  """
			
 
				-@retry(
			
 
				-    stop=stop_after_attempt(MODEL_CONFIG['max_retries']),
			
 
				-    wait=wait_exponential(multiplier=1, min=4, max=10),
			
 
				-    retry=retry_if_exception_type((openai.APIError, openai.APITimeoutError)),
			
 
				-    before_sleep=lambda retry_state: logging.warning(f"重试第 {retry_state.attempt_number} 次...")
			
 
				-)
			
 
				-def translate_text(text):
			
 
				-    """翻译文本，使用流式输出"""
			
 
				-    try:
			
 
				-        messages = [
			
 
				-            {
			
 
				-                "role": "system",
			
 
				-                "content": "- 你名为epub翻译大师，专注于将任意语言的文本翻译成中文。- 你在翻译过程中，力求保留原文语意，确保翻译的准确性和完整性。- 你特别注重翻译结果要贴合现代人的阅读习惯，使译文更加流畅易懂。- 在处理包含代码结构的文本时，你会特别注意保持代码的原样。- 你的服务旨在为用户提供高效、便捷的翻译体验，帮助用户跨越语言障碍。- 在回答问题的时候，尽可能保留原来的代码结构。- 在回答问题的时候，尽可能只返回翻译后的内容和代码结构，不要返回任何其他内容。"
			
 
				-            },
			
 
				-            {
			
 
				-                "role": "user",
			
 
				-                "content": text
			
 
				-            }
			
 
				-        ]
			
 
				-        
			
 
				-        # 使用流式输出
			
 
				-        stream = config.client.chat.completions.create(
			
 
				-            model=MODEL_CONFIG['model_name'],
			
 
				-            messages=messages,
			
 
				-            timeout=MODEL_CONFIG['timeout'],
			
 
				-            stream=True  # 启用流式输出
			
 
				-        )
			
 
				-        
			
 
				-        # 收集流式输出的内容
			
 
				-        translated_text = ""
			
 
				-        for chunk in stream:
			
 
				-            if chunk.choices[0].delta.content is not None:
			
 
				-                content = chunk.choices[0].delta.content
			
 
				-                translated_text += content
			
 
				-                # 实时打印翻译内容
			
 
				-                print(content, end='', flush=True)
			
 
				-        
			
 
				-        print()  # 换行
			
 
				-        line_count_manager.adjust_line_count(True)
			
 
				-        return translated_text
			
 
				-        
			
 
				-    except Exception as e:
			
 
				-        logging.error(f"翻译出错: {str(e)}")
			
 
				-        line_count_manager.adjust_line_count(False)
			
 
				-        raise
			
 
				-
			
 
				-def process_html_file(file_path, conn):
			
 
				-    """处理HTML文件"""
			
 
				-    # 检查文件进度
			
 
				-    progress = db_manager.get_file_progress(file_path)
			
 
				-    
			
 
				-    try:
			
 
				-        # 尝试不同的编码方式读取文件
			
 
				-        encodings = ['utf-8', 'gbk', 'gb2312', 'latin1']
			
 
				-        content = None
			
 
				-        
			
 
				-        for encoding in encodings:
			
 
				-            try:
			
 
				-                with open(file_path, 'r', encoding=encoding) as f:
			
 
				-                    content = f.read()
			
 
				-                break
			
 
				-            except UnicodeDecodeError:
			
 
				-                continue
			
 
				-        
			
 
				-        if content is None:
			
 
				-            raise Exception(f"无法使用支持的编码读取文件: {file_path}")
			
 
				-    
			
 
				-        # 使用正则表达式提取body标签内的内容
			
 
				-        body_pattern = re.compile(r'<body[^>]*>(.*?)</body>', re.DOTALL)
			
 
				-        body_match = body_pattern.search(content)
			
 
				-        
			
 
				-        if not body_match:
			
 
				-            print(f"警告: {file_path} 中没有找到body标签")
			
 
				-            return
			
 
				-        
			
 
				-        body_content = body_match.group(1)
			
 
				-        
			
 
				-        # 按行分割内容，保留所有HTML标签行，但只翻译包含 <p class 的行
			
 
				-        lines = []
			
 
				-        for line in body_content.split('\n'):
			
 
				-            line = line.strip()
			
 
				-            if line and line.startswith('<'):
			
 
				-                lines.append(line)
			
 
				-        
			
 
				-        total_lines = len(lines)
			
 
				-        
			
 
				-        # 获取已完成的翻译组
			
 
				-        completed_groups = get_completed_groups(conn, file_path)
			
 
				-        completed_indices = {group[0] for group in completed_groups}
			
 
				-        
			
 
				-        # 计算已处理的进度
			
 
				-        if progress:
			
 
				-            print(f"文件 {file_path} 已处理进度: {progress[2]}/{progress[1]} 行 ({round(progress[2]*100/progress[1], 2)}%)")
			
 
				-        
			
 
				-        # 按组处理内容
			
 
				-        translated_lines = []
			
 
				-        try:
			
 
				-            with tqdm(range(0, len(lines), line_count_manager.current_line_count), 
			
 
				-                     desc=f"处理文件 {os.path.basename(file_path)}", 
			
 
				-                     unit="组") as pbar:
			
 
				-                for i in pbar:
			
 
				-                    group_index = i // line_count_manager.current_line_count
			
 
				-                    
			
 
				-                    # 检查是否已完成
			
 
				-                    if group_index in completed_indices:
			
 
				-                        # 使用已完成的翻译
			
 
				-                        for group in completed_groups:
			
 
				-                            if group[0] == group_index:
			
 
				-                                translated_lines.extend(group[1].split('\n'))
			
 
				-                                break
			
 
				-                        continue
			
 
				-                    
			
 
				-                    group = lines[i:i+line_count_manager.current_line_count]
			
 
				-                    if group:
			
 
				-                        # 保存原始文本
			
 
				-                        original_text = "\n".join(group)
			
 
				-                        
			
 
				-                        # 收集需要翻译的段落
			
 
				-                        paragraphs_to_translate = []
			
 
				-                        paragraph_indices = []
			
 
				-                        for idx, line in enumerate(group):
			
 
				-                            if '<p class' in line or line.startswith('<h'):
			
 
				-                                paragraphs_to_translate.append(line)
			
 
				-                                paragraph_indices.append(idx)
			
 
				-                        
			
 
				-                        # 如果有需要翻译的段落，进行翻译
			
 
				-                        if paragraphs_to_translate:
			
 
				-                            translated_paragraphs = []
			
 
				-                            for paragraph in paragraphs_to_translate:
			
 
				-                                print(f"\n翻译段落 {len(translated_paragraphs) + 1}/{len(paragraphs_to_translate)}:")
			
 
				-                                translated_paragraph = translate_text(paragraph)
			
 
				-                                translated_paragraphs.append(translated_paragraph)
			
 
				-                            
			
 
				-                            # 将翻译后的段落放回原位置
			
 
				-                            translated_group = group.copy()
			
 
				-                            for idx, translated in zip(paragraph_indices, translated_paragraphs):
			
 
				-                                translated_group[idx] = translated
			
 
				-                        else:
			
 
				-                            translated_group = group
			
 
				-                        
			
 
				-                        translated_text = "\n".join(translated_group)
			
 
				-                        
			
 
				-                        # 更新翻译组进度
			
 
				-                        db_manager.update_group_progress(file_path, group_index, original_text, translated_text, 'completed')
			
 
				-                        
			
 
				-                        # 分割翻译后的文本
			
 
				-                        translated_lines.extend(translated_group)
			
 
				-                        
			
 
				-                        # 更新文件进度
			
 
				-                        processed_lines = min((group_index + 1) * line_count_manager.current_line_count, total_lines)
			
 
				-                        db_manager.update_file_progress(file_path, total_lines, processed_lines, 'in_progress')
			
 
				-                        
			
 
				-                        # 显示当前统计信息
			
 
				-                        stats = translation_stats.get_stats()
			
 
				-                        pbar.set_postfix(stats)
			
 
				-                        
			
 
				-                        # 添加较小的延迟以避免API限制
			
 
				-                        time.sleep(0.1)  # 减少延迟时间
			
 
				-            
			
 
				-            # 替换原始内容
			
 
				-            if translated_lines:
			
 
				-                # 构建新的body内容
			
 
				-                new_body_content = []
			
 
				-                current_index = 0
			
 
				-                
			
 
				-                # 遍历原始内容，替换需要翻译的部分
			
 
				-                for line in body_content.split('\n'):
			
 
				-                    line = line.strip()
			
 
				-                    if not line:
			
 
				-                        new_body_content.append('')
			
 
				-                        continue
			
 
				-                        
			
 
				-                    if line.startswith('<'):
			
 
				-                        if ('<p class' in line or line.startswith('<h')) and current_index < len(translated_lines):
			
 
				-                            # 替换翻译后的内容
			
 
				-                            new_body_content.append(translated_lines[current_index])
			
 
				-                            current_index += 1
			
 
				-                        else:
			
 
				-                            # 保持原样
			
 
				-                            new_body_content.append(line)
			
 
				-                    else:
			
 
				-                        # 保持非HTML内容原样
			
 
				-                        new_body_content.append(line)
			
 
				-                
			
 
				-                # 将新内容重新组合
			
 
				-                new_body_content = '\n'.join(new_body_content)
			
 
				-                
			
 
				-                # 替换原始内容中的body部分
			
 
				-                new_content = content.replace(body_content, new_body_content)
			
 
				-                
			
 
				-                # 保存修改后的文件
			
 
				-                output_dir = config.get('paths', 'output_dir')
			
 
				-                os.makedirs(output_dir, exist_ok=True)
			
 
				-                output_path = os.path.join(output_dir, os.path.basename(file_path))
			
 
				-                
			
 
				-                with open(output_path, 'w', encoding='utf-8') as f:
			
 
				-                    f.write(new_content)
			
 
				-                
			
 
				-                # 更新完成状态
			
 
				-                db_manager.update_file_progress(file_path, total_lines, total_lines, 'completed')
			
 
				-                print(f"文件 {file_path} 翻译完成，已保存到 {output_path}")
			
 
				-                
			
 
				-                # 显示最终统计信息
			
 
				-                print("\n翻译统计信息:")
			
 
				-                for key, value in translation_stats.get_stats().items():
			
 
				-                    print(f"{key}: {value}")
			
 
				-                
			
 
				-        except KeyboardInterrupt:
			
 
				-            print("\n检测到中断，保存当前进度...")
			
 
				-            if 'processed_lines' in locals():
			
 
				-                db_manager.update_file_progress(file_path, total_lines, processed_lines, 'interrupted')
			
 
				-            # 显示中断时的统计信息
			
 
				-            print("\n中断时的统计信息:")
			
 
				-            for key, value in translation_stats.get_stats().items():
			
 
				-                print(f"{key}: {value}")
			
 
				-            raise
			
 
				-        except Exception as e:
			
 
				-            print(f"处理文件时出错: {str(e)}")
			
 
				-            if 'processed_lines' in locals():
			
 
				-                db_manager.update_file_progress(file_path, total_lines, processed_lines, 'error')
			
 
				-            raise
			
 
				-            
			
 
				-    except Exception as e:
			
 
				-        print(f"读取文件时出错: {str(e)}")
			
 
				-        return
			
 
				-
			
 
				-def main():
			
 
				-    ops_dir = "002/Ops"
			
 
				-    html_files = [f for f in os.listdir(ops_dir) if f.endswith('.html')]
			
 
				-    
			
 
				-    print(f"找到 {len(html_files)} 个HTML文件需要处理")
			
 
				-    print(f"开始时间: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
			
 
				-    
			
 
				-    # 初始化数据库连接
			
 
				-    conn = db_manager.get_connection()
			
 
				-    
			
 
				-    try:
			
 
				-        for filename in tqdm(html_files, desc="处理文件", unit="文件"):
			
 
				-            file_path = os.path.join(ops_dir, filename)
			
 
				-            process_html_file(file_path, conn)
			
 
				-    except KeyboardInterrupt:
			
 
				-        print("\n程序被用户中断")
			
 
				-    finally:
			
 
				-        db_manager.close()
			
 
				-        print(f"\n结束时间: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
			
 
				-        print("\n最终统计信息:")
			
 
				-        for key, value in translation_stats.get_stats().items():
			
 
				-            print(f"{key}: {value}")
			
 
				-
			
 
				-if __name__ == "__main__":
			
 
				-    main() 
			
--- a/code/translate_epub_v5(多线程版本).py
+++ b/code/translate_epub_v5(多线程版本).py
@@ -444,13 +444,28 @@ def get_completed_groups(conn, file_path):
 
				     retry=retry_if_exception_type((openai.APIError, openai.APITimeoutError)),
			
 
				     before_sleep=lambda retry_state: logging.warning(f"重试第 {retry_state.attempt_number} 次...")
			
 
				 )
			
 
				-def translate_text(text):
			
 
				+def translate_text(text, file_name=None, context=None):
			
 
				     """翻译文本，使用流式输出"""
			
 
				     try:
			
 
				+        # 构建上下文信息
			
 
				+        context_info = ""
			
 
				+        if file_name:
			
 
				+            context_info += f"当前正在翻译文件：{file_name}\n"
			
 
				+        if context:
			
 
				+            context_info += f"上下文信息：{context}\n"
			
 
				+        
			
 
				         messages = [
			
 
				             {
			
 
				                 "role": "system",
			
 
				-                "content": "- 你名为epub翻译大师，专注于将任意语言的文本翻译成中文。- 你在翻译过程中，力求保留原文语意，确保翻译的准确性和完整性。- 你特别注重翻译结果要贴合现代人的阅读习惯，使译文更加流畅易懂。- 在处理包含代码结构的文本时，你会特别注意保持代码的原样。- 你的服务旨在为用户提供高效、便捷的翻译体验，帮助用户跨越语言障碍。- 在回答问题的时候，尽可能保留原来的代码结构。- 在回答问题的时候，尽可能只返回翻译后的内容和代码结构，不要返回任何其他内容。- 在任何情况下都不要返回 翻译说明，不要返回任何其他内容。"
			
 
				+                "content": f"""你是一个专业的翻译助手，专注于将文本翻译成中文。
			
 
				+- 你正在翻译一个完整的文档，请保持翻译风格的一致性。
			
 
				+- 翻译时要考虑上下文，确保翻译的连贯性。
			
 
				+- 对于专业术语，请保持统一的翻译。
			
 
				+- 保持原文的格式和结构。
			
 
				+- 不要添加任何额外的解释或说明。
			
 
				+- 只返回翻译后的内容，不要包含任何其他内容。
			
 
				+
			
 
				+{context_info}"""
			
 
				             },
			
 
				             {
			
 
				                 "role": "user",
			
@@ -476,7 +491,6 @@ def translate_text(text):
 
				                 print(content, end='', flush=True)
			
 
				         
			
 
				         print()  # 换行
			
 
				-        line_count_manager.adjust_line_count(True)
			
 
				         
			
 
				         # 更新统计信息
			
 
				         if hasattr(process_files_batch, 'process_stats'):
			
@@ -486,7 +500,6 @@ def translate_text(text):
 
				         
			
 
				     except Exception as e:
			
 
				         logging.error(f"翻译出错: {str(e)}")
			
 
				-        line_count_manager.adjust_line_count(False)
			
 
				         
			
 
				         # 更新统计信息
			
 
				         if hasattr(process_files_batch, 'process_stats'):
			
@@ -684,56 +697,17 @@ def process_files_batch(file_batch, process_id):
 
				         # 创建进程级别的统计对象
			
 
				         process_stats = TranslationStats()
			
 
				         
			
 
				-        def translate_with_stats(text):
			
 
				+        # 创建文件锁
			
 
				+        file_lock = Lock()
			
 
				+        
			
 
				+        def translate_with_stats(text, file_name, context=None):
			
 
				             """包装翻译函数以收集统计信息"""
			
 
				-            try:
			
 
				-                messages = [
			
 
				-                    {
			
 
				-                        "role": "system",
			
 
				-                        "content": "- 你名为epub翻译大师，专注于将任意语言的文本翻译成中文。- 你在翻译过程中，力求保留原文语意，确保翻译的准确性和完整性。- 你特别注重翻译结果要贴合现代人的阅读习惯，使译文更加流畅易懂。- 在处理包含代码结构的文本时，你会特别注意保持代码的原样。- 你的服务旨在为用户提供高效、便捷的翻译体验，帮助用户跨越语言障碍。- 在回答问题的时候，尽可能保留原来的代码结构。- 在回答问题的时候，尽可能只返回翻译后的内容和代码结构，不要返回任何其他内容。- 在任何情况下都不要返回 翻译说明，不要返回任何其他内容。"
			
 
				-                    },
			
 
				-                    {
			
 
				-                        "role": "user",
			
 
				-                        "content": text
			
 
				-                    }
			
 
				-                ]
			
 
				-                
			
 
				-                # 使用流式输出
			
 
				-                stream = config.client.chat.completions.create(
			
 
				-                    model=MODEL_CONFIG['model_name'],
			
 
				-                    messages=messages,
			
 
				-                    timeout=MODEL_CONFIG['timeout'],
			
 
				-                    stream=True  # 启用流式输出
			
 
				-                )
			
 
				-                
			
 
				-                # 收集流式输出的内容
			
 
				-                translated_text = ""
			
 
				-                for chunk in stream:
			
 
				-                    if chunk.choices[0].delta.content is not None:
			
 
				-                        content = chunk.choices[0].delta.content
			
 
				-                        translated_text += content
			
 
				-                        # 实时打印翻译内容
			
 
				-                        print(content, end='', flush=True)
			
 
				-                
			
 
				-                print()  # 换行
			
 
				-                line_count_manager.adjust_line_count(True)
			
 
				-                
			
 
				-                # 更新统计信息
			
 
				-                process_stats.update_stats(text, translated_text, True)
			
 
				-                return translated_text
			
 
				-                
			
 
				-            except Exception as e:
			
 
				-                logging.error(f"翻译出错: {str(e)}")
			
 
				-                line_count_manager.adjust_line_count(False)
			
 
				-                
			
 
				-                # 更新统计信息
			
 
				-                process_stats.update_stats(text, "", False)
			
 
				-                raise
			
 
				+            with file_lock:  # 使用文件锁确保同一时间只有一个翻译请求
			
 
				+                return translate_text(text, file_name, context)
			
 
				         
			
 
				         for filename in tqdm(file_batch, desc=f"进程 {process_id} 处理文件", unit="文件"):
			
 
				             file_path = os.path.join(config.get('paths', 'input_dir'), filename)
			
 
				             
			
 
				-            # 修改 process_html_file 函数调用，使用新的翻译函数
			
 
				             try:
			
 
				                 # 尝试不同的编码方式读取文件
			
 
				                 encodings = ['utf-8', 'gbk', 'gb2312', 'latin1']
			
@@ -813,7 +787,18 @@ def process_files_batch(file_batch, process_id):
 
				                                 translated_paragraphs = []
			
 
				                                 for paragraph in paragraphs_to_translate:
			
 
				                                     print(f"\n翻译段落 {len(translated_paragraphs) + 1}/{len(paragraphs_to_translate)}:")
			
 
				-                                    translated_paragraph = translate_with_stats(paragraph)
			
 
				+                                    # 获取上下文（前一段和后一段）
			
 
				+                                    context = ""
			
 
				+                                    if len(translated_lines) > 0:
			
 
				+                                        context += f"前文：{translated_lines[-1]}\n"
			
 
				+                                    if i + line_count_manager.current_line_count < len(lines):
			
 
				+                                        context += f"后文：{lines[i + line_count_manager.current_line_count]}"
			
 
				+                                    
			
 
				+                                    translated_paragraph = translate_with_stats(
			
 
				+                                        paragraph, 
			
 
				+                                        filename,
			
 
				+                                        context
			
 
				+                                    )
			
 
				                                     translated_paragraphs.append(translated_paragraph)
			
 
				                                 
			
 
				                                 # 将翻译后的段落放回原位置