John 7 сар өмнө
parent
commit
d8eb0e4f0a

+ 10 - 2
epub_node/db/chapter.js

@@ -62,7 +62,7 @@ export async function searchChapterInfoForPath(path, book_id) {
             FROM files
                      INNER JOIN book_link_file ON files.file_id = book_link_file.file_id
             WHERE book_link_file.book_id = ?
-              AND files.source_id LIKE ?;`; // 确保 `source_id` 上有索引
+              AND files.path LIKE ?;`; // 确保 `source_id` 上有索引
         
         // 调整参数顺序以匹配 SQL 中的占位符顺序
         const queryParams = [book_id, `%${path}%`];
@@ -81,11 +81,19 @@ export async function searchChapterInfoForPath(path, book_id) {
 export async function searchChapterForBookId({book_id, level = 10}) {
     console.log(8282, level);
     return new Promise((resolve, reject) => {
+        /*
+         SELECT files.*, chapter.*
+         FROM files
+         INNER JOIN book_link_file ON files.file_id = book_link_file.file_id
+         INNER JOIN chapter ON files.path like CONCAT('%', chapter.order_id, '%')
+         WHERE book_link_file.book_id = '28b639bf9362ad3b2cd5a24cb6d811c0'
+         AND files.mimetype = 'text/html' AND chapter.level = '1';
+         */
         const query = `
             SELECT files.*, chapter.*
             FROM files
                      INNER JOIN book_link_file ON files.file_id = book_link_file.file_id
-                     INNER JOIN chapter ON chapter.content LIKE CONCAT('%', files.source_id, '%')
+                     INNER JOIN chapter ON files.path LIKE CONCAT('%', chapter.order_id, '%')
             WHERE book_link_file.book_id = '${book_id}'
               AND files.mimetype = 'text/html' ${level === 10 ? '' : `AND chapter.level = '${level}'`};
         `;

+ 10 - 0
epub_node/db/update.sql

@@ -109,3 +109,13 @@ ALTER TABLE epub_manage.chapter ADD order_index INT NULL;
 ALTER TABLE epub_manage.chapter ADD order_id VARCHAR(255) NULL;
 ALTER TABLE epub_manage.chapter ADD old_path VARCHAR(255) NULL;
 ALTER TABLE epub_manage.chapter ADD path VARCHAR(255) NULL;
+
+
+
+
+alter table chapter
+    modify content varchar(255) null;
+
+
+
+CREATE INDEX idx_files_source_id ON files (source_id);

+ 20 - 0
epub_node/docker-compose.yml

@@ -0,0 +1,20 @@
+version: '2'
+services:
+  zookeeper:
+    image: wurstmeister/zookeeper:latest
+    ports:
+      - "2181:2181"
+  kafka:
+    image: wurstmeister/kafka:2.11-1.1.1
+    ports:
+      - "9092:9092"
+    links:
+      - zookeeper
+    environment:
+      KAFKA_ADVERTISED_HOST_NAME: ${HOST_IP}
+      KAFKA_ZOOKEEPER_CONNECT: zookeeper:2181
+      KAFKA_AUTO_CREATE_TOPICS_ENABLE: 'true'
+      KAFKA_DELETE_TOPIC_ENABLE: 'true'
+      KAFKA_CREATE_TOPICS: "topic-test:1:1"
+    volumes:
+      - /var/run/docker.sock:/var/run/docker.sock

+ 16 - 16
epub_node/environment/index.js

@@ -1,26 +1,26 @@
 function dbInfo() {
   // 根据需要更新db的数据配置
-  return {
-    host: "124.221.51.4",
-    port: 3306,
-    user: "root",
-    password: "btm-2024-.",
-    database: "epub_manage",
-  };
   // return {
-  //   host: "localhost",
+  //   host: "124.221.51.4",
   //   port: 3306,
   //   user: "root",
-  //   password: "12345678",
-  //   database: "epub_manage",
-  // };
-  // return {
-  //   host: "192.168.2.101",
-  //   port: 6806,
-  //   user: "root",
-  //   password: "admin",
+  //   password: "btm-2024-.",
   //   database: "epub_manage",
   // };
+  /*return {
+    host: "localhost",
+    port: 3306,
+    user: "root",
+    password: "12345678",
+    database: "epub_manage",
+  };*/
+  return {
+    host: "192.168.2.101",
+    port: 6806,
+    user: "root",
+    password: "admin",
+    database: "epub_manage",
+  };
 }
 
 

+ 3 - 1
epub_node/router/epub/index.js

@@ -19,7 +19,7 @@ import {saveImgs, calculateMD5} from "./image.js";
 import {htmlParser, saveMateInfo} from "./txt.js";
 import {saveAllCSS} from "./style.js";
 import {saveAllFount} from "./font.js";
-import {saveToc} from "./toc.js";
+import {saveToc} from "./toc_old.js";
 
 const router = express.Router();
 
@@ -37,7 +37,9 @@ router.get("/chapter_all/:book_id", async function (req, res) {
     const book_id = req.params.book_id; // 获取 fileId 参数
     res.send("epub types" + book_id);
     console.log(393939, '开始查询')
+    console.time('searchChapterForBookId')
     const chapter_all = await searchChapterForBookId({book_id, level: '1'});
+    console.timeEnd('searchChapterForBookId')
     console.log(393939, chapter_all.length, chapter_all[0]);
 });
 router.get("/clear", async function (req, res) {

+ 53 - 36
epub_node/router/epub/toc.js

@@ -1,40 +1,57 @@
-import logger from "#logger";
-import {chapter_insert, searchChapterInfoForPath} from "#db";
-import {dirExists} from "#utils";
-import fs from "node:fs";
-import {calculateMD5} from "./image.js";
+import { Worker } from 'worker_threads';
+import cliProgress from 'cli-progress';
+
+const MAX_THREADS = 10;
 
-// ./base_files/5ae2d9158081faab184484ed1783e176
 export async function saveToc(epub, uploadPath, book_id, author_id) {
-    await Promise.all(epub.toc.map(async (elm) => {
-        try {
-            const match = `${elm.href}`.match(/\/(.*\.html).*/)
-            let chapterInfo = {
-                file_id: ''
-            }
-            let path = ''
-            if (match) {
-                path = match[1];
-                chapterInfo = await searchChapterInfoForPath(path, book_id)
-            }
-            logger.info(elm);
-            const  name = `${elm.title}`
-            elm.title = '';
-            const params = {
-                name: name,
-                book_id: book_id,
-                author_id: author_id,
-                content: JSON.stringify(elm),
-                level: elm.level,
-                order_index: elm.order,
-                order_id: chapterInfo.file_id,
-                old_path: elm.href,
-                path: `./base_files/${book_id}/Text/${chapterInfo.file_id}.html`,
-            }
-            logger.info(params)
-            return await chapter_insert(params)
-        } catch (e) {
-            logger.error(e)
+    const progressBar = new cliProgress.SingleBar({}, cliProgress.Presets.shades_classic);
+    progressBar.start(epub.toc.length, 0);
+
+    let activeWorkers = 0;
+    let currentIndex = 0;
+    const promises = [];
+
+    function startNextWorker() {
+        if (currentIndex >= epub.toc.length) {
+            return;
         }
-    }))
+
+        const elm = epub.toc[currentIndex];
+        currentIndex++;
+        activeWorkers++;
+
+        const promise = new Promise((resolve, reject) => {
+            const worker = new Worker(new URL('./worker.mjs', import.meta.url), {
+                workerData: { elm, book_id, author_id }
+            });
+
+            worker.on('message', (message) => {
+                progressBar.increment();
+                activeWorkers--;
+                resolve(message);
+                startNextWorker();
+            });
+
+            worker.on('error', (err) => {
+                activeWorkers--;
+                reject(err);
+                startNextWorker();
+            });
+
+            worker.on('exit', (code) => {
+                if (code !== 0) {
+                    reject(new Error(`Worker stopped with exit code ${code}`));
+                }
+            });
+        });
+
+        promises.push(promise);
+    }
+
+    for (let i = 0; i < Math.min(MAX_THREADS, epub.toc.length); i++) {
+        startNextWorker();
+    }
+
+    await Promise.all(promises);
+    progressBar.stop();
 }

+ 47 - 0
epub_node/router/epub/toc_old.js

@@ -0,0 +1,47 @@
+import logger from "#logger";
+import { chapter_insert, searchChapterInfoForPath } from "#db";
+import cliProgress from 'cli-progress';
+
+export async function saveToc(epub, uploadPath, book_id, author_id) {
+    // Initialize the progress bar
+    const progressBar = new cliProgress.SingleBar({}, cliProgress.Presets.shades_classic);
+    progressBar.start(epub.toc.length, 0);
+
+    for (const elm of epub.toc) {
+        try {
+            const match = `${elm.href}`.match(/\/(.*\.html).*/);
+            let chapterInfo = {
+                file_id: ''
+            };
+            let path = '';
+            if (match) {
+                path = match[1];
+                chapterInfo = await searchChapterInfoForPath(path, book_id);
+            }
+            logger.info(elm);
+            const name = `${elm.title}`;
+            elm.title = '';
+            const params = {
+                name: name,
+                book_id: book_id,
+                author_id: author_id,
+                content: JSON.stringify(elm),
+                level: elm.level,
+                order_index: elm.order,
+                order_id: chapterInfo.file_id,
+                old_path: elm.href,
+                path: `./base_files/${book_id}/Text/${chapterInfo.file_id}.html`,
+            };
+            logger.info(params);
+            await chapter_insert(params);
+        } catch (e) {
+            logger.error(e);
+        } finally {
+            // Update the progress bar
+            progressBar.increment();
+        }
+    }
+
+    // Stop the progress bar
+    progressBar.stop();
+}

+ 68 - 63
epub_node/router/epub/txt.js

@@ -10,10 +10,11 @@ import {
     book_mate_insert,
 } from "#db";
 import {calculateMD5} from "./image.js";
+import cliProgress from 'cli-progress';
 
 const imageExtensions = [".png", ".jpg", ".jpeg", ".svg"];
 
-async function processFiles(elmData, file_md5) {
+async function processFiles(elmData, file_md5, elmIndex) {
     const rows = elmData.toString().split(/\n/);
     const promises = rows.map(async (rowtext) => {
         if (
@@ -34,15 +35,16 @@ async function processFiles(elmData, file_md5) {
             }
         } else if (rowtext.includes(".css")) {
             const match = rowtext.match(/.*="(.*\/?(.*\.css))/);
-            const [elmPath, elmName] = `${rowtext}`.match(/.*\/(.*\.css)/);
-            if (match) {
+            if (rowtext) {
+                const [elmPath , elmName] = `${rowtext}`.match(/.*\/?(.*\.css)/);
+
                 const [, cssPath, cssSrc] = match;
                 // const imgRow = await searchFileByPath(elmName, file_md5);
                 const imgNameRow =  await searchFileByName(elmName, file_md5);
                 if (imgNameRow) {
                     return (
-                        rowtext.replace(cssPath, `/api/v1/epub/css/${imgNameRow.file_id}`) +
-                        "\n"
+                      rowtext.replace(cssPath, `/api/v1/epub/css/${imgNameRow.file_id}`) +
+                      "\n"
                     );
                 }
             }
@@ -54,12 +56,7 @@ async function processFiles(elmData, file_md5) {
                 try {
                     // 搜索数据库中是否存在该字体文件
                     const imgRow = await searchFileByPath(cssSrc, file_md5);
-                    if (imgRow) {
-                        // 如果找到,替换路径为 API 端点
-                        console.log(57, rowtext);
-                        console.log(58, cssPath, cssSrc);
-                        console.log(59, `/api/v1/epub/css/${imgRow.file_id}`);
-                        
+                    if (imgRow) {                       
                         return (
                             rowtext.replace(cssPath, `/api/v1/epub/css/${imgRow.file_id}`) +
                             "\n"
@@ -76,71 +73,79 @@ async function processFiles(elmData, file_md5) {
         return rowtext + "\n";
     });
     
-    const results = await Promise.all(promises);
+    const results = await Promise.allSettled(promises);
     return results.join("");
 }
 
 export async function htmlParser(epub, zipEpubExtract, file_md5, author_id) {
     const needSetImage = epub.zip.names.filter(
-        (elm) => elm.endsWith(".html") || elm.endsWith(".css")
+      (elm) => elm.endsWith(".html") || elm.endsWith(".css")
     );
-    
-    const needSetFont = epub.zip.names.filter((elm) => elm.endsWith(".ttf"));
+
     const basePath = path.join("./base_files", file_md5, "Text");
     const styleBasePath = path.join("./base_files", file_md5, "style");
     dirExists(basePath);
     dirExists(styleBasePath);
-    
-    await Promise.all(
-        needSetImage.map(async (elm, elmIndex) => {
-            console.log('needSetImage', elmIndex)
-            const filePath = path.join(zipEpubExtract, elm);
-            const elmData = fs.readFileSync(filePath);
-            const htmlStr = await processFiles(elmData, file_md5);
-            let file_path;
-            let source_id;
-            
-            if (htmlStr) {
-                fs.writeFileSync(filePath, htmlStr);
-                
-                const htmlMd5 = await calculateMD5(filePath);
-                const isCss = elm.endsWith(".css");
-                const newFilePath = path.join(
-                    isCss ? styleBasePath : basePath,
-                    `${htmlMd5}.${isCss ? "css" : "html"}`
-                );
-                
-                Object.keys(epub.manifest).forEach(m_key => {
-                    const mElm = epub.manifest[m_key];
-                    if (mElm.href.indexOf(elm) > -1 && !source_id) {
-                        source_id = mElm.id;
-                        file_path = mElm.href
-                    }
-                })
-                
-                const params = {
+
+    // Initialize the progress bar
+    const progressBar = new cliProgress.SingleBar({}, cliProgress.Presets.shades_classic);
+    progressBar.start(needSetImage.length, 0);
+
+    for (let elmIndex = 0; elmIndex < needSetImage.length; elmIndex++) {
+        const elm = needSetImage[elmIndex];
+        const filePath = path.join(zipEpubExtract, elm);
+        const elmData = fs.readFileSync(filePath);
+        const htmlStr = await processFiles(elmData, file_md5, elmIndex);
+        let file_path;
+        let source_id;
+        if (htmlStr) {
+            // console.log('needSetImage', elmIndex);
+            // fs.writeFileSync(filePath, htmlStr);
+
+            const htmlMd5 = await calculateMD5(filePath);
+            const isCss = elm.endsWith(".css");
+            const newFilePath = path.join(
+              isCss ? styleBasePath : basePath,
+              `${htmlMd5}.${isCss ? "css" : "html"}`
+            );
+
+            Object.keys(epub.manifest).forEach(m_key => {
+                const mElm = epub.manifest[m_key];
+                if (mElm.href.indexOf(elm) > -1 && !source_id) {
+                    source_id = mElm.id;
+                    file_path = mElm.href;
+                }
+            });
+
+            const params = {
+                file_id: htmlMd5,
+                md5: htmlMd5,
+                mimetype: isCss ? "text/css" : "text/html",
+                size: Buffer.byteLength(htmlStr),
+                name: `${htmlMd5}.${isCss ? "css" : "html"}`,
+                path: file_path,
+                source_id: source_id,
+            };
+            await files_insert(params);
+            await Promise.all([
+                files_insert_link_epub({
                     file_id: htmlMd5,
-                    md5: htmlMd5,
-                    mimetype: isCss ? "text/css" : "text/html",
-                    size: Buffer.byteLength(htmlStr),
-                    name: `${htmlMd5}.${isCss ? "css" : "html"}`,
-                    path: file_path,
-                    source_id: source_id,
-                };
-                await files_insert(params);
-                await Promise.all([
-                    files_insert_link_epub({
-                        file_id: htmlMd5,
-                        book_id: file_md5,
-                        author_id,
-                    }),
-                    fs.promises.writeFile(newFilePath, htmlStr),
-                ]);
-            }
-        })
-    );
+                    book_id: file_md5,
+                    author_id,
+                }),
+                fs.promises.writeFile(newFilePath, htmlStr),
+            ]);
+        }
+        // Update the progress bar
+        progressBar.update(elmIndex + 1);
+    }
+    // Update the progress bar
+    progressBar.update(needSetImage.length);
+    // Stop the progress bar
+    progressBar.stop();
 }
 
+
 // saveMateInfo
 export async function saveMateInfo(epub, zipEpubExtract, file_md5, author_id) {
     // book_mate_insert

+ 157 - 0
epub_node/router/epub/txt_old.js

@@ -0,0 +1,157 @@
+import fs from "node:fs";
+import path from "node:path";
+import {dirExists, isFileSync, isDir, waittime} from "#utils";
+import {
+    getFileBymd5,
+    searchFileByPath,
+    searchFileByName,
+    files_insert_link_epub,
+    files_insert,
+    book_mate_insert,
+} from "#db";
+import {calculateMD5} from "./image.js";
+
+const imageExtensions = [".png", ".jpg", ".jpeg", ".svg"];
+
+async function processFiles(elmData, file_md5, elmIndex) {
+    const rows = elmData.toString().split(/\n/);
+    const promises = rows.map(async (rowtext) => {
+        if (
+            rowtext.includes("<img ") &&
+            imageExtensions.some((ext) => rowtext.includes(ext))
+        ) {
+            // const match = rowtext.match(/.*(..\/Images\/(.*\.(jpg|png|jpeg|svg))).*/);
+            const match = rowtext.match(/src=("|')(.*\/(.*\.[a-zA-Z]+))("|')/)
+            if (match) {
+                const [, , imgPath, imageSrc] = match;
+                const imgRow = await searchFileByPath(imageSrc);
+                if (imgRow) {
+                    return (
+                        rowtext.replace(imgPath, `/api/v1/epub/img/${imgRow.file_id}`) +
+                        "\n"
+                    );
+                }
+            }
+        } else if (rowtext.includes(".css")) {
+            const match = rowtext.match(/.*="(.*\/?(.*\.css))/);
+            if (rowtext) {
+                const [elmPath , elmName] = `${rowtext}`.match(/.*\/?(.*\.css)/);
+
+                const [, cssPath, cssSrc] = match;
+                // const imgRow = await searchFileByPath(elmName, file_md5);
+                const imgNameRow =  await searchFileByName(elmName, file_md5);
+                if (imgNameRow) {
+                    return (
+                      rowtext.replace(cssPath, `/api/v1/epub/css/${imgNameRow.file_id}`) +
+                      "\n"
+                    );
+                }
+            }
+        } else if (rowtext.includes(".ttf")) {
+            // 使用正则表达式匹配路径和文件名
+            const match = rowtext.match(/.*\((.*\/?(.*ttf))\)./);
+            if (match) {
+                const [, cssPath, cssSrc] = match;
+                try {
+                    // 搜索数据库中是否存在该字体文件
+                    const imgRow = await searchFileByPath(cssSrc, file_md5);
+                    if (imgRow) {                       
+                        return (
+                            rowtext.replace(cssPath, `/api/v1/epub/css/${imgRow.file_id}`) +
+                            "\n"
+                        );
+                    } else {
+                        console.warn(`Font file not found for path: ${cssSrc}`);
+                    }
+                } catch (error) {
+                    console.error("Error searching for font file:", error);
+                }
+            }
+        }
+        
+        return rowtext + "\n";
+    });
+    
+    const results = await Promise.allSettled(promises);
+    return results.join("");
+}
+
+export async function htmlParser(epub, zipEpubExtract, file_md5, author_id) {
+    const needSetImage = epub.zip.names.filter(
+        (elm) => elm.endsWith(".html") || elm.endsWith(".css")
+    );
+    
+    const needSetFont = epub.zip.names.filter((elm) => elm.endsWith(".ttf"));
+    const basePath = path.join("./base_files", file_md5, "Text");
+    const styleBasePath = path.join("./base_files", file_md5, "style");
+    dirExists(basePath);
+    dirExists(styleBasePath);
+    
+    await Promise.allSettled(
+        needSetImage.map(async (elm, elmIndex) => {
+            const filePath = path.join(zipEpubExtract, elm);
+            const elmData = fs.readFileSync(filePath);
+            const htmlStr = await processFiles(elmData, file_md5, elmIndex);
+            let file_path;
+            let source_id;
+            if (htmlStr) {
+                console.log('needSetImage', elmIndex)
+                fs.writeFileSync(filePath, htmlStr);
+                
+                const htmlMd5 = await calculateMD5(filePath);
+                const isCss = elm.endsWith(".css");
+                const newFilePath = path.join(
+                    isCss ? styleBasePath : basePath,
+                    `${htmlMd5}.${isCss ? "css" : "html"}`
+                );
+                
+                Object.keys(epub.manifest).forEach(m_key => {
+                    const mElm = epub.manifest[m_key];
+                    if (mElm.href.indexOf(elm) > -1 && !source_id) {
+                        source_id = mElm.order_id;
+                        file_path = mElm.href
+                    }
+                })
+                
+                const params = {
+                    file_id: htmlMd5,
+                    md5: htmlMd5,
+                    mimetype: isCss ? "text/css" : "text/html",
+                    size: Buffer.byteLength(htmlStr),
+                    name: `${htmlMd5}.${isCss ? "css" : "html"}`,
+                    path: file_path,
+                    source_id: source_id,
+                };
+                await files_insert(params);
+                await Promise.allSettled([
+                    files_insert_link_epub({
+                        file_id: htmlMd5,
+                        book_id: file_md5,
+                        author_id,
+                    }),
+                    fs.promises.writeFile(newFilePath, htmlStr),
+                ]);
+            }
+        })
+    );
+}
+
+// saveMateInfo
+export async function saveMateInfo(epub, zipEpubExtract, file_md5, author_id) {
+    // book_mate_insert
+    const params = {
+        book_name: epub.metadata.title,
+        book_id: file_md5,
+        book_md5: file_md5,
+        // language: "",
+        // date: "",
+        // creatorFileAs: "",
+        // UUID: "",
+        // ISBN: "",
+        author_id: author_id,
+        // category_id: "",
+        // Introduction: "",
+    };
+    
+    const res = await book_mate_insert(params);
+}