Bladeren bron

Dockerfile

John 7 maanden geleden
bovenliggende
commit
989a527897
5 gewijzigde bestanden met toevoegingen van 120 en 89 verwijderingen
  1. 5 3
      epub_node/db/files.js
  2. 15 9
      epub_node/router/epub/image.js
  3. 31 5
      epub_node/router/epub/index.js
  4. 55 62
      epub_node/router/epub/style.js
  5. 14 10
      epub_node/router/epub/txt.js

+ 5 - 3
epub_node/db/files.js

@@ -55,11 +55,13 @@ export function getFileBymd5(md5Str) {
 }
 
 // 查询图片信息
-export function searchFileByPath(imgPath) {
+export function searchFileByPath(imgPath, file_md5) {
   return new Promise((resolve, reject) => {
+    console.log(606060, `SELECT * FROM files WHERE path like ? ${file_md5 ? 'and path like ?' : ''};`)
+    console.log(606060, [`%${imgPath}%`, `%${file_md5}%`])
     connection.query(
-      `SELECT * FROM files WHERE path like ?;`,
-      [`%${imgPath}%`],
+      `SELECT * FROM files WHERE path like ? ${file_md5 ? 'and path like ?' : ''};`,
+      [`%${imgPath}%`, `%${file_md5}%`],
       (err, rows) => {
         if (err) {
           console.log(62626262, err);

+ 15 - 9
epub_node/router/epub/image.js

@@ -59,17 +59,23 @@ export async function saveImgs(epub) {
   }
 }
 
-function calculateMD5(filePath) {
-  const hash = crypto.createHash("md5");
-  const stream = fs.createReadStream(filePath);
+export function calculateMD5(filePath) {
+  return new Promise((resolve, reject) => {
+    try {
+      const hash = crypto.createHash("md5");
+      const stream = fs.createReadStream(filePath);
 
-  stream.on("data", (chunk) => {
-    hash.update(chunk);
-  });
+      stream.on("data", (chunk) => {
+        hash.update(chunk);
+      });
 
-  stream.on("end", () => {
-    console.log("MD5 hash:", hash.digest("hex"));
-  });
+      stream.on("end", () => {
+        resolve(hash.digest("hex"));
+      });  
+    } catch ( err ) {
+      resolve('');
+    }
+  })
 }
 
 export async function calculateMD5FromStream(fileStream) {

+ 31 - 5
epub_node/router/epub/index.js

@@ -7,7 +7,7 @@ import express from "express";
 import { EPub } from "epub2";
 import { dirExists, isFileSync, isDir, waittime } from "#utils";
 
-import { saveImgs } from "./image.js";
+import { saveImgs, calculateMD5 } from "./image.js";
 import { htmlParser } from "./txt.js";
 import { saveAllCSS } from "./style.js";
 
@@ -32,7 +32,8 @@ router.get("/html", function (req, res) {
   // 构建文件的绝对路径
   const filePath = path.join(
     process.cwd(),
-    "./base_files/2a0b8153f3ede4bd43abb3b0e38ee857/epub-extract/OEBPS/Text/:::::::::::::::::::::*:*::**::*:.html"
+    // "./base_files/2a0b8153f3ede4bd43abb3b0e38ee857/epub-extract/OEBPS/Text/:::::::::::::::::::::*:*::**::*:.html"
+    "./base_files/5ae2d9158081faab184484ed1783e176/epub-extract/OEBPS/text00040.html"
   );
 
   // 发送文件
@@ -65,6 +66,28 @@ router.get("/img/:fileId", async function (req, res) {
   res.sendFile(filePath);
 });
 
+
+router.get("/css/:fileId", async function (req, res) {
+  const fileId = req.params.fileId; // 获取 fileId 参数
+  logger.info(`Found ${fileId}`);
+  const fileRow = await getFileBymd5(fileId);
+
+  if (!fileRow) {
+    return res.status(404).send("文件查询失败");
+  }
+
+  const filePath = path.resolve(fileRow.path);
+  console.log(79, filePath)
+  // 检查文件是否存在
+  if (!fs.existsSync(filePath)) {
+    return res.status(404).send("服务器中不存在该文件");
+  }
+
+  // 返回文件
+  res.setHeader("Content-Type", fileRow.mimetype);
+  res.sendFile(filePath);
+});
+
 // define the about route
 router.put("/", async function (req, res) {
   let sampleFile;
@@ -80,7 +103,7 @@ router.put("/", async function (req, res) {
 
   sampleFile = req.files.file;
 
-  const file_md5 = sampleFile.md5;
+  let file_md5 = sampleFile.md5;
   uploadPath = `./base_files/${file_md5}/`;
   epubFilePath = uploadPath + sampleFile.md5 + '.epub';
   zipEpubExtract = uploadPath + "epub-extract/";
@@ -99,6 +122,7 @@ router.put("/", async function (req, res) {
     epubData = sampleFile.data;
   } else {
     epubData = fs.readFileSync(epubFilePath);
+    file_md5 = await calculateMD5(epubFilePath)
   }
 
   /* 是否需要解压文件 */
@@ -131,11 +155,13 @@ router.put("/", async function (req, res) {
     3、存储html数据
     4、存储css数据
    */
-  // await saveImgs(epub, uploadPath);
+  await saveImgs(epub, uploadPath);
   await saveAllCSS(epub, uploadPath)
 
   // 存储html数据
-  // await htmlParser(epub, zipEpubExtract);
+  const test  = await htmlParser(epub, zipEpubExtract, file_md5);
+  
+  
   // console.log("\nSPINE:\n");
   // // console.log(epub.flow);
   // epub.flow.forEach((elm) => {

+ 55 - 62
epub_node/router/epub/style.js

@@ -1,73 +1,66 @@
-import { dirExists } from "#utils";
+import logger from "#logger";
 import { files_insert } from "#db";
-import crypto from "crypto";
+import { dirExists } from "#utils";
 import fs from "node:fs";
-import logger from "#logger";
+import { calculateMD5 } from './image.js'
+
 
-export async function saveAllCSS(epub, uploadPath) {
+// ./base_files/5ae2d9158081faab184484ed1783e176
+export async function saveAllCSS( epub,uploadPath ) {
   dirExists(uploadPath);
+  dirExists(`${ uploadPath }style/`);
 
   // 获取原始数据源
-  const getAllCss = epub.zip.names.filter(
-    (elm) => elm.indexOf("css") > -1
-  );
-  console.log(14, getAllCss);
-/*    if (getAllCss.length) {
-      const imgRes = await Promise.allSettled(
-        getAllCss.map((img) => {
-          return fs.readFileSync()
-        })
-      );*/
-  //     // 过滤数据
-  //   let imgs = epub.listImage();
-  //   if (imgs.length) {
-  //     const imgRes = await Promise.allSettled(
-  //       imgs.map((img) => {
-  //         return epub.getImageAsync(img.id);
-  //       })
-  //     );
-  //     // 过滤数据
+  const getAllCss = epub.zip.names.filter(( elm ) => elm.indexOf("css") > -1);
+  const base_path = `${ uploadPath }epub-extract/`
+  if ( getAllCss.length ) {
+    const cssRes = await Promise.allSettled(getAllCss.map(( img ) => {
+      return fs.readFileSync(base_path + img,"utf8");
+    }));
+    const allCss_fulfilled = cssRes
+      .map(( img,index ) => {
+        const img_fulfilled = cssRes[index];
+        const cssPath = getAllCss[index];
+        if ( img_fulfilled.status === "fulfilled" ) {
+          // const file_md5 = await calculateMD5(base_path + cssPath)
+          return {
+            ...img,...img_fulfilled,index,path: base_path + cssPath,cssPath,// md5: file_md5,
+            css_data: img_fulfilled.value,mimeType: 'text/css',
+          };
+        }
+        return false;
+      })
+      .filter(( elm ) => elm);
 
-  //     const imgs_fulfilled = imgs
-  //       .map((img, index) => {
-  //         const img_fulfilled = imgRes[index];
-  //         if (img_fulfilled.status === "fulfilled") {
-  //           const [img_data, img_mimeType] = img_fulfilled.value;
-  //           return {
-  //             ...img,
-  //             index,
-  //             img_data,
-  //             img_mimeType,
-  //           };
-  //         }
-  //         return false;
-  //       })
-  //       .filter((elm) => elm);
+    await Promise.allSettled(allCss_fulfilled.map(async ( elm ) => {
+      const md5 = await calculateMD5(elm.path)
+      const [elmPath,elmName] = `${ elm.path }`.match(/.*\/(.*\.css)/)
 
-  //     await Promise.allSettled(
-  //       imgs_fulfilled.map(async (elm) => {
-  //         const img_md5 = await calculateMD5FromBuffer(elm.img_data);
+      // 
+      // 移动文件
+      // './base_files/5ae2d9158081faab184484ed1783e176/epub-extract/OEBPS/flow0001.css'
+      // ./base_files/5ae2d9158081faab184484ed1783e176/style/flow0001.css
+      //
+      
+      fs.writeFile(`${ uploadPath }style/${ elmName }`,elm.css_data,( err ) => {
+        if ( err ) {
+          logger.error("Error writing Img file:",err);
+        } else {
+          logger.info("Img data saved to " + md5);
+        }
+      });
 
-  //         const uploadPath = "./base_files/" + img_md5;
-  //         const params = {
-  //           file_id: img_md5,
-  //           md5: img_md5,
-  //           mimetype: elm.img_mimeType,
-  //           size: elm.img_data.length,
-  //           name: elm.id,
-  //           path: elm.href,
-  //           source_id: elm.id,
-  //         };
 
-  //         fs.writeFile(uploadPath, elm.img_data, (err) => {
-  //           if (err) {
-  //             logger.error("Error writing Img file:", err);
-  //           } else {
-  //             logger.info("Img data saved to " + img_md5);
-  //           }
-  //         });
-  //         return await files_insert(params);
-  //       })
-  //     );
-  //   }
+      const params = {
+        file_id: md5,
+        md5: md5,
+        mimetype: elm.mimeType,
+        size: elm.css_data.length,
+        name: elmName,
+        path: `${ uploadPath }style/${ elmName }`,
+        source_id: md5,
+      };
+      return await files_insert(params);
+    }));
+  }
 }

+ 14 - 10
epub_node/router/epub/txt.js

@@ -1,9 +1,10 @@
 import fs from "node:fs";
 import { getFileBymd5, searchFileByPath } from "#db";
+
 import * as cheerio from "cheerio";
 
 // 置换文件中的图片路径
-async function processFiles(elmDate) {
+async function processFiles(elmDate, file_md5) {
   const rows = elmDate.toString().split(/\n/);
   let htmlStr = "";
   for (const rowtext of rows) {
@@ -14,9 +15,11 @@ async function processFiles(elmDate) {
         rowtext.includes(".jpeg"))
     ) {
       const match = rowtext.match(/.*(..\/Images\/(.*(jpg|png|jpeg))).*/);
+      console.log(18, match)
       if (match) {
         const [imgText, imgPath, imageSrc] = match;
         const imgRow = await searchFileByPath(imageSrc);
+        console.log(18, imgRow)
         if (imgRow) {
           const text = rowtext.replace(
             imgPath,
@@ -29,14 +32,16 @@ async function processFiles(elmDate) {
       } else {
         htmlStr += rowtext + "\n";
       }
-    } else if (rowtext.includes(".css")) {
-      const match = rowtext.match(/.*"(.*\/(.*\.css))/);
+      return 
+    }
+    if (rowtext.includes(".css")) {
+      const match = rowtext.match(/.*="(.*\/?(.*\.css))/);
       if (match) {
         const [cssText, cssPath, cssSrc] = match;
-        const imgRow = await searchFileByPath(imageSrc);
+        const imgRow = await searchFileByPath(cssSrc, file_md5);
         if (imgRow) {
           const text = rowtext.replace(
-            imgPath,
+            cssPath,
             `/api/v1/epub/css/${imgRow.file_id}`
           );
           htmlStr += text + "\n";
@@ -53,18 +58,17 @@ async function processFiles(elmDate) {
   return Promise.resolve(htmlStr);
 }
 
-export async function htmlParser(epub, zipEpubExtract) {
+export async function htmlParser(epub, zipEpubExtract, file_md5) {
   // 获取原始数据源
   const needSetImge = epub.zip.names.filter(
     (elm) => elm.indexOf("html") > -1 || elm.indexOf("css") > -1
   );
-  for (let i = 0; i < 1; i++) {
+  for (let i = 0; i < needSetImge.length; i++) {
     // 执行当前层的异步操作
-    const elm = needSetImge[60];
+    const elm = needSetImge[i];
     const elmDate = fs.readFileSync(zipEpubExtract + elm);
-    let htmlStr = await processFiles(elmDate);
+    let htmlStr = await processFiles(elmDate, file_md5);
     // 修改源数据
     fs.writeFileSync(zipEpubExtract + elm, htmlStr);
-    console.log(48, zipEpubExtract + elm);
   }
 }