|
@@ -1,74 +1,93 @@
|
|
|
import fs from "node:fs";
|
|
|
-import { getFileBymd5, searchFileByPath } from "#db";
|
|
|
+import path from "node:path";
|
|
|
+import { dirExists, isFileSync, isDir, waittime } from "#utils";
|
|
|
+import {
|
|
|
+ getFileBymd5,
|
|
|
+ searchFileByPath,
|
|
|
+ files_insert_link_epub,
|
|
|
+ files_insert,
|
|
|
+} from "#db";
|
|
|
+import { calculateMD5 } from "./image.js";
|
|
|
|
|
|
-import * as cheerio from "cheerio";
|
|
|
+const imageExtensions = [".png", ".jpg", ".jpeg"];
|
|
|
|
|
|
-// 置换文件中的图片路径
|
|
|
-async function processFiles(elmDate, file_md5) {
|
|
|
- const rows = elmDate.toString().split(/\n/);
|
|
|
- let htmlStr = "";
|
|
|
- for (const rowtext of rows) {
|
|
|
+async function processFiles(elmData, file_md5) {
|
|
|
+ const rows = elmData.toString().split(/\n/);
|
|
|
+ const promises = rows.map(async (rowtext) => {
|
|
|
if (
|
|
|
rowtext.includes("Images") &&
|
|
|
- (rowtext.includes(".png") ||
|
|
|
- rowtext.includes(".jpg") ||
|
|
|
- rowtext.includes(".jpeg"))
|
|
|
+ imageExtensions.some((ext) => rowtext.includes(ext))
|
|
|
) {
|
|
|
- const match = rowtext.match(/.*(..\/Images\/(.*(jpg|png|jpeg))).*/);
|
|
|
+ const match = rowtext.match(/.*(..\/Images\/(.*\.(jpg|png|jpeg))).*/);
|
|
|
if (match) {
|
|
|
- const [imgText, imgPath, imageSrc] = match;
|
|
|
+ const [, imgPath, imageSrc] = match;
|
|
|
const imgRow = await searchFileByPath(imageSrc);
|
|
|
if (imgRow) {
|
|
|
- const text = rowtext.replace(
|
|
|
- imgPath,
|
|
|
- `/api/v1/epub/img/${imgRow.file_id}`
|
|
|
+ return (
|
|
|
+ rowtext.replace(imgPath, `/api/v1/epub/img/${imgRow.file_id}`) +
|
|
|
+ "\n"
|
|
|
);
|
|
|
- htmlStr += text + "\n";
|
|
|
- } else {
|
|
|
- htmlStr += rowtext + "\n";
|
|
|
}
|
|
|
- } else {
|
|
|
- htmlStr += rowtext + "\n";
|
|
|
}
|
|
|
- return;
|
|
|
- }
|
|
|
- if (rowtext.includes(".css")) {
|
|
|
+ } else if (rowtext.includes(".css")) {
|
|
|
const match = rowtext.match(/.*="(.*\/?(.*\.css))/);
|
|
|
if (match) {
|
|
|
- const [cssText, cssPath, cssSrc] = match;
|
|
|
+ const [, cssPath, cssSrc] = match;
|
|
|
const imgRow = await searchFileByPath(cssSrc, file_md5);
|
|
|
if (imgRow) {
|
|
|
- const text = rowtext.replace(
|
|
|
- cssPath,
|
|
|
- `/api/v1/epub/css/${imgRow.file_id}`
|
|
|
+ return (
|
|
|
+ rowtext.replace(cssPath, `/api/v1/epub/css/${imgRow.file_id}`) +
|
|
|
+ "\n"
|
|
|
);
|
|
|
- htmlStr += text + "\n";
|
|
|
- } else {
|
|
|
- htmlStr += rowtext + "\n";
|
|
|
}
|
|
|
- } else {
|
|
|
- htmlStr += rowtext + "\n";
|
|
|
}
|
|
|
- } else {
|
|
|
- htmlStr += rowtext + "\n";
|
|
|
}
|
|
|
- }
|
|
|
- return Promise.resolve(htmlStr);
|
|
|
+ return rowtext + "\n";
|
|
|
+ });
|
|
|
+
|
|
|
+ const results = await Promise.all(promises);
|
|
|
+ return results.join("");
|
|
|
}
|
|
|
|
|
|
-export async function htmlParser(epub, zipEpubExtract, file_md5) {
|
|
|
- // 获取原始数据源
|
|
|
- const needSetImge = epub.zip.names.filter(
|
|
|
- (elm) => elm.indexOf(".html") > -1 || elm.indexOf(".css") > -1
|
|
|
+export async function htmlParser(epub, zipEpubExtract, file_md5, author_id) {
|
|
|
+ const needSetImage = epub.zip.names.filter(
|
|
|
+ (elm) => elm.endsWith(".html") || elm.endsWith(".css")
|
|
|
+ );
|
|
|
+
|
|
|
+ const basePath = path.join("./base_files", file_md5, "Text");
|
|
|
+ dirExists(basePath);
|
|
|
+
|
|
|
+ await Promise.all(
|
|
|
+ needSetImage.map(async (elm) => {
|
|
|
+ const filePath = path.join(zipEpubExtract, elm);
|
|
|
+ const elmData = fs.readFileSync(filePath);
|
|
|
+ const htmlStr = await processFiles(elmData, file_md5);
|
|
|
+
|
|
|
+ if (htmlStr) {
|
|
|
+ fs.writeFileSync(filePath, htmlStr);
|
|
|
+
|
|
|
+ const htmlMd5 = await calculateMD5(filePath);
|
|
|
+ const newFilePath = path.join(basePath, `${htmlMd5}.html`);
|
|
|
+
|
|
|
+ const params = {
|
|
|
+ file_id: htmlMd5,
|
|
|
+ md5: htmlMd5,
|
|
|
+ mimetype: "text/html",
|
|
|
+ size: Buffer.byteLength(htmlStr),
|
|
|
+ name: `${htmlMd5}.html`,
|
|
|
+ path: newFilePath,
|
|
|
+ source_id: elm,
|
|
|
+ };
|
|
|
+ await files_insert(params);
|
|
|
+ await Promise.all([
|
|
|
+ files_insert_link_epub({
|
|
|
+ file_id: htmlMd5,
|
|
|
+ book_id: file_md5,
|
|
|
+ author_id,
|
|
|
+ }),
|
|
|
+ fs.promises.writeFile(newFilePath, htmlStr),
|
|
|
+ ]);
|
|
|
+ }
|
|
|
+ })
|
|
|
);
|
|
|
- for (let i = 0; i < needSetImge.length; i++) {
|
|
|
- // 执行当前层的异步操作
|
|
|
- const elm = needSetImge[i];
|
|
|
- const elmDate = fs.readFileSync(zipEpubExtract + elm);
|
|
|
- let htmlStr = await processFiles(elmDate, file_md5);
|
|
|
- if(htmlStr) {
|
|
|
- // 修改源数据
|
|
|
- fs.writeFileSync(zipEpubExtract + elm, htmlStr);
|
|
|
- }
|
|
|
- }
|
|
|
}
|