Selaa lähdekoodia

可控的数据hash计算过程

John 1 vuosi sitten
vanhempi
sitoutus
f969f43d43

+ 42 - 21
src-tauri/src/self_plugin/tauri_plugin_file/files.rs

@@ -37,10 +37,11 @@ pub struct FileInfo {
     pub id: Option<u32>,
     pub progress: Option<f32>,
     pub types: Option<Vec<String>>,
+    pub excluded_file_names: Option<Vec<String>>,
 }
 
 #[command]
-pub fn get_all_directory(file_info: FileInfo) -> Vec<PathBuf> {
+pub fn get_all_directory(file_info: FileInfo) -> Vec<FileInfos> {
     let mut files = Vec::new();
     if let Some(ref path) = file_info.path {
         println!("Processing directory: {}", path);
@@ -50,6 +51,7 @@ pub fn get_all_directory(file_info: FileInfo) -> Vec<PathBuf> {
             &mut files,
             &file_info.checked_size_values,
             &file_info.types,
+            &file_info.excluded_file_names,
         );
         files
     } else {
@@ -74,34 +76,44 @@ pub fn get_file_type_by_path(file_path: String) -> String {
 
 fn read_files_in_directory(
     dir: &Path,
-    files: &mut Vec<PathBuf>,
+    files: &mut Vec<FileInfos>,
     filters: &Option<Vec<FileSizeCategory>>,
     types: &Option<Vec<String>>,
+    excluded_file_names: &Option<Vec<String>>,
 ) {
     if dir.is_dir() {
-        // 尝试读取目录,忽略错误
         if let Ok(entries) = fs::read_dir(dir) {
-            for entry in entries {
-                if let Ok(entry) = entry {
-                    let path = entry.path();
-                    if path.is_dir() {
-                        // 递归调用,忽略错误
-                        read_files_in_directory(&path, files, filters, types);
-                    } else {
-                        // 尝试获取文件元数据,忽略错误
-                        if let Ok(metadata) = fs::metadata(&path) {
-                            let size = metadata.len();
-                            let size_matches = filters.is_none()
-                                || file_size_matches(size, filters.as_ref().unwrap());
-                            let type_matches = types.is_none()
-                                || file_type_matches(&path, types.as_ref().unwrap());
-
-                            if size_matches && type_matches {
-                                files.push(path);
-                            }
+            for entry in entries.flatten() {
+                let path = entry.path();
+                if path.is_dir() {
+                    read_files_in_directory(&path, files, filters, types, excluded_file_names);
+                    continue;
+                }
+
+                if let Some(file_name) = path.file_name().and_then(|name| name.to_str()) {
+                    if let Some(excluded_names) = excluded_file_names {
+                        if excluded_names.contains(&String::from(file_name)) {
+                            continue;
                         }
                     }
                 }
+
+                let metadata = if let Ok(meta) = path.metadata() { meta } else { continue };
+                let size_matches = filters.as_ref().map_or(true, |f| file_size_matches(metadata.len(), f));
+                let type_matches = types.as_ref().map_or(true, |t| file_type_matches(&path, t));
+                if size_matches && type_matches {
+                    if let Some(path_str) = path.to_str() {
+                        // 确保 path_str 是有效的 UTF-8 字符串
+                        let path_info = get_file_info(path_str.to_string());
+                        // 使用 path_info 做其他事情
+                        files.push(path_info);
+                    } else {
+                        // 处理 path 不是有效 UTF-8 的情况
+                        // eprintln!("Path is not valid UTF-8");
+                        continue;
+                    }
+
+                }
             }
         }
     }
@@ -129,6 +141,15 @@ fn file_type_matches(path: &Path, types: &Vec<String>) -> bool {
     false
 }
 
+fn excluded_file_names_matches(path_name: &str, excluded_file_names: &Vec<String>) -> bool {
+    for excluded_name in excluded_file_names {
+        if path_name == excluded_name {
+            return true;
+        }
+    }
+    false
+}
+
 #[command]
 // 定义异步函数来计算文件的 SHA256 哈希
 pub async fn calculate_file_hash(file_path: String) -> String {

+ 85 - 43
src/pages/DuplicateFile/CalculateDuplicateFiles.tsx

@@ -1,11 +1,12 @@
 import {
-  get_info_by_id,
+  get_info_by_id, getFirstEmptyHashBySourceId,
   insertSearchFiles,
   updateSelectedFileHistoryFiles,
 } from "@/services";
 import { useEffect, useState } from "react";
-import { useNavigate, useParams } from "react-router-dom";
+import { useNavigate, useParams, useLocation  } from "react-router-dom";
 import {
+  backFileInfoType,
   FileInfoType,
   insertSearchFilesPasamsType,
   stepsStatusType,
@@ -24,9 +25,12 @@ import get_progress_by_sourceId, {
 export default function CalculateDuplicateFiles() {
   let { fileId } = useParams();
   let navigate = useNavigate();
+  const location = useLocation();
+
   const [fileInfo, setFileInfo] = useState<FileInfoType>({});
   const [current, setCurrent] = useState(1);
   const [percent, setPercent] = useState(85);
+  const [duplicateFilesStep, setDuplicateFilesStep] = useState('');
   const [stepsStatus, setStepsStatus] = useState<stepsStatusType>({
     // 'wait' | 'process' | 'finish' | 'error';
     scanDir: "wait",
@@ -34,10 +38,34 @@ export default function CalculateDuplicateFiles() {
     duplicateFiles: "wait",
     done: "wait",
   });
+  const [isCancelled, setIsCancelled] = useState(false); // 离开页面时终止正在执行的逻辑
+  const [hasMounted, setHasMounted] = useState(false);
   useEffect(() => {
-    pageInit();
+    pageInit().then(r => console.log(r));
   }, []);
 
+  useEffect(() => {
+    // 这段代码只会在组件首次挂载时执行一次
+    console.log("组件已挂载");
+
+    console.log(location); // 当前路由路径
+    console.log(location.pathname); // 当前路由路径
+
+    setTimeout(() => {
+      // 设置一个状态标志,表示组件已经挂载
+      setHasMounted(true);
+    }, 300)
+    // 如果你需要在组件卸载时进行清理,可以在这里返回一个函数
+    // 当组件加载时,不做特殊操作
+    // 只在组件卸载时设置isCancelled为true
+    return () => {
+      if(hasMounted) {
+        console.log(47, ' 当组件卸载时,设置isCancelled为true');
+        setIsCancelled(true);
+      }
+    };
+  }, [hasMounted]);
+
   const waittime = (time = 100) => {
     return new Promise((resolve) => {
       setTimeout(() => {
@@ -68,18 +96,24 @@ export default function CalculateDuplicateFiles() {
   async function scanDirAll() {
     if (fileInfo.path) {
       // 扫描目录文件
-
       console.log("扫描目录文件 结束");
       const files = await scanAllFilesInDir();
 
       // 计算文件属性
       console.log("计算文件属性 开始");
-      await computeFileMetadata(files);
+      await computeFileMetadata_v2(files);
       console.log("计算文件属性 结束");
 
       // 计算文件具体内容
       console.log("计算每一个文件的hash 开始");
-      await computeFileChecksums();
+      try {
+        await computeFileChecksums_2();
+      } catch (error) {
+        console.log(107, error);
+        if(error == '提前终止') {
+          return
+        }
+      }
       console.log("计算每一个文件的hash 结束");
 
       setStepsStatus({
@@ -107,7 +141,7 @@ export default function CalculateDuplicateFiles() {
   }
 
   // 扫描目录文件
-  async function scanAllFilesInDir(): Promise<string[]> {
+  async function scanAllFilesInDir(): Promise<backFileInfoType[]> {
     const [progressRes] = await get_progress_by_sourceId(`${fileId}`);
     if (progressRes.total_entries !== fileInfo.files || !fileInfo.files) {
       console.log("扫描目录文件 开始");
@@ -132,9 +166,12 @@ export default function CalculateDuplicateFiles() {
     return Promise.resolve([]);
   }
 
-  // 计算文件属性
-  async function computeFileMetadata(files: string[]) {
-    if(!files.length) {
+  /*
+  * 处理获取到的文件属性
+  * */
+  async function computeFileMetadata_v2(files: backFileInfoType[]) {
+    const [progressRes] = await get_progress_by_sourceId(`${fileId}`);
+    if(!files.length || !progressRes.total_entries) {
       setStepsStatus({
         ...stepsStatus,
         scanDir: "finish",
@@ -143,7 +180,6 @@ export default function CalculateDuplicateFiles() {
       setPercent(100);
       return Promise.resolve(0)
     }
-    /* 如果文件数目为0 ,查询数据库进行 */
     // 更新当前查询目录的总文件数目
     await updateSelectedFileHistoryFiles(`${fileInfo.path}`, files.length);
     setStepsStatus({
@@ -155,46 +191,40 @@ export default function CalculateDuplicateFiles() {
     let fileIndex = -1;
     let allFilesLength = files.length;
     await files.reduce(
-      async (prevPromise: any, currentFile: any) => {
-        // 等待上一个 Promise 完成
-        await prevPromise;
-        // ishaveFile: true 表示文件数据已经存在; false 表示文件数据不存在xuy;
-        const [ishaveFile, fileinfo] = await get_fileInfo_by_path(
-          currentFile,
-          `${fileId}`
-        );
-        if (!ishaveFile) {
-          // 获取文件类型和哈希
-          const fileInfo = await File.getInfo(currentFile);
+        async (prevPromise: any, currentFile: any) => {
+          // 等待上一个 Promise 完成
+          await prevPromise;
           fileIndex++;
+          const file_info = files[fileIndex]
           setPercent(Math.floor((fileIndex / allFilesLength) * 100));
           return insertSearchFiles({
             // 组装数据
             sourceId: `${fileId}`,
-            path: currentFile,
-            name: fileInfo.file_name,
-            creation_time: fileInfo.creation_time,
-            modified_time: fileInfo.modified_time,
-            file_size: fileInfo.file_size,
-            type: fileInfo.file_type,
-            // 由于 计算单个文件的hash 时间较长,所以单独起一个事件,专门做这个事情
+            path: `${file_info.file_path}`,
+            name: file_info.file_name,
+            creation_time: file_info.creation_time,
+            modified_time: file_info.modified_time,
+            file_size: file_info.file_size,
+            type: file_info.file_type,
             hash: "",
           });
-        }
-        return Promise.resolve(0);
-      },
-      Promise.resolve(0)
+        },
+        Promise.resolve(0)
     );
     setPercent(100);
     return waittime(300);
   }
 
   // 计算每一个文件的hash
-  async function computeFileChecksums() {
-    const [allList, allListMsg] = await get_list_by_sourceid(`${fileId}`);
-    if (allList && Array.isArray(allList)) {
+  async function computeFileChecksums_2() {
+    const [progressRes] = await get_progress_by_sourceId(`${fileId}`);
+    console.log(178, progressRes)
+
+    // 已经存在的数据中,计算过的 hash 总量跟 文件总数不是一样的,并且存在有记录的文件
+    if (progressRes.hash_null_count && progressRes.total_entries) {
       let fileIndex = -1;
-      let allFilesLength = allList.length;
+      let allFilesLength = progressRes.hash_null_count;
+      const allList = [...Array(allFilesLength).keys()];
       setStepsStatus({
         ...stepsStatus,
         scanDir: "finish",
@@ -203,20 +233,32 @@ export default function CalculateDuplicateFiles() {
       });
       setPercent(0);
       await allList
-        .filter((currentFile: insertSearchFilesPasamsType) => !currentFile.hash)
         .reduce(
           async (
             prevPromise: any,
-            currentFile: insertSearchFilesPasamsType
+            index: number
           ) => {
             // 等待上一个 Promise 完成
             await prevPromise;
-            // 获取文件类型和哈希
-            const hash = await File.getHash(currentFile.path);
+            if (isCancelled || window.location.href.indexOf(location.pathname) < 0) {
+              // @ts-ignore
+              throw '提前终止'
+              return Promise.resolve(0);
+            } // 如果设置了取消标志,则提前终止
+            const [fileinfo, error] = await getFirstEmptyHashBySourceId(`${fileId}`);
+            if(fileinfo) {
+              // 获取文件类型和哈希
+              const hash = await File.getHash(fileinfo.path);
+              await updateFileHsah(fileinfo.path, hash, `${fileId}`);
+            }
+            console.clear();  // 清除控制台
+            // console.log(223, window.location.href, location.pathname, fileinfo);
+            console.log(223, window.location.href.indexOf(location.pathname), location.pathname);
             fileIndex++;
             await waittime();
             setPercent(Math.floor((fileIndex / allFilesLength) * 100));
-            return updateFileHsah(currentFile.path, hash, `${fileId}`);
+            setDuplicateFilesStep(`: ${fileIndex} / ${allFilesLength}`);
+            return Promise.resolve(0)
           },
           Promise.resolve(0)
         );
@@ -283,7 +325,7 @@ export default function CalculateDuplicateFiles() {
               status: stepsStatus.fileOptions,
             },
             {
-              title: "分析重复文件",
+              title: "分析重复文件" + duplicateFilesStep,
               status: stepsStatus.duplicateFiles,
             },
             {

+ 6 - 3
src/plugins/tauri-plugin-file/file.ts

@@ -1,7 +1,10 @@
 import { invoke } from "@tauri-apps/api/tauri";
 
 import Database from "tauri-plugin-sql-api";
-import {FileInfoType} from "@/types/files";
+import {
+  backFileInfoType,
+  fileInfoParamsType
+} from "@/types/files";
 
 export class File {
   path: string;
@@ -11,8 +14,8 @@ export class File {
   }
 
   // static async getAllList(fileInfo: FileInfoType): Promise<string[]> {
-  static async getAllList(fileInfo: any): Promise<string[]> {
-    return await invoke<string[]>("plugin:st-files|get_all_directory", {
+  static async getAllList(fileInfo: fileInfoParamsType): Promise<backFileInfoType[]> {
+    return await invoke<backFileInfoType[]>("plugin:st-files|get_all_directory", {
       fileInfo,
     });
   }

+ 30 - 9
src/services/file-service.ts

@@ -269,21 +269,14 @@ export async function get_list_by_sourceid(
   sourceId: string
 ): Promise<[insertSearchFilesPasamsType[] | false, string]> {
   try {
-    // await table_init(FILE_DB_PATH, "select_history");
-    // const DB = await SQLite.open(FILE_DB_PATH);
     const DB = await Database.load(`sqlite:files_${sourceId}.db`);
     // 创建表
     await DB.execute(createSql.search_files);
     const res = await DB.select(
-      "SELECT * FROM search_files WHERE sourceId = $1",
+      "SELECT * FROM search_files WHERE sourceId = $1 AND (hash = '' OR hash IS NULL)",
       [sourceId]
     );
     console.log(969696, sourceId);
-
-    /* const res = await DB.queryWithArgs<Array<insertSearchFilesPasamsType>>(
-          "SELECT * FROM search_files WHERE sourceId = :sourceId GROUP BY hash HAVING COUNT(*) > 1",
-          { ":sourceId": sourceid }
-        ); */
     console.log(3434, res);
 
     if (Array.isArray(res)) {
@@ -347,7 +340,7 @@ export async function searchDuplicateFile({
     const DB = await Database.load(`sqlite:files_${sourceId}.db`);
     // 创建表
     await DB.execute(createSql.search_files);
-    /* 
+    /*
     select * from search_files where sourceId = $1 in (select sourceId from search_files group by hash having count(hash) > 1)
  */
     // const res = await DB.select("SELECT * from search_files WHERE sourceId = $1", [sourceId]);
@@ -489,3 +482,31 @@ export async function del_file_by_id(path: string, sourceId: string) {
     return Promise.resolve(error);
   }
 }
+
+
+/*
+* 这个函数是获取到第一个hash为空的数据*/
+export async function getFirstEmptyHashBySourceId(sourceId: string) {
+  try {
+    const DB = await Database.load(`sqlite:files_${sourceId}.db`);
+    // 创建表
+    await DB.execute(createSql.search_files);
+    const res = await DB.select(
+        `SELECT * FROM search_files
+WHERE hash = '' OR hash IS NULL
+LIMIT 1;`,
+        [
+          sourceId
+        ]
+    );
+    if (Array.isArray(res) && res.length) {
+      return Promise.resolve([res[0], ""]);
+    }
+    return Promise.resolve([false, "暂无数据"]);
+  } catch (error) {
+    if (error && `${error}`.indexOf("UNIQUE constraint failed") > -1) {
+      return "当前数据格式异常";
+    }
+    return Promise.resolve([false, error]);
+  }
+}

+ 19 - 1
src/types/files.d.ts

@@ -26,6 +26,7 @@ export type insertSearchFilesPasamsType = {
   id?: number;
   sourceId?: number | string | any;
   path: string;
+  file_path?: string;
   time?: string;
   // progress: number;
   type: string,
@@ -51,4 +52,21 @@ export type stepsStatusType = {
   fileOptions: StepProps.status;
   duplicateFiles: StepProps.status;
   done: StepProps.status;
-}
+}
+
+export type backFileInfoType = {
+    file_path: string,
+    file_name: string,
+    file_type: string,
+    file_size: string,
+    modified_time: string, // 时间戳形式
+    creation_time: string,
+}
+
+
+export type fileInfoParamsType = {
+  path?: string,
+  checked_size_values?: string[],
+  types?: any[],
+  excluded_file_names?: number
+}