1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950 |
- import os
- import pandas as pd
- def find_excel_files(directory, search_str):
- search_str_lower = search_str.lower()
- matched_files = []
-
- for root, _, files in os.walk(directory):
- for file in files:
- if file.lower().endswith(('.xls', '.xlsx')):
- file_path = os.path.join(root, file)
- try:
- xls = pd.ExcelFile(file_path)
- found = False
- for sheet_name in xls.sheet_names:
- df = pd.read_excel(xls, sheet_name=sheet_name)
-
- # 检查列名
- cols_lower = [str(col).lower() for col in df.columns]
- if any(search_str_lower in col for col in cols_lower):
- matched_files.append(file_path)
- found = True
- break
-
- # 检查数据单元格
- mask = df.astype(str).apply(
- lambda col: col.str.contains(search_str, case=False, regex=False, na=False)
- )
- if mask.any().any():
- matched_files.append(file_path)
- found = True
- break
- if found:
- break # 跳到下一个文件
- except Exception as e:
- print(f"错误处理文件 {file_path}: {e}")
- return matched_files
- if __name__ == "__main__":
- # 用户输入
- target_dir = input("请输入要搜索的目录路径:")
- search_text = input("请输入要查找的字符串:")
-
- # 查找文件
- results = find_excel_files(target_dir, search_text)
-
- # 输出结果
- print("\n找到以下包含指定字符串的文件:")
- for file in results:
- print(file)
|