search_excel.py 1.9 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950
  1. import os
  2. import pandas as pd
  3. def find_excel_files(directory, search_str):
  4. search_str_lower = search_str.lower()
  5. matched_files = []
  6. for root, _, files in os.walk(directory):
  7. for file in files:
  8. if file.lower().endswith(('.xls', '.xlsx')):
  9. file_path = os.path.join(root, file)
  10. try:
  11. xls = pd.ExcelFile(file_path)
  12. found = False
  13. for sheet_name in xls.sheet_names:
  14. df = pd.read_excel(xls, sheet_name=sheet_name)
  15. # 检查列名
  16. cols_lower = [str(col).lower() for col in df.columns]
  17. if any(search_str_lower in col for col in cols_lower):
  18. matched_files.append(file_path)
  19. found = True
  20. break
  21. # 检查数据单元格
  22. mask = df.astype(str).apply(
  23. lambda col: col.str.contains(search_str, case=False, regex=False, na=False)
  24. )
  25. if mask.any().any():
  26. matched_files.append(file_path)
  27. found = True
  28. break
  29. if found:
  30. break # 跳到下一个文件
  31. except Exception as e:
  32. print(f"错误处理文件 {file_path}: {e}")
  33. return matched_files
  34. if __name__ == "__main__":
  35. # 用户输入
  36. target_dir = input("请输入要搜索的目录路径:")
  37. search_text = input("请输入要查找的字符串:")
  38. # 查找文件
  39. results = find_excel_files(target_dir, search_text)
  40. # 输出结果
  41. print("\n找到以下包含指定字符串的文件:")
  42. for file in results:
  43. print(file)