1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859 |
- '''
- 遇到问题没人解答?小编创建了一个Python学习交流QQ群:778463939
- 寻找有志同道合的小伙伴,互帮互助,群里还有不错的视频学习教程和PDF电子书!
- '''
- # coding: utf-8
- __author__ = "lau.wenbo"
- from checksum import create_checksum
- from diskwalk import diskwalk
- from os.path import getsize
- import csv
- import os
- import sys
- # reload(sys)
- # sys.setdefaultencoding('utf8')
- def findDupes(path):
- record = {}
- dup = {}
- d = diskwalk(path)
- files = d.paths()
- for file in files:
- try:
- # 这里使用了大小,文件名的对比方式,如果你需要MD5值的对比方式,可以打开下面的注释
- # compound_key = (getsize(file),create_checksum(file))
- compound_key = (getsize(file), file.split("/")[-1])
- if compound_key in record:
- dup[file] = record[compound_key]
- else:
- record[compound_key] = file
- except:
- continue
- return dup
- if __name__ == '__main__':
- path = '/Volumes/16T/柚木'
- # path = './file/'
- csv_path = './file/'
- # if not os.path.isdir(path) or not os.path.isdir(csv_path) or csv_path[-1] != "/":
- # print("参数不是一个有效的文件夹!")
- # exit()
- # else:
- # path = path.decode("utf-8")
- print("待检测的文件夹为{path}".format(path=path))
- with open(u"{csv_path}重复文件.csv".format(csv_path=csv_path), "w+") as csvfile:
- # 源文件 重复文件
- header = ["Source", "Duplicate", "Size"]
- writer = csv.DictWriter(csvfile, fieldnames=header)
- writer.writeheader()
- print("开始遍历文件夹,寻找重复文件,请等待.........")
- print("开始写入CSV文件,请等待........")
- for file in findDupes(path).items():
- print(file)
- writer.writerow({"Source": file[1], "Duplicate": file[0], "Size": getsize(file[0])})
|