Duplicate File Finder
This is kind of an improved version of the shell one-liner I made some years ago, except it does not remove the files, just lists them.
This new script uses Python, and it is almost too easy, since Python includes a file compare module in it's standard library. Take a look at the code:
#!/usr/bin/python
import os
import filecmp
class DupFinder(object):
def __init__(self, dirname):
self.dirname = dirname
self.files = dict()
def run(self):
os.path.walk(self.dirname, self._walker, None)
for dupes in self.files.values():
if len(dupes) == 1:
continue
for path1 in dupes:
for path2 in dupes:
if path1 == path2:
continue
if filecmp.cmp(path1, path2, False):
print "%s == %s" % (path1, path2)
def _walker(self, arg, dirname, names):
for file in names:
try:
path = os.path.join(dirname, file)
size = os.path.getsize(path)
if size == 0:
continue # Ignore empty files.
if size in self.files:
self.files[size].append(path)
else:
self.files[size] = [path]
except OSError:
continue
def _compare(self, path1, path2):
print path1, path2
if __name__ == "__main__":
import sys
if len(sys.argv) != 2:
print "Usage: %s <directory>" % (sys.argv[0])
sys.exit(1)
dp = DupFinder(sys.argv[1])
dp.run()
sys.exit(0)