Duplicate File Finder
This is kind of an improved version of the shell one-liner I made some years ago, except it does not remove the files, just lists them.
This new script uses Python, and it is almost too easy, since Python includes a file compare module in it's standard library. Take a look at the code:
#!/usr/bin/python import os import filecmp class DupFinder(object): def __init__(self, dirname): self.dirname = dirname self.files = dict() def run(self): os.path.walk(self.dirname, self._walker, None) for dupes in self.files.values(): if len(dupes) == 1: continue for path1 in dupes: for path2 in dupes: if path1 == path2: continue if filecmp.cmp(path1, path2, False): print "%s == %s" % (path1, path2) def _walker(self, arg, dirname, names): for file in names: try: path = os.path.join(dirname, file) size = os.path.getsize(path) if size == 0: continue # Ignore empty files. if size in self.files: self.files[size].append(path) else: self.files[size] = [path] except OSError: continue def _compare(self, path1, path2): print path1, path2 if __name__ == "__main__": import sys if len(sys.argv) != 2: print "Usage: %s <directory>" % (sys.argv[0]) sys.exit(1) dp = DupFinder(sys.argv[1]) dp.run() sys.exit(0)