Kjetil's Information Center: A Blog About My Projects

File Batch Splitter

This is a Python script that takes a directory with a lot of files and splits the files among subfolder batches of a certain size. I've hard-coded the size of a standard CD (~700MB) into the script since this is a very typical use case.

Have a look:

#!/usr/bin/python

import os

class Batch(object):
    def __init__(self):
        self._size = 0
        self._files = list()

    def size_get(self):
        return self._size

    def files_get(self):
        return self._files

    def add(self, path):
        self._files.append(path)
        self._size += os.stat(path).st_size

class BatchManager(object):
    def __init__(self, max_size):
        self._max_size = max_size
        self._batches = list()

    def create(self, directory):
        batch = Batch()
        for filename in sorted(os.listdir(directory)):
            path = os.path.join(directory, filename)
            if os.path.isfile(path):
                if (batch.size_get() + os.stat(path).st_size) > self._max_size:
                    self._batches.append(batch)
                    batch = Batch()
                batch.add(path)
        self._batches.append(batch)

    def split(self, directory):
        for batch_no, batch in enumerate(self._batches):
            print "\nBatch #%03d, Size: %d" % (batch_no + 1, batch.size_get())
            for file_no, src_path in enumerate(batch.files_get()):
                print "%03d:%03d: %s" % (batch_no + 1, file_no + 1, src_path)
                batch_dir = "%03d" % (batch_no + 1)
                dst_dir = os.path.join(os.path.dirname(src_path), batch_dir)
                dst_path = os.path.join(os.path.dirname(src_path), batch_dir, os.path.basename(src_path))
                if not os.path.isdir(dst_dir):
                    os.mkdir(dst_dir)
                os.rename(src_path, dst_path)

if __name__ == "__main__":
    import sys
    if len(sys.argv) < 2:
        print "Usage: %s <directory>" % (sys.argv[0])
        sys.exit(1)

    directory = sys.argv[1]

    bm = BatchManager(737280000) # Bytes on 80 min CD-ROM Mode 1
    bm.create(directory)
    bm.split(directory)

    sys.exit(0)
          


Topic: Scripts and Code, by Kjetil @ 16/07-2018, Article Link