File Batch Splitter
This is a Python script that takes a directory with a lot of files and splits the files among subfolder batches of a certain size. I've hard-coded the size of a standard CD (~700MB) into the script since this is a very typical use case.
Have a look:
#!/usr/bin/python
import os
class Batch(object):
def __init__(self):
self._size = 0
self._files = list()
def size_get(self):
return self._size
def files_get(self):
return self._files
def add(self, path):
self._files.append(path)
self._size += os.stat(path).st_size
class BatchManager(object):
def __init__(self, max_size):
self._max_size = max_size
self._batches = list()
def create(self, directory):
batch = Batch()
for filename in sorted(os.listdir(directory)):
path = os.path.join(directory, filename)
if os.path.isfile(path):
if (batch.size_get() + os.stat(path).st_size) > self._max_size:
self._batches.append(batch)
batch = Batch()
batch.add(path)
self._batches.append(batch)
def split(self, directory):
for batch_no, batch in enumerate(self._batches):
print "\nBatch #%03d, Size: %d" % (batch_no + 1, batch.size_get())
for file_no, src_path in enumerate(batch.files_get()):
print "%03d:%03d: %s" % (batch_no + 1, file_no + 1, src_path)
batch_dir = "%03d" % (batch_no + 1)
dst_dir = os.path.join(os.path.dirname(src_path), batch_dir)
dst_path = os.path.join(os.path.dirname(src_path), batch_dir, os.path.basename(src_path))
if not os.path.isdir(dst_dir):
os.mkdir(dst_dir)
os.rename(src_path, dst_path)
if __name__ == "__main__":
import sys
if len(sys.argv) < 2:
print "Usage: %s <directory>" % (sys.argv[0])
sys.exit(1)
directory = sys.argv[1]
bm = BatchManager(737280000) # Bytes on 80 min CD-ROM Mode 1
bm.create(directory)
bm.split(directory)
sys.exit(0)