XSPF Coverage and Duplication Check
Two new XSPF playlist use cases has come to mind. Checking for duplicate file references across playlists and checking for coverage. By coverage, I mean checking if all files within a directory structure is actually referenced by the playlist(s).
Both scripts are based on the XSPF integrity check script I made earlier, and the same parser is used.
Script for duplication check:
#!/usr/bin/python
import xml.dom.minidom
import re
import os.path
xspf_files = dict()
def xspf_parse(playlist_filename, handler):
xml_data = xml.dom.minidom.parse(playlist_filename)
for playlist in xml_data.getElementsByTagName("playlist"):
for tracklist in playlist.getElementsByTagName("trackList"):
for track in tracklist.getElementsByTagName("track"):
for location in track.getElementsByTagName("location"):
data = re.sub("%([0-9a-fA-F]{2})", \
lambda x: chr(int(x.group(1), 16)), \
location.firstChild.data.encode("utf-8"))
track_filename = data.decode("utf-8").replace("file://", "")
handler(playlist_filename, track_filename)
def file_check(playlist_filename, track_filename):
if track_filename in xspf_files:
print track_filename, "-->", xspf_files[track_filename], "&", playlist_filename
else:
xspf_files[track_filename] = playlist_filename
if __name__ == "__main__":
import sys
if len(sys.argv) < 2:
print "Usage: %s <xspf file> ... <xspf file>" % (sys.argv[0])
sys.exit(1)
for filename in sys.argv[1:]:
xspf_parse(filename, file_check)
sys.exit(0)
Script for coverage check:
#!/usr/bin/python
import xml.dom.minidom
import re
import os
xspf_files = set()
fs_files = set()
def xspf_parse(playlist_filename, handler):
xml_data = xml.dom.minidom.parse(playlist_filename)
for playlist in xml_data.getElementsByTagName("playlist"):
for tracklist in playlist.getElementsByTagName("trackList"):
for track in tracklist.getElementsByTagName("track"):
for location in track.getElementsByTagName("location"):
data = re.sub("%([0-9a-fA-F]{2})", \
lambda x: chr(int(x.group(1), 16)), \
location.firstChild.data.encode("utf-8"))
track_filename = data.decode("utf-8").replace("file://", "")
handler(playlist_filename, track_filename)
def add_xspf_file(playlist_filename, track_filename):
xspf_files.add(track_filename)
if __name__ == "__main__":
import sys
if len(sys.argv) < 3:
print "Usage: %s <directory> <xspf file> ... <xspf file>" % (sys.argv[0])
sys.exit(1)
for root, dirs, files in os.walk(sys.argv[1]):
for filename in files:
fs_files.add(os.path.join(root, filename).decode("iso-8859-1"))
for filename in sys.argv[2:]:
xspf_parse(filename, add_xspf_file)
fs_covered = float(len(fs_files.intersection(xspf_files)))
fs_total = float(len(fs_files))
print "Coverage: %.2f%%" % ((fs_covered / fs_total) * 100)
print "Missing Files:"
for filename in fs_files.difference(xspf_files):
print filename
sys.exit(0)