XSPF Integrity Check
This is a Python script to check the integrity of XSPF playlist files. By integrity check, I mean checking if the files that the playlists references actually do exist.
You may notice that I use a regular expression with a lambda expression to decode the URL encoding instead of using the standard urllib.unquote() routine. There is a good reason for this, namely that urllib.unquote() returns a "unicode" formatted string instead of a regular "str" Python string. I happen to use Latin-1 encoding on my filenames, and in order to properly decode these, the built-in decode() function must be used, but that one only works on regular "str" strings!
Anyway, here's my script:
#!/usr/bin/python
import xml.dom.minidom
import re
import os.path
def xspf_parse(playlist_filename, handler):
xml_data = xml.dom.minidom.parse(playlist_filename)
for playlist in xml_data.getElementsByTagName("playlist"):
for tracklist in playlist.getElementsByTagName("trackList"):
for track in tracklist.getElementsByTagName("track"):
for location in track.getElementsByTagName("location"):
data = re.sub("%([0-9a-fA-F]{2})", \
lambda x: chr(int(x.group(1), 16)), \
location.firstChild.data.encode("utf-8"))
track_filename = data.decode("utf-8").replace("file://", "")
handler(playlist_filename, track_filename)
def file_check(playlist_filename, track_filename):
if not os.path.isfile(track_filename):
print playlist_filename, "-->", track_filename
if __name__ == "__main__":
import sys
if len(sys.argv) < 2:
print "Usage: %s <xspf file> ... <xspf file>" % (sys.argv[0])
sys.exit(1)
for filename in sys.argv[1:]:
xspf_parse(filename, file_check)
sys.exit(0)