I'd recommend against using os.walk()
, since you have to do so much to massage its output. Instead, just use a recursive function that uses os.listdir()
, os.path.join()
, os.path.isdir()
, etc.
import os
from xml.sax.saxutils import escape as xml_escape
def DirAsXML(path):
result = '<dir>
<name>%s</name>
' % xml_escape(os.path.basename(path))
dirs = []
files = []
for item in os.listdir(path):
itempath = os.path.join(path, item)
if os.path.isdir(itempath):
dirs.append(item)
elif os.path.isfile(itempath):
files.append(item)
if files:
result += ' <files>
'
+ '
'.join(' <file>
<name>%s</name>
</file>'
% xml_escape(f) for f in files) + '
</files>
'
if dirs:
for d in dirs:
x = DirAsXML(os.path.join(path, d))
result += '
'.join(' ' + line for line in x.split('
'))
result += '</dir>'
return result
if __name__ == '__main__':
print '<structure>
' + DirAsXML(os.getcwd()) + '
</structure>'
Personally, I'd recommend a much less verbose XML schema, putting names in attributes and getting rid of the <files>
group:
import os
from xml.sax.saxutils import quoteattr as xml_quoteattr
def DirAsLessXML(path):
result = '<dir name=%s>
' % xml_quoteattr(os.path.basename(path))
for item in os.listdir(path):
itempath = os.path.join(path, item)
if os.path.isdir(itempath):
result += '
'.join(' ' + line for line in
DirAsLessXML(os.path.join(path, item)).split('
'))
elif os.path.isfile(itempath):
result += ' <file name=%s />
' % xml_quoteattr(item)
result += '</dir>'
return result
if __name__ == '__main__':
print '<structure>
' + DirAsLessXML(os.getcwd()) + '
</structure>'
This gives an output like:
<structure>
<dir name="local">
<dir name=".hg">
<file name="00changelog.i" />
<file name="branch" />
<file name="branch.cache" />
<file name="dirstate" />
<file name="hgrc" />
<file name="requires" />
<dir name="store">
<file name="00changelog.i" />
etc.
If os.walk()
worked more like expat
's callbacks, you'd have an easier time of it.
与恶龙缠斗过久,自身亦成为恶龙;凝视深渊过久,深渊将回以凝视…