1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103
| #!/usr/bin/env python
# -*- coding: utf-8 -*-
from __future__ import unicode_literals
import xml.sax
class Element:
def __init__ (self, tag, attributes):
self.tag = str(tag)
self.attributes = attributes
self.cdata = ""
self.children = []
# end def
# end class
class DataContentHandler (xml.sax.ContentHandler):
def __init__ (self):
self.parents = []
self.root_node = None
# end def
def startElement (self, tag, attributes):
"opening XML element tag"
# element inits
element = Element(tag, attributes)
# element is root node ?
if not self.parents:
self.root_node = element
self.parents.append(self.root_node)
# child element
else:
# get last registered parent element
parent = self.parents[-1]
# add new child element to parent element
parent.children.append(element)
# now child element becomes
# a potential parent for
# further children elements
self.parents.append(element)
# end if
# end def
def endElement (self, tag):
"closing XML element tag"
# pop last registered parent from LIFO stack
if self.parents:
self.parents.pop(-1)
# end if
# end def
def characters (self, content):
"CDATA encountered on the run"
# get last registered parent element
parent = self.parents[-1]
# add CDATA char string to this element
parent.cdata = content.strip() # strip useless chars
# end def
def dump (self, element=None, show_all=False):
"showing element's contents"
# got no initial element?
if not element:
# set to root node
element = self.root_node
# end if
# browse element's children
for child in element.children:
# shortcut inits
_tag = child.tag.capitalize()
_attrs = child.attributes
# print tag and cdata only
if show_all or child.cdata:
print "{0}: '{1}'".format(_tag, child.cdata)
# print tag if got some attrs to show up
elif _attrs.getLength():
print "{0}:".format(_tag)
# end if
# showing attributes
for attribute in _attrs.getNames():
print " |---> {0}: '{1}'"\
.format(attribute, _attrs.getValue(attribute))
# end for
# then recursively dump child's children and so on
self.dump(child, show_all)
# end for
# end def
# end class DataContentHandler
if __name__ == "__main__":
# handler init
handler = DataContentHandler()
# parsing file
xml.sax.parse("example.xml", handler)
# change 'show_all' flag
# to meet your needs (True or False)
handler.dump(show_all=True)
# end if |
Partager