xml_sax.py
'''\
XML SAX Framework.
classes:
Property (name, value, dtype=None)
XmlHandlerBase (saxhandler.ContentHandler)
XmlHandlerTest (XmlHandlerBase)
XmlHandlerMain (XmlHandlerBase)
XmlHandlerText (XmlHandlerMain)
XmlHandlerList (XmlHandlerMain)
XmlHandlerConfigBase (XmlHandlerMain)
functions:
parse_xml_file (filename, config_handler)
Developer@Sonnack.com
November 2016
'''
from sys import stdout, stderr, argv
from xml.sax import parse
from xml.sax.handler import ContentHandler
from xml.sax.xmlreader import Locator
from xml.sax.saxutils import unescape
class Property (object):
'''Property class. Features data typed name-value pairs.'''
def __init__ (self, name, value, dtype=None):
self.name = name
self.value = value
self.dtype = dtype
def __repr__ (self):
s = '{Property:{name:"%s", value:"%s", type:"%s"}}'
t = (self.name, str(self.value), self.dtype)
return s % t
def __str__ (self):
v = self.value if self.value else ''
t = (':%s' % self.dtype) if self.dtype else ''
return '%s%s = %s' % (self.name, t, v)
def __call__ (self):
return self.value
class XmlHandlerBase (ContentHandler):
'''A basic XML parser framework using SAX. Strictly a base class.'''
def __init__ (self, name, fout=stdout, log=None):
self.setDocumentLocator(Locator())
self.name = name
self.out = fout
self.log = log
self.depth = 0
self.state = 0
self.buff = ''
def __str__ (self):
return self.name
def __repr__ (self):
s = '{XMLHandler:{name:"%s", state:%d, depth:%d, id:%d}}'
t = (self.name, self.state, self.depth, id(self))
return s % t
def __cmp__ (self, other):
return cmp(self.name, other.name) or cmp(id(self), id(other))
def __nonzero__ (self):
return True
def get_attribute (self, attrs, name):
return attrs[name] if name in attrs else ''
def startDocument (self):
if self.log: self.log.debug('document: START <%s>' % self.name)
self.depth = 0
self.state = 0
self.buff = ''
self.buff_enable = False
def endDocument (self):
if self.log: self.log.debug('document: END')
if self.depth:
raise SyntaxError('Element Nesting Error!')
def characters (self, content):
if self.buff_enable:
if self.log and content.strip():
self.log.trace('data: "%s"' % content.encode('UTF-8'))
self.buff += content
def startElement (self, name, attrs):
if self.log: self.log.trace('element: <%s> %s' % (name, attrs.keys()))
self.depth += 1
self.buff = ''
self.buff_enable = True
def endElement (self, name):
if self.log: self.log.trace('element: "%s" End' % name)
self.depth -= 1
if self.depth < 0:
raise SyntaxError('Element Nesting Error!')
self.buff = ''
self.buff_enable = False
class XmlHandlerTest (XmlHandlerBase):
'''Implements the Prefix Mapping and NameSpace handler methods. A base class for testing.'''
def startPrefixMapping (self, prefix, uri):
if self.log: self.log.warn('prefix: "%s":"%s"' % (prefix,uri))
def endPrefixMapping (self, prefix):
if self.log: self.log.warn('prefix: "%s" End' % prefix)
def startElementNS (self, name, qname, attrs):
if self.log: self.log.warn('element: "%s" ("%s")' % (name,qname))
self.depth += 1
self.buff = ''
self.buff_enable = True
def endElementNS (self, name, qname):
if self.log: self.log.warn('element: "%s" ("%s") End' % (name,qname))
self.depth -= 1
if self.depth < 0:
raise SyntaxError('Element Nesting Error!')
self.buff = ''
self.buff_enable = False
class XmlHandlerMain (XmlHandlerBase):
'''A generic XML parser base class. Does basic element text management. (Shows how to get started.)'''
def __init__ (self, name, fout=stdout, log=None):
XmlHandlerBase.__init__(self, name, fout, log)
self.text = ''
def startElement (self, name, attrs):
XmlHandlerBase.startElement(self, name, attrs)
self.text = ''
def endElement (self, name):
if self.buff:
self.text = unescape(self.buff).encode('UTF-8')
else:
self.text = ''
XmlHandlerBase.endElement(self, name)
class XmlHandlerText (XmlHandlerMain):
'''A simple XML file element text extraction app. (Shows how to extend the Main class.)'''
def __init__ (self, name, fout=stdout, log=None):
XmlHandlerMain.__init__(self, name, fout, log)
self.text_content = []
def endElement (self, name):
XmlHandlerMain.endElement(self, name)
if len(self.text):
if self.out:
print >> self.out, '%s(%s)' % (name.encode('UTF-8'), self.text)
self.text_content.append(self.text)
class XmlHandlerList (XmlHandlerMain):
'''A simple XML file list app. (A more practical demo of Main.)'''
def __init__ (self, name, fout, log=None):
XmlHandlerMain.__init__(self, name, fout, log)
self.indent = ''
def startElement (self, name, attrs):
if self.out:
self.indent = ('| ' * self.depth) if 0 < self.depth else ''
print >> self.out, '%s%s' % (self.indent, name)
for a in sorted(attrs.keys()):
print >> self.out, '%s @ %s="%s"' % (self.indent, a.encode('UTF-8'), attrs[a].encode('UTF-8'))
XmlHandlerMain.startElement(self, name, attrs)
def endElement (self, name):
XmlHandlerMain.endElement(self, name)
if len(self.text):
if self.out:
print >> self.out, '%s = "%s"' % (self.indent, self.text)
class XmlHandlerConfigBase (XmlHandlerMain):
'''An XML Configuration file framework base class.'''
def __init__ (self, name, fout=stdout, log=None):
XmlHandlerMain.__init__(self, name, fout, log)
self.props_list = []
self.config_list = []
self.curr_config = None
def __len__ (self):
return len(self.config_list)
def __iter__(self):
return iter(self.config_list)
def __getitem__ (self, index):
self.curr_config = self.config_list[index]
return self.curr_config
def __contains__ (self, index):
if index < 0: return False
if index < len(self): return True
return False
def __setitem__(self, name, value):
raise NotImplementedError('Not allowed to modify the Configurations list!')
def __delitem__ (self, ix):
raise NotImplementedError('Not allowed to modify the Configurations list!')
def startElement (self, name, attrs):
self.localStart(name, attrs)
XmlHandlerMain.startElement(self, name, attrs)
def endElement (self, name):
XmlHandlerMain.endElement(self, name)
self.localEnd(name)
def new_config_object (self, name):
'''Client classes MUST override this method!'''
return None
def find_configuration (self, name):
'''Find a Config by name. (Sets current pointer.)'''
for cn in self:
if cn.name == name:
self.curr_config = cn
return cn
return None
def print_configuration (self):
'''Client classes really SHOULD override this method!'''
if self.out:
print >> self.out, 'Configuration: "%s"' % self.curr_config
def print_configurations (self):
if self.out:
print >> self.out, '::Configurations::'
print >> self.out
for prop in self.props_list:
print >> self.out, '. prop "%s"' % prop
print >> self.out
for config in self.config_list:
self.curr_config = config
self.print_configuration()
print >> self.out, '::::'
self.curr_config = None
def localStart (self, name, attrs):
'''Handles Configuration and PropertyList elements plus Property elements. (states: 100<0, 200<0, 201<200)'''
if self.state == 0:
if name == 'Configuration':
self.state = 100
attr_name = self.get_attribute(attrs, 'name')
self.curr_config = self.new_config_object(attr_name)
if self.log: self.log.debug('Configuration: "%s"' % attr_name)
return
if name == 'PropertyList':
if self.log: self.log.trace('PropertyList:')
self.state = 200
return
return
if self.state == 200:
if name == 'Property':
self.state = 201
attr_name = self.get_attribute(attrs,'name')
attr_value = self.get_attribute(attrs,'value')
attr_type = self.get_attribute(attrs,'type')
prop = Property(attr_name, attr_value, attr_type)
self.props_list.append(prop)
return
return
def localEnd (self, name):
'''Handles Configuration and PropertyList elements plus Property elements. (states: 0<100, 0<200, 200<201)'''
if self.state == 100:
if name == 'Configuration':
self.state = 0
if self.log: self.log.trace('Exit Configuration.')
self.config_list.append(self.curr_config)
self.curr_config = None
return
if self.state == 200:
if name == 'PropertyList':
self.state = 0
return
if self.state == 201:
if name == 'Property':
self.state = 200
return
return
def parse_xml_file (filename, config_handler):
'''Given an XML filename and a config handler (extending XmlHandlerBase), parse the XML.'''
fp = open(filename)
try:
config_handler.feature_namespaces = True
config_handler.feature_namespace_prefixes = True
parse(fp, config_handler)
except Exception as e:
raise
finally:
fp.close()
'''eof'''