xml_sax.py

'''\
XML SAX Framework.

classes:
    Property                (name, value, dtype=None)
    XmlHandlerBase          (saxhandler.ContentHandler)
    XmlHandlerTest          (XmlHandlerBase)
    XmlHandlerMain          (XmlHandlerBase)
    XmlHandlerText          (XmlHandlerMain)
    XmlHandlerList          (XmlHandlerMain)
    XmlHandlerConfigBase    (XmlHandlerMain)

functions:
    parse_xml_file (filename, config_handler)

Developer@Sonnack.com
November 2016
'''
####################################################################################################
from sys import stdoutstderrargv
from xml.sax import parse
from xml.sax.handler import ContentHandler
from xml.sax.xmlreader import Locator
from xml.sax.saxutils import unescape
####################################################################################################



##~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~##
class Property (object):
    '''Property class. Features data typed name-value pairs.'''
    def __init__ (selfnamevaluedtype=None):
        self.name = name
        self.value = value
        self.dtype = dtype

    def __repr__ (self):
        s = '{Property:{name:"%s", value:"%s", type:"%s"}}'
        t = (self.namestr(self.value), self.dtype)
        return s % t

    def __str__ (self):
        v = self.value if self.value else ''
        t = (':%s' % self.dtypeif self.dtype else ''
        return '%s%s = %s' % (self.nametv)

    def __call__ (self):
        return self.value


##~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~##
class XmlHandlerBase (ContentHandler):
    '''A basic XML parser framework using SAX. Strictly a base class.'''

    def __init__ (selfnamefout=stdoutlog=None):
        self.setDocumentLocator(Locator())
        self.name = name
        self.out = fout
        self.log = log
        self.depth = 0
        self.state = 0
        self.buff = ''

    def __str__ (self):
        return self.name

    def __repr__ (self):
        s = '{XMLHandler:{name:"%s", state:%d, depth:%d, id:%d}}'
        t = (self.nameself.stateself.depthid(self))
        return s % t

    def __cmp__ (selfother):
        return cmp(self.nameother.nameor cmp(id(self), id(other))

    def __nonzero__ (self):
        return True

    def get_attribute (selfattrsname):
        return attrs[nameif name in attrs else ''

    # Document...
    def startDocument (self):
        if self.log: self.log.debug('document: START <%s>' % self.name)
        self.depth = 0
        self.state = 0
        self.buff = ''
        self.buff_enable = False

    def endDocument (self):
        if self.log: self.log.debug('document: END')
        if self.depth:
            raise SyntaxError('Element Nesting Error!')

    # Content...
    def characters (selfcontent):
        if self.buff_enable:
            if self.log and content.strip():
                self.log.trace('data: "%s"' % content.encode('UTF-8'))
            self.buff += content

    def startElement (selfnameattrs):
        if self.log: self.log.trace('element: <%s> %s' % (nameattrs.keys()))
        self.depth += 1
        self.buff = ''
        self.buff_enable = True

    def endElement (selfname):
        if self.log: self.log.trace('element: "%s" End' % name)
        self.depth -= 1
        if self.depth < 0:
            raise SyntaxError('Element Nesting Error!')
        self.buff = ''
        self.buff_enable = False


##~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~##
class XmlHandlerTest (XmlHandlerBase):
    '''Implements the Prefix Mapping and NameSpace handler methods. A base class for testing.'''

    # Prefix Mapping...
    def startPrefixMapping (selfprefixuri):
        if self.log: self.log.warn('prefix: "%s":"%s"' % (prefix,uri))

    def endPrefixMapping (selfprefix):
        if self.log: self.log.warn('prefix: "%s" End' % prefix)

    # Element (NS)...
    def startElementNS (selfnameqnameattrs):
        if self.log: self.log.warn('element: "%s" ("%s")' % (name,qname))
        self.depth += 1
        self.buff = ''
        self.buff_enable = True

    def endElementNS (selfnameqname):
        if self.log: self.log.warn('element: "%s" ("%s") End' % (name,qname))
        self.depth -= 1
        if self.depth < 0:
            raise SyntaxError('Element Nesting Error!')
        self.buff = ''
        self.buff_enable = False


##~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~##
class XmlHandlerMain (XmlHandlerBase):
    '''A generic XML parser base class. Does basic element text management. (Shows how to get started.)'''

    def __init__ (selfnamefout=stdoutlog=None):
        XmlHandlerBase.__init__(selfnamefoutlog)
        self.text = ''

    # Element...
    def startElement (selfnameattrs):
        XmlHandlerBase.startElement(selfnameattrs)
        self.text = ''

    def endElement (selfname):
        if self.buff:
            self.text = unescape(self.buff).encode('UTF-8')
        else:
            self.text = ''
        XmlHandlerBase.endElement(selfname)


##~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~##
class XmlHandlerText (XmlHandlerMain):
    '''A simple XML file element text extraction app. (Shows how to extend the Main class.)'''

    def __init__ (selfnamefout=stdoutlog=None):
        XmlHandlerMain.__init__(selfnamefoutlog)
        self.text_content = []

    # End Element...
    def endElement (selfname):
        XmlHandlerMain.endElement(selfname)
        if len(self.text):
            if self.out:
                print >> self.out'%s(%s)' % (name.encode('UTF-8'), self.text)
            self.text_content.append(self.text)


##~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~##
class XmlHandlerList (XmlHandlerMain):
    '''A simple XML file list app. (A more practical demo of Main.)'''

    def __init__ (selfnamefoutlog=None):
        XmlHandlerMain.__init__(selfnamefoutlog)
        self.indent = ''

    # Element...
    def startElement (selfnameattrs):
        if self.out:
            self.indent = ('|   ' * self.depthif 0 < self.depth else ''
            print >> self.out'%s%s' % (self.indentname)
            for a in sorted(attrs.keys()):
                print >> self.out'%s @ %s="%s"' % (self.indenta.encode('UTF-8'), attrs[a].encode('UTF-8'))
        XmlHandlerMain.startElement(selfnameattrs)

    def endElement (selfname):
        XmlHandlerMain.endElement(selfname)
        if len(self.text):
            if self.out:
                print >> self.out'%s = "%s"' % (self.indentself.text)


##~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~##
class XmlHandlerConfigBase (XmlHandlerMain):
    '''An XML Configuration file framework base class.'''
    def __init__ (selfnamefout=stdoutlog=None):
        XmlHandlerMain.__init__(selfnamefoutlog)
        self.props_list = []
        self.config_list = []
        self.curr_config = None

    def __len__ (self):
        return len(self.config_list)
    def __iter__(self):
        return iter(self.config_list)
    def __getitem__ (selfindex):
        self.curr_config = self.config_list[index]
        return self.curr_config
    def __contains__ (selfindex):
        if index < 0: return False
        if index < len(self): return True
        return False
    def __setitem__(selfnamevalue):
        raise NotImplementedError('Not allowed to modify the Configurations list!')
    def __delitem__ (selfix):
        raise NotImplementedError('Not allowed to modify the Configurations list!')

    # Element...
    def startElement (selfnameattrs):
        self.localStart(nameattrs)
        XmlHandlerMain.startElement(selfnameattrs)

    def endElement (selfname):
        XmlHandlerMain.endElement(selfname)
        self.localEnd(name)

    def new_config_object (selfname):
        '''Client classes MUST override this method!'''
        return None

    def find_configuration (selfname):
        '''Find a Config by name. (Sets current pointer.)'''
        for cn in self:
            if cn.name == name:
                self.curr_config = cn
                return cn
        return None

    def print_configuration (self):
        '''Client classes really SHOULD override this method!'''
        if self.out:
            print >> self.out'Configuration: "%s"' % self.curr_config

    def print_configurations (self):
        if self.out:
            print >> self.out'::Configurations::'
            print >> self.out
            for prop in self.props_list:
                print >> self.out'. prop "%s"' % prop
            print >> self.out
            for config in self.config_list:
                self.curr_config = config
                self.print_configuration()
            print >> self.out'::::'
            self.curr_config = None

    def localStart (selfnameattrs):
        '''Handles Configuration and PropertyList elements plus Property elements. (states: 100<0, 200<0, 201<200)'''
        # Ground state...
        if self.state == 0:
            if name == 'Configuration':
                self.state = 100
                attr_name = self.get_attribute(attrs'name')
                self.curr_config = self.new_config_object(attr_name)
                if self.log: self.log.debug('Configuration: "%s"' % attr_name)
                return
            if name == 'PropertyList':
                if self.log: self.log.trace('PropertyList:')
                self.state = 200
                return
            return
        # PropertyList state (200)...
        if self.state == 200:
            # Property...
            if name == 'Property':
                self.state = 201
                attr_name = self.get_attribute(attrs,'name')
                attr_value = self.get_attribute(attrs,'value')
                attr_type = self.get_attribute(attrs,'type')
                prop = Property(attr_nameattr_valueattr_type)
                self.props_list.append(prop)
            return
        return

    def localEnd (selfname):
        '''Handles Configuration and PropertyList elements plus Property elements. (states: 0<100, 0<200, 200<201)'''
        # Configuration...
        if self.state == 100:
            if name == 'Configuration':
                self.state = 0
                if self.log: self.log.trace('Exit Configuration.')
                self.config_list.append(self.curr_config)
                self.curr_config = None
                #self.print_configuration()
            return
        # PropertyList...
        if self.state == 200:
            if name == 'PropertyList':
                self.state = 0
            return
        # Property...
        if self.state == 201:
            if name == 'Property':
                self.state = 200
            return
        return



##================================================================================================##
def parse_xml_file (filenameconfig_handler):
    '''Given an XML filename and a config handler (extending XmlHandlerBase), parse the XML.'''
    fp = open(filename)
    try:
        # SAX Handler...
        config_handler.feature_namespaces = True
        config_handler.feature_namespace_prefixes = True

        # SAX Parse...
        parse(fpconfig_handler)
    except Exception as e:
        raise
    finally:
        fp.close()



####################################################################################################
'''eof'''