template.py

'''Template to HTML.

Merge a template file with data to create an HTML output file.
The template file is an HTML file, but with special fields (which are cloaked in comments).
The HTML is written to the output unchanged; the fields (including the enclosing comment)
are replaced with strings from the provided data bag.

Special Fields:
    <!--@(field-spec)-->
    <!--@[loop-name]-->  ...  <!--@[/loop-name]-->

Fields refer by name to items in the data bag.
Field syntax:
    [group-name [:sub-name [:sub-name]]] :datum-name

No whitespace; most characters allowed (no colons); not case-sensitive.
The datum-name is required. It specifies a datum in the data bag.
The group-name specifies a top-level group in the bag.
It is optional ONLY inside a loop, where it defaults to the enclosing loop group.
Optional sub-names specify hierarchal groups within the bag.

Loops:
Loop name refers to top-level group in the bag, which is iterated over.
Each loop has a $# variable depending on loop level.
(The first loop has $1, an immediately nested loop has $2, etc.)
The loop variable is the group sub-bag key, if provided, else the loop index.
The loop variable can be used in field specifications (except as group-name).

Examples:
    (seasons:caption)           - caption datum from seasons sub-bag
    (halfs:1:caption)           - caption datum from halfs group, first sub-bag
    (halfs:2:caption)           - caption datum from halfs group, second sub-bag
    (months:apr:caption)        - caption datum from months group, sub-bag with 'apr' key
    (months:$1:caption)         - caption datum from months group, indexed by loop var $1
    (months:$2:$1)              - datum[$1] from bag[months][$2]
    (months:$2:weeks:$3:$1)     - datum[$1] from bag[months][$2][weeks][$3]

Data Bag:
    <bag>     := { <datums> <groups> }
    <datum>   := <name> : <value>
    <group>   := [ <bags> ]

    <datums>  := NULL | <datum> <datums>
    <groups>  := NULL | <group> <groups>
    <bags>    := NULL | <bag> <bags>

If the bags in a group all have a datum named 'key', the group can be indexed by
those keys as well as by numeric index (the default). (In point of fact, *any*
key datums allow name access, but iteration needs keys from all sub-bags to switch
to using those keys in the loop variable.

Therefore, protocol allows (with sub-bags recursively defined):
    <bag> = { name:value,.. name:<bag>,.. name:[<bag>,<bag>,..],.. }

Developer@Sonnack.com
August 2017
'''
####################################################################################################
from __future__ import print_function
from sys import argvstdoutstderrpath as syspath
from os import path
from datetime import datetimedatetimedelta
from re import compile
from xml_sax import XmlHandlerMainparse_xml_file
####################################################################################################

StartMarker = '<!--@'
EndMarker = '-->'


##~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~##
class HtmlTemplateToken (object):
    Key0Format = '*%s'
    Key1Format = '$%s'
    Key2Format = '@%s'

    def __init__ (selftextlog=None):
        self.text = text
        self.log = log

    def __len__ (self):
        return len(self.text)

    def __str__ (self):
        s = self.text[0:16].replace('\n',' ')
        return 'TemplateToken["%s..."] bytes=%d' % (slen(self.text))

    def __call__ (selffpdata_bagtoks):
        fp.write(self.text)
        if self.log: self.log.trace('TEXT (%d bytes)' % len(self.text))

##~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~##
class HtmlTemplateField (HtmlTemplateToken):

    def __init__ (selftextlog=None):
        super(HtmlTemplateField,self).__init__(textlog=log)
        self.parts = self.text.split(':')

    def __str__ (self):
        return 'TemplateField[%s]' % self.text

    def __call__ (selffpdata_bagtoks):
        bag = data_bag
        key = self.__resolve_key(self.parts[-1], data_bag)
        # If there's a group-name...
        if 1 < len(self.parts):
            # Get the group sub-bag...
            bag = bag[self.parts[0]]
            # If there are sub-names (and bag is a group)...
            for k in self.parts[1:-1]:
                k = self.__resolve_key(kdata_bag)
                bag = self.__find_sub_bag(kbag)
        # Get the field's value
        fld = bag[keyif key in bag else '?'
        fp.write(fld)
        if self.log: self.log.trace('FIELD[%s] = "%s"' % (self.textfld))

    def __find_sub_bag (selfkeygroup):
        if key.isdigit():
            return group[int(key)-1]
        for bag in group:
            if ('key' in bagand (bag['key'] == key):
                return bag
        raise RuntimeError('No sub-bag "%s" found!' % key)

    def __resolve_key (selfkeydata_bag):
        # If key is a loop variable...
        if key.startswith('$'):
            # Look for it in the loop variables...
            k1 = self.Key1Format % key[1:]
            k2 = self.Key2Format % key[1:]
            # See if there's a name variable...
            key = data_bag[k2]
            # If there isn't, fall back to the index variable...
            if not key:
                key = data_bag[k1]
        return key

##~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~##
class HtmlTemplateLoop (HtmlTemplateToken):

    def __init__ (selftextlog=None):
        super(HtmlTemplateLoop,self).__init__(textlog=log)
        self.tok_list = []
        self.alt_list = []
        self.group = {}
        self.alt_loop = True if text.startswith('#'else False
        if self.alt_loop:
            parts = text.split(':')
            if len(parts) != 2: raise SyntaxError('Invalid AltLoop Name: "%s"' % text)
            self.text = parts[0]
            self.key_name = parts[1]

    def __str__ (self):
        return 'TemplateLoop[%s]' % self.text

    def __call__ (selffpdata_bagtoks):
        if self.log: self.log.trace('LOOP[%s]: BEGIN' % self.text)
        self.__push_context(data_bag)
        self.__find_loop_end(toks)
        k0 = self.Key0Format % self.level
        k1 = self.Key1Format % self.level
        k2 = self.Key2Format % self.level
        if self.log: self.log.trace('toks=%d, alts=%d, keys=(%s, %s, %s)' % (len(self.tok_list),len(self.alt_list),k0,k1,k2))
        # For each bag in the group...
        for rx,sub_bag in enumerate(self.group):
            bag_idx = str(1+rx)
            bag_key = sub_bag['key'if 'key' in sub_bag else None
            if self.log: self.log.trace('LOOP[%s]: %s %s' % (self.textbag_idxbag_key))
            # Check for Alt-Loops...
            toks = self.__test_alt_loops(bag_key)
            if toks:
                if self.log: self.log.trace('ALT-LOOP[%s]: %d tokens' % (self.textlen(toks)))
                while len(toks):
                    # Consume the token...
                    tok = toks.pop(0)
                    data_bag[k0] = sub_bag
                    data_bag[k1] = bag_idx
                    data_bag[k2] = bag_key
                    # Execute the token...
                    tok(fpdata_bagtoks)
                    del data_bag[k0]
                    del data_bag[k1]
                    del data_bag[k2]
                continue
            # Copy the list of tokens to loop...
            toks = list(self.tok_list)
            # For each token in the copy...
            while len(toks):
                # Consume the token...
                tok = toks.pop(0)
                data_bag[k0] = sub_bag
                data_bag[k1] = bag_idx
                data_bag[k2] = bag_key
                # Execute the token...
                tok(fpdata_bagtoks)
                del data_bag[k0]
                del data_bag[k1]
                del data_bag[k2]
        self.__pop_context(data_bag)
        if self.log: self.log.trace('LOOP[%s]: END' % self.text)

    def __test_alt_loops (selfbag_key):
        if self.alt_list:
            for alt_loop in self.alt_list:
                if alt_loop[0].key_name == bag_key:
                    return list(alt_loop[1:])
        return None

    def __find_loop_end (selftoks):
        self.tok_list = []
        name = '/%s' % self.text
        for tx,tok in enumerate(toks):
            if isinstance(tok,HtmlTemplateLoopand (tok.text == name):
                self.tok_list = toks[0:tx]
                del toks[0:tx+1]
                self.__extract_alt_loops()
                return self.tok_list
        raise RuntimeError('Invalid Loop: No Loop End! [%s]' % name)

    def __extract_alt_loops (self):
        self.alt_list = []
        name = '#%s' % self.text
        txs = [tx for tx,tok in enumerate(self.tok_listif isinstance(tok,HtmlTemplateLoopand (tok.text==name)]
        if txs:
            loop_toks = self.tok_list[0:txs[0]]
            txs.append(len(self.tok_list))
            while 1 < len(txs):
                self.alt_list.append(self.tok_list[txs[0]:txs[1]])
                txs.pop(0)
            self.tok_list = loop_toks
            if self.log:
                for ax,a in enumerate(self.alt_list):
                    self.log.debug('Alt-Loop[%d] %s' % (axa[0].key_name))
                    for xx,x in enumerate(a):
                        self.log.debug('Token[%d] %s' % (ax,x))

    def __push_context (selfdata_bag):
        self.group = data_bag[self.text]
        self.level = 1 + data_bag['level']
        data_bag['level'] = self.level

    def __pop_context (selfdata_bag):
        data_bag['level'] = self.level - 1

##~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~##
class HtmlTemplate (object):
    def __init__ (selffilenamelog=None):
        self.log = log
        self.tokens = []
        self.load(filename)
        self.tokenize()

    def __len__ (self):
        return len(self.html)

    def __str__ (self):
        s = '%s (%d bytes)'
        t = (self.filenamelen(self))
        return s % t

    def tokenize (self):
        if self.log: self.log.trace('HtmlTemplate::tokenize:')
        txt_view = lambda s: (s[:20], '...' if 20 < len(selse '')
        self.tokens = []
        txt = str(self.html)
        while len(txt):
            # See if the text begins with a field...
            if txt.startswith(StartMarker):
                # It does, so consume it and continue...
                cx = txt.find(EndMarker)
                if cx < 0:
                    raise SyntaxError('Bad Field; No End Marker: "%s%s"' % txt_view(txt))
                fld = txt[len(StartMarker):cx]
                txt = txt[cx+len(EndMarker):]
                tok = None
                #
                if fld.startswith('('):
                    if not fld.endswith(')'):
                        raise SyntaxError('Invalid Field Name: "%s"' % fld)
                    if self.log:
                        self.log.trace('HtmlTemplate::tokenize: Template Field: %s' % fld)
                    tok = HtmlTemplateField(fld[+1:-1], log=self.log)
                if fld.startswith('['):
                    if not fld.endswith(']'):
                        raise SyntaxError('Invalid Loop Name: "%s"' % fld)
                    if self.log:
                        self.log.trace('HtmlTemplate::tokenize: Template Loop: %s' % fld)
                    tok = HtmlTemplateLoop(fld[+1:-1], log=self.log)
                if not tok:
                    raise SyntaxError('Invalid Field Spec: "%s"' % fld)
                #
                self.tokens.append(tok)
                continue

            # Text doesn't begin with field, so look for one somewhere in the text...
            cx = txt.find(StartMarker)
            if 0 <= cx:
                # Found one in the text, so consume text up to it and continue...
                if self.log:
                    self.log.trace('HtmlTemplate::tokenize: Template Text[0:%d]' % cx)
                tok = HtmlTemplateToken(txt[0:cx], log=self.log)
                txt = txt[cx:]
                self.tokens.append(tok)
                continue

            # None found, text to the end, so consume it all (and we're done)...
            if self.log:
                self.log.trace('HtmlTemplate::tokenize: Template Text[rest]')
            tok = HtmlTemplateToken(txtlog=self.log)
            txt = ''
            self.tokens.append(tok)

    def emit (selffilenamedata_bag={}):
        self.oname = filename
        fp = open(self.oname'w')
        try:
            toks = list(self.tokens)
            while len(toks):
                tok = toks.pop(0)
                tok(fpdata_bagtoks)
            if self.log: self.log.debug('wrote: %s' % self.oname)
        except:
            raise
        finally:
            fp.close()

    def load (selffilename):
        self.iname = filename
        fp = open(self.iname'r')
        try:
            self.html = fp.read()
            if self.log: self.log.debug('read: %s' % self.iname)
        except:
            raise
        finally:
            fp.close()


##~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~##
class XmlHandlerDataBag (XmlHandlerMain):

    def __init__ (selfnamefoutlog=None):
        XmlHandlerMain.__init__(selfnamefoutlog)
        self.bag = {}
        self.ctx = [self.bag]

    # Element...
    def startElement (selfnameattrs):
        cur_ctx = self.ctx[-1]
        #
        if name == 'bag':
            bag_name = self.get_attribute(attrs'name')
            new_bag = {'_name':bag_name}
            if isinstance(cur_ctx,list):
                # Context is a Group...
                cur_ctx.append(new_bag)
            else:
                # Context is a Bag...
                if not bag_name:
                    raise SyntaxError('<bag ?> in <bag %s> requires a name!' % cur_ctx['_name'])
                cur_ctx[bag_name] = new_bag
            self.ctx.append(new_bag)
        #
        if name == 'group':
            grp_name = self.get_attribute(attrs'name')
            new_grp = []
            if isinstance(cur_ctx,list):
                # Context is a Group...
                cur_ctx.append(new_bag)
            else:
                # Context is a Bag...
                if not grp_name:
                    raise SyntaxError('<group ?> in <bag %s> requires a name!' % cur_ctx['_name'])
                cur_ctx[grp_name] = new_grp
            self.ctx.append(new_grp)
        #
        if name == 'datum':
            dat_name = self.get_attribute(attrs'name')
            dat_value = self.get_attribute(attrs'value')
            if isinstance(cur_ctx,list):
                # Context is a Group...
                raise SyntaxError('<datum %s %s> not allowed in <group>!' % (dat_name,dat_value))
            # Context is a Bag...
            cur_ctx[dat_name] = dat_value
        #
        XmlHandlerMain.startElement(selfnameattrs)

    def endElement (selfname):
        XmlHandlerMain.endElement(selfname)
        #
        if name == 'bag':
            self.ctx.pop()
        #
        if name == 'group':
            self.ctx.pop()
        #
        if name == 'datum':
            pass



##================================================================================================##
def xml_to_data_bag (xml_data_filelog=None):
    dt = datetime.now()
    #
    hndlr = XmlHandlerDataBag('root'stdoutlog=log)
    parse_xml_file(xml_data_filehndlr)
    #
    data = { 'last-modified':dt.strftime('%a %b %d %H:%M:%S %Y')
           , 'DCR':dt.strftime('%m-%d-%Y %H:%M')
           , 'level':0
           }
    for k in hndlr.bag:
        bag = hndlr.bag[k]
        for x in bag:
            data[x] = bag[x]
    return data

##================================================================================================##
def template_to_html (in_filexml_data_fileout_filedbag=Nonelog=None):
    '''Generate an HTML output file from an input HTML template plus a bag of data.'''
    # Generate the data bag from an input XML file (if not provided ready-made)...
    data_bag = dbag if dbag else xml_to_data_bag(xml_data_file)
    # New template instance...
    tmplt = HtmlTemplate(in_filelog=log)
    # Write the HTML text to an output file...
    tmplt.emit(out_filedata_bag)



####################################################################################################
'''eof'''