template.py
'''Template to HTML.
Merge a template file with data to create an HTML output file.
The template file is an HTML file, but with special fields (which are cloaked in comments).
The HTML is written to the output unchanged; the fields (including the enclosing comment)
are replaced with strings from the provided data bag.
Special Fields:
<!--@(field-spec)-->
<!--@[loop-name]--> ... <!--@[/loop-name]-->
Fields refer by name to items in the data bag.
Field syntax:
[group-name [:sub-name [:sub-name]]] :datum-name
No whitespace; most characters allowed (no colons); not case-sensitive.
The datum-name is required. It specifies a datum in the data bag.
The group-name specifies a top-level group in the bag.
It is optional ONLY inside a loop, where it defaults to the enclosing loop group.
Optional sub-names specify hierarchal groups within the bag.
Loops:
Loop name refers to top-level group in the bag, which is iterated over.
Each loop has a $# variable depending on loop level.
(The first loop has $1, an immediately nested loop has $2, etc.)
The loop variable is the group sub-bag key, if provided, else the loop index.
The loop variable can be used in field specifications (except as group-name).
Examples:
(seasons:caption) - caption datum from seasons sub-bag
(halfs:1:caption) - caption datum from halfs group, first sub-bag
(halfs:2:caption) - caption datum from halfs group, second sub-bag
(months:apr:caption) - caption datum from months group, sub-bag with 'apr' key
(months:$1:caption) - caption datum from months group, indexed by loop var $1
(months:$2:$1) - datum[$1] from bag[months][$2]
(months:$2:weeks:$3:$1) - datum[$1] from bag[months][$2][weeks][$3]
Data Bag:
<bag> := { <datums> <groups> }
<datum> := <name> : <value>
<group> := [ <bags> ]
<datums> := NULL | <datum> <datums>
<groups> := NULL | <group> <groups>
<bags> := NULL | <bag> <bags>
If the bags in a group all have a datum named 'key', the group can be indexed by
those keys as well as by numeric index (the default). (In point of fact, *any*
key datums allow name access, but iteration needs keys from all sub-bags to switch
to using those keys in the loop variable.
Therefore, protocol allows (with sub-bags recursively defined):
<bag> = { name:value,.. name:<bag>,.. name:[<bag>,<bag>,..],.. }
Developer@Sonnack.com
August 2017
'''
from __future__ import print_function
from sys import argv, stdout, stderr, path as syspath
from os import path
from datetime import datetime, date, timedelta
from re import compile
from xml_sax import XmlHandlerMain, parse_xml_file
StartMarker = '<!--@'
EndMarker = '-->'
class HtmlTemplateToken (object):
Key0Format = '*%s'
Key1Format = '$%s'
Key2Format = '@%s'
def __init__ (self, text, log=None):
self.text = text
self.log = log
def __len__ (self):
return len(self.text)
def __str__ (self):
s = self.text[0:16].replace('\n',' ')
return 'TemplateToken["%s..."] bytes=%d' % (s, len(self.text))
def __call__ (self, fp, data_bag, toks):
fp.write(self.text)
if self.log: self.log.trace('TEXT (%d bytes)' % len(self.text))
class HtmlTemplateField (HtmlTemplateToken):
def __init__ (self, text, log=None):
super(HtmlTemplateField,self).__init__(text, log=log)
self.parts = self.text.split(':')
def __str__ (self):
return 'TemplateField[%s]' % self.text
def __call__ (self, fp, data_bag, toks):
bag = data_bag
key = self.__resolve_key(self.parts[-1], data_bag)
if 1 < len(self.parts):
bag = bag[self.parts[0]]
for k in self.parts[1:-1]:
k = self.__resolve_key(k, data_bag)
bag = self.__find_sub_bag(k, bag)
fld = bag[key] if key in bag else '?'
fp.write(fld)
if self.log: self.log.trace('FIELD[%s] = "%s"' % (self.text, fld))
def __find_sub_bag (self, key, group):
if key.isdigit():
return group[int(key)-1]
for bag in group:
if ('key' in bag) and (bag['key'] == key):
return bag
raise RuntimeError('No sub-bag "%s" found!' % key)
def __resolve_key (self, key, data_bag):
if key.startswith('$'):
k1 = self.Key1Format % key[1:]
k2 = self.Key2Format % key[1:]
key = data_bag[k2]
if not key:
key = data_bag[k1]
return key
class HtmlTemplateLoop (HtmlTemplateToken):
def __init__ (self, text, log=None):
super(HtmlTemplateLoop,self).__init__(text, log=log)
self.tok_list = []
self.alt_list = []
self.group = {}
self.alt_loop = True if text.startswith('#') else False
if self.alt_loop:
parts = text.split(':')
if len(parts) != 2: raise SyntaxError('Invalid AltLoop Name: "%s"' % text)
self.text = parts[0]
self.key_name = parts[1]
def __str__ (self):
return 'TemplateLoop[%s]' % self.text
def __call__ (self, fp, data_bag, toks):
if self.log: self.log.trace('LOOP[%s]: BEGIN' % self.text)
self.__push_context(data_bag)
self.__find_loop_end(toks)
k0 = self.Key0Format % self.level
k1 = self.Key1Format % self.level
k2 = self.Key2Format % self.level
if self.log: self.log.trace('toks=%d, alts=%d, keys=(%s, %s, %s)' % (len(self.tok_list),len(self.alt_list),k0,k1,k2))
for rx,sub_bag in enumerate(self.group):
bag_idx = str(1+rx)
bag_key = sub_bag['key'] if 'key' in sub_bag else None
if self.log: self.log.trace('LOOP[%s]: %s %s' % (self.text, bag_idx, bag_key))
toks = self.__test_alt_loops(bag_key)
if toks:
if self.log: self.log.trace('ALT-LOOP[%s]: %d tokens' % (self.text, len(toks)))
while len(toks):
tok = toks.pop(0)
data_bag[k0] = sub_bag
data_bag[k1] = bag_idx
data_bag[k2] = bag_key
tok(fp, data_bag, toks)
del data_bag[k0]
del data_bag[k1]
del data_bag[k2]
continue
toks = list(self.tok_list)
while len(toks):
tok = toks.pop(0)
data_bag[k0] = sub_bag
data_bag[k1] = bag_idx
data_bag[k2] = bag_key
tok(fp, data_bag, toks)
del data_bag[k0]
del data_bag[k1]
del data_bag[k2]
self.__pop_context(data_bag)
if self.log: self.log.trace('LOOP[%s]: END' % self.text)
def __test_alt_loops (self, bag_key):
if self.alt_list:
for alt_loop in self.alt_list:
if alt_loop[0].key_name == bag_key:
return list(alt_loop[1:])
return None
def __find_loop_end (self, toks):
self.tok_list = []
name = '/%s' % self.text
for tx,tok in enumerate(toks):
if isinstance(tok,HtmlTemplateLoop) and (tok.text == name):
self.tok_list = toks[0:tx]
del toks[0:tx+1]
self.__extract_alt_loops()
return self.tok_list
raise RuntimeError('Invalid Loop: No Loop End! [%s]' % name)
def __extract_alt_loops (self):
self.alt_list = []
name = '#%s' % self.text
txs = [tx for tx,tok in enumerate(self.tok_list) if isinstance(tok,HtmlTemplateLoop) and (tok.text==name)]
if txs:
loop_toks = self.tok_list[0:txs[0]]
txs.append(len(self.tok_list))
while 1 < len(txs):
self.alt_list.append(self.tok_list[txs[0]:txs[1]])
txs.pop(0)
self.tok_list = loop_toks
if self.log:
for ax,a in enumerate(self.alt_list):
self.log.debug('Alt-Loop[%d] %s' % (ax, a[0].key_name))
for xx,x in enumerate(a):
self.log.debug('Token[%d] %s' % (ax,x))
def __push_context (self, data_bag):
self.group = data_bag[self.text]
self.level = 1 + data_bag['level']
data_bag['level'] = self.level
def __pop_context (self, data_bag):
data_bag['level'] = self.level - 1
class HtmlTemplate (object):
def __init__ (self, filename, log=None):
self.log = log
self.tokens = []
self.load(filename)
self.tokenize()
def __len__ (self):
return len(self.html)
def __str__ (self):
s = '%s (%d bytes)'
t = (self.filename, len(self))
return s % t
def tokenize (self):
if self.log: self.log.trace('HtmlTemplate::tokenize:')
txt_view = lambda s: (s[:20], '...' if 20 < len(s) else '')
self.tokens = []
txt = str(self.html)
while len(txt):
if txt.startswith(StartMarker):
cx = txt.find(EndMarker)
if cx < 0:
raise SyntaxError('Bad Field; No End Marker: "%s%s"' % txt_view(txt))
fld = txt[len(StartMarker):cx]
txt = txt[cx+len(EndMarker):]
tok = None
if fld.startswith('('):
if not fld.endswith(')'):
raise SyntaxError('Invalid Field Name: "%s"' % fld)
if self.log:
self.log.trace('HtmlTemplate::tokenize: Template Field: %s' % fld)
tok = HtmlTemplateField(fld[+1:-1], log=self.log)
if fld.startswith('['):
if not fld.endswith(']'):
raise SyntaxError('Invalid Loop Name: "%s"' % fld)
if self.log:
self.log.trace('HtmlTemplate::tokenize: Template Loop: %s' % fld)
tok = HtmlTemplateLoop(fld[+1:-1], log=self.log)
if not tok:
raise SyntaxError('Invalid Field Spec: "%s"' % fld)
self.tokens.append(tok)
continue
cx = txt.find(StartMarker)
if 0 <= cx:
if self.log:
self.log.trace('HtmlTemplate::tokenize: Template Text[0:%d]' % cx)
tok = HtmlTemplateToken(txt[0:cx], log=self.log)
txt = txt[cx:]
self.tokens.append(tok)
continue
if self.log:
self.log.trace('HtmlTemplate::tokenize: Template Text[rest]')
tok = HtmlTemplateToken(txt, log=self.log)
txt = ''
self.tokens.append(tok)
def emit (self, filename, data_bag={}):
self.oname = filename
fp = open(self.oname, 'w')
try:
toks = list(self.tokens)
while len(toks):
tok = toks.pop(0)
tok(fp, data_bag, toks)
if self.log: self.log.debug('wrote: %s' % self.oname)
except:
raise
finally:
fp.close()
def load (self, filename):
self.iname = filename
fp = open(self.iname, 'r')
try:
self.html = fp.read()
if self.log: self.log.debug('read: %s' % self.iname)
except:
raise
finally:
fp.close()
class XmlHandlerDataBag (XmlHandlerMain):
def __init__ (self, name, fout, log=None):
XmlHandlerMain.__init__(self, name, fout, log)
self.bag = {}
self.ctx = [self.bag]
def startElement (self, name, attrs):
cur_ctx = self.ctx[-1]
if name == 'bag':
bag_name = self.get_attribute(attrs, 'name')
new_bag = {'_name':bag_name}
if isinstance(cur_ctx,list):
cur_ctx.append(new_bag)
else:
if not bag_name:
raise SyntaxError('<bag ?> in <bag %s> requires a name!' % cur_ctx['_name'])
cur_ctx[bag_name] = new_bag
self.ctx.append(new_bag)
if name == 'group':
grp_name = self.get_attribute(attrs, 'name')
new_grp = []
if isinstance(cur_ctx,list):
cur_ctx.append(new_bag)
else:
if not grp_name:
raise SyntaxError('<group ?> in <bag %s> requires a name!' % cur_ctx['_name'])
cur_ctx[grp_name] = new_grp
self.ctx.append(new_grp)
if name == 'datum':
dat_name = self.get_attribute(attrs, 'name')
dat_value = self.get_attribute(attrs, 'value')
if isinstance(cur_ctx,list):
raise SyntaxError('<datum %s %s> not allowed in <group>!' % (dat_name,dat_value))
cur_ctx[dat_name] = dat_value
XmlHandlerMain.startElement(self, name, attrs)
def endElement (self, name):
XmlHandlerMain.endElement(self, name)
if name == 'bag':
self.ctx.pop()
if name == 'group':
self.ctx.pop()
if name == 'datum':
pass
def xml_to_data_bag (xml_data_file, log=None):
dt = datetime.now()
hndlr = XmlHandlerDataBag('root', stdout, log=log)
parse_xml_file(xml_data_file, hndlr)
data = { 'last-modified':dt.strftime('%a %b %d %H:%M:%S %Y')
, 'DCR':dt.strftime('%m-%d-%Y %H:%M')
, 'level':0
}
for k in hndlr.bag:
bag = hndlr.bag[k]
for x in bag:
data[x] = bag[x]
return data
def template_to_html (in_file, xml_data_file, out_file, dbag=None, log=None):
'''Generate an HTML output file from an input HTML template plus a bag of data.'''
data_bag = dbag if dbag else xml_to_data_bag(xml_data_file)
tmplt = HtmlTemplate(in_file, log=log)
tmplt.emit(out_file, data_bag)
'''eof'''