xml_grep.py
'''\
Python GREP implementation.
classes:
GrepConfig (object)
XmlHandlerCfgGrep (XmlHandlerConfigBase)
usage:
cfg_handler = XmlHandlerCfgGrep(filename, stdout, Log)
xml_sax.parse_xml_file(filename, cfg_handler)
cfg = cfg_handler[config_name]
if cfg:
cfg.execute()
Developer@Sonnack.com
November 2016
'''
from sys import stdin, stdout, stderr, argv
from datetime import date, datetime, timedelta
from os import path, listdir
import re
from xml_sax import XmlHandlerConfigBase
class GrepConfig (object):
'''GREP Configuration object.'''
def __init__ (self, name, fout, log=None):
self.name = name
self.out = fout
self.log = log
self.search_expression = None
self.recurse_folders = False
self.file_paths = []
self.file_types = []
self.file_includes = []
self.file_excludes = []
self.path_includes = []
self.path_excludes = []
self.search_object = None
self.file_include_filter = []
self.file_exclude_filter = []
self.path_include_filter = []
self.path_exclude_filter = []
self.no_scan_flag = False
def __str__ (self):
return self.name
def __call__ (self):
return self.search_expression
def scanFile (self, filename):
n_matches = 0
ln = 0
virgin = True
if self.log: self.log.debug('File: %s' % filename)
if self.no_scan_flag: return 0
fp = open(filename, 'r')
try:
for txt in fp:
txt = txt.strip()
ln = ln + 1
if self.search_object.search(txt):
if virgin:
print >> self.out, filename
virgin = False
print >> self.out, '[%d] %s' % (ln, txt)
n_matches += 1
if not virgin:
print >> self.out
except Exception as e:
print >> stderr, e
if self.log: self.log.error(e)
raise
finally:
fp.close()
return n_matches
def scanDirectory (self, pathname):
if self.log: self.log.debug('Directory: %s' % pathname)
t_matches = 0
file_and_dir_names = listdir(pathname)
if len(file_and_dir_names) == 0:
if self.log: self.log.trace('No files found!')
return t_matches
if self.log: self.log.trace('Directory entries: %d' % len(file_and_dir_names))
fnames = filter(lambda f: path.isfile(path.join(pathname,f)), file_and_dir_names)
if self.log: self.log.trace('> files-found: %d' % len(fnames))
if 0 < len(self.file_types):
fnames = filter(lambda fn: 0 < sum(map(lambda ft: fn.endswith(ft), self.file_types)), fnames)
if self.log: self.log.trace('> files-after-ftype: %d' % len(fnames))
if 0 < len(self.file_exclude_filter):
fnames = filter(lambda fn: 0 == sum(map(lambda ff: 1 if ff.search(fn) else 0, self.file_exclude_filter)), fnames)
if self.log: self.log.trace('> files-after-exclude: %d' % len(fnames))
if 0 < len(self.file_include_filter):
fnames = filter(lambda fn: 0 < sum(map(lambda ff: 1 if ff.search(fn) else 0, self.file_include_filter)), fnames)
if self.log: self.log.trace('> files-after-include: %d' % len(fnames))
if len(fnames):
if self.log: self.log.trace('> files-to-scan: %d' % len(fnames))
for fn in sorted(fnames):
filename = path.join (pathname, fn)
n = self.scanFile(filename)
t_matches += n
else:
if self.log: self.log.trace('No files to Scan!')
if self.recurse_folders:
dnames = filter(lambda f: path.isdir(f), map(lambda f: path.join(pathname,f), file_and_dir_names))
if self.log: self.log.trace('> subdirs-found: %d' % len(dnames))
if 0 < len(self.path_exclude_filter):
dnames = filter(lambda fn: 0 == sum(map(lambda ff: 1 if ff.search(fn) else 0, self.path_exclude_filter)), dnames)
if self.log: self.log.trace('> subdirs-after-exclude: %d' % len(dnames))
if 0 < len(self.path_include_filter):
dnames = filter(lambda fn: 0 < sum(map(lambda ff: 1 if ff.search(fn) else 0, self.path_include_filter)), dnames)
if self.log: self.log.trace('> subdirs-after-include: %d' % len(dnames))
if len(dnames):
if self.log: self.log.trace('> subdirs-to-recurse: %d' % len(dnames))
for subdirname in sorted(dnames):
n = self.scanDirectory(subdirname)
t_matches += n
return t_matches
def execute (self):
if self.log:
self.log.trace('search: /%s/' % self.search_expression)
for fpath in self.file_paths:
self.log.trace('directory: %s' % fpath)
self.log.trace('recurse? %s' % self.recurse_folders)
self.search_object = re.compile(self.search_expression)
if self.log:
self.log.trace('search-pattern: /%s/' % self.search_object.pattern)
for ff in self.file_includes:
fx = re.compile(ff, re.IGNORECASE)
self.file_include_filter.append(fx)
for ff in self.file_excludes:
fx = re.compile(ff, re.IGNORECASE)
self.file_exclude_filter.append(fx)
for ff in self.path_includes:
fx = re.compile(ff, re.IGNORECASE)
self.path_include_filter.append(fx)
for ff in self.path_excludes:
fx = re.compile(ff, re.IGNORECASE)
self.path_exclude_filter.append(fx)
if self.log:
for flt in self.file_include_filter:
self.log.trace('file-include-filter: %s' % flt.pattern)
for flt in self.file_exclude_filter:
self.log.trace('file-exclude-filter: %s' % flt.pattern)
for flt in self.path_include_filter:
self.log.trace('path-include-filter: %s' % flt.pattern)
for flt in self.path_exclude_filter:
self.log.trace('path-exclude-filter: %s' % flt.pattern)
tot_n = 0
for fpath in self.file_paths:
n = self.scanDirectory(fpath)
if self.log:
self.log.info('MATCHES: %d' % n)
tot_n += n
if self.log:
self.log.info('TOTAL-MATCHES: %d' % tot_n)
class XmlHandlerCfgGrep (XmlHandlerConfigBase):
'''XML SAX Handler for GREP Configuration files.'''
def __init__ (self, name, fout, log=None):
XmlHandlerConfigBase.__init__(self, name, fout, log)
def new_config_object (self, name):
'''Create a new Config object.'''
return GrepConfig(name, self.out, self.log)
def print_configuration (self):
'''Print the current Config object.'''
if self.out and (self.state == 0):
print >> self.out, 'Configuration: "%s"' % self.curr_config
print >> self.out, '. Search String: "%s"' % self.curr_config.search_expression
print >> self.out, '. Do Sub-Dirs: "%s"' % self.curr_config.recurse_folders
for s in self.curr_config.file_paths:
print >> self.out, '. dir "%s"' % s
for s in self.curr_config.file_types:
print >> self.out, '. ext "%s"' % s
for s in self.curr_config.file_includes:
print >> self.out, '. fin "%s"' % s
for s in self.curr_config.file_excludes:
print >> self.out, '. fex "%s"' % s
for s in self.curr_config.path_includes:
print >> self.out, '. din "%s"' % s
for s in self.curr_config.path_excludes:
print >> self.out, '. dex "%s"' % s
print >> self.out
def localStart (self, name, attrs):
if self.state == 100:
if name == 'SearchExpression':
self.state = 101
return
if name == 'RecurseFolders':
self.state = 102
s = self.get_attribute(attrs, 'value')
x = s in ['1', 'T','t' 'True','true', 'Y','y', 'Yes','yes', 'Yep','yep','On', 'Enable','enable', 'GO FOR IT','go for it']
self.curr_config.recurse_folders = x
return
if name == 'FilePaths':
self.state = 103
return
if name == 'FileTypes':
self.state = 104
return
if name == 'FileIncludePatterns':
self.state = 105
return
if name == 'FileExcludePatterns':
self.state = 106
return
if name == 'PathIncludePatterns':
self.state = 107
return
if name == 'PathExcludePatterns':
self.state = 108
return
return
if self.state == 103:
if name == 'FilePath':
self.state = 113
self.curr_config.file_paths.append(self.get_attribute(attrs,'folder'))
return
if self.state == 104:
if name == 'FileType':
self.state = 114
self.curr_config.file_types.append(self.get_attribute(attrs,'ext'))
return
if self.state == 105:
if name == 'FileIncludePattern':
self.state = 115
self.curr_config.file_includes.append(self.get_attribute(attrs,'match'))
return
if self.state == 106:
if name == 'FileExcludePattern':
self.state = 116
self.curr_config.file_excludes.append(self.get_attribute(attrs,'match'))
return
if self.state == 107:
if name == 'PathIncludePattern':
self.state = 117
self.curr_config.path_includes.append(self.get_attribute(attrs,'match'))
return
if self.state == 108:
if name == 'PathExcludePattern':
self.state = 118
self.curr_config.path_excludes.append(self.get_attribute(attrs,'match'))
return
XmlHandlerConfigBase.localStart(self, name, attrs)
def localEnd (self, name):
if self.state == 101:
if name == 'SearchExpression':
self.state = 100
self.curr_config.search_expression = self.text
return
if self.state == 102:
if name == 'RecurseFolders':
self.state = 100
return
if self.state == 103:
if name == 'FilePaths':
self.state = 100
return
if self.state == 104:
if name == 'FileTypes':
self.state = 100
return
if self.state == 105:
if name == 'FileIncludePatterns':
self.state = 100
return
if self.state == 106:
if name == 'FileExcludePatterns':
self.state = 100
return
if self.state == 107:
if name == 'PathIncludePatterns':
self.state = 100
return
if self.state == 108:
if name == 'PathExcludePatterns':
self.state = 100
return
if self.state == 113:
if name == 'FilePath':
self.state = 103
return
if self.state == 114:
if name == 'FileType':
self.state = 104
return
if self.state == 115:
if name == 'FileIncludePattern':
self.state = 105
return
if self.state == 116:
if name == 'FileExcludePattern':
self.state = 106
return
if self.state == 117:
if name == 'PathIncludePattern':
self.state = 107
return
if self.state == 118:
if name == 'PathExcludePattern':
self.state = 108
return
XmlHandlerConfigBase.localEnd(self, name)
'''eof'''