filelist.py
'''\
Python File List classes and functions.
function:
ListFiles (folder) - Returns a 'pathname/filename' list of strings. {not recursive}
GetFileList (folders) - Returns a 'pathname/filename' list of strings. {recursive}
ListFolder (folder) - Returns a (pathname,[filenames]) list of tuples. {recursive}
ListFolders (folders) - Returns a (pathname,[filenames]) list of tuples. {recursive}
classes:
fileobj - File Object.
filelist - File List.
filefinder - File Finder.
filefilter - File Filter.
Developer@Sonnack.com
November 2013
'''
from sys import stdout, stderr, argv
from os import listdir,path
from datetime import datetime, timedelta
from time import gmtime, localtime
from re import compile as RegEx, IGNORECASE
from env import plural, add_commas
Log = stderr
class fileobj (object):
'''File Object class.
properties:
fullname - string : Full path/name of file.
pathname - string : File path (only).
filename - string : File name (only).
name - string : File name (only - no extension).
ext - string : File extension (duh; only).
fsize - int : File size
created - datetime : File created date
updated - datetime : File updated date
xflag - boolean : File exists flag
fflag - boolean : File is a file flag
dflag - boolean : File is a directory flag
methods:
get_text - returns file contents (as a string of bytes)
get_lines - returns file contents (as a list of lines)
'''
def __init__ (self, pathfilename):
self.fullname = pathfilename
t = path.split(self.fullname)
self.pathname = t[0]
self.filename = t[1]
t = path.splitext(self.fullname)
self.name = t[0]
self.ext = t[1][1:]
self.fsize = 0
self.created = None
self.updated = None
self.xflag = path.exists(self.fullname)
if self.xflag:
self.dflag = path.isdir(self.fullname)
self.fflag = path.isfile(self.fullname)
if self.fflag:
self.fsize = path.getsize(self.fullname)
t = localtime(path.getctime(self.fullname))
self.created = (datetime(t[0],t[1],t[2],t[3],t[4],t[5]))
t = localtime(path.getmtime(self.fullname))
self.updated = (datetime(t[0],t[1],t[2],t[3],t[4],t[5]))
def get_text (self):
'''Read and return file contents.'''
fp = open(self.fullname, 'r')
try:
txt = fp.read()
return txt
except:
raise
finally:
fp.close()
def get_lines (self):
'''Read and return file contents (as a list of lines).'''
fp = open(self.fullname, 'r')
try:
txt = fp.readlines()
return txt
except:
raise
finally:
fp.close()
def __getattribute__ (self, name):
if name == 'exists': return self.xflag
if name == 'isfile': return self.fflag
if name == 'isdir': return self.dflag
return super(fileobj,self).__getattribute__(name)
def __len__ (self):
return self.fsize
def __cmp__ (self, other):
d = cmp(self.self.fullname, other.self.fullname)
if d: return d
d = cmp(self.created, other.created)
if d: return d
d = cmp(self.updated, other.updated)
if d: return d
return cmp(self.id, other.id)
def __str__ (self):
s = '%s (%d byte%s) [%s]'
t = (self.filename, self.fsize, plural(self.fsize), self.created)
return s % t
def __repr__ (self):
s = '{file:{name:"%s", fflag:%s, dflag:%s, xflag:%s, size:%d, created:/%s/, updated:/%s/}}'
t = (self.fullname, self.fflag,self.dflag,self.xflag, self.fsize, self.created, self.updated)
return s % t
class filefinder (object):
'''File Finder class.'''
def __init__ (self, folder):
self.folder = folder
self.filenames = []
self.item_include = filefilter()
self.item_exclude = filefilter()
self.folder_include = filefilter()
self.folder_exclude = filefilter()
def scan (self):
'''Scan for files.'''
self._scan(self.folder)
def add_include_item (self, item):
'''Item names in this list are (only) ones used for the list.'''
self.item_include.add_item(item)
def add_exclude_item (self, item):
'''Item names in this list are excluded from the list.'''
self.item_exclude.add_item(item)
def add_include_folder (self, folder):
'''Folder names in this list are (only) ones used for the list.'''
self.folder_include.add_item(folder)
def add_exclude_folder (self, folder):
'''Folder names in this list are excluded from the list.'''
self.folder_exclude.add_item(folder)
def __str__ (self):
return self.folder
def __len__ (self):
return len(self.filenames)
def __getitem__ (self, ix):
if (ix < 0) or (len(self.filenames) <= ix):
raise IndexError, "Index out of range: %d" % ix
return self.filenames[ix]
def __iter__ (self):
return iter(self.filenames)
def _scan (self, folder):
'''Scan (recursive).'''
self.ns = listdir(folder)
fns = map(lambda n: path.join(folder, n), self.ns)
fs = filter(lambda fn: path.isfile(fn), fns)
if len(self.item_include):
fs = self.item_include.include_names(fs)
if len(self.item_exclude):
fs = self.item_exclude.exclude_names(fs)
self.filenames.extend(fs)
ds = filter(lambda fn: path.isdir(fn) , fns)
if len(self.folder_include):
ds = self.folder_include.include_names(ds)
if len(self.folder_exclude):
ds = self.folder_exclude.exclude_names(ds)
for d in sorted(ds):
self._scan(d)
class filefilter (object):
'''File Filter class.'''
def __init__ (self, items=[]):
self.items = []
self.add_items(items)
def include_names (self, names):
return filter(lambda n: self.test_name(n), names)
def exclude_names (self, names):
return filter(lambda n: not self.test_name(n), names)
def test_name (self, name):
return 0 < len(filter(lambda ff: ff.search(name), self.items))
def add_items (self, items):
for ii in items:
self.add_item(ii)
def add_item (self, item):
self.items.append(RegEx(item, IGNORECASE))
def __str__ (self):
return map(lambda ii: str(ii)+'\n', self.items)
def __len__ (self):
return len(self.items)
def __getitem__ (self, ix):
if (ix < 0) or (len(self.flist) <= ix):
raise IndexError, "Index out of range: %d" % ix
return self.items[ix]
def __iter__ (self):
return iter(self.items)
class filelist (object):
'''File List class.'''
def __init__ (self, folder):
self.folder = folder
self.flist = ListFolder(folder)
def prune (self, function):
'''Apply {function} as a filter to all filenames.'''
for ix,rcd in enumerate(self.flist):
g = filter(function, rcd[1])
self.flist[ix] = (rcd[0], g)
def execute (self, function):
'''Apply {function} to all full filenames; return list of results.'''
file_list = self.get_filelist()
return map(lambda f: function(f), file_list)
def get_filelist (self):
'''Return a flat list of full filenames.'''
file_list = []
for f in self.flist:
file_list.extend(map(lambda n: path.join(f[0],n), f[1]))
return file_list
def get_pathlist (self):
'''Return a flat list of pathnames.'''
path_list = []
for f in self.flist:
path_list.append(f[0])
return path_list
def __str__ (self):
'''String version: just the base folder name.'''
return self.folder
def __repr__ (self):
'''JSON-like string.'''
t = (self.folder, len(self.flist), len(self))
s = '{filelist:{path:"%s", folders:%d, files:%d}}'
return s % t
def __cmp__ (self, other):
'''Compare: just uses the base folder name.'''
return cmp(self.folder, other.folder)
def __len__ (self):
'''Return number of files. NOTE: This is in contrast to the index method!'''
return sum(map(lambda t: len(t[1]), self.flist))
def __getitem__ (self, ix):
'''Return Nth record. (TODO: should probably index filenames)'''
if ix < 0:
raise TypeError, "Invalid Index: %s" % str(ix)
if len(self.flist) <= ix:
raise IndexError, "Index out of range: %d" % ix
return self.flist[ix]
def ListFiles (folder):
'''List Folder (not recursive). Returns a "pathname/filename" list of strings.'''
print >> Log, '[D: %s]' % folder
ns = listdir(folder)
fs = filter(lambda fn: path.isfile(fn), map(lambda n: path.join(folder, n), ns))
fs.sort()
return fs
def ListFolder (folder):
'''Returns a (pathname,[filenames]) list of tuples. RECURSIVE!'''
print >> Log, '[D: %s]' % folder
ns = listdir(folder)
fs = []
ds = []
for n in ns:
fn = path.join(folder, n)
if path.isfile(fn):
fs.append(n)
continue
if path.isdir(fn):
ds.append(fn)
continue
print >> Log, '[?: %s]' % fn
folder_list = [(folder, sorted(fs))]
for d in sorted(ds):
folder_list.extend(ListFolder(d))
return folder_list
def ListFolders (folders):
'''Return a (pathname,[filenames]) list of tuples. Uses (recursive) ListFolder.'''
folder_list = []
for f in folders:
ts = ListFolder(f)
folder_list.extend(ts)
return folder_list
def GetFileList (folders):
'''Return a "pathname/filename" (flat) list of strings. Uses (recursive) ListFolders.'''
folder_list = ListFolders(folders)
file_list = []
for f in folder_list:
file_list.extend(map(lambda n: path.join(f[0],n), f[1]))
return file_list
def do_test (*args):
print 'test/parameters: %d' % len(args)
pathname = args[0] if 0 < len(args) else r'C:\Users\Chris\Pictures\lib\Cartoons'
flist = ListFiles(pathname)
print
print 'Total Files: %d' % len(flist)
print
for f in flist:
print 'File: %s' % f
print
return flist
def do_demo (*args):
print 'demo/parameters: %d' % len(args)
pn1 = args[0] if 0 < len(args) else r'C:\CJS\prj\Python\lib'
pn2 = args[1] if 1 < len(args) else r'C:\CJS\prj\Python\app'
pn3 = args[2] if 2 < len(args) else r'C:\CJS\prj\Python\ws'
ps = [pn1, pn2, pn3]
flist = ListFolders(ps)
print
print 'Total Dirs: %d' % len(flist)
print 'Total Files: %d' % sum(map(lambda t: len(t[1]), flist))
print
for t in flist:
print 'Folder: %s (%d files)' % (t[0], len(t[1]))
print
return flist
def do_main (*args):
print 'main/parameters: %d' % len(args)
flist = ListFolders(args)
print
print 'Total Dirs: %d' % len(flist)
print 'Total Files: %d' % sum(map(lambda t: len(t[1]), flist))
print
for t in flist:
print 'Folder: %s (%d files)' % (t[0], len(t[1]))
for f in t[1]:
print '. File: %s' % path.join(t[0], f)
print
return flist
def dispatch (cmd, *args):
print 'command: %s' % cmd
print 'arguments: %d' % len(args)
if cmd == 'test': return do_test(*args)
if cmd == 'demo': return do_demo(*args)
if cmd == 'main': return do_main(*args)
return [None, cmd, args]
if __name__ == '__main__':
print 'autorun: %s' % argv[0]
cmd = argv[1] if 1 < len(argv) else ''
etc = argv[2:]
obj = dispatch(cmd, *etc)
print 'exit/type: %s (length: %d)' % (type(obj), len(obj))
'''eof'''