filelist.py

'''\
Python File List classes and functions.

function:
    ListFiles   (folder)    - Returns a 'pathname/filename' list of strings.   {not recursive}
    GetFileList (folders)   - Returns a 'pathname/filename' list of strings.   {recursive}

    ListFolder  (folder)    - Returns a (pathname,[filenames]) list of tuples. {recursive}
    ListFolders (folders)   - Returns a (pathname,[filenames]) list of tuples. {recursive}

classes:
    fileobj     - File Object.
    filelist    - File List.
    filefinder  - File Finder.
    filefilter  - File Filter.

Developer@Sonnack.com
November 2013
'''
####################################################################################################
from sys import stdoutstderrargv
from os import listdir,path
from datetime import datetimetimedelta
from time import gmtimelocaltime
from re import compile as RegExIGNORECASE
from env import pluraladd_commas
####################################################################################################
#TODO: Add callback for found files.
####################################################################################################
Log = stderr

##~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~##
class fileobj (object):
    '''File Object class.

properties:
    fullname    - string    : Full path/name of file.
    pathname    - string    : File path (only).
    filename    - string    : File name (only).
    name        - string    : File name (only - no extension).
    ext         - string    : File extension (duh; only).
    fsize       - int       : File size
    created     - datetime  : File created date
    updated     - datetime  : File updated date
    xflag       - boolean   : File exists flag
    fflag       - boolean   : File is a file flag
    dflag       - boolean   : File is a directory flag

methods:
    get_text    - returns file contents (as a string of bytes)
    get_lines   - returns file contents (as a list of lines)

'''
    def __init__ (selfpathfilename):
        self.fullname = pathfilename
        # Parse the full name into parts...
        t = path.split(self.fullname)
        self.pathname = t[0]
        self.filename = t[1]
        t = path.splitext(self.fullname)
        self.name  = t[0]
        self.ext = t[1][1:]
        # Default file properties...
        self.fsize = 0
        self.created = None
        self.updated = None
        # If the file exists, get its properties...
        self.xflag = path.exists(self.fullname)
        if self.xflag:
            # Exists!...
            self.dflag = path.isdir(self.fullname)
            self.fflag = path.isfile(self.fullname)
            # And it's a file!...
            if self.fflag:
                # File Size...
                self.fsize = path.getsize(self.fullname)
                # File Create Date/Time...
                t = localtime(path.getctime(self.fullname))
                self.created = (datetime(t[0],t[1],t[2],t[3],t[4],t[5]))
                # File Updated Date/Time...
                t = localtime(path.getmtime(self.fullname))
                self.updated = (datetime(t[0],t[1],t[2],t[3],t[4],t[5]))

    def get_text (self):
        '''Read and return file contents.'''
        fp = open(self.fullname'r')
        try:
            txt = fp.read()
            return txt
        except:
            raise
        finally:
            fp.close()

    def get_lines (self):
        '''Read and return file contents (as a list of lines).'''
        fp = open(self.fullname'r')
        try:
            txt = fp.readlines()
            return txt
        except:
            raise
        finally:
            fp.close()

    def __getattribute__ (selfname):
        if name == 'exists': return self.xflag
        if name == 'isfile': return self.fflag
        if name == 'isdir':  return self.dflag
        return super(fileobj,self).__getattribute__(name)

    def __len__ (self):
        return self.fsize

    def __cmp__ (selfother):
        d = cmp(self.self.fullnameother.self.fullname)
        if d: return d
        d = cmp(self.createdother.created)
        if d: return d
        d = cmp(self.updatedother.updated)
        if d: return d
        return cmp(self.idother.id)

    def __str__ (self):
        s = '%s (%d byte%s) [%s]'
        t = (self.filenameself.fsizeplural(self.fsize), self.created)
        return s % t

    def __repr__ (self):
        s = '{file:{name:"%s", fflag:%s, dflag:%s, xflag:%s, size:%d, created:/%s/, updated:/%s/}}'
        t = (self.fullnameself.fflag,self.dflag,self.xflagself.fsizeself.createdself.updated)
        return s % t


##~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~##
class filefinder (object):
    '''File Finder class.'''

    def __init__ (selffolder):
        self.folder = folder
        self.filenames = []
        self.item_include = filefilter()
        self.item_exclude = filefilter()
        self.folder_include = filefilter()
        self.folder_exclude = filefilter()

    def scan (self):
        '''Scan for files.'''
        self._scan(self.folder)

    def add_include_item (selfitem):
        '''Item names in this list are (only) ones used for the list.'''
        self.item_include.add_item(item)

    def add_exclude_item (selfitem):
        '''Item names in this list are excluded from the list.'''
        self.item_exclude.add_item(item)

    def add_include_folder (selffolder):
        '''Folder names in this list are (only) ones used for the list.'''
        self.folder_include.add_item(folder)

    def add_exclude_folder (selffolder):
        '''Folder names in this list are excluded from the list.'''
        self.folder_exclude.add_item(folder)

    def __str__ (self):
        return self.folder

    def __len__ (self):
        return len(self.filenames)

    def __getitem__ (selfix):
        if (ix < 0or (len(self.filenames) <= ix):
            raise IndexError"Index out of range: %d" % ix
        return self.filenames[ix]

    def __iter__ (self):
        return iter(self.filenames)

    def _scan (selffolder):
        '''Scan (recursive).'''
        # Get list of (file) names in this folder...
        self.ns = listdir(folder)
        # Make it a list of full filenames...
        fns = map(lambda n: path.join(foldern), self.ns)
        # Extract a list of filenames...
        fs = filter(lambda fn: path.isfile(fn), fns)
        # Filter files...
        if len(self.item_include):
            fs = self.item_include.include_names(fs)
        if len(self.item_exclude):
            fs = self.item_exclude.exclude_names(fs)
        # Add this list to main filelist...
        self.filenames.extend(fs)
        # Extract list of sub-folder names...
        ds = filter(lambda fn: path.isdir(fn) , fns)
        # Filter list...
        if len(self.folder_include):
            ds = self.folder_include.include_names(ds)
        if len(self.folder_exclude):
            ds = self.folder_exclude.exclude_names(ds)
        # Recurse sub-folders...
        for d in sorted(ds):
            self._scan(d)

##~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~##
class filefilter (object):
    '''File Filter class.'''

    def __init__ (selfitems=[]):
        self.items = []
        self.add_items(items)

    def include_names (selfnames):
        return filter(lambda n: self.test_name(n), names)

    def exclude_names (selfnames):
        return filter(lambda n: not self.test_name(n), names)

    def test_name (selfname):
        return 0 < len(filter(lambda ff: ff.search(name), self.items))

    def add_items (selfitems):
        for ii in items:
            self.add_item(ii)

    def add_item (selfitem):
        self.items.append(RegEx(itemIGNORECASE))

    def __str__ (self):
        return map(lambda ii: str(ii)+'\n'self.items)

    def __len__ (self):
        return len(self.items)

    def __getitem__ (selfix):
        if (ix < 0or (len(self.flist) <= ix):
            raise IndexError"Index out of range: %d" % ix
        return self.items[ix]

    def __iter__ (self):
        return iter(self.items)


##~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~##
class filelist (object):
    '''File List class.'''

    def __init__ (selffolder):
        self.folder = folder
        self.flist = ListFolder(folder)

    def prune (selffunction):
        '''Apply {function} as a filter to all filenames.'''
        for ix,rcd in enumerate(self.flist):
            g = filter(functionrcd[1])
            self.flist[ix] = (rcd[0], g)

    def execute (selffunction):
        '''Apply {function} to all full filenames; return list of results.'''
        file_list = self.get_filelist()
        return map(lambda f: function(f), file_list)

    def get_filelist (self):
        '''Return a flat list of full filenames.'''
        file_list = []
        for f in self.flist:
            file_list.extend(map(lambda n: path.join(f[0],n), f[1]))
        return file_list

    def get_pathlist (self):
        '''Return a flat list of pathnames.'''
        path_list = []
        for f in self.flist:
            path_list.append(f[0])
        return path_list

    def __str__ (self):
        '''String version: just the base folder name.'''
        return self.folder

    def __repr__ (self):
        '''JSON-like string.'''
        t = (self.folderlen(self.flist), len(self))
        s = '{filelist:{path:"%s", folders:%d, files:%d}}'
        return s % t

    def __cmp__ (selfother):
        '''Compare: just uses the base folder name.'''
        return cmp(self.folderother.folder)

    def __len__ (self):
        '''Return number of files. NOTE: This is in contrast to the index method!'''
        return sum(map(lambda t: len(t[1]), self.flist))

    def __getitem__ (selfix):
        '''Return Nth record. (TODO: should probably index filenames)'''
        if ix < 0:
            raise TypeError"Invalid Index: %s" % str(ix)
        if len(self.flist) <= ix:
            raise IndexError"Index out of range: %d" % ix
        return self.flist[ix]



##================================================================================================##
def ListFiles (folder):
    '''List Folder (not recursive). Returns a "pathname/filename" list of strings.'''
    print >> Log'[D: %s]' % folder
    # Get list of (file) names in this folder...
    ns = listdir(folder)
    # Create a list of full filenames...
    fs = filter(lambda fn: path.isfile(fn), map(lambda n: path.join(foldern), ns))
    fs.sort()
    return fs


##================================================================================================##
def ListFolder (folder):
    '''Returns a (pathname,[filenames]) list of tuples. RECURSIVE!'''
    print >> Log'[D: %s]' % folder
    # get list of (file) names in this folder...
    ns = listdir(folder)
    # Divide the names into file and sub-folder lists...
    fs = []  # list of folder files
    ds = []  # list of folder sub-folders
    for n in ns:
        fn = path.join(foldern)
        if path.isfile(fn):
            #print >> Log, '[F: %s]' % fn
            fs.append(n)
            continue
        if path.isdir(fn):
            ds.append(fn)
            continue
        print >> Log'[?: %s]' % fn
    folder_list = [(foldersorted(fs))]
    # Recurse Directories...
    for d in sorted(ds):
        folder_list.extend(ListFolder(d))
    return folder_list

##================================================================================================##
def ListFolders (folders):
    '''Return a (pathname,[filenames]) list of tuples. Uses (recursive) ListFolder.'''
    folder_list = []
    for f in folders:
        # Use ListFolder (repeatedly) for heavy lifting...
        ts = ListFolder(f)
        folder_list.extend(ts)
    return folder_list


##================================================================================================##
def GetFileList (folders):
    '''Return a "pathname/filename" (flat) list of strings. Uses (recursive) ListFolders.'''
    # ListFolders does the heavy lifting...
    folder_list = ListFolders(folders)
    # Extract a list of filenames...
    file_list = []
    for f in folder_list:
        file_list.extend(map(lambda n: path.join(f[0],n), f[1]))
    return file_list



##================================================================================================##
def do_test (*args):
    print 'test/parameters: %d' % len(args)
    pathname = args[0if 0 < len(argselse r'C:\Users\Chris\Pictures\lib\Cartoons'
    flist = ListFiles(pathname)
    print
    print 'Total Files: %d' % len(flist)
    print
    for f in flist:
        print 'File: %s' % f
    print
    return flist
##================================================================================================##
def do_demo (*args):
    print 'demo/parameters: %d' % len(args)
    pn1 = args[0if 0 < len(argselse r'C:\CJS\prj\Python\lib'
    pn2 = args[1if 1 < len(argselse r'C:\CJS\prj\Python\app'
    pn3 = args[2if 2 < len(argselse r'C:\CJS\prj\Python\ws'
    ps = [pn1pn2pn3]
    flist = ListFolders(ps)
    print
    print 'Total Dirs: %d' % len(flist)
    print 'Total Files: %d' % sum(map(lambda t: len(t[1]), flist))
    print
    for t in flist:
        print 'Folder: %s  (%d files)' % (t[0], len(t[1]))
    print
    return flist
##================================================================================================##
def do_main (*args):
    print 'main/parameters: %d' % len(args)
    flist = ListFolders(args)
    print
    print 'Total Dirs: %d' % len(flist)
    print 'Total Files: %d' % sum(map(lambda t: len(t[1]), flist))
    print
    for t in flist:
        print 'Folder: %s  (%d files)' % (t[0], len(t[1]))
        for f in t[1]:
            print '. File: %s' % path.join(t[0], f)
    print
    return flist
####################################################################################################
def dispatch (cmd, *args):
    print 'command: %s' % cmd
    print 'arguments: %d' % len(args)
    if cmd == 'test': return do_test(*args)
    if cmd == 'demo': return do_demo(*args)
    if cmd == 'main': return do_main(*args)
    return [Nonecmdargs]
####################################################################################################
if __name__ == '__main__':
    print 'autorun: %s' % argv[0]
    cmd = argv[1if 1 < len(argvelse ''
    etc = argv[2:]
    obj = dispatch(cmd, *etc)
    print 'exit/type: %s (length: %d)' % (type(obj), len(obj))
####################################################################################################
'''eof'''