#!/usr/bin/env python # coding=utf-8 # # FileTypeList.py # # Copyright © 2008 Steven Brown # # This file is part of File List Applet. # # FileListApplet is free software: you can redistribute it and/or # modify it under the terms of the GNU General Public License as # published by the Free Software Foundation, either version 3 of the # License, or (at your option) any later version. # FileListApplet is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # You should have received a copy of the GNU General Public License # along with FileListApplet. If not, see . import os, xdg.Mime import time #todo: # - let subtypes of large collections bubble up to be counted as Media. # - media == "application" --> use Comment as title. def index_all(thelist, thevalue): "Returns a list of all indexes of thevalue." return [i for i in range(len(thelist)) if thelist[i] == thevalue] # indexes for FILENAME, MEDIA, SUBTYPE, COMMENT, MODTIME = range(5) class FileTypeList(object): def __init__(self, path="~"): "Default to user's home directory." self.path = os.path.expanduser(path) print "update: ", before = time.time() self.update() after = time.time() print "%f seconds" % (after - before) # TESTING NEW MEDIA IMPLEMENTATION # print self.MEDIA # print "Videos:" # for subtype in self.MEDIA["video"]: # print " %s" % subtype # for filename in self.MEDIA["video"][subtype]: # print " %s" % filename # print "All Subtypes: ", self.MEDIA.keys() #Test Creating categories, and grouping Application types # for m in self.MEDIA: # if m == "application": # if len(self.MEDIA[m] def update(self, hidden=False): "Updates list of filetypes, ignoring hidden files by default." self.files = [] # all filenames self.media = [] # media of each file self.subtypes = [] # subtypes of each file self.modtimes = [] # modtimes of each file self.comments = {} # dict of all xdg comments, indexed by media+subtype self.MEDIA = {} files = os.listdir(self.path) # remove hidden files if not hidden: files = filter(lambda x: x[0] != '.', files) for f in files: self.add_file(f) def add_file(self, filename): "Adds file to list of files." fullpath = os.path.join(self.path, filename) before = time.time() mtype = xdg.Mime.get_type(fullpath) # THIS IS SLOW!! :( -- TODO: cache media/subtype with extensions after = time.time() print "add_file_xdg.Mime.get_type(): ", after - before, "seconds" self.files.append(filename) self.media.append(mtype.media) self.subtypes.append(mtype.subtype) self.modtimes.append(os.stat(fullpath).st_mtime) k = mtype.media + mtype.subtype if k not in self.comments: self.comments[k] = mtype.get_comment() if not self.MEDIA.has_key(mtype.media): self.MEDIA[mtype.media] = {} if not self.MEDIA[mtype.media].has_key(mtype.subtype): self.MEDIA[mtype.media][mtype.subtype] = {} if not self.MEDIA[mtype.media][mtype.subtype].has_key(filename): # store the index of this file in self.files self.MEDIA[mtype.media][mtype.subtype][filename] = len(self.files) - 1 def remove_file(self, filename): "Removes file from list of files." i = self.files.index(filename) f = self.__get_row(i) #TODO: test self.MEDIA if self.MEDIA[f[MEDIA]][f[SUBTYPE]].has_key(filename): del self.MEDIA[f[MEDIA]][f[SUBTYPE]][filename] if len(self.MEDIA[f[MEDIA]][f[SUBTYPE]]) == 0: del self.MEDIA[f[MEDIA]][f[SUBTYPE]] if len(self.MEDIA[f[MEDIA]]) == 0: del self.MEDIA[f[MEDIA]] del self.files[i] del self.media[i] del self.subtypes[i] del self.modtimes[i] def update_file(self, filename): "Updates the details for file filename, affecting modification time, etc." self.remove_file(filename) self.add_file(filename) #FIXME - Lazy implementation, but it shouldn't affect performance. ^.^ def get_file_by_name(self, fn): """Returns a tuple of the file with filename fn. Format of tuple: (FILENAME, MEDIA, SUBTYPE, COMMENT, MODTIME)""" i = self.files.index(fn) return self.__get_row(i) def get_subtype(self, fn): return self.get_file_by_name(fn)[SUBTYPE] def get_media(self, fn): return self.get_file_by_name(fn)[MEDIA] def get_comment(self, fn): return self.get_file_by_name(fn)[COMMENT] def get_modtime(self, fn): return self.get_file_by_name(fn)[MODTIME] def get_files_by_media(self, media, sortbymodtime=True, max=0, subtype=None): l = self.__get_indexes(media=media, subtype=subtype) # now get the tuples, and sort it by modtime, if requested. rows = self.__get_rows(l) #sort the rows by modtime if sortbymodtime: rows.sort(cmp = lambda a,b: cmp(a[MODTIME], b[MODTIME])) rows.reverse() fs = [i[0] for i in rows] if max != 0: fs = fs[:max] return fs def __get_row(self, i): "Returns a tuple of all the elements at row i. filename, media, subtype, comment, modtime." return (self.files[i], self.media[i], self.subtypes[i], self.comments[self.media[i] + self.subtypes[i]], self.modtimes[i]) def __get_rows(self, l): "Like __get_row, but returns a list of rows for all indices in l." return [self.__get_row(i) for i in l] def __get_indexes(self, filename=None, media=None, subtype=None, comment=None, modtimestart=None, modtimeend=None): "Returns a list of indexes with all rows that satisfy the parameters. Partial matches supported?" l = range(len(self.files)) # list of all indexes # Start with the most specific, to cut the list down to size as quickly as possible if filename: #there will be either one hit, or none if filename in self.files: l = [self.files.index(filename)] else: l = [] if subtype: l = [i for i in l if self.subtypes[i] == subtype] if media: l = [i for i in l if self.media[i] == media] if comment: l = [i for i in l if self.comments[i] == comment] if modtimestart: l = [i for i in l if self.modtimes[i] >= modtimestart] if modtimeend: l = [i for i in l if self.modtimes[i] <= modtimeend] return l def get_dirs(self, sortbymodtime=True, max=0): """ Returns a sorted list of max number of directories. If sortbymodtime=True, the MOST RECENT are first. """ li = filter(lambda x: self.subtypes[x] == "directory" and self.media[x] == "inode", range(len(self.files))) d = map(lambda x: self.files[x], li) if sortbymodtime: d.sort(cmp = self.sort_by_mtime) d.reverse() else: d.sort() if max != 0: d = d[:max] return d def get_media(self, sortbyqty=True, max=0): s = set(self.media) m = list(s) # print "DEBUG: M: ", m if sortbyqty: m.sort(cmp = lambda a,b: cmp(self.media.count(a), self.media.count(b))) m.reverse() if max != 0: m = m[:max] return m def get_media_counts(self, sortbyqty=True, max=0): s = set(self.media) m = list(s) sz = [self.media.count(media) for media in m] z = zip(m, sz) if sortbyqty: z.sort(cmp = lambda a,b: cmp(a[1],b[1])) z.reverse() if max: z = z[:max] return z def get_subtypes_counts(self, media, sortbyqty=True, max=0): l = self.__get_indexes(media=media) # list of indexes for rows of media rows = self.__get_rows(l) st = [row[SUBTYPE] for row in rows] # list of subtypes s = set(st) st = list(s) sz = [self.subtypes.count(i) for i in st] z = zip (st, sz) if sortbyqty: z.sort(cmp = lambda a,b: cmp(a[1],b[1])) z.reverse() if max: z = z[:max] return z def get_subtypes(self, media, sortbyqty=True, max=0): l = self.__get_indexes(media=media) # list of indexes for rows of media rows = self.__get_rows(l) st = [row[SUBTYPE] for row in rows] # list of subtypes s = set(st) st = list(s) return st def sort_by_mtime(self, a,b): return cmp(os.stat(os.path.join(self.path,a)).st_mtime, \ os.stat(os.path.join(self.path,b)).st_mtime) def sort_by_qty(self, a,b): "Least to Most - reverse the results for Most to Least." return cmp(len(self.mtypes[a]), len(self.mtypes[b])) # Documents > Type (7/32)> Document.ext1..7 For 3 or more of one type # Documents > Other > # Documents > Recent > MostRecent1..7 # Documents > Folders > Recent > MostRecentFolder1..7 # Documents > Hidden > Folders > Recent # Disabled by default # filename, mimetype general, mimetype detailed, icon, ... if __name__ == "__main__": l = FileTypeList('/home/steve/Downloads') print '------------------------------------------------' # Build the main display dirs = l.get_dirs(max=7) main = {} misc = [] # for t in l.mtypes: # #files = l.get_files_by_media(t.media,sortbymodtime=True) # elif len(files) < 2: # misc.extend(files) # else: # main[t.media] = files print "Directories (%i): %s" % (len(dirs), "\n\t".join(dirs)) for m,c in l.get_media_counts(): if m == "inode": continue # don't list directories again print "%s (%i)" % (m.title(), c) for s,cc in l.get_subtypes_counts(m): print "\t%s (%i)" % (s.title(), cc) for f in l.get_files_by_media(m, subtype=s, max=7): print "\t\t%s (%s)" % (f, l.comments[m+s]) # TODO: get comment method?