# # pkpgcounter : a generic Page Description Language parser # # (c) 2003, 2004, 2005, 2006 Jerome Alet # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. # # $Id$ # import sys import tempfile import version, pdlparser, postscript, pdf, pcl345, pclxl, \ escp2, dvi, tiff, ooo, zjstream class NoOption : """A class for use as a default parameter to PDLAnalyzer's constructor.""" debug = None colorspace = None resolution = None class PDLAnalyzer : """Class for PDL autodetection.""" def __init__(self, filename, options=NoOption()) : """Initializes the PDL analyzer. filename is the name of the file or '-' for stdin. filename can also be a file-like object which supports read() and seek(). """ self.options = options self.filename = filename def getJobSize(self) : """Returns the job's size.""" self.openFile() try : pdlhandler = self.detectPDLHandler() except pdlparser.PDLParserError, msg : self.closeFile() raise pdlparser.PDLParserError, "Unknown file format for %s (%s)" % (self.filename, msg) else : try : size = pdlhandler.getJobSize() finally : self.closeFile() return size def openFile(self) : """Opens the job's data stream for reading.""" self.mustclose = 0 # by default we don't want to close the file when finished if hasattr(self.filename, "read") and hasattr(self.filename, "seek") : # filename is in fact a file-like object infile = self.filename elif self.filename == "-" : # we must read from stdin infile = sys.stdin else : # normal file self.infile = open(self.filename, "rb") self.mustclose = 1 return # Use a temporary file, always seekable contrary to standard input. self.infile = tempfile.TemporaryFile(mode="w+b") while 1 : data = infile.read(pdlparser.MEGABYTE) if not data : break self.infile.write(data) self.infile.flush() self.infile.seek(0) def closeFile(self) : """Closes the job's data stream if we can close it.""" if self.mustclose : self.infile.close() else : # if we don't have to close the file, then # ensure the file pointer is reset to the # start of the file in case the process wants # to read the file again. try : self.infile.seek(0) except : pass # probably stdin, which is not seekable def detectPDLHandler(self) : """Tries to autodetect the document format. Returns the correct PDL handler class or None if format is unknown """ # Try to detect file type by reading first and last blocks of datas # Each parser can read them automatically, but here we do this only once. self.infile.seek(0) firstblock = self.infile.read(pdlparser.FIRSTBLOCKSIZE) try : self.infile.seek(-pdlparser.LASTBLOCKSIZE, 2) lastblock = self.infile.read(pdlparser.LASTBLOCKSIZE) except IOError : lastblock = "" self.infile.seek(0) if not firstblock : raise pdlparser.PDLParserError, "input file %s is empty !" % str(self.filename) else : for module in (postscript, \ pclxl, \ pdf, \ pcl345, \ escp2, \ dvi, \ tiff, \ zjstream, \ ooo) : try : return module.Parser(self.infile, self.options.debug, firstblock, lastblock) except pdlparser.PDLParserError : pass # try next parser raise pdlparser.PDLParserError, "Analysis of first data block failed." def main() : """Entry point for PDL Analyzer.""" import optparse from copy import copy def check_cichoice(option, opt, value) : """To add a CaseIgnore Choice option type.""" valower = value.lower() if valower in [v.lower() for v in option.cichoices] : return valower else : choices = ", ".join(map(repr, option.cichoices)) raise optparse.OptionValueError( "option %s: invalid choice: %r (choose from %s)" % (opt, value, choices)) class MyOption(optparse.Option) : """New Option class, with CaseIgnore Choice type.""" TYPES = optparse.Option.TYPES + ("cichoice",) ATTRS = optparse.Option.ATTRS + ["cichoices"] TYPE_CHECKER = copy(optparse.Option.TYPE_CHECKER) TYPE_CHECKER["cichoice"] = check_cichoice parser = optparse.OptionParser(option_class=MyOption, usage="python analyzer.py [options] file1 [file2 ...]") parser.add_option("-v", "--version", action="store_true", dest="version", help="Show pkpgcounter's version number and exit.") parser.add_option("-d", "--debug", action="store_true", dest="debug", help="Activate debug mode.") parser.add_option("-c", "--colorspace", dest="colorspace", type="cichoice", cichoices=["bw", "cmyk", "cmy", "all"], help="Activate the computation of ink usage, and defines the colorspace to use. Supported values are 'BW', 'CMYK', 'CMY' and 'ALL'.") parser.add_option("-r", "--resolution", type="int", default=150, dest="resolution", help="The resolution in DPI to use when checking ink usage. Lower resolution is faster. Default is 150.") (options, arguments) = parser.parse_args() if options.version : print "%s" % version.__version__ else : if (not arguments) or ((not sys.stdin.isatty()) and ("-" not in arguments)) : arguments.append("-") totalsize = 0 try : for arg in arguments : try : parser = PDLAnalyzer(arg, options) totalsize += parser.getJobSize() except (IOError, pdlparser.PDLParserError), msg : sys.stderr.write("ERROR: %s\n" % msg) sys.stderr.flush() except KeyboardInterrupt : sys.stderr.write("WARN: Aborted at user's request.\n") sys.stderr.flush() print "%s" % totalsize if __name__ == "__main__" : main()