Changeset 199
- Timestamp:
- 04/04/05 13:59:39 (20 years ago)
- Location:
- pkpgcounter/trunk
- Files:
-
- 3 modified
Legend:
- Unmodified
- Added
- Removed
-
pkpgcounter/trunk/bin/pkpgcounter
r195 r199 23 23 # 24 24 25 import sys 26 import tempfile 27 28 from pdlanalyzer import version 25 from pdlanalyzer import pdlparser 29 26 30 KILOBYTE = 1024 31 MEGABYTE = 1024 * KILOBYTE 32 LASTBLOCKSIZE = int(KILOBYTE / 4) 27 if __name__ == "__main__" : 28 pdlparser.main() 33 29 34 class PDLAnalyzer :35 """Generic PDL Analyzer class."""36 def __init__(self, filename, debug=0) :37 """Initializes the PDL analyzer.38 39 filename is the name of the file or '-' for stdin.40 filename can also be a file-like object which41 supports read() and seek().42 """43 self.debug = debug44 self.filename = filename45 try :46 import psyco47 except ImportError :48 sys.stderr.write("pkpgcounter : you should install psyco if possible, this would greatly speedup parsing.\n")49 pass # Psyco is not installed50 else :51 # Psyco is installed, tell it to compile52 # the CPU intensive methods : PCL and PCLXL53 # parsing will greatly benefit from this,54 # for PostScript and PDF the difference is55 # barely noticeable since they are already56 # almost optimal, and much more speedy anyway.57 psyco.bind(PostScriptAnalyzer.getJobSize)58 psyco.bind(PDFAnalyzer.getJobSize)59 psyco.bind(ESCP2Analyzer.getJobSize)60 psyco.bind(PCLAnalyzer.getJobSize)61 psyco.bind(PCLXLAnalyzer.getJobSize)62 63 def getJobSize(self) :64 """Returns the job's size."""65 self.openFile()66 try :67 pdlhandler = self.detectPDLHandler()68 except PDLAnalyzerError, msg :69 self.closeFile()70 raise PDLAnalyzerError, "ERROR : Unknown file format for %s (%s)" % (self.filename, msg)71 else :72 try :73 size = pdlhandler(self.infile, self.debug).getJobSize()74 finally :75 self.closeFile()76 return size77 78 def openFile(self) :79 """Opens the job's data stream for reading."""80 self.mustclose = 0 # by default we don't want to close the file when finished81 if hasattr(self.filename, "read") and hasattr(self.filename, "seek") :82 # filename is in fact a file-like object83 infile = self.filename84 elif self.filename == "-" :85 # we must read from stdin86 infile = sys.stdin87 else :88 # normal file89 self.infile = open(self.filename, "rb")90 self.mustclose = 191 return92 93 # Use a temporary file, always seekable contrary to standard input.94 self.infile = tempfile.TemporaryFile(mode="w+b")95 while 1 :96 data = infile.read(MEGABYTE)97 if not data :98 break99 self.infile.write(data)100 self.infile.flush()101 self.infile.seek(0)102 103 def closeFile(self) :104 """Closes the job's data stream if we can close it."""105 if self.mustclose :106 self.infile.close()107 else :108 # if we don't have to close the file, then109 # ensure the file pointer is reset to the110 # start of the file in case the process wants111 # to read the file again.112 try :113 self.infile.seek(0)114 except :115 pass # probably stdin, which is not seekable116 117 def isPostScript(self, sdata, edata) :118 """Returns 1 if data is PostScript, else 0."""119 if sdata.startswith("%!") or \120 sdata.startswith("\004%!") or \121 sdata.startswith("\033%-12345X%!PS") or \122 ((sdata[:128].find("\033%-12345X") != -1) and \123 ((sdata.find("LANGUAGE=POSTSCRIPT") != -1) or \124 (sdata.find("LANGUAGE = POSTSCRIPT") != -1) or \125 (sdata.find("LANGUAGE = Postscript") != -1))) or \126 (sdata.find("%!PS-Adobe") != -1) :127 if self.debug :128 sys.stderr.write("%s is a PostScript file\n" % str(self.filename))129 return 1130 else :131 return 0132 133 def isPDF(self, sdata, edata) :134 """Returns 1 if data is PDF, else 0."""135 if sdata.startswith("%PDF-") or \136 sdata.startswith("\033%-12345X%PDF-") or \137 ((sdata[:128].find("\033%-12345X") != -1) and (sdata.upper().find("LANGUAGE=PDF") != -1)) or \138 (sdata.find("%PDF-") != -1) :139 if self.debug :140 sys.stderr.write("%s is a PDF file\n" % str(self.filename))141 return 1142 else :143 return 0144 145 def isPCL(self, sdata, edata) :146 """Returns 1 if data is PCL, else 0."""147 if sdata.startswith("\033E\033") or \148 (sdata.startswith("\033*rbC") and (not edata[-3:] == "\f\033@")) or \149 sdata.startswith("\033%8\033") or \150 (sdata.find("\033%-12345X") != -1) :151 if self.debug :152 sys.stderr.write("%s is a PCL3/4/5 file\n" % str(self.filename))153 return 1154 else :155 return 0156 157 def isPCLXL(self, sdata, edata) :158 """Returns 1 if data is PCLXL aka PCL6, else 0."""159 if ((sdata[:128].find("\033%-12345X") != -1) and \160 (sdata.find(" HP-PCL XL;") != -1) and \161 ((sdata.find("LANGUAGE=PCLXL") != -1) or \162 (sdata.find("LANGUAGE = PCLXL") != -1))) :163 if self.debug :164 sys.stderr.write("%s is a PCLXL (aka PCL6) file\n" % str(self.filename))165 return 1166 else :167 return 0168 169 def isESCP2(self, sdata, edata) :170 """Returns 1 if data is ESC/P2, else 0."""171 if sdata.startswith("\033@") or \172 sdata.startswith("\033*") or \173 sdata.startswith("\n\033@") or \174 sdata.startswith("\0\0\0\033\1@EJL") : # ESC/P Raster ??? Seen on Stylus Photo 1284175 if self.debug :176 sys.stderr.write("%s is an ESC/P2 file\n" % str(self.filename))177 return 1178 else :179 return 0180 181 def detectPDLHandler(self) :182 """Tries to autodetect the document format.183 184 Returns the correct PDL handler class or None if format is unknown185 """186 # Try to detect file type by reading first block of datas187 self.infile.seek(0)188 firstblock = self.infile.read(4 * KILOBYTE)189 try :190 self.infile.seek(-LASTBLOCKSIZE, 2)191 lastblock = self.infile.read(LASTBLOCKSIZE)192 except IOError :193 lastblock = ""194 195 self.infile.seek(0)196 if self.isPostScript(firstblock, lastblock) :197 return PostScriptAnalyzer198 elif self.isPCLXL(firstblock, lastblock) :199 return PCLXLAnalyzer200 elif self.isPDF(firstblock, lastblock) :201 return PDFAnalyzer202 elif self.isPCL(firstblock, lastblock) :203 return PCLAnalyzer204 elif self.isESCP2(firstblock, lastblock) :205 return ESCP2Analyzer206 else :207 raise PDLAnalyzerError, "Analysis of first data block failed."208 209 def main() :210 """Entry point for PDL Analyzer."""211 212 sys.stderr.write("Big changes are currently occuring in the developpment version, please use the stable version for now !\n")213 sys.exit(-1)214 215 if (len(sys.argv) < 2) or ((not sys.stdin.isatty()) and ("-" not in sys.argv[1:])) :216 sys.argv.append("-")217 218 if ("-h" in sys.argv[1:]) or ("--help" in sys.argv[1:]) :219 print "usage : pkpgcounter file1 file2 ... fileN"220 elif ("-v" in sys.argv[1:]) or ("--version" in sys.argv[1:]) :221 print "%s" % version.__version__222 else :223 totalsize = 0224 debug = 0225 minindex = 1226 if sys.argv[1] == "--debug" :227 minindex = 2228 debug = 1229 for arg in sys.argv[minindex:] :230 try :231 parser = PDLAnalyzer(arg, debug)232 totalsize += parser.getJobSize()233 except PDLAnalyzerError, msg :234 sys.stderr.write("ERROR: %s\n" % msg)235 sys.stderr.flush()236 print "%s" % totalsize237 238 if __name__ == "__main__" :239 main() -
pkpgcounter/trunk/pdlanalyzer/pdlparser.py
r193 r199 19 19 # $Id$ 20 20 # 21 22 import sys 23 import tempfile 24 25 from pdlanalyzer.postscript import PostScriptParser 26 from pdlanalyzer.pdf import PDFParser 27 from pdlanalyzer.pcl345 import PCL345Parser 28 from pdlanalyzer.pclxl import PCLXLParser 29 from pdlanalyzer.escp2 import ESCP2Parser 30 31 KILOBYTE = 1024 32 MEGABYTE = 1024 * KILOBYTE 33 LASTBLOCKSIZE = int(KILOBYTE / 4) 21 34 22 35 class PDLParserError(Exception): … … 39 52 """Counts pages in the document.""" 40 53 raise RuntimeError, "Not implemented !" 54 55 class PDLAnalyzer : 56 """Class for PDL autodetection.""" 57 def __init__(self, filename, debug=0) : 58 """Initializes the PDL analyzer. 59 60 filename is the name of the file or '-' for stdin. 61 filename can also be a file-like object which 62 supports read() and seek(). 63 """ 64 self.debug = debug 65 self.filename = filename 66 try : 67 import psyco 68 except ImportError : 69 sys.stderr.write("pkpgcounter : you should install psyco if possible, this would greatly speedup parsing.\n") 70 pass # Psyco is not installed 71 else : 72 # Psyco is installed, tell it to compile 73 # the CPU intensive methods : PCL and PCLXL 74 # parsing will greatly benefit from this, 75 # for PostScript and PDF the difference is 76 # barely noticeable since they are already 77 # almost optimal, and much more speedy anyway. 78 psyco.bind(PostScriptParser.getJobSize) 79 psyco.bind(PDFParser.getJobSize) 80 psyco.bind(ESCP2Parser.getJobSize) 81 psyco.bind(PCLParser.getJobSize) 82 psyco.bind(PCLXLParser.getJobSize) 83 84 def getJobSize(self) : 85 """Returns the job's size.""" 86 self.openFile() 87 try : 88 pdlhandler = self.detectPDLHandler() 89 except PDLParserError, msg : 90 self.closeFile() 91 raise PDLParserError, "ERROR : Unknown file format for %s (%s)" % (self.filename, msg) 92 else : 93 try : 94 size = pdlhandler(self.infile, self.debug).getJobSize() 95 finally : 96 self.closeFile() 97 return size 98 99 def openFile(self) : 100 """Opens the job's data stream for reading.""" 101 self.mustclose = 0 # by default we don't want to close the file when finished 102 if hasattr(self.filename, "read") and hasattr(self.filename, "seek") : 103 # filename is in fact a file-like object 104 infile = self.filename 105 elif self.filename == "-" : 106 # we must read from stdin 107 infile = sys.stdin 108 else : 109 # normal file 110 self.infile = open(self.filename, "rb") 111 self.mustclose = 1 112 return 113 114 # Use a temporary file, always seekable contrary to standard input. 115 self.infile = tempfile.TemporaryFile(mode="w+b") 116 while 1 : 117 data = infile.read(MEGABYTE) 118 if not data : 119 break 120 self.infile.write(data) 121 self.infile.flush() 122 self.infile.seek(0) 123 124 def closeFile(self) : 125 """Closes the job's data stream if we can close it.""" 126 if self.mustclose : 127 self.infile.close() 128 else : 129 # if we don't have to close the file, then 130 # ensure the file pointer is reset to the 131 # start of the file in case the process wants 132 # to read the file again. 133 try : 134 self.infile.seek(0) 135 except : 136 pass # probably stdin, which is not seekable 137 138 def isPostScript(self, sdata, edata) : 139 """Returns 1 if data is PostScript, else 0.""" 140 if sdata.startswith("%!") or \ 141 sdata.startswith("\004%!") or \ 142 sdata.startswith("\033%-12345X%!PS") or \ 143 ((sdata[:128].find("\033%-12345X") != -1) and \ 144 ((sdata.find("LANGUAGE=POSTSCRIPT") != -1) or \ 145 (sdata.find("LANGUAGE = POSTSCRIPT") != -1) or \ 146 (sdata.find("LANGUAGE = Postscript") != -1))) or \ 147 (sdata.find("%!PS-Adobe") != -1) : 148 if self.debug : 149 sys.stderr.write("%s is a PostScript file\n" % str(self.filename)) 150 return 1 151 else : 152 return 0 153 154 def isPDF(self, sdata, edata) : 155 """Returns 1 if data is PDF, else 0.""" 156 if sdata.startswith("%PDF-") or \ 157 sdata.startswith("\033%-12345X%PDF-") or \ 158 ((sdata[:128].find("\033%-12345X") != -1) and (sdata.upper().find("LANGUAGE=PDF") != -1)) or \ 159 (sdata.find("%PDF-") != -1) : 160 if self.debug : 161 sys.stderr.write("%s is a PDF file\n" % str(self.filename)) 162 return 1 163 else : 164 return 0 165 166 def isPCL(self, sdata, edata) : 167 """Returns 1 if data is PCL, else 0.""" 168 if sdata.startswith("\033E\033") or \ 169 (sdata.startswith("\033*rbC") and (not edata[-3:] == "\f\033@")) or \ 170 sdata.startswith("\033%8\033") or \ 171 (sdata.find("\033%-12345X") != -1) : 172 if self.debug : 173 sys.stderr.write("%s is a PCL3/4/5 file\n" % str(self.filename)) 174 return 1 175 else : 176 return 0 177 178 def isPCLXL(self, sdata, edata) : 179 """Returns 1 if data is PCLXL aka PCL6, else 0.""" 180 if ((sdata[:128].find("\033%-12345X") != -1) and \ 181 (sdata.find(" HP-PCL XL;") != -1) and \ 182 ((sdata.find("LANGUAGE=PCLXL") != -1) or \ 183 (sdata.find("LANGUAGE = PCLXL") != -1))) : 184 if self.debug : 185 sys.stderr.write("%s is a PCLXL (aka PCL6) file\n" % str(self.filename)) 186 return 1 187 else : 188 return 0 189 190 def isESCP2(self, sdata, edata) : 191 """Returns 1 if data is ESC/P2, else 0.""" 192 if sdata.startswith("\033@") or \ 193 sdata.startswith("\033*") or \ 194 sdata.startswith("\n\033@") or \ 195 sdata.startswith("\0\0\0\033\1@EJL") : # ESC/P Raster ??? Seen on Stylus Photo 1284 196 if self.debug : 197 sys.stderr.write("%s is an ESC/P2 file\n" % str(self.filename)) 198 return 1 199 else : 200 return 0 201 202 def detectPDLHandler(self) : 203 """Tries to autodetect the document format. 204 205 Returns the correct PDL handler class or None if format is unknown 206 """ 207 # Try to detect file type by reading first block of datas 208 self.infile.seek(0) 209 firstblock = self.infile.read(4 * KILOBYTE) 210 try : 211 self.infile.seek(-LASTBLOCKSIZE, 2) 212 lastblock = self.infile.read(LASTBLOCKSIZE) 213 except IOError : 214 lastblock = "" 215 self.infile.seek(0) 216 if self.isPostScript(firstblock, lastblock) : 217 return PostScriptParser 218 elif self.isPCLXL(firstblock, lastblock) : 219 return PCLXLParser 220 elif self.isPDF(firstblock, lastblock) : 221 return PDFParser 222 elif self.isPCL(firstblock, lastblock) : 223 return PCLParser 224 elif self.isESCP2(firstblock, lastblock) : 225 return ESCP2Parser 226 else : 227 raise PDLParserError, "Analysis of first data block failed." 228 229 def main() : 230 """Entry point for PDL Analyzer.""" 231 if (len(sys.argv) < 2) or ((not sys.stdin.isatty()) and ("-" not in sys.argv[1:])) : 232 sys.argv.append("-") 233 234 if ("-h" in sys.argv[1:]) or ("--help" in sys.argv[1:]) : 235 print "usage : pkpgcounter file1 file2 ... fileN" 236 elif ("-v" in sys.argv[1:]) or ("--version" in sys.argv[1:]) : 237 print "%s" % version.__version__ 238 else : 239 totalsize = 0 240 debug = 0 241 minindex = 1 242 if sys.argv[1] == "--debug" : 243 minindex = 2 244 debug = 1 245 for arg in sys.argv[minindex:] : 246 try : 247 parser = PDLAnalyzer(arg, debug) 248 totalsize += parser.getJobSize() 249 except PDLParserError, msg : 250 sys.stderr.write("ERROR: %s\n" % msg) 251 sys.stderr.flush() 252 print "%s" % totalsize 253 254 if __name__ == "__main__" : 255 main() -
pkpgcounter/trunk/setup.py
r198 r199 45 45 46 46 docdir = "share/doc/pkpgcounter" 47 docfiles = ["README", " FAQ", "SECURITY", "COPYING", "LICENSE", "CREDITS", "TODO", "NEWS"]47 docfiles = ["README", "COPYING", "BUGS", "CREDITS", "NEWS"] 48 48 data_files.append((docdir, docfiles)) 49 49