Changeset 220 for pkpgcounter/trunk/pdlanalyzer
- Timestamp:
- 07/02/05 15:41:30 (19 years ago)
- Location:
- pkpgcounter/trunk/pdlanalyzer
- Files:
-
- 10 modified
Legend:
- Unmodified
- Added
- Removed
-
pkpgcounter/trunk/pdlanalyzer/analyzer.py
r217 r220 25 25 from pdlanalyzer import version, pdlparser, postscript, pdf, pcl345, pclxl, escp2, dvi, tiff 26 26 27 KILOBYTE = 102428 MEGABYTE = 1024 * KILOBYTE29 LASTBLOCKSIZE = int(KILOBYTE / 4)30 31 27 class PDLAnalyzer : 32 28 """Class for PDL autodetection.""" … … 40 36 self.debug = debug 41 37 self.filename = filename 42 try :43 import psyco44 except ImportError :45 sys.stderr.write("pkpgcounter : you should install psyco if possible, this would greatly speedup parsing.\n")46 pass # Psyco is not installed47 else :48 # Psyco is installed, tell it to compile49 # the CPU intensive methods : PCL and PCLXL50 # parsing will greatly benefit from this,51 # for PostScript and PDF the difference is52 # barely noticeable since they are already53 # almost optimal, and much more speedy anyway.54 psyco.bind(postscript.PostScriptParser.getJobSize)55 psyco.bind(pdf.PDFParser.getJobSize)56 psyco.bind(escp2.ESCP2Parser.getJobSize)57 psyco.bind(pcl345.PCL345Parser.getJobSize)58 psyco.bind(pclxl.PCLXLParser.getJobSize)59 psyco.bind(dvi.DVIParser.getJobSize)60 psyco.bind(tiff.TIFFParser.getJobSize)61 38 62 39 def getJobSize(self) : … … 67 44 except pdlparser.PDLParserError, msg : 68 45 self.closeFile() 69 raise pdlparser.PDLParserError, " ERROR :Unknown file format for %s (%s)" % (self.filename, msg)46 raise pdlparser.PDLParserError, "Unknown file format for %s (%s)" % (self.filename, msg) 70 47 else : 71 48 try : 72 size = pdlhandler (self.infile, self.debug).getJobSize()49 size = pdlhandler.getJobSize() 73 50 finally : 74 51 self.closeFile() … … 93 70 self.infile = tempfile.TemporaryFile(mode="w+b") 94 71 while 1 : 95 data = infile.read( MEGABYTE)72 data = infile.read(pdlparser.MEGABYTE) 96 73 if not data : 97 74 break … … 114 91 pass # probably stdin, which is not seekable 115 92 116 def isPostScript(self, sdata, edata) :117 """Returns 1 if data is PostScript, else 0."""118 if sdata.startswith("%!") or \119 sdata.startswith("\004%!") or \120 sdata.startswith("\033%-12345X%!PS") or \121 ((sdata[:128].find("\033%-12345X") != -1) and \122 ((sdata.find("LANGUAGE=POSTSCRIPT") != -1) or \123 (sdata.find("LANGUAGE = POSTSCRIPT") != -1) or \124 (sdata.find("LANGUAGE = Postscript") != -1))) or \125 (sdata.find("%!PS-Adobe") != -1) :126 if self.debug :127 sys.stderr.write("%s is a PostScript file\n" % str(self.filename))128 return 1129 else :130 return 0131 132 def isPDF(self, sdata, edata) :133 """Returns 1 if data is PDF, else 0."""134 if sdata.startswith("%PDF-") or \135 sdata.startswith("\033%-12345X%PDF-") or \136 ((sdata[:128].find("\033%-12345X") != -1) and (sdata.upper().find("LANGUAGE=PDF") != -1)) or \137 (sdata.find("%PDF-") != -1) :138 if self.debug :139 sys.stderr.write("%s is a PDF file\n" % str(self.filename))140 return 1141 else :142 return 0143 144 def isPCL(self, sdata, edata) :145 """Returns 1 if data is PCL, else 0."""146 if sdata.startswith("\033E\033") or \147 (sdata.startswith("\033*rbC") and (not edata[-3:] == "\f\033@")) or \148 sdata.startswith("\033%8\033") or \149 (sdata.find("\033%-12345X") != -1) :150 if self.debug :151 sys.stderr.write("%s is a PCL3/4/5 file\n" % str(self.filename))152 return 1153 else :154 return 0155 156 def isPCLXL(self, sdata, edata) :157 """Returns 1 if data is PCLXL aka PCL6, else 0."""158 if ((sdata[:128].find("\033%-12345X") != -1) and \159 (sdata.find(" HP-PCL XL;") != -1) and \160 ((sdata.find("LANGUAGE=PCLXL") != -1) or \161 (sdata.find("LANGUAGE = PCLXL") != -1))) :162 if self.debug :163 sys.stderr.write("%s is a PCLXL (aka PCL6) file\n" % str(self.filename))164 return 1165 else :166 return 0167 168 def isESCP2(self, sdata, edata) :169 """Returns 1 if data is ESC/P2, else 0."""170 if sdata.startswith("\033@") or \171 sdata.startswith("\033*") or \172 sdata.startswith("\n\033@") or \173 sdata.startswith("\0\0\0\033\1@EJL") : # ESC/P Raster ??? Seen on Stylus Photo 1284174 if self.debug :175 sys.stderr.write("%s is an ESC/P2 file\n" % str(self.filename))176 return 1177 else :178 return 0179 180 def isDVI(self, sdata, edata) :181 """Returns 1 if data is DVI, else 0."""182 if (ord(sdata[0]) == 0xf7) and (ord(edata[-1]) == 0xdf) :183 if self.debug :184 sys.stderr.write("%s is a DVI file\n" % str(self.filename))185 return 1186 else :187 return 0188 189 def isTIFF(self, sdata, edata) :190 """Returns 1 if data is TIFF, else 0."""191 littleendian = (chr(0x49)*2) + chr(0x2a) + chr(0)192 bigendian = (chr(0x4d)*2) + chr(0) + chr(0x2a)193 if sdata[:4] in (littleendian, bigendian) :194 if self.debug :195 sys.stderr.write("%s is a TIFF file\n" % str(self.filename))196 return 1197 else :198 return 0199 200 93 def detectPDLHandler(self) : 201 94 """Tries to autodetect the document format. … … 203 96 Returns the correct PDL handler class or None if format is unknown 204 97 """ 205 # Try to detect file type by reading first block of datas 98 # Try to detect file type by reading first and last blocks of datas 99 # Each parser can read them automatically, but here we do this only once. 206 100 self.infile.seek(0) 207 firstblock = self.infile.read( 16 * KILOBYTE)101 firstblock = self.infile.read(pdlparser.FIRSTBLOCKSIZE) 208 102 try : 209 self.infile.seek(- LASTBLOCKSIZE, 2)210 lastblock = self.infile.read( LASTBLOCKSIZE)103 self.infile.seek(-pdlparser.LASTBLOCKSIZE, 2) 104 lastblock = self.infile.read(pdlparser.LASTBLOCKSIZE) 211 105 except IOError : 212 106 lastblock = "" 213 107 self.infile.seek(0) 214 108 if not firstblock : 215 sys.stderr.write("ERROR: input file %s is empty !\n" % str(self.filename))109 raise pdlparser.PDLParserError, "input file %s is empty !" % str(self.filename) 216 110 else : 217 if self.isPostScript(firstblock, lastblock) : 218 return postscript.PostScriptParser 219 elif self.isPCLXL(firstblock, lastblock) : 220 return pclxl.PCLXLParser 221 elif self.isPDF(firstblock, lastblock) : 222 return pdf.PDFParser 223 elif self.isPCL(firstblock, lastblock) : 224 return pcl345.PCL345Parser 225 elif self.isESCP2(firstblock, lastblock) : 226 return escp2.ESCP2Parser 227 elif self.isDVI(firstblock, lastblock) : 228 return dvi.DVIParser 229 elif self.isTIFF(firstblock, lastblock) : 230 return tiff.TIFFParser 111 for module in (postscript, \ 112 pclxl, \ 113 pdf, \ 114 pcl345, \ 115 escp2, \ 116 dvi, \ 117 tiff) : 118 try : 119 return getattr(module, "Parser")(self.infile, self.debug, firstblock, lastblock) 120 except pdlparser.PDLParserError : 121 pass # try next parser 231 122 raise pdlparser.PDLParserError, "Analysis of first data block failed." 232 123 -
pkpgcounter/trunk/pdlanalyzer/dvi.py
r218 r220 29 29 from pdlanalyzer import pdlparser 30 30 31 class DVIParser(pdlparser.PDLParser) :31 class Parser(pdlparser.PDLParser) : 32 32 """A parser for DVI documents.""" 33 def isValid(self) : 34 """Returns 1 if data is DVI, else 0.""" 35 try : 36 if (ord(self.firstblock[0]) == 0xf7) and (ord(self.lastblock[-1]) == 0xdf) : 37 if self.debug : 38 sys.stderr.write("DEBUG: Input file is in the DVI format.\n") 39 return 1 40 else : 41 return 0 42 except IndexError : 43 return 0 44 33 45 def getJobSize(self) : 34 46 """Counts pages in a DVI document. … … 74 86 mustclose = 1 75 87 try : 76 parser = DVIParser(infile, debug=1)88 parser = Parser(infile, debug=1) 77 89 totalsize += parser.getJobSize() 78 90 except pdlparser.PDLParserError, msg : -
pkpgcounter/trunk/pdlanalyzer/escp2.py
r211 r220 26 26 from pdlanalyzer import pdlparser 27 27 28 class ESCP2Parser(pdlparser.PDLParser) :28 class Parser(pdlparser.PDLParser) : 29 29 """A parser for ESC/P2 documents.""" 30 def isValid(self) : 31 """Returns 1 if data is ESC/P2, else 0.""" 32 if self.firstblock.startswith("\033@") or \ 33 self.firstblock.startswith("\033*") or \ 34 self.firstblock.startswith("\n\033@") or \ 35 self.firstblock.startswith("\0\0\0\033\1@EJL") : # ESC/P Raster ??? Seen on Stylus Photo 1284 36 if self.debug : 37 sys.stderr.write("DEBUG: Input file is in the ESC/P2 format.\n") 38 return 1 39 else : 40 return 0 41 30 42 def getJobSize(self) : 31 43 """Counts pages in an ESC/P2 document.""" … … 76 88 mustclose = 1 77 89 try : 78 parser = ESCP2Parser(infile, debug=1)90 parser = Parser(infile, debug=1) 79 91 totalsize += parser.getJobSize() 80 92 except pdlparser.PDLParserError, msg : -
pkpgcounter/trunk/pdlanalyzer/pcl345.py
r211 r220 28 28 from pdlanalyzer import pdlparser 29 29 30 class P CL345Parser(pdlparser.PDLParser) :30 class Parser(pdlparser.PDLParser) : 31 31 """A parser for PCL3, PCL4, PCL5 documents.""" 32 32 mediasizes = { # ESC&l####A … … 77 77 4 : "Transparent", 78 78 } 79 80 def isValid(self) : 81 """Returns 1 if data is PCL, else 0.""" 82 if self.firstblock.startswith("\033E\033") or \ 83 (self.firstblock.startswith("\033*rbC") and (not self.lastblock[-3:] == "\f\033@")) or \ 84 self.firstblock.startswith("\033%8\033") or \ 85 (self.firstblock.find("\033%-12345X") != -1) : 86 if self.debug : 87 sys.stderr.write("DEBUG: Input file is in the PCL3/4/5 format.\n") 88 return 1 89 else : 90 return 0 79 91 80 92 def setPageDict(self, pages, number, attribute, value) : … … 346 358 mustclose = 1 347 359 try : 348 parser = P CL345Parser(infile, debug=1)360 parser = Parser(infile, debug=1) 349 361 totalsize += parser.getJobSize() 350 362 except pdlparser.PDLParserError, msg : -
pkpgcounter/trunk/pdlanalyzer/pclxl.py
r211 r220 29 29 from pdlanalyzer import pdlparser 30 30 31 class P CLXLParser(pdlparser.PDLParser) :31 class Parser(pdlparser.PDLParser) : 32 32 """A parser for PCLXL (aka PCL6) documents.""" 33 33 mediasizes = { … … 70 70 } 71 71 72 def isValid(self) : 73 """Returns 1 if data is PCLXL aka PCL6, else 0.""" 74 if ((self.firstblock[:128].find("\033%-12345X") != -1) and \ 75 (self.firstblock.find(" HP-PCL XL;") != -1) and \ 76 ((self.firstblock.find("LANGUAGE=PCLXL") != -1) or \ 77 (self.firstblock.find("LANGUAGE = PCLXL") != -1))) : 78 if self.debug : 79 sys.stderr.write("DEBUG: Input file is in the PCLXL (aka PCL6) format.\n") 80 return 1 81 else : 82 return 0 83 72 84 def beginPage(self) : 73 85 """Indicates the beginning of a new page, and extracts media information.""" … … 371 383 mustclose = 1 372 384 try : 373 parser = P CLXLParser(infile, debug=1)385 parser = Parser(infile, debug=1) 374 386 totalsize += parser.getJobSize() 375 387 except pdlparser.PDLParserError, msg : -
pkpgcounter/trunk/pdlanalyzer/pdf.py
r211 r220 27 27 from pdlanalyzer import pdlparser 28 28 29 class P DFParser(pdlparser.PDLParser) :29 class Parser(pdlparser.PDLParser) : 30 30 """A parser for PDF documents.""" 31 def isValid(self) : 32 """Returns 1 if data is PDF, else 0.""" 33 if self.firstblock.startswith("%PDF-") or \ 34 self.firstblock.startswith("\033%-12345X%PDF-") or \ 35 ((self.firstblock[:128].find("\033%-12345X") != -1) and (self.firstblock.upper().find("LANGUAGE=PDF") != -1)) or \ 36 (self.firstblock.find("%PDF-") != -1) : 37 if self.debug : 38 sys.stderr.write("DEBUG: Input file is in the PDF format.\n") 39 return 1 40 else : 41 return 0 42 31 43 def getJobSize(self) : 32 44 """Counts pages in a PDF document.""" … … 56 68 mustclose = 1 57 69 try : 58 parser = P DFParser(infile, debug=1)70 parser = Parser(infile, debug=1) 59 71 totalsize += parser.getJobSize() 60 72 except pdlparser.PDLParserError, msg : -
pkpgcounter/trunk/pdlanalyzer/pdlparser.py
r211 r220 21 21 22 22 import sys 23 import psyco 24 25 KILOBYTE = 1024 26 MEGABYTE = 1024 * KILOBYTE 27 FIRSTBLOCKSIZE = 16 * KILOBYTE 28 LASTBLOCKSIZE = int(KILOBYTE / 4) 23 29 24 30 class PDLParserError(Exception): … … 33 39 class PDLParser : 34 40 """Generic PDL parser.""" 35 def __init__(self, infile, debug=0 ) :41 def __init__(self, infile, debug=0, firstblock=None, lastblock=None) : 36 42 """Initialize the generic parser.""" 43 self.infile = infile 37 44 self.debug = debug 38 self.infile = infile 39 40 def getJobSize(self) : 41 """Counts pages in the document.""" 45 if firstblock is None : 46 self.infile.seek(0) 47 firstblock = self.infile.read(FIRSTBLOCKSIZE) 48 try : 49 self.infile.seek(-LASTBLOCKSIZE, 2) 50 lastblock = self.infile.read(LASTBLOCKSIZE) 51 except IOError : 52 lastblock = "" 53 self.infile.seek(0) 54 self.firstblock = firstblock 55 self.lastblock = lastblock 56 if not self.isValid() : 57 raise PDLParserError, "Invalid file format !" 58 try : 59 import psyco 60 except ImportError : 61 sys.stderr.write("WARN: you should install psyco if possible, this would greatly speedup parsing.\n") 62 pass # Psyco is not installed 63 else : 64 # Psyco is installed, tell it to compile 65 # the CPU intensive methods : PCL and PCLXL 66 # parsing will greatly benefit from this, 67 # for PostScript and PDF the difference is 68 # barely noticeable since they are already 69 # almost optimal, and much more speedy anyway. 70 psyco.bind(self.getJobSize) 71 72 def isValid(self) : 73 """Returns 1 if data is in the expected format, else 0.""" 42 74 raise RuntimeError, "Not implemented !" 75 76 def getJobSize(self) : 77 """Counts pages in a document.""" 78 raise RuntimeError, "Not implemented !" -
pkpgcounter/trunk/pdlanalyzer/postscript.py
r211 r220 27 27 from pdlanalyzer import pdlparser 28 28 29 class P ostScriptParser(pdlparser.PDLParser) :29 class Parser(pdlparser.PDLParser) : 30 30 """A parser for PostScript documents.""" 31 def isValid(self) : 32 """Returns 1 if data is PostScript, else 0.""" 33 if self.firstblock.startswith("%!") or \ 34 self.firstblock.startswith("\004%!") or \ 35 self.firstblock.startswith("\033%-12345X%!PS") or \ 36 ((self.firstblock[:128].find("\033%-12345X") != -1) and \ 37 ((self.firstblock.find("LANGUAGE=POSTSCRIPT") != -1) or \ 38 (self.firstblock.find("LANGUAGE = POSTSCRIPT") != -1) or \ 39 (self.firstblock.find("LANGUAGE = Postscript") != -1))) or \ 40 (self.firstblock.find("%!PS-Adobe") != -1) : 41 if self.debug : 42 sys.stderr.write("DEBUG: Input file is in the PostScript format.\n") 43 return 1 44 else : 45 return 0 46 31 47 def throughGhostScript(self) : 32 48 """Get the count through GhostScript, useful for non-DSC compliant PS files.""" … … 112 128 mustclose = 1 113 129 try : 114 parser = P ostScriptParser(infile, debug=1)130 parser = Parser(infile, debug=1) 115 131 totalsize += parser.getJobSize() 116 132 except pdlparser.PDLParserError, msg : -
pkpgcounter/trunk/pdlanalyzer/tiff.py
r219 r220 29 29 from pdlanalyzer import pdlparser 30 30 31 class TIFFParser(pdlparser.PDLParser) :31 class Parser(pdlparser.PDLParser) : 32 32 """A parser for TIFF documents.""" 33 def isValid(self) : 34 """Returns 1 if data is TIFF, else 0.""" 35 littleendian = (chr(0x49)*2) + chr(0x2a) + chr(0) 36 bigendian = (chr(0x4d)*2) + chr(0) + chr(0x2a) 37 if self.firstblock[:4] in (littleendian, bigendian) : 38 if self.debug : 39 sys.stderr.write("DEBUG: Input file is in the TIFF format.\n") 40 return 1 41 else : 42 return 0 43 33 44 def getJobSize(self) : 34 45 """Counts pages in a TIFF document. … … 77 88 mustclose = 1 78 89 try : 79 parser = TIFFParser(infile, debug=1)90 parser = Parser(infile, debug=1) 80 91 totalsize += parser.getJobSize() 81 92 except pdlparser.PDLParserError, msg : -
pkpgcounter/trunk/pdlanalyzer/version.py
r217 r220 20 20 # 21 21 22 __version__ = "1.5 2"22 __version__ = "1.53" 23 23 24 24 __doc__ = """pkpgcounter : a generic Page Description Languages parser."""