Changeset 220 for pkpgcounter
- Timestamp:
- 07/02/05 15:41:30 (19 years ago)
- Location:
- pkpgcounter/trunk
- Files:
-
- 12 modified
Legend:
- Unmodified
- Added
- Removed
-
pkpgcounter/trunk/NEWS
r217 r220 22 22 pkpgcounter News : 23 23 24 * 1.53 : 25 26 - Moved code around to improve maintainability. 27 24 28 * 1.52 : 25 29 26 - Basicsupport for the TIFF format.30 - Added support for the TIFF format. 27 31 28 32 * 1.51 : 29 33 30 - Basicsupport for the DVI format.34 - Added support for the DVI format. 31 35 32 36 * 1.50 : -
pkpgcounter/trunk/pdlanalyzer/analyzer.py
r217 r220 25 25 from pdlanalyzer import version, pdlparser, postscript, pdf, pcl345, pclxl, escp2, dvi, tiff 26 26 27 KILOBYTE = 102428 MEGABYTE = 1024 * KILOBYTE29 LASTBLOCKSIZE = int(KILOBYTE / 4)30 31 27 class PDLAnalyzer : 32 28 """Class for PDL autodetection.""" … … 40 36 self.debug = debug 41 37 self.filename = filename 42 try :43 import psyco44 except ImportError :45 sys.stderr.write("pkpgcounter : you should install psyco if possible, this would greatly speedup parsing.\n")46 pass # Psyco is not installed47 else :48 # Psyco is installed, tell it to compile49 # the CPU intensive methods : PCL and PCLXL50 # parsing will greatly benefit from this,51 # for PostScript and PDF the difference is52 # barely noticeable since they are already53 # almost optimal, and much more speedy anyway.54 psyco.bind(postscript.PostScriptParser.getJobSize)55 psyco.bind(pdf.PDFParser.getJobSize)56 psyco.bind(escp2.ESCP2Parser.getJobSize)57 psyco.bind(pcl345.PCL345Parser.getJobSize)58 psyco.bind(pclxl.PCLXLParser.getJobSize)59 psyco.bind(dvi.DVIParser.getJobSize)60 psyco.bind(tiff.TIFFParser.getJobSize)61 38 62 39 def getJobSize(self) : … … 67 44 except pdlparser.PDLParserError, msg : 68 45 self.closeFile() 69 raise pdlparser.PDLParserError, " ERROR :Unknown file format for %s (%s)" % (self.filename, msg)46 raise pdlparser.PDLParserError, "Unknown file format for %s (%s)" % (self.filename, msg) 70 47 else : 71 48 try : 72 size = pdlhandler (self.infile, self.debug).getJobSize()49 size = pdlhandler.getJobSize() 73 50 finally : 74 51 self.closeFile() … … 93 70 self.infile = tempfile.TemporaryFile(mode="w+b") 94 71 while 1 : 95 data = infile.read( MEGABYTE)72 data = infile.read(pdlparser.MEGABYTE) 96 73 if not data : 97 74 break … … 114 91 pass # probably stdin, which is not seekable 115 92 116 def isPostScript(self, sdata, edata) :117 """Returns 1 if data is PostScript, else 0."""118 if sdata.startswith("%!") or \119 sdata.startswith("\004%!") or \120 sdata.startswith("\033%-12345X%!PS") or \121 ((sdata[:128].find("\033%-12345X") != -1) and \122 ((sdata.find("LANGUAGE=POSTSCRIPT") != -1) or \123 (sdata.find("LANGUAGE = POSTSCRIPT") != -1) or \124 (sdata.find("LANGUAGE = Postscript") != -1))) or \125 (sdata.find("%!PS-Adobe") != -1) :126 if self.debug :127 sys.stderr.write("%s is a PostScript file\n" % str(self.filename))128 return 1129 else :130 return 0131 132 def isPDF(self, sdata, edata) :133 """Returns 1 if data is PDF, else 0."""134 if sdata.startswith("%PDF-") or \135 sdata.startswith("\033%-12345X%PDF-") or \136 ((sdata[:128].find("\033%-12345X") != -1) and (sdata.upper().find("LANGUAGE=PDF") != -1)) or \137 (sdata.find("%PDF-") != -1) :138 if self.debug :139 sys.stderr.write("%s is a PDF file\n" % str(self.filename))140 return 1141 else :142 return 0143 144 def isPCL(self, sdata, edata) :145 """Returns 1 if data is PCL, else 0."""146 if sdata.startswith("\033E\033") or \147 (sdata.startswith("\033*rbC") and (not edata[-3:] == "\f\033@")) or \148 sdata.startswith("\033%8\033") or \149 (sdata.find("\033%-12345X") != -1) :150 if self.debug :151 sys.stderr.write("%s is a PCL3/4/5 file\n" % str(self.filename))152 return 1153 else :154 return 0155 156 def isPCLXL(self, sdata, edata) :157 """Returns 1 if data is PCLXL aka PCL6, else 0."""158 if ((sdata[:128].find("\033%-12345X") != -1) and \159 (sdata.find(" HP-PCL XL;") != -1) and \160 ((sdata.find("LANGUAGE=PCLXL") != -1) or \161 (sdata.find("LANGUAGE = PCLXL") != -1))) :162 if self.debug :163 sys.stderr.write("%s is a PCLXL (aka PCL6) file\n" % str(self.filename))164 return 1165 else :166 return 0167 168 def isESCP2(self, sdata, edata) :169 """Returns 1 if data is ESC/P2, else 0."""170 if sdata.startswith("\033@") or \171 sdata.startswith("\033*") or \172 sdata.startswith("\n\033@") or \173 sdata.startswith("\0\0\0\033\1@EJL") : # ESC/P Raster ??? Seen on Stylus Photo 1284174 if self.debug :175 sys.stderr.write("%s is an ESC/P2 file\n" % str(self.filename))176 return 1177 else :178 return 0179 180 def isDVI(self, sdata, edata) :181 """Returns 1 if data is DVI, else 0."""182 if (ord(sdata[0]) == 0xf7) and (ord(edata[-1]) == 0xdf) :183 if self.debug :184 sys.stderr.write("%s is a DVI file\n" % str(self.filename))185 return 1186 else :187 return 0188 189 def isTIFF(self, sdata, edata) :190 """Returns 1 if data is TIFF, else 0."""191 littleendian = (chr(0x49)*2) + chr(0x2a) + chr(0)192 bigendian = (chr(0x4d)*2) + chr(0) + chr(0x2a)193 if sdata[:4] in (littleendian, bigendian) :194 if self.debug :195 sys.stderr.write("%s is a TIFF file\n" % str(self.filename))196 return 1197 else :198 return 0199 200 93 def detectPDLHandler(self) : 201 94 """Tries to autodetect the document format. … … 203 96 Returns the correct PDL handler class or None if format is unknown 204 97 """ 205 # Try to detect file type by reading first block of datas 98 # Try to detect file type by reading first and last blocks of datas 99 # Each parser can read them automatically, but here we do this only once. 206 100 self.infile.seek(0) 207 firstblock = self.infile.read( 16 * KILOBYTE)101 firstblock = self.infile.read(pdlparser.FIRSTBLOCKSIZE) 208 102 try : 209 self.infile.seek(- LASTBLOCKSIZE, 2)210 lastblock = self.infile.read( LASTBLOCKSIZE)103 self.infile.seek(-pdlparser.LASTBLOCKSIZE, 2) 104 lastblock = self.infile.read(pdlparser.LASTBLOCKSIZE) 211 105 except IOError : 212 106 lastblock = "" 213 107 self.infile.seek(0) 214 108 if not firstblock : 215 sys.stderr.write("ERROR: input file %s is empty !\n" % str(self.filename))109 raise pdlparser.PDLParserError, "input file %s is empty !" % str(self.filename) 216 110 else : 217 if self.isPostScript(firstblock, lastblock) : 218 return postscript.PostScriptParser 219 elif self.isPCLXL(firstblock, lastblock) : 220 return pclxl.PCLXLParser 221 elif self.isPDF(firstblock, lastblock) : 222 return pdf.PDFParser 223 elif self.isPCL(firstblock, lastblock) : 224 return pcl345.PCL345Parser 225 elif self.isESCP2(firstblock, lastblock) : 226 return escp2.ESCP2Parser 227 elif self.isDVI(firstblock, lastblock) : 228 return dvi.DVIParser 229 elif self.isTIFF(firstblock, lastblock) : 230 return tiff.TIFFParser 111 for module in (postscript, \ 112 pclxl, \ 113 pdf, \ 114 pcl345, \ 115 escp2, \ 116 dvi, \ 117 tiff) : 118 try : 119 return getattr(module, "Parser")(self.infile, self.debug, firstblock, lastblock) 120 except pdlparser.PDLParserError : 121 pass # try next parser 231 122 raise pdlparser.PDLParserError, "Analysis of first data block failed." 232 123 -
pkpgcounter/trunk/pdlanalyzer/dvi.py
r218 r220 29 29 from pdlanalyzer import pdlparser 30 30 31 class DVIParser(pdlparser.PDLParser) :31 class Parser(pdlparser.PDLParser) : 32 32 """A parser for DVI documents.""" 33 def isValid(self) : 34 """Returns 1 if data is DVI, else 0.""" 35 try : 36 if (ord(self.firstblock[0]) == 0xf7) and (ord(self.lastblock[-1]) == 0xdf) : 37 if self.debug : 38 sys.stderr.write("DEBUG: Input file is in the DVI format.\n") 39 return 1 40 else : 41 return 0 42 except IndexError : 43 return 0 44 33 45 def getJobSize(self) : 34 46 """Counts pages in a DVI document. … … 74 86 mustclose = 1 75 87 try : 76 parser = DVIParser(infile, debug=1)88 parser = Parser(infile, debug=1) 77 89 totalsize += parser.getJobSize() 78 90 except pdlparser.PDLParserError, msg : -
pkpgcounter/trunk/pdlanalyzer/escp2.py
r211 r220 26 26 from pdlanalyzer import pdlparser 27 27 28 class ESCP2Parser(pdlparser.PDLParser) :28 class Parser(pdlparser.PDLParser) : 29 29 """A parser for ESC/P2 documents.""" 30 def isValid(self) : 31 """Returns 1 if data is ESC/P2, else 0.""" 32 if self.firstblock.startswith("\033@") or \ 33 self.firstblock.startswith("\033*") or \ 34 self.firstblock.startswith("\n\033@") or \ 35 self.firstblock.startswith("\0\0\0\033\1@EJL") : # ESC/P Raster ??? Seen on Stylus Photo 1284 36 if self.debug : 37 sys.stderr.write("DEBUG: Input file is in the ESC/P2 format.\n") 38 return 1 39 else : 40 return 0 41 30 42 def getJobSize(self) : 31 43 """Counts pages in an ESC/P2 document.""" … … 76 88 mustclose = 1 77 89 try : 78 parser = ESCP2Parser(infile, debug=1)90 parser = Parser(infile, debug=1) 79 91 totalsize += parser.getJobSize() 80 92 except pdlparser.PDLParserError, msg : -
pkpgcounter/trunk/pdlanalyzer/pcl345.py
r211 r220 28 28 from pdlanalyzer import pdlparser 29 29 30 class P CL345Parser(pdlparser.PDLParser) :30 class Parser(pdlparser.PDLParser) : 31 31 """A parser for PCL3, PCL4, PCL5 documents.""" 32 32 mediasizes = { # ESC&l####A … … 77 77 4 : "Transparent", 78 78 } 79 80 def isValid(self) : 81 """Returns 1 if data is PCL, else 0.""" 82 if self.firstblock.startswith("\033E\033") or \ 83 (self.firstblock.startswith("\033*rbC") and (not self.lastblock[-3:] == "\f\033@")) or \ 84 self.firstblock.startswith("\033%8\033") or \ 85 (self.firstblock.find("\033%-12345X") != -1) : 86 if self.debug : 87 sys.stderr.write("DEBUG: Input file is in the PCL3/4/5 format.\n") 88 return 1 89 else : 90 return 0 79 91 80 92 def setPageDict(self, pages, number, attribute, value) : … … 346 358 mustclose = 1 347 359 try : 348 parser = P CL345Parser(infile, debug=1)360 parser = Parser(infile, debug=1) 349 361 totalsize += parser.getJobSize() 350 362 except pdlparser.PDLParserError, msg : -
pkpgcounter/trunk/pdlanalyzer/pclxl.py
r211 r220 29 29 from pdlanalyzer import pdlparser 30 30 31 class P CLXLParser(pdlparser.PDLParser) :31 class Parser(pdlparser.PDLParser) : 32 32 """A parser for PCLXL (aka PCL6) documents.""" 33 33 mediasizes = { … … 70 70 } 71 71 72 def isValid(self) : 73 """Returns 1 if data is PCLXL aka PCL6, else 0.""" 74 if ((self.firstblock[:128].find("\033%-12345X") != -1) and \ 75 (self.firstblock.find(" HP-PCL XL;") != -1) and \ 76 ((self.firstblock.find("LANGUAGE=PCLXL") != -1) or \ 77 (self.firstblock.find("LANGUAGE = PCLXL") != -1))) : 78 if self.debug : 79 sys.stderr.write("DEBUG: Input file is in the PCLXL (aka PCL6) format.\n") 80 return 1 81 else : 82 return 0 83 72 84 def beginPage(self) : 73 85 """Indicates the beginning of a new page, and extracts media information.""" … … 371 383 mustclose = 1 372 384 try : 373 parser = P CLXLParser(infile, debug=1)385 parser = Parser(infile, debug=1) 374 386 totalsize += parser.getJobSize() 375 387 except pdlparser.PDLParserError, msg : -
pkpgcounter/trunk/pdlanalyzer/pdf.py
r211 r220 27 27 from pdlanalyzer import pdlparser 28 28 29 class P DFParser(pdlparser.PDLParser) :29 class Parser(pdlparser.PDLParser) : 30 30 """A parser for PDF documents.""" 31 def isValid(self) : 32 """Returns 1 if data is PDF, else 0.""" 33 if self.firstblock.startswith("%PDF-") or \ 34 self.firstblock.startswith("\033%-12345X%PDF-") or \ 35 ((self.firstblock[:128].find("\033%-12345X") != -1) and (self.firstblock.upper().find("LANGUAGE=PDF") != -1)) or \ 36 (self.firstblock.find("%PDF-") != -1) : 37 if self.debug : 38 sys.stderr.write("DEBUG: Input file is in the PDF format.\n") 39 return 1 40 else : 41 return 0 42 31 43 def getJobSize(self) : 32 44 """Counts pages in a PDF document.""" … … 56 68 mustclose = 1 57 69 try : 58 parser = P DFParser(infile, debug=1)70 parser = Parser(infile, debug=1) 59 71 totalsize += parser.getJobSize() 60 72 except pdlparser.PDLParserError, msg : -
pkpgcounter/trunk/pdlanalyzer/pdlparser.py
r211 r220 21 21 22 22 import sys 23 import psyco 24 25 KILOBYTE = 1024 26 MEGABYTE = 1024 * KILOBYTE 27 FIRSTBLOCKSIZE = 16 * KILOBYTE 28 LASTBLOCKSIZE = int(KILOBYTE / 4) 23 29 24 30 class PDLParserError(Exception): … … 33 39 class PDLParser : 34 40 """Generic PDL parser.""" 35 def __init__(self, infile, debug=0 ) :41 def __init__(self, infile, debug=0, firstblock=None, lastblock=None) : 36 42 """Initialize the generic parser.""" 43 self.infile = infile 37 44 self.debug = debug 38 self.infile = infile 39 40 def getJobSize(self) : 41 """Counts pages in the document.""" 45 if firstblock is None : 46 self.infile.seek(0) 47 firstblock = self.infile.read(FIRSTBLOCKSIZE) 48 try : 49 self.infile.seek(-LASTBLOCKSIZE, 2) 50 lastblock = self.infile.read(LASTBLOCKSIZE) 51 except IOError : 52 lastblock = "" 53 self.infile.seek(0) 54 self.firstblock = firstblock 55 self.lastblock = lastblock 56 if not self.isValid() : 57 raise PDLParserError, "Invalid file format !" 58 try : 59 import psyco 60 except ImportError : 61 sys.stderr.write("WARN: you should install psyco if possible, this would greatly speedup parsing.\n") 62 pass # Psyco is not installed 63 else : 64 # Psyco is installed, tell it to compile 65 # the CPU intensive methods : PCL and PCLXL 66 # parsing will greatly benefit from this, 67 # for PostScript and PDF the difference is 68 # barely noticeable since they are already 69 # almost optimal, and much more speedy anyway. 70 psyco.bind(self.getJobSize) 71 72 def isValid(self) : 73 """Returns 1 if data is in the expected format, else 0.""" 42 74 raise RuntimeError, "Not implemented !" 75 76 def getJobSize(self) : 77 """Counts pages in a document.""" 78 raise RuntimeError, "Not implemented !" -
pkpgcounter/trunk/pdlanalyzer/postscript.py
r211 r220 27 27 from pdlanalyzer import pdlparser 28 28 29 class P ostScriptParser(pdlparser.PDLParser) :29 class Parser(pdlparser.PDLParser) : 30 30 """A parser for PostScript documents.""" 31 def isValid(self) : 32 """Returns 1 if data is PostScript, else 0.""" 33 if self.firstblock.startswith("%!") or \ 34 self.firstblock.startswith("\004%!") or \ 35 self.firstblock.startswith("\033%-12345X%!PS") or \ 36 ((self.firstblock[:128].find("\033%-12345X") != -1) and \ 37 ((self.firstblock.find("LANGUAGE=POSTSCRIPT") != -1) or \ 38 (self.firstblock.find("LANGUAGE = POSTSCRIPT") != -1) or \ 39 (self.firstblock.find("LANGUAGE = Postscript") != -1))) or \ 40 (self.firstblock.find("%!PS-Adobe") != -1) : 41 if self.debug : 42 sys.stderr.write("DEBUG: Input file is in the PostScript format.\n") 43 return 1 44 else : 45 return 0 46 31 47 def throughGhostScript(self) : 32 48 """Get the count through GhostScript, useful for non-DSC compliant PS files.""" … … 112 128 mustclose = 1 113 129 try : 114 parser = P ostScriptParser(infile, debug=1)130 parser = Parser(infile, debug=1) 115 131 totalsize += parser.getJobSize() 116 132 except pdlparser.PDLParserError, msg : -
pkpgcounter/trunk/pdlanalyzer/tiff.py
r219 r220 29 29 from pdlanalyzer import pdlparser 30 30 31 class TIFFParser(pdlparser.PDLParser) :31 class Parser(pdlparser.PDLParser) : 32 32 """A parser for TIFF documents.""" 33 def isValid(self) : 34 """Returns 1 if data is TIFF, else 0.""" 35 littleendian = (chr(0x49)*2) + chr(0x2a) + chr(0) 36 bigendian = (chr(0x4d)*2) + chr(0) + chr(0x2a) 37 if self.firstblock[:4] in (littleendian, bigendian) : 38 if self.debug : 39 sys.stderr.write("DEBUG: Input file is in the TIFF format.\n") 40 return 1 41 else : 42 return 0 43 33 44 def getJobSize(self) : 34 45 """Counts pages in a TIFF document. … … 77 88 mustclose = 1 78 89 try : 79 parser = TIFFParser(infile, debug=1)90 parser = Parser(infile, debug=1) 80 91 totalsize += parser.getJobSize() 81 92 except pdlparser.PDLParserError, msg : -
pkpgcounter/trunk/pdlanalyzer/version.py
r217 r220 20 20 # 21 21 22 __version__ = "1.5 2"22 __version__ = "1.53" 23 23 24 24 __doc__ = """pkpgcounter : a generic Page Description Languages parser.""" -
pkpgcounter/trunk/tests/runtest.sh
r201 r220 3 3 echo -n "Generating testsuite..." 4 4 gunzip <master.ps.gz >master2.ps 5 for device in lj250 lj4dithp ljet2p ljet4pjl ljetplus laserjet ljet3 ljet4 lj5gray lj5mono pxlmono pxlcolor pdfwrite pswrite psgray psmono psrgb epson epsonc eps9mid eps9high stcolor st800 escp escpc pcl3 cdeskjet cdj1600 cdj500 cdj550 cdj670 cdj850 cdj880 cdj890 cdj970 cdjcolor cdjmono dj505j djet500 djet500c hpdj1120c hpdj310 hpdj320 hpdj340 hpdj400 hpdj500 hpdj500c hpdj510 hpdj520 hpdj540 hpdj550c hpdj560c hpdj600 hpdj660c hpdj670c hpdj680c hpdj690c hpdj850c hpdj855c hpdj870c hpdj890c hpdjplus hpdjportable ; do5 for device in lj250 lj4dithp ljet2p ljet4pjl ljetplus laserjet ljet3 ljet4 lj5gray lj5mono pxlmono pxlcolor pdfwrite pswrite psgray psmono psrgb epson epsonc eps9mid eps9high stcolor st800 escp escpc pcl3 cdeskjet cdj1600 cdj500 cdj550 cdj670 cdj850 cdj880 cdj890 cdj970 cdjcolor cdjmono dj505j djet500 djet500c hpdj1120c hpdj310 hpdj320 hpdj340 hpdj400 hpdj500 hpdj500c hpdj510 hpdj520 hpdj540 hpdj550c hpdj560c hpdj600 hpdj660c hpdj670c hpdj680c hpdj690c hpdj850c hpdj855c hpdj870c hpdj890c hpdjplus hpdjportable tiff12nc tiff24nc tiffcrle tiffg3 tiffg32d tiffg4 tifflzw tiffpack ; do 6 6 if ! [ -f "testsuite.$device" ] ; then 7 7 gs -dQUIET -dBATCH -dNOPAUSE -sOutputFile="testsuite.$device" -sDEVICE="$device" master2.ps ;