Changeset 200
- Timestamp:
- 04/04/05 14:18:36 (19 years ago)
- Location:
- pkpgcounter/trunk
- Files:
-
- 1 added
- 7 modified
Legend:
- Unmodified
- Added
- Removed
-
pkpgcounter/trunk/pdlanalyzer/escp2.py
r196 r200 24 24 import sys 25 25 26 from pdlanalyzer .pdlparser import PDLParser, PDLParserError26 from pdlanalyzer import pdlparser 27 27 28 class ESCP2Parser( PDLParser) :28 class ESCP2Parser(pdlparser.PDLParser) : 29 29 """A parser for ESC/P2 documents.""" 30 30 def getJobSize(self) : … … 78 78 parser = ESCP2Parser(infile, debug=1) 79 79 totalsize += parser.getJobSize() 80 except PDLParserError, msg :80 except pdlparser.PDLParserError, msg : 81 81 sys.stderr.write("ERROR: %s\n" % msg) 82 82 sys.stderr.flush() -
pkpgcounter/trunk/pdlanalyzer/pcl345.py
r196 r200 25 25 import mmap 26 26 27 from pdlanalyzer .pdlparser import PDLParser, PDLParserError27 from pdlanalyzer import pdlparser 28 28 29 class PCL Parser(PDLParser) :29 class PCL345Parser(pdlparser.PDLParser) : 30 30 """A parser for PCL3, PCL4, PCL5 documents.""" 31 31 mediasizes = { # ESC&l####A … … 345 345 parser = PCL345Parser(infile, debug=1) 346 346 totalsize += parser.getJobSize() 347 except PDLParserError, msg :347 except pdlparser.PDLParserError, msg : 348 348 sys.stderr.write("ERROR: %s\n" % msg) 349 349 sys.stderr.flush() -
pkpgcounter/trunk/pdlanalyzer/pclxl.py
r196 r200 26 26 from struct import unpack 27 27 28 from pdlanalyzer .pdlparser import PDLParser, PDLParserError28 from pdlanalyzer import pdlparser 29 29 30 class PCLXLParser( PDLParser) :30 class PCLXLParser(pdlparser.PDLParser) : 31 31 """A parser for PCLXL (aka PCL6) documents.""" 32 32 mediasizes = { … … 150 150 return unpack(self.endianness + "I", self.minfile[pos:posl])[0] 151 151 else : 152 raise PDLParserError, "Error on array size at %s" % self.pos152 raise pdlparser.PDLParserError, "Error on array size at %s" % self.pos 153 153 154 154 def array_16(self) : … … 171 171 return 2 * unpack(self.endianness + "I", self.minfile[pos:posl])[0] 172 172 else : 173 raise PDLParserError, "Error on array size at %s" % self.pos173 raise pdlparser.PDLParserError, "Error on array size at %s" % self.pos 174 174 175 175 def array_32(self) : … … 192 192 return 4 * unpack(self.endianness + "I", self.minfile[pos:posl])[0] 193 193 else : 194 raise PDLParserError, "Error on array size at %s" % self.pos194 raise pdlparser.PDLParserError, "Error on array size at %s" % self.pos 195 195 196 196 def embeddedDataSmall(self) : … … 245 245 # 246 246 else : 247 raise PDLParserError, "Unknown endianness marker 0x%02x at start !" % endian247 raise pdlparser.PDLParserError, "Unknown endianness marker 0x%02x at start !" % endian 248 248 if not found : 249 raise PDLParserError, "This file doesn't seem to be PCLXL (aka PCL6)"249 raise pdlparser.PDLParserError, "This file doesn't seem to be PCLXL (aka PCL6)" 250 250 251 251 # Initialize table of tags … … 345 345 parser = PCLXLParser(infile, debug=1) 346 346 totalsize += parser.getJobSize() 347 except PDLParserError, msg :347 except pdlparser.PDLParserError, msg : 348 348 sys.stderr.write("ERROR: %s\n" % msg) 349 349 sys.stderr.flush() -
pkpgcounter/trunk/pdlanalyzer/pdf.py
r196 r200 25 25 import re 26 26 27 from pdlanalyzer .pdlparser import PDLParser, PDLParserError27 from pdlanalyzer import pdlparser 28 28 29 class PDFParser( PDLParser) :29 class PDFParser(pdlparser.PDLParser) : 30 30 """A parser for PDF documents.""" 31 31 def getJobSize(self) : … … 52 52 parser = PDFParser(infile, debug=1) 53 53 totalsize += parser.getJobSize() 54 except PDLParserError, msg :54 except pdlparser.PDLParserError, msg : 55 55 sys.stderr.write("ERROR: %s\n" % msg) 56 56 sys.stderr.flush() -
pkpgcounter/trunk/pdlanalyzer/pdlparser.py
r199 r200 21 21 22 22 import sys 23 import tempfile24 25 from pdlanalyzer.postscript import PostScriptParser26 from pdlanalyzer.pdf import PDFParser27 from pdlanalyzer.pcl345 import PCL345Parser28 from pdlanalyzer.pclxl import PCLXLParser29 from pdlanalyzer.escp2 import ESCP2Parser30 31 KILOBYTE = 102432 MEGABYTE = 1024 * KILOBYTE33 LASTBLOCKSIZE = int(KILOBYTE / 4)34 23 35 24 class PDLParserError(Exception): … … 52 41 """Counts pages in the document.""" 53 42 raise RuntimeError, "Not implemented !" 54 55 class PDLAnalyzer :56 """Class for PDL autodetection."""57 def __init__(self, filename, debug=0) :58 """Initializes the PDL analyzer.59 60 filename is the name of the file or '-' for stdin.61 filename can also be a file-like object which62 supports read() and seek().63 """64 self.debug = debug65 self.filename = filename66 try :67 import psyco68 except ImportError :69 sys.stderr.write("pkpgcounter : you should install psyco if possible, this would greatly speedup parsing.\n")70 pass # Psyco is not installed71 else :72 # Psyco is installed, tell it to compile73 # the CPU intensive methods : PCL and PCLXL74 # parsing will greatly benefit from this,75 # for PostScript and PDF the difference is76 # barely noticeable since they are already77 # almost optimal, and much more speedy anyway.78 psyco.bind(PostScriptParser.getJobSize)79 psyco.bind(PDFParser.getJobSize)80 psyco.bind(ESCP2Parser.getJobSize)81 psyco.bind(PCLParser.getJobSize)82 psyco.bind(PCLXLParser.getJobSize)83 84 def getJobSize(self) :85 """Returns the job's size."""86 self.openFile()87 try :88 pdlhandler = self.detectPDLHandler()89 except PDLParserError, msg :90 self.closeFile()91 raise PDLParserError, "ERROR : Unknown file format for %s (%s)" % (self.filename, msg)92 else :93 try :94 size = pdlhandler(self.infile, self.debug).getJobSize()95 finally :96 self.closeFile()97 return size98 99 def openFile(self) :100 """Opens the job's data stream for reading."""101 self.mustclose = 0 # by default we don't want to close the file when finished102 if hasattr(self.filename, "read") and hasattr(self.filename, "seek") :103 # filename is in fact a file-like object104 infile = self.filename105 elif self.filename == "-" :106 # we must read from stdin107 infile = sys.stdin108 else :109 # normal file110 self.infile = open(self.filename, "rb")111 self.mustclose = 1112 return113 114 # Use a temporary file, always seekable contrary to standard input.115 self.infile = tempfile.TemporaryFile(mode="w+b")116 while 1 :117 data = infile.read(MEGABYTE)118 if not data :119 break120 self.infile.write(data)121 self.infile.flush()122 self.infile.seek(0)123 124 def closeFile(self) :125 """Closes the job's data stream if we can close it."""126 if self.mustclose :127 self.infile.close()128 else :129 # if we don't have to close the file, then130 # ensure the file pointer is reset to the131 # start of the file in case the process wants132 # to read the file again.133 try :134 self.infile.seek(0)135 except :136 pass # probably stdin, which is not seekable137 138 def isPostScript(self, sdata, edata) :139 """Returns 1 if data is PostScript, else 0."""140 if sdata.startswith("%!") or \141 sdata.startswith("\004%!") or \142 sdata.startswith("\033%-12345X%!PS") or \143 ((sdata[:128].find("\033%-12345X") != -1) and \144 ((sdata.find("LANGUAGE=POSTSCRIPT") != -1) or \145 (sdata.find("LANGUAGE = POSTSCRIPT") != -1) or \146 (sdata.find("LANGUAGE = Postscript") != -1))) or \147 (sdata.find("%!PS-Adobe") != -1) :148 if self.debug :149 sys.stderr.write("%s is a PostScript file\n" % str(self.filename))150 return 1151 else :152 return 0153 154 def isPDF(self, sdata, edata) :155 """Returns 1 if data is PDF, else 0."""156 if sdata.startswith("%PDF-") or \157 sdata.startswith("\033%-12345X%PDF-") or \158 ((sdata[:128].find("\033%-12345X") != -1) and (sdata.upper().find("LANGUAGE=PDF") != -1)) or \159 (sdata.find("%PDF-") != -1) :160 if self.debug :161 sys.stderr.write("%s is a PDF file\n" % str(self.filename))162 return 1163 else :164 return 0165 166 def isPCL(self, sdata, edata) :167 """Returns 1 if data is PCL, else 0."""168 if sdata.startswith("\033E\033") or \169 (sdata.startswith("\033*rbC") and (not edata[-3:] == "\f\033@")) or \170 sdata.startswith("\033%8\033") or \171 (sdata.find("\033%-12345X") != -1) :172 if self.debug :173 sys.stderr.write("%s is a PCL3/4/5 file\n" % str(self.filename))174 return 1175 else :176 return 0177 178 def isPCLXL(self, sdata, edata) :179 """Returns 1 if data is PCLXL aka PCL6, else 0."""180 if ((sdata[:128].find("\033%-12345X") != -1) and \181 (sdata.find(" HP-PCL XL;") != -1) and \182 ((sdata.find("LANGUAGE=PCLXL") != -1) or \183 (sdata.find("LANGUAGE = PCLXL") != -1))) :184 if self.debug :185 sys.stderr.write("%s is a PCLXL (aka PCL6) file\n" % str(self.filename))186 return 1187 else :188 return 0189 190 def isESCP2(self, sdata, edata) :191 """Returns 1 if data is ESC/P2, else 0."""192 if sdata.startswith("\033@") or \193 sdata.startswith("\033*") or \194 sdata.startswith("\n\033@") or \195 sdata.startswith("\0\0\0\033\1@EJL") : # ESC/P Raster ??? Seen on Stylus Photo 1284196 if self.debug :197 sys.stderr.write("%s is an ESC/P2 file\n" % str(self.filename))198 return 1199 else :200 return 0201 202 def detectPDLHandler(self) :203 """Tries to autodetect the document format.204 205 Returns the correct PDL handler class or None if format is unknown206 """207 # Try to detect file type by reading first block of datas208 self.infile.seek(0)209 firstblock = self.infile.read(4 * KILOBYTE)210 try :211 self.infile.seek(-LASTBLOCKSIZE, 2)212 lastblock = self.infile.read(LASTBLOCKSIZE)213 except IOError :214 lastblock = ""215 self.infile.seek(0)216 if self.isPostScript(firstblock, lastblock) :217 return PostScriptParser218 elif self.isPCLXL(firstblock, lastblock) :219 return PCLXLParser220 elif self.isPDF(firstblock, lastblock) :221 return PDFParser222 elif self.isPCL(firstblock, lastblock) :223 return PCLParser224 elif self.isESCP2(firstblock, lastblock) :225 return ESCP2Parser226 else :227 raise PDLParserError, "Analysis of first data block failed."228 229 def main() :230 """Entry point for PDL Analyzer."""231 if (len(sys.argv) < 2) or ((not sys.stdin.isatty()) and ("-" not in sys.argv[1:])) :232 sys.argv.append("-")233 234 if ("-h" in sys.argv[1:]) or ("--help" in sys.argv[1:]) :235 print "usage : pkpgcounter file1 file2 ... fileN"236 elif ("-v" in sys.argv[1:]) or ("--version" in sys.argv[1:]) :237 print "%s" % version.__version__238 else :239 totalsize = 0240 debug = 0241 minindex = 1242 if sys.argv[1] == "--debug" :243 minindex = 2244 debug = 1245 for arg in sys.argv[minindex:] :246 try :247 parser = PDLAnalyzer(arg, debug)248 totalsize += parser.getJobSize()249 except PDLParserError, msg :250 sys.stderr.write("ERROR: %s\n" % msg)251 sys.stderr.flush()252 print "%s" % totalsize253 254 if __name__ == "__main__" :255 main() -
pkpgcounter/trunk/pdlanalyzer/postscript.py
r196 r200 25 25 import popen2 26 26 27 from pdlanalyzer .pdlparser import PDLParser, PDLParserError27 from pdlanalyzer import pdlparser 28 28 29 class PostScriptParser( PDLParser) :29 class PostScriptParser(pdlparser.PDLParser) : 30 30 """A parser for PostScript documents.""" 31 31 def throughGhostScript(self) : … … 44 44 child.tochild.close() 45 45 except (IOError, OSError), msg : 46 raise PDLParserError, "Problem during analysis of Binary PostScript document : %s" % msg46 raise pdlparser.PDLParserError, "Problem during analysis of Binary PostScript document : %s" % msg 47 47 48 48 pagecount = 0 … … 50 50 pagecount = int(child.fromchild.readline().strip()) 51 51 except (IOError, OSError, AttributeError, ValueError), msg : 52 raise PDLParserError, "Problem during analysis of Binary PostScript document : %s" % msg52 raise pdlparser.PDLParserError, "Problem during analysis of Binary PostScript document : %s" % msg 53 53 child.fromchild.close() 54 54 … … 56 56 child.wait() 57 57 except OSError, msg : 58 raise PDLParserError, "Problem during analysis of Binary PostScript document : %s" % msg58 raise pdlparser.PDLParserError, "Problem during analysis of Binary PostScript document : %s" % msg 59 59 return pagecount * self.copies 60 60 … … 113 113 parser = PostScriptParser(infile, debug=1) 114 114 totalsize += parser.getJobSize() 115 except PDLParserError, msg :115 except pdlparser.PDLParserError, msg : 116 116 sys.stderr.write("ERROR: %s\n" % msg) 117 117 sys.stderr.flush() -
pkpgcounter/trunk/tests/runtest.sh
r197 r200 11 11 12 12 echo -n "File master.ps should be 16 pages long, result is : " 13 python ../pdlanalyzer/pdl analyzer.py master2.ps13 python ../pdlanalyzer/pdlparser.py master2.ps 14 14 15 15 echo "Analyzing testsuite..." 16 16 for file in testsuite.* ; do 17 echo -n "$file ===> " && python ../pdlanalyzer/pdl analyzer.py "$file" ;17 echo -n "$file ===> " && python ../pdlanalyzer/pdlparser.py "$file" ; 18 18 done