Changeset 1701 for pykota/trunk/pykota/pdlanalyzer.py
- Timestamp:
- 09/04/04 16:01:47 (20 years ago)
- Files:
-
- 1 modified
Legend:
- Unmodified
- Added
- Removed
-
pykota/trunk/pykota/pdlanalyzer.py
r1700 r1701 22 22 # 23 23 # $Log$ 24 # Revision 1.40 2004/09/04 14:01:47 jalet 25 # Support for PCL3 (HP Deskjets) added to generic PDL parser 26 # 24 27 # Revision 1.39 2004/09/02 23:30:05 jalet 25 28 # Comments … … 158 161 KILOBYTE = 1024 159 162 MEGABYTE = 1024 * KILOBYTE 163 LASTBLOCKSIZE = int(KILOBYTE / 4) 160 164 161 165 class PDLAnalyzerError(Exception): … … 325 329 "&a" : "G", # TODO : 0 means next side, 1 front side, 2 back side 326 330 "*g" : "W", 331 "*r" : "sbABC", 327 332 # "*b" : "VW", # treated specially because it occurs very often 328 333 } 329 pagecount = resets = ejects = backsides = 0 334 pagecount = resets = ejects = backsides = startgfx = endgfx = strangegfx = 0 335 starb = ispcl3 = 0 330 336 tag = None 331 337 copies = {} … … 337 343 pagecount += 1 338 344 elif char == "\033" : 345 starb = 0 339 346 # 347 # <ESC>*b###y#m###v###w... -> PCL3 raster graphics 340 348 # <ESC>*b###W -> Start of a raster data row/block 341 349 # <ESC>*b###V -> Start of a raster data plane … … 345 353 # <ESC>*m###W -> Start of a download dither matrix block 346 354 # <ESC>*v###W -> Start of a configure image data block 355 # <ESC>*r1A -> Start Gfx 347 356 # <ESC>(s###W -> Start of a characters description block 348 357 # <ESC>)s###W -> Start of a fonts description block … … 363 372 tag = tagstart + minfile[pos] ; pos += 1 364 373 if tag == "*b" : 374 starb = 1 365 375 tagend = "VW" 366 376 else : … … 381 391 elif (tag == "&l") and (char == "H") and (size == 0) : 382 392 ejects += 1 # Eject 393 elif (tag == "*r") : 394 # Special tests for PCL3 395 if (char == "s") and size : 396 while 1 : 397 char = minfile[pos] ; pos += 1 398 if char == "A" : 399 break 400 elif (char == "b") and (minfile[pos] == "C") and not size : 401 ispcl3 = 1 # Certainely a PCL3 file 402 startgfx += (char == "A") and (minfile[pos - 2] in ("0", "1", "2", "3")) # Start Gfx 403 endgfx += (not size) and (char in ("C", "B")) # End Gfx 383 404 elif (tag == "&a") and (size == 2) : 384 405 backsides += 1 # Back side in duplex mode … … 390 411 size += 1 391 412 pos += size 413 else : 414 if starb : 415 # special handling of PCL3 in which 416 # *b introduces combined ESCape sequences 417 size = 0 418 while 1 : 419 char = minfile[pos] ; pos += 1 420 if not char.isdigit() : 421 break 422 size = (size * 10) + int(char) 423 if char in ("w", "v") : 424 ispcl3 = 1 # certainely a PCL3 document 425 pos += size - 1 426 elif char in ("y", "m") : 427 ispcl3 = 1 # certainely a PCL3 document 428 pos -= 1 # fix position : we were ahead 392 429 except IndexError : # EOF ? 393 430 minfile.close() # reached EOF … … 415 452 nb = copies.get(pnum, copies.get(pnum-1, copies.get(0, 1))) 416 453 pagecount += (nb - 1) 417 return pagecount 418 419 class PCL3GUIAnalyzer : 420 def __init__(self, infile) : 421 """Initialize PCL3GUI Analyzer.""" 422 self.infile = infile 423 424 def getJobSize(self) : 425 """Count pages in a PCL3GUI document. 426 427 Not much documentation available, so we will count occurences 428 of <ESC>*r1A which is start of graphical data. 429 430 This is FAR from being accurate. PCL3 ressembles PCL5 in fact, 431 and PCL parser should be made better, but some documentation 432 definitely lacks. 433 """ 434 data = self.infile.read() 435 pagecount = data.count("\033*r1A") # TODO : Allowed values 0, 1, 2, 3 after *r 454 455 # in PCL3 files, there's one Start Gfx tag per page 456 if ispcl3 : 457 if endgfx == int(startgfx / 2) : # special case for cdj1600 458 pagecount = endgfx 459 elif startgfx : 460 pagecount = startgfx 461 elif endgfx : 462 pagecount = endgfx 463 436 464 return pagecount 437 465 … … 733 761 pass # probably stdin, which is not seekable 734 762 735 def isPostScript(self, data) :763 def isPostScript(self, sdata, edata) : 736 764 """Returns 1 if data is PostScript, else 0.""" 737 if data.startswith("%!") or \738 data.startswith("\004%!") or \739 data.startswith("\033%-12345X%!PS") or \740 (( data[:128].find("\033%-12345X") != -1) and \741 (( data.find("LANGUAGE=POSTSCRIPT") != -1) or \742 ( data.find("LANGUAGE = POSTSCRIPT") != -1) or \743 ( data.find("LANGUAGE = Postscript") != -1))) or \744 ( data.find("%!PS-Adobe") != -1) :765 if sdata.startswith("%!") or \ 766 sdata.startswith("\004%!") or \ 767 sdata.startswith("\033%-12345X%!PS") or \ 768 ((sdata[:128].find("\033%-12345X") != -1) and \ 769 ((sdata.find("LANGUAGE=POSTSCRIPT") != -1) or \ 770 (sdata.find("LANGUAGE = POSTSCRIPT") != -1) or \ 771 (sdata.find("LANGUAGE = Postscript") != -1))) or \ 772 (sdata.find("%!PS-Adobe") != -1) : 745 773 return 1 746 774 else : 747 775 return 0 748 776 749 def isPDF(self, data) :777 def isPDF(self, sdata, edata) : 750 778 """Returns 1 if data is PDF, else 0.""" 751 if data.startswith("%PDF-") or \752 data.startswith("\033%-12345X%PDF-") or \753 (( data[:128].find("\033%-12345X") != -1) and (data.upper().find("LANGUAGE=PDF") != -1)) or \754 ( data.find("%PDF-") != -1) :779 if sdata.startswith("%PDF-") or \ 780 sdata.startswith("\033%-12345X%PDF-") or \ 781 ((sdata[:128].find("\033%-12345X") != -1) and (sdata.upper().find("LANGUAGE=PDF") != -1)) or \ 782 (sdata.find("%PDF-") != -1) : 755 783 return 1 756 784 else : 757 785 return 0 758 786 759 def isPCL(self, data) :787 def isPCL(self, sdata, edata) : 760 788 """Returns 1 if data is PCL, else 0.""" 761 if data.startswith("\033E\033") or \ 762 (data[:128].find("\033%-12345X") != -1) : 789 if sdata.startswith("\033E\033") or \ 790 (sdata.startswith("\033*rbC") and (not edata[-3:] == "\f\033@")) or \ 791 (sdata.find("\033%-12345X") != -1) : 763 792 return 1 764 793 else : 765 794 return 0 766 795 767 def isPCL3GUI(self, data) : 768 """Returns 1 if data is PCL3GUI, else 0.""" 769 if data.find("@PJL ENTER LANGUAGE=PCL3GUI") != -1 : 796 def isPCLXL(self, sdata, edata) : 797 """Returns 1 if data is PCLXL aka PCL6, else 0.""" 798 if ((sdata[:128].find("\033%-12345X") != -1) and \ 799 (sdata.find(" HP-PCL XL;") != -1) and \ 800 ((sdata.find("LANGUAGE=PCLXL") != -1) or \ 801 (sdata.find("LANGUAGE = PCLXL") != -1))) : 770 802 return 1 771 803 else : 772 804 return 0 773 774 def isPCLXL(self, data) : 775 """Returns 1 if data is PCLXL aka PCL6, else 0.""" 776 if ((data[:128].find("\033%-12345X") != -1) and \ 777 (data.find(" HP-PCL XL;") != -1) and \ 778 ((data.find("LANGUAGE=PCLXL") != -1) or \ 779 (data.find("LANGUAGE = PCLXL") != -1))) : 780 return 1 781 else : 782 return 0 783 784 def isESCP2(self, data) : 805 806 def isESCP2(self, sdata, edata) : 785 807 """Returns 1 if data is ESC/P2, else 0.""" 786 if data.startswith("\033@") or \787 data.startswith("\033*") or \788 data.startswith("\n\033@") :808 if sdata.startswith("\033@") or \ 809 sdata.startswith("\033*") or \ 810 sdata.startswith("\n\033@") : 789 811 return 1 790 812 else : … … 799 821 self.infile.seek(0) 800 822 firstblock = self.infile.read(4 * KILOBYTE) 823 try : 824 self.infile.seek(-LASTBLOCKSIZE, 2) 825 except IOError : 826 lastblock = "" 827 else : 828 lastblock = self.infile.read(LASTBLOCKSIZE) 801 829 self.infile.seek(0) 802 if self.isPostScript(firstblock ) :830 if self.isPostScript(firstblock, lastblock) : 803 831 return PostScriptAnalyzer 804 elif self.isPCLXL(firstblock ) :832 elif self.isPCLXL(firstblock, lastblock) : 805 833 return PCLXLAnalyzer 806 elif self.isPDF(firstblock ) :834 elif self.isPDF(firstblock, lastblock) : 807 835 return PDFAnalyzer 808 elif self.isPCL3GUI(firstblock) : 809 return PCL3GUIAnalyzer 810 elif self.isPCL(firstblock) : 836 elif self.isPCL(firstblock, lastblock) : 811 837 return PCLAnalyzer 812 elif self.isESCP2(firstblock ) :838 elif self.isESCP2(firstblock, lastblock) : 813 839 return ESCP2Analyzer 814 840 else :