- Timestamp:
- 11/13/04 21:52:26 (20 years ago)
- Files:
-
- 1 modified
Legend:
- Unmodified
- Added
- Removed
-
pykota/trunk/pykota/pdlanalyzer.py
r1743 r1912 22 22 # 23 23 # $Log$ 24 # Revision 1.43 2004/11/13 20:52:26 jalet 25 # Heavy work done on PCL3/4/5 and PCLXL job analyzis 26 # 24 27 # Revision 1.42 2004/09/24 21:19:48 jalet 25 28 # Did a pass of PyChecker … … 179 182 180 183 class PostScriptAnalyzer : 184 """A class to parse PostScript documents.""" 181 185 def __init__(self, infile) : 182 186 """Initialize PostScript Analyzer.""" … … 244 248 245 249 class PDFAnalyzer : 250 """A class to parse PDF documents.""" 246 251 def __init__(self, infile) : 247 252 """Initialize PDF Analyzer.""" … … 257 262 258 263 class ESCP2Analyzer : 264 """A class to parse ESC/P2 documents.""" 259 265 def __init__(self, infile) : 260 266 """Initialize ESC/P2 Analyzer.""" … … 297 303 298 304 class PCLAnalyzer : 305 """A class to parse PCL3, PCL4, PCL5 documents.""" 306 mediasizes = { # ESC&l####A 307 0 : "Default", 308 1 : "Executive", 309 2 : "Letter", 310 3 : "Legal", 311 6 : "Ledger", 312 25 : "A5", 313 26 : "A4", 314 27 : "A3", 315 45 : "JB5", 316 46 : "JB4", 317 71 : "HagakiPostcard", 318 72 : "OufukuHagakiPostcard", 319 80 : "MonarchEnvelope", 320 81 : "COM10Envelope", 321 90 : "DLEnvelope", 322 91 : "C5Envelope", 323 100 : "B5Envelope", 324 101 : "Custom", 325 } 326 327 mediasources = { # ESC&l####H 328 0 : "Default", 329 1 : "Main", 330 2 : "Manual", 331 3 : "ManualEnvelope", 332 4 : "Alternate", 333 5 : "OptionalLarge", 334 6 : "EnvelopeFeeder", 335 7 : "Auto", 336 8 : "Tray1", 337 } 338 339 orientations = { # ESC&l####O 340 0 : "Portrait", 341 1 : "Landscape", 342 2 : "ReversePortrait", 343 3 : "ReverseLandscape", 344 } 345 346 mediatypes = { # ESC&l####M 347 0 : "Plain", 348 1 : "Bond", 349 2 : "Special", 350 3 : "Glossy", 351 4 : "Transparent", 352 } 353 354 299 355 def __init__(self, infile) : 300 356 """Initialize PCL Analyzer.""" 301 357 self.infile = infile 358 359 def setPageDict(self, pages, number, attribute, value) : 360 """Initializes a page dictionnary.""" 361 dict = pages.setdefault(number, { "copies" : 1, "mediasource" : "Main", "mediasize" : "Default", "mediatype" : "Plain", "orientation" : "Portrait"}) 362 dict[attribute] = value 302 363 303 364 def getJobSize(self) : … … 332 393 ")s" : "W", 333 394 "&p" : "X", 334 "&l" : "XH",395 # "&l" : "XHAOM", # treated specially 335 396 "&a" : "G", # TODO : 0 means next side, 1 front side, 2 back side 336 397 "*g" : "W", … … 339 400 } 340 401 pagecount = resets = ejects = backsides = startgfx = endgfx = 0 341 starb = ispcl3 = 0402 starb = ampl = ispcl3 = 0 342 403 tag = None 343 copies = {}404 pages = {} 344 405 pos = 0 345 406 try : … … 349 410 pagecount += 1 350 411 elif char == "\033" : 351 starb = 0412 starb = ampl = 0 352 413 # 353 414 # <ESC>*b###y#m###v###w... -> PCL3 raster graphics … … 369 430 # <ESC>&a2G -> Back side when duplex mode as generated by rastertohp 370 431 # <ESC>*g###W -> Needed for planes in PCL3 output 371 # <ESC>&l0H -> Eject if NumPlanes > 1, as generated by rastertohp 432 # <ESC>&l###H (or only 0 ?) -> Eject if NumPlanes > 1, as generated by rastertohp. Also defines mediasource 433 # <ESC>&l###A -> mediasize 434 # <ESC>&l###O -> orientation 435 # <ESC>&l###M -> mediatype 372 436 # 373 437 tagstart = minfile[pos] ; pos += 1 … … 380 444 starb = 1 381 445 tagend = "VW" 446 elif tag == "&l" : 447 ampl = 1 448 tagend = "XHAOM" 382 449 else : 383 450 try : … … 393 460 size = (size * 10) + int(char) 394 461 if char in tagend : 395 if (tag == "&l") and (char == "X") : # copies for current page 396 copies[pagecount] = size 397 elif (tag == "&l") and (char == "H") and (size == 0) : 398 ejects += 1 # Eject 399 elif (tag == "*r") : 462 if tag == "&l" : 463 if char == "X" : 464 self.setPageDict(pages, pagecount, "copies", size) 465 elif char == "H" : 466 self.setPageDict(pages, pagecount, "mediasource", self.mediasources.get(size, str(size))) 467 ejects += 1 468 elif char == "A" : 469 self.setPageDict(pages, pagecount, "mediasize", self.mediasizes.get(size, str(size))) 470 elif char == "O" : 471 self.setPageDict(pages, pagecount, "orientation", self.orientations.get(size, str(size))) 472 elif char == "M" : 473 self.setPageDict(pages, pagecount, "mediatype", self.mediatypes.get(size, str(size))) 474 elif tag == "*r" : 400 475 # Special tests for PCL3 401 476 if (char == "s") and size : … … 433 508 ispcl3 = 1 # certainely a PCL3 document 434 509 pos -= 1 # fix position : we were ahead 510 elif ampl : 511 # special handling of PCL3 in which 512 # &l introduces combined ESCape sequences 513 size = 0 514 while 1 : 515 char = minfile[pos] ; pos += 1 516 if not char.isdigit() : 517 break 518 size = (size * 10) + int(char) 519 if char in ("a", "o", "h", "m") : 520 ispcl3 = 1 # certainely a PCL3 document 521 pos -= 1 # fix position : we were ahead 522 if char == "h" : 523 self.setPageDict(pages, pagecount, "mediasource", self.mediasources.get(size, str(size))) 524 elif char == "a" : 525 self.setPageDict(pages, pagecount, "mediasize", self.mediasizes.get(size, str(size))) 526 elif char == "o" : 527 self.setPageDict(pages, pagecount, "orientation", self.orientations.get(size, str(size))) 528 elif char == "m" : 529 self.setPageDict(pages, pagecount, "mediatype", self.mediatypes.get(size, str(size))) 435 530 except IndexError : # EOF ? 436 531 minfile.close() # reached EOF … … 456 551 for pnum in range(pagecount) : 457 552 # if no number of copies defined, take the preceding one else the one set before any page else 1. 458 nb = copies.get(pnum, copies.get(pnum-1, copies.get(0, 1)))459 pagecount += ( nb- 1)553 page = pages.get(pnum, pages.get(pnum - 1, pages.get(0, { "copies" : 1 }))) 554 pagecount += (page["copies"] - 1) 460 555 461 556 # in PCL3 files, there's one Start Gfx tag per page … … 467 562 elif endgfx : 468 563 pagecount = endgfx 564 565 #for pnum in range(pagecount) : 566 # # if no number of copies defined, take the preceding one else the one set before any page else 1. 567 # page = pages.get(pnum, pages.get(pnum - 1, pages.get(0, { "copies" : 1, "mediasource" : "Main", "mediasize" : "Default", "mediatype" : "Plain", "orientation" : "Portrait"}))) 568 # print "%s*%s*%s*%s*%s" % (page["copies"], page["mediatype"], page["mediasize"], page["orientation"], page["mediasource"]) 469 569 470 570 return pagecount 471 571 472 572 class PCLXLAnalyzer : 573 """A class to parse PCL6 (aka XL) documents.""" 574 mediasizes = { 575 0 : "Letter", 576 1 : "Legal", 577 2 : "A4", 578 3 : "Executive", 579 4 : "Ledger", 580 5 : "A3", 581 6 : "COM10Envelope", 582 7 : "MonarchEnvelope", 583 8 : "C5Envelope", 584 9 : "DLEnvelope", 585 10 : "JB4", 586 11 : "JB5", 587 12 : "B5Envelope", 588 14 : "JPostcard", 589 15 : "JDoublePostcard", 590 16 : "A5", 591 17 : "A6", 592 18 : "JB6", 593 } 594 595 mediasources = { 596 0 : "Default", 597 1 : "Auto", 598 2 : "Manual", 599 3 : "MultiPurpose", 600 4 : "UpperCassette", 601 5 : "LowerCassette", 602 6 : "EnvelopeTray", 603 7 : "ThirdCassette", 604 } 605 606 orientations = { 607 0 : "Portrait", 608 1 : "Landscape", 609 2 : "ReversePortrait", 610 3 : "ReverseLandscape", 611 } 612 473 613 def __init__(self, infile) : 474 614 """Initialize PCLXL Analyzer.""" … … 487 627 elif endian == 0x28 : 488 628 self.bigEndian() 489 # elif endian == 0x27 : TODO : What can we do here ?629 # elif endian == 0x27 : # TODO : This is the ESC code : parse it for PJL statements ! 490 630 # 491 631 else : … … 540 680 541 681 def beginPage(self) : 542 """Indicates the beginning of a new page ."""682 """Indicates the beginning of a new page, and extracts media information.""" 543 683 self.pagecount += 1 684 685 # Default values 686 mediatypelabel = "Plain" 687 mediasourcelabel = "Main" 688 mediasizelabel = "Default" 689 orientationlabel = "Portrait" 690 691 # Now go upstream to decode media type, size, source, and orientation 692 # this saves time because we don't need a complete parser ! 693 minfile = self.minfile 694 pos = self.pos - 2 695 while pos > 0 : # safety check : don't go back to far ! 696 val = ord(minfile[pos]) 697 if val in (0x44, 0x48, 0x41) : # if previous endPage or openDataSource or beginSession (first page) 698 break 699 if val == 0x26 : 700 mediasource = ord(minfile[pos - 2]) 701 mediasourcelabel = self.mediasources.get(mediasource, str(mediasource)) 702 pos = pos - 4 703 elif val == 0x25 : 704 mediasize = ord(minfile[pos - 2]) 705 mediasizelabel = self.mediasizes.get(mediasize, str(mediasize)) 706 pos = pos - 4 707 elif val == 0x28 : 708 orientation = ord(minfile[pos - 2]) 709 orienationlabel = self.orientations.get(orientation, str(orientation)) 710 pos = pos - 4 711 elif val == 0x27 : 712 savepos = pos 713 pos = pos - 1 714 while pos > 0 : # safety check : don't go back to far ! 715 val = ord(minfile[pos]) 716 pos -= 1 717 if val == 0xc8 : 718 break 719 mediatypelabel = minfile[pos:savepos] # TODO : INCORRECT, WE HAVE TO STRIP OUT THE UBYTE ARRAY'S LENGTH !!! 720 # else : TODO : CUSTOM MEDIA SIZE AND UNIT ! 721 else : 722 pos = pos - 2 # ignored 723 self.pages[self.pagecount] = { "copies" : 1, 724 "orientation" : orientationlabel, 725 "mediatype" : mediatypelabel, 726 "mediasize" : mediasizelabel, 727 "mediasource" : mediasourcelabel, 728 } 544 729 return 0 545 730 … … 549 734 minfile = self.minfile 550 735 if (ord(minfile[pos-3]) == 0xf8) and (ord(minfile[pos-2]) == 0x31) : 551 # The EndPage operator ispreceded by a PageCopies attribute736 # The EndPage operator may be preceded by a PageCopies attribute 552 737 # So set number of copies for current page. 553 738 # From what I read in PCLXL documentation, the number 554 739 # of copies is an unsigned 16 bits integer 555 self. copies[self.pagecount] = unpack(self.endianness + "H", minfile[pos-5:pos-3])[0]740 self.pages[self.pagecount]["copies"] = unpack(self.endianness + "H", minfile[pos-5:pos-3])[0] 556 741 return 0 557 742 … … 655 840 """ 656 841 infileno = self.infile.fileno() 657 self. copies = {}842 self.pages = {} 658 843 self.minfile = minfile = mmap.mmap(infileno, os.fstat(infileno)[6], prot=mmap.PROT_READ, flags=mmap.MAP_SHARED) 659 844 tags = self.tags … … 682 867 # but the formula below is still correct : we want 683 868 # to decrease the total number of pages in this case. 684 self.pagecount += (self.copies.get(pnum, 1) - 1) 869 page = self.pages.get(pnum, 1) 870 copies = page["copies"] 871 self.pagecount += (copies - 1) 872 # print "%s*%s*%s*%s*%s" % (copies, page["mediatype"], page["mediasize"], page["orientation"], page["mediasource"]) 685 873 686 874 return self.pagecount