39 | | class PDLAnalyzerError(Exception): |
40 | | """An exception for PDL Analyzer related stuff.""" |
41 | | def __init__(self, message = ""): |
42 | | self.message = message |
43 | | Exception.__init__(self, message) |
44 | | def __repr__(self): |
45 | | return self.message |
46 | | __str__ = __repr__ |
47 | | |
48 | | class PostScriptAnalyzer : |
49 | | """A class to parse PostScript documents.""" |
50 | | def __init__(self, infile, debug=0) : |
51 | | """Initialize PostScript Analyzer.""" |
52 | | self.debug = debug |
53 | | self.infile = infile |
54 | | self.copies = 1 |
55 | | |
56 | | def throughGhostScript(self) : |
57 | | """Get the count through GhostScript, useful for non-DSC compliant PS files.""" |
58 | | if self.debug : |
59 | | sys.stderr.write("Internal parser sucks, using GhostScript instead...\n") |
60 | | self.infile.seek(0) |
61 | | command = 'gs -sDEVICE=bbox -dNOPAUSE -dBATCH -dQUIET - 2>&1 | grep -c "%%HiResBoundingBox:" 2>/dev/null' |
62 | | child = popen2.Popen4(command) |
63 | | try : |
64 | | data = self.infile.read(MEGABYTE) |
65 | | while data : |
66 | | child.tochild.write(data) |
67 | | data = self.infile.read(MEGABYTE) |
68 | | child.tochild.flush() |
69 | | child.tochild.close() |
70 | | except (IOError, OSError), msg : |
71 | | raise PDLAnalyzerError, "Problem during analysis of Binary PostScript document : %s" % msg |
72 | | |
73 | | pagecount = 0 |
74 | | try : |
75 | | pagecount = int(child.fromchild.readline().strip()) |
76 | | except (IOError, OSError, AttributeError, ValueError), msg : |
77 | | raise PDLAnalyzerError, "Problem during analysis of Binary PostScript document : %s" % msg |
78 | | child.fromchild.close() |
79 | | |
80 | | try : |
81 | | child.wait() |
82 | | except OSError, msg : |
83 | | raise PDLAnalyzerError, "Problem during analysis of Binary PostScript document : %s" % msg |
84 | | return pagecount * self.copies |
85 | | |
86 | | def natively(self) : |
87 | | """Count pages in a DSC compliant PostScript document.""" |
88 | | self.infile.seek(0) |
89 | | pagecount = 0 |
90 | | for line in self.infile.xreadlines() : |
91 | | if line.startswith("%%Page: ") : |
92 | | pagecount += 1 |
93 | | elif line.startswith("%%Requirements: numcopies(") : |
94 | | try : |
95 | | number = int(line.strip().split('(')[1].split(')')[0]) |
96 | | except : |
97 | | pass |
98 | | else : |
99 | | if number > self.copies : |
100 | | self.copies = number |
101 | | elif line.startswith("%%BeginNonPPDFeature: NumCopies ") : |
102 | | # handle # of copies set by some Windows printer driver |
103 | | try : |
104 | | number = int(line.strip().split()[2]) |
105 | | except : |
106 | | pass |
107 | | else : |
108 | | if number > self.copies : |
109 | | self.copies = number |
110 | | elif line.startswith("1 dict dup /NumCopies ") : |
111 | | # handle # of copies set by mozilla/kprinter |
112 | | try : |
113 | | number = int(line.strip().split()[4]) |
114 | | except : |
115 | | pass |
116 | | else : |
117 | | if number > self.copies : |
118 | | self.copies = number |
119 | | return pagecount * self.copies |
120 | | |
121 | | def getJobSize(self) : |
122 | | """Count pages in PostScript document.""" |
123 | | return self.natively() or self.throughGhostScript() |
124 | | |
125 | | class PDFAnalyzer : |
126 | | """A class to parse PDF documents.""" |
127 | | def __init__(self, infile, debug=0) : |
128 | | """Initialize PDF Analyzer.""" |
129 | | self.debug = debug |
130 | | self.infile = infile |
131 | | |
132 | | def getJobSize(self) : |
133 | | """Counts pages in a PDF document.""" |
134 | | regexp = re.compile(r"(/Type) ?(/Page)[/ \t\r\n]") |
135 | | pagecount = 0 |
136 | | for line in self.infile.xreadlines() : |
137 | | pagecount += len(regexp.findall(line)) |
138 | | return pagecount |
139 | | |
140 | | class ESCP2Analyzer : |
141 | | """A class to parse ESC/P2 documents.""" |
142 | | def __init__(self, infile, debug=0) : |
143 | | """Initialize ESC/P2 Analyzer.""" |
144 | | self.debug = debug |
145 | | self.infile = infile |
146 | | |
147 | | def getJobSize(self) : |
148 | | """Counts pages in an ESC/P2 document.""" |
149 | | # with Gimpprint, at least, for each page there |
150 | | # are two Reset Printer sequences (ESC + @) |
151 | | marker1 = "\033@" |
152 | | |
153 | | # with other software or printer driver, we |
154 | | # may prefer to search for "\r\n\fESCAPE" |
155 | | # or "\r\fESCAPE" |
156 | | marker2r = "\r\f\033" |
157 | | marker2rn = "\r\n\f\033" |
158 | | |
159 | | # and ghostscript's stcolor for example seems to |
160 | | # output ESC + @ + \f for each page plus one |
161 | | marker3 = "\033@\f" |
162 | | |
163 | | # while ghostscript's escp driver outputs instead |
164 | | # \f + ESC + @ |
165 | | marker4 = "\f\033@" |
166 | | |
167 | | data = self.infile.read() |
168 | | pagecount1 = data.count(marker1) |
169 | | pagecount2 = max(data.count(marker2r), data.count(marker2rn)) |
170 | | pagecount3 = data.count(marker3) |
171 | | pagecount4 = data.count(marker4) |
172 | | |
173 | | if pagecount2 : |
174 | | return pagecount2 |
175 | | elif pagecount3 > 1 : |
176 | | return pagecount3 - 1 |
177 | | elif pagecount4 : |
178 | | return pagecount4 |
179 | | else : |
180 | | return int(pagecount1 / 2) |
181 | | |
182 | | class PCLAnalyzer : |
183 | | """A class to parse PCL3, PCL4, PCL5 documents.""" |
184 | | mediasizes = { # ESC&l####A |
185 | | 0 : "Default", |
186 | | 1 : "Executive", |
187 | | 2 : "Letter", |
188 | | 3 : "Legal", |
189 | | 6 : "Ledger", |
190 | | 25 : "A5", |
191 | | 26 : "A4", |
192 | | 27 : "A3", |
193 | | 45 : "JB5", |
194 | | 46 : "JB4", |
195 | | 71 : "HagakiPostcard", |
196 | | 72 : "OufukuHagakiPostcard", |
197 | | 80 : "MonarchEnvelope", |
198 | | 81 : "COM10Envelope", |
199 | | 90 : "DLEnvelope", |
200 | | 91 : "C5Envelope", |
201 | | 100 : "B5Envelope", |
202 | | 101 : "Custom", |
203 | | } |
204 | | |
205 | | mediasources = { # ESC&l####H |
206 | | 0 : "Default", |
207 | | 1 : "Main", |
208 | | 2 : "Manual", |
209 | | 3 : "ManualEnvelope", |
210 | | 4 : "Alternate", |
211 | | 5 : "OptionalLarge", |
212 | | 6 : "EnvelopeFeeder", |
213 | | 7 : "Auto", |
214 | | 8 : "Tray1", |
215 | | } |
216 | | |
217 | | orientations = { # ESC&l####O |
218 | | 0 : "Portrait", |
219 | | 1 : "Landscape", |
220 | | 2 : "ReversePortrait", |
221 | | 3 : "ReverseLandscape", |
222 | | } |
223 | | |
224 | | mediatypes = { # ESC&l####M |
225 | | 0 : "Plain", |
226 | | 1 : "Bond", |
227 | | 2 : "Special", |
228 | | 3 : "Glossy", |
229 | | 4 : "Transparent", |
230 | | } |
231 | | |
232 | | |
233 | | def __init__(self, infile, debug=0) : |
234 | | """Initialize PCL Analyzer.""" |
235 | | self.debug = debug |
236 | | self.infile = infile |
237 | | |
238 | | def setPageDict(self, pages, number, attribute, value) : |
239 | | """Initializes a page dictionnary.""" |
240 | | dict = pages.setdefault(number, { "copies" : 1, "mediasource" : "Main", "mediasize" : "Default", "mediatype" : "Plain", "orientation" : "Portrait"}) |
241 | | dict[attribute] = value |
242 | | |
243 | | def getJobSize(self) : |
244 | | """Count pages in a PCL5 document. |
245 | | |
246 | | Should also work for PCL3 and PCL4 documents. |
247 | | |
248 | | Algorithm from pclcount |
249 | | (c) 2003, by Eduardo Gielamo Oliveira & Rodolfo Broco Manin |
250 | | published under the terms of the GNU General Public Licence v2. |
251 | | |
252 | | Backported from C to Python by Jerome Alet, then enhanced |
253 | | with more PCL tags detected. I think all the necessary PCL tags |
254 | | are recognized to correctly handle PCL5 files wrt their number |
255 | | of pages. The documentation used for this was : |
256 | | |
257 | | HP PCL/PJL Reference Set |
258 | | PCL5 Printer Language Technical Quick Reference Guide |
259 | | http://h20000.www2.hp.com/bc/docs/support/SupportManual/bpl13205/bpl13205.pdf |
260 | | """ |
261 | | infileno = self.infile.fileno() |
262 | | minfile = mmap.mmap(infileno, os.fstat(infileno)[6], prot=mmap.PROT_READ, flags=mmap.MAP_SHARED) |
263 | | tagsends = { "&n" : "W", |
264 | | "&b" : "W", |
265 | | "*i" : "W", |
266 | | "*l" : "W", |
267 | | "*m" : "W", |
268 | | "*v" : "W", |
269 | | "*c" : "W", |
270 | | "(f" : "W", |
271 | | "(s" : "W", |
272 | | ")s" : "W", |
273 | | "&p" : "X", |
274 | | # "&l" : "XHAOM", # treated specially |
275 | | "&a" : "G", # TODO : 0 means next side, 1 front side, 2 back side |
276 | | "*g" : "W", |
277 | | "*r" : "sbABC", |
278 | | "*t" : "R", |
279 | | # "*b" : "VW", # treated specially because it occurs very often |
280 | | } |
281 | | pagecount = resets = ejects = backsides = startgfx = endgfx = 0 |
282 | | starb = ampl = ispcl3 = escstart = 0 |
283 | | mediasourcecount = mediasizecount = orientationcount = mediatypecount = 0 |
284 | | tag = None |
285 | | pages = {} |
286 | | pos = 0 |
287 | | try : |
288 | | while 1 : |
289 | | char = minfile[pos] ; pos += 1 |
290 | | if char == "\014" : |
291 | | pagecount += 1 |
292 | | elif char == "\033" : |
293 | | starb = ampl = 0 |
294 | | # |
295 | | # <ESC>*b###y#m###v###w... -> PCL3 raster graphics |
296 | | # <ESC>*b###W -> Start of a raster data row/block |
297 | | # <ESC>*b###V -> Start of a raster data plane |
298 | | # <ESC>*c###W -> Start of a user defined pattern |
299 | | # <ESC>*i###W -> Start of a viewing illuminant block |
300 | | # <ESC>*l###W -> Start of a color lookup table |
301 | | # <ESC>*m###W -> Start of a download dither matrix block |
302 | | # <ESC>*v###W -> Start of a configure image data block |
303 | | # <ESC>*r1A -> Start Gfx |
304 | | # <ESC>(s###W -> Start of a characters description block |
305 | | # <ESC>)s###W -> Start of a fonts description block |
306 | | # <ESC>(f###W -> Start of a symbol set block |
307 | | # <ESC>&b###W -> Start of configuration data block |
308 | | # <ESC>&l###X -> Number of copies for current page |
309 | | # <ESC>&n###W -> Starts an alphanumeric string ID block |
310 | | # <ESC>&p###X -> Start of a non printable characters block |
311 | | # <ESC>&a2G -> Back side when duplex mode as generated by rastertohp |
312 | | # <ESC>*g###W -> Needed for planes in PCL3 output |
313 | | # <ESC>&l###H (or only 0 ?) -> Eject if NumPlanes > 1, as generated by rastertohp. Also defines mediasource |
314 | | # <ESC>&l###A -> mediasize |
315 | | # <ESC>&l###O -> orientation |
316 | | # <ESC>&l###M -> mediatype |
317 | | # <ESC>*t###R -> gfx resolution |
318 | | # |
319 | | tagstart = minfile[pos] ; pos += 1 |
320 | | if tagstart in "E9=YZ" : # one byte PCL tag |
321 | | if tagstart == "E" : |
322 | | resets += 1 |
323 | | continue # skip to next tag |
324 | | tag = tagstart + minfile[pos] ; pos += 1 |
325 | | if tag == "*b" : |
326 | | starb = 1 |
327 | | tagend = "VW" |
328 | | elif tag == "&l" : |
329 | | ampl = 1 |
330 | | tagend = "XHAOM" |
331 | | else : |
332 | | try : |
333 | | tagend = tagsends[tag] |
334 | | except KeyError : |
335 | | continue # Unsupported PCL tag |
336 | | # Now read the numeric argument |
337 | | size = 0 |
338 | | while 1 : |
339 | | char = minfile[pos] ; pos += 1 |
340 | | if not char.isdigit() : |
341 | | break |
342 | | size = (size * 10) + int(char) |
343 | | if char in tagend : |
344 | | if tag == "&l" : |
345 | | if char == "X" : |
346 | | self.setPageDict(pages, pagecount, "copies", size) |
347 | | elif char == "H" : |
348 | | self.setPageDict(pages, pagecount, "mediasource", self.mediasources.get(size, str(size))) |
349 | | mediasourcecount += 1 |
350 | | ejects += 1 |
351 | | elif char == "A" : |
352 | | self.setPageDict(pages, pagecount, "mediasize", self.mediasizes.get(size, str(size))) |
353 | | mediasizecount += 1 |
354 | | elif char == "O" : |
355 | | self.setPageDict(pages, pagecount, "orientation", self.orientations.get(size, str(size))) |
356 | | orientationcount += 1 |
357 | | elif char == "M" : |
358 | | self.setPageDict(pages, pagecount, "mediatype", self.mediatypes.get(size, str(size))) |
359 | | mediatypecount += 1 |
360 | | elif tag == "*r" : |
361 | | # Special tests for PCL3 |
362 | | if (char == "s") and size : |
363 | | while 1 : |
364 | | char = minfile[pos] ; pos += 1 |
365 | | if char == "A" : |
366 | | break |
367 | | elif (char == "b") and (minfile[pos] == "C") and not size : |
368 | | ispcl3 = 1 # Certainely a PCL3 file |
369 | | startgfx += (char == "A") and (minfile[pos - 2] in ("0", "1", "2", "3")) # Start Gfx |
370 | | endgfx += (not size) and (char in ("C", "B")) # End Gfx |
371 | | elif tag == "*t" : |
372 | | escstart += 1 |
373 | | elif (tag == "&a") and (size == 2) : |
374 | | backsides += 1 # Back side in duplex mode |
375 | | else : |
376 | | # we just ignore the block. |
377 | | if tag == "&n" : |
378 | | # we have to take care of the operation id byte |
379 | | # which is before the string itself |
380 | | size += 1 |
381 | | pos += size |
382 | | else : |
383 | | if starb : |
384 | | # special handling of PCL3 in which |
385 | | # *b introduces combined ESCape sequences |
386 | | size = 0 |
387 | | while 1 : |
388 | | char = minfile[pos] ; pos += 1 |
389 | | if not char.isdigit() : |
390 | | break |
391 | | size = (size * 10) + int(char) |
392 | | if char in ("w", "v") : |
393 | | ispcl3 = 1 # certainely a PCL3 document |
394 | | pos += size - 1 |
395 | | elif char in ("y", "m") : |
396 | | ispcl3 = 1 # certainely a PCL3 document |
397 | | pos -= 1 # fix position : we were ahead |
398 | | elif ampl : |
399 | | # special handling of PCL3 in which |
400 | | # &l introduces combined ESCape sequences |
401 | | size = 0 |
402 | | while 1 : |
403 | | char = minfile[pos] ; pos += 1 |
404 | | if not char.isdigit() : |
405 | | break |
406 | | size = (size * 10) + int(char) |
407 | | if char in ("a", "o", "h", "m") : |
408 | | ispcl3 = 1 # certainely a PCL3 document |
409 | | pos -= 1 # fix position : we were ahead |
410 | | if char == "h" : |
411 | | self.setPageDict(pages, pagecount, "mediasource", self.mediasources.get(size, str(size))) |
412 | | mediasourcecount += 1 |
413 | | elif char == "a" : |
414 | | self.setPageDict(pages, pagecount, "mediasize", self.mediasizes.get(size, str(size))) |
415 | | mediasizecount += 1 |
416 | | elif char == "o" : |
417 | | self.setPageDict(pages, pagecount, "orientation", self.orientations.get(size, str(size))) |
418 | | orientationcount += 1 |
419 | | elif char == "m" : |
420 | | self.setPageDict(pages, pagecount, "mediatype", self.mediatypes.get(size, str(size))) |
421 | | mediatypecount += 1 |
422 | | except IndexError : # EOF ? |
423 | | minfile.close() # reached EOF |
424 | | |
425 | | # if pagecount is still 0, we will use the number |
426 | | # of resets instead of the number of form feed characters. |
427 | | # but the number of resets is always at least 2 with a valid |
428 | | # pcl file : one at the very start and one at the very end |
429 | | # of the job's data. So we substract 2 from the number of |
430 | | # resets. And since on our test data we needed to substract |
431 | | # 1 more, we finally substract 3, and will test several |
432 | | # PCL files with this. If resets < 2, then the file is |
433 | | # probably not a valid PCL file, so we use 0 |
434 | | |
435 | | if self.debug : |
436 | | sys.stderr.write("pagecount : %s\n" % pagecount) |
437 | | sys.stderr.write("resets : %s\n" % resets) |
438 | | sys.stderr.write("ejects : %s\n" % ejects) |
439 | | sys.stderr.write("backsides : %s\n" % backsides) |
440 | | sys.stderr.write("startgfx : %s\n" % startgfx) |
441 | | sys.stderr.write("endgfx : %s\n" % endgfx) |
442 | | sys.stderr.write("mediasourcecount : %s\n" % mediasourcecount) |
443 | | sys.stderr.write("mediasizecount : %s\n" % mediasizecount) |
444 | | sys.stderr.write("orientationcount : %s\n" % orientationcount) |
445 | | sys.stderr.write("mediatypecount : %s\n" % mediatypecount) |
446 | | sys.stderr.write("escstart : %s\n" % escstart) |
447 | | |
448 | | # if not pagecount : |
449 | | # pagecount = (pagecount or ((resets - 3) * (resets > 2))) |
450 | | # else : |
451 | | # # here we add counters for other ways new pages may have |
452 | | # # been printed and ejected by the printer |
453 | | # pagecount += ejects + backsides |
454 | | # |
455 | | # # now handle number of copies for each page (may differ). |
456 | | # # in duplex mode, number of copies may be sent only once. |
457 | | # for pnum in range(pagecount) : |
458 | | # # if no number of copies defined, take the preceding one else the one set before any page else 1. |
459 | | # page = pages.get(pnum, pages.get(pnum - 1, pages.get(0, { "copies" : 1 }))) |
460 | | # pagecount += (page["copies"] - 1) |
461 | | # |
462 | | # # in PCL3 files, there's one Start Gfx tag per page |
463 | | # if ispcl3 : |
464 | | # if endgfx == int(startgfx / 2) : # special case for cdj1600 |
465 | | # pagecount = endgfx |
466 | | # elif startgfx : |
467 | | # pagecount = startgfx |
468 | | # elif endgfx : |
469 | | # pagecount = endgfx |
470 | | |
471 | | |
472 | | if pagecount == mediasourcecount == escstart : |
473 | | pass # should be OK. |
474 | | elif (not startgfx) and (not endgfx) : |
475 | | pagecount = ejects or pagecount |
476 | | elif startgfx == endgfx : |
477 | | pagecount = startgfx |
478 | | elif startgfx == (endgfx - 1) : |
479 | | pagecount = startgfx |
480 | | else : |
481 | | pagecount = abs(startgfx - endgfx) |
482 | | |
483 | | if self.debug : |
484 | | for pnum in range(pagecount) : |
485 | | # if no number of copies defined, take the preceding one else the one set before any page else 1. |
486 | | page = pages.get(pnum, pages.get(pnum - 1, pages.get(0, { "copies" : 1, "mediasource" : "Main", "mediasize" : "Default", "mediatype" : "Plain", "orientation" : "Portrait"}))) |
487 | | sys.stderr.write("%s*%s*%s*%s*%s\n" % (page["copies"], page["mediatype"], page["mediasize"], page["orientation"], page["mediasource"])) |
488 | | |
489 | | return pagecount |
490 | | |
491 | | class PCLXLAnalyzer : |
492 | | """A class to parse PCL6 (aka XL) documents.""" |
493 | | mediasizes = { |
494 | | 0 : "Letter", |
495 | | 1 : "Legal", |
496 | | 2 : "A4", |
497 | | 3 : "Executive", |
498 | | 4 : "Ledger", |
499 | | 5 : "A3", |
500 | | 6 : "COM10Envelope", |
501 | | 7 : "MonarchEnvelope", |
502 | | 8 : "C5Envelope", |
503 | | 9 : "DLEnvelope", |
504 | | 10 : "JB4", |
505 | | 11 : "JB5", |
506 | | 12 : "B5Envelope", |
507 | | 14 : "JPostcard", |
508 | | 15 : "JDoublePostcard", |
509 | | 16 : "A5", |
510 | | 17 : "A6", |
511 | | 18 : "JB6", |
512 | | } |
513 | | |
514 | | mediasources = { |
515 | | 0 : "Default", |
516 | | 1 : "Auto", |
517 | | 2 : "Manual", |
518 | | 3 : "MultiPurpose", |
519 | | 4 : "UpperCassette", |
520 | | 5 : "LowerCassette", |
521 | | 6 : "EnvelopeTray", |
522 | | 7 : "ThirdCassette", |
523 | | } |
524 | | |
525 | | orientations = { |
526 | | 0 : "Portrait", |
527 | | 1 : "Landscape", |
528 | | 2 : "ReversePortrait", |
529 | | 3 : "ReverseLandscape", |
530 | | } |
531 | | |
532 | | def __init__(self, infile, debug=0) : |
533 | | """Initialize PCLXL Analyzer.""" |
534 | | self.debug = debug |
535 | | self.infile = infile |
536 | | self.endianness = None |
537 | | found = 0 |
538 | | while not found : |
539 | | line = self.infile.readline() |
540 | | if not line : |
541 | | break |
542 | | if line[1:12] == " HP-PCL XL;" : |
543 | | found = 1 |
544 | | endian = ord(line[0]) |
545 | | if endian == 0x29 : |
546 | | self.littleEndian() |
547 | | elif endian == 0x28 : |
548 | | self.bigEndian() |
549 | | # elif endian == 0x27 : # TODO : This is the ESC code : parse it for PJL statements ! |
550 | | # |
551 | | else : |
552 | | raise PDLAnalyzerError, "Unknown endianness marker 0x%02x at start !" % endian |
553 | | if not found : |
554 | | raise PDLAnalyzerError, "This file doesn't seem to be PCLXL (aka PCL6)" |
555 | | else : |
556 | | # Initialize table of tags |
557 | | self.tags = [ 0 ] * 256 |
558 | | |
559 | | # GhostScript's sources tell us that HP printers |
560 | | # only accept little endianness, but we can handle both. |
561 | | self.tags[0x28] = self.bigEndian # BigEndian |
562 | | self.tags[0x29] = self.littleEndian # LittleEndian |
563 | | |
564 | | self.tags[0x43] = self.beginPage # BeginPage |
565 | | self.tags[0x44] = self.endPage # EndPage |
566 | | |
567 | | self.tags[0xc0] = 1 # ubyte |
568 | | self.tags[0xc1] = 2 # uint16 |
569 | | self.tags[0xc2] = 4 # uint32 |
570 | | self.tags[0xc3] = 2 # sint16 |
571 | | self.tags[0xc4] = 4 # sint32 |
572 | | self.tags[0xc5] = 4 # real32 |
573 | | |
574 | | self.tags[0xc8] = self.array_8 # ubyte_array |
575 | | self.tags[0xc9] = self.array_16 # uint16_array |
576 | | self.tags[0xca] = self.array_32 # uint32_array |
577 | | self.tags[0xcb] = self.array_16 # sint16_array |
578 | | self.tags[0xcc] = self.array_32 # sint32_array |
579 | | self.tags[0xcd] = self.array_32 # real32_array |
580 | | |
581 | | self.tags[0xd0] = 2 # ubyte_xy |
582 | | self.tags[0xd1] = 4 # uint16_xy |
583 | | self.tags[0xd2] = 8 # uint32_xy |
584 | | self.tags[0xd3] = 4 # sint16_xy |
585 | | self.tags[0xd4] = 8 # sint32_xy |
586 | | self.tags[0xd5] = 8 # real32_xy |
587 | | |
588 | | self.tags[0xe0] = 4 # ubyte_box |
589 | | self.tags[0xe1] = 8 # uint16_box |
590 | | self.tags[0xe2] = 16 # uint32_box |
591 | | self.tags[0xe3] = 8 # sint16_box |
592 | | self.tags[0xe4] = 16 # sint32_box |
593 | | self.tags[0xe5] = 16 # real32_box |
594 | | |
595 | | self.tags[0xf8] = 1 # attr_ubyte |
596 | | self.tags[0xf9] = 2 # attr_uint16 |
597 | | |
598 | | self.tags[0xfa] = self.embeddedData # dataLength |
599 | | self.tags[0xfb] = self.embeddedDataSmall # dataLengthByte |
600 | | |
601 | | def beginPage(self) : |
602 | | """Indicates the beginning of a new page, and extracts media information.""" |
603 | | self.pagecount += 1 |
604 | | |
605 | | # Default values |
606 | | mediatypelabel = "Plain" |
607 | | mediasourcelabel = "Main" |
608 | | mediasizelabel = "Default" |
609 | | orientationlabel = "Portrait" |
610 | | |
611 | | # Now go upstream to decode media type, size, source, and orientation |
612 | | # this saves time because we don't need a complete parser ! |
613 | | minfile = self.minfile |
614 | | pos = self.pos - 2 |
615 | | while pos > 0 : # safety check : don't go back to far ! |
616 | | val = ord(minfile[pos]) |
617 | | if val in (0x44, 0x48, 0x41) : # if previous endPage or openDataSource or beginSession (first page) |
618 | | break |
619 | | if val == 0x26 : |
620 | | mediasource = ord(minfile[pos - 2]) |
621 | | mediasourcelabel = self.mediasources.get(mediasource, str(mediasource)) |
622 | | pos = pos - 4 |
623 | | elif val == 0x25 : |
624 | | mediasize = ord(minfile[pos - 2]) |
625 | | mediasizelabel = self.mediasizes.get(mediasize, str(mediasize)) |
626 | | pos = pos - 4 |
627 | | elif val == 0x28 : |
628 | | orientation = ord(minfile[pos - 2]) |
629 | | orienationlabel = self.orientations.get(orientation, str(orientation)) |
630 | | pos = pos - 4 |
631 | | elif val == 0x27 : |
632 | | savepos = pos |
633 | | pos = pos - 1 |
634 | | while pos > 0 : # safety check : don't go back to far ! |
635 | | val = ord(minfile[pos]) |
636 | | pos -= 1 |
637 | | if val == 0xc8 : |
638 | | break |
639 | | mediatypelabel = minfile[pos:savepos] # TODO : INCORRECT, WE HAVE TO STRIP OUT THE UBYTE ARRAY'S LENGTH !!! |
640 | | # else : TODO : CUSTOM MEDIA SIZE AND UNIT ! |
641 | | else : |
642 | | pos = pos - 2 # ignored |
643 | | self.pages[self.pagecount] = { "copies" : 1, |
644 | | "orientation" : orientationlabel, |
645 | | "mediatype" : mediatypelabel, |
646 | | "mediasize" : mediasizelabel, |
647 | | "mediasource" : mediasourcelabel, |
648 | | } |
649 | | return 0 |
650 | | |
651 | | def endPage(self) : |
652 | | """Indicates the end of a page.""" |
653 | | pos = self.pos |
654 | | minfile = self.minfile |
655 | | if (ord(minfile[pos-3]) == 0xf8) and (ord(minfile[pos-2]) == 0x31) : |
656 | | # The EndPage operator may be preceded by a PageCopies attribute |
657 | | # So set number of copies for current page. |
658 | | # From what I read in PCLXL documentation, the number |
659 | | # of copies is an unsigned 16 bits integer |
660 | | self.pages[self.pagecount]["copies"] = unpack(self.endianness + "H", minfile[pos-5:pos-3])[0] |
661 | | return 0 |
662 | | |
663 | | def array_8(self) : |
664 | | """Handles byte arrays.""" |
665 | | pos = self.pos |
666 | | datatype = self.minfile[pos] |
667 | | pos += 1 |
668 | | length = self.tags[ord(datatype)] |
669 | | if callable(length) : |
670 | | self.pos = pos |
671 | | length = length() |
672 | | pos = self.pos |
673 | | posl = pos + length |
674 | | self.pos = posl |
675 | | if length == 1 : |
676 | | return unpack("B", self.minfile[pos:posl])[0] |
677 | | elif length == 2 : |
678 | | return unpack(self.endianness + "H", self.minfile[pos:posl])[0] |
679 | | elif length == 4 : |
680 | | return unpack(self.endianness + "I", self.minfile[pos:posl])[0] |
681 | | else : |
682 | | raise PDLAnalyzerError, "Error on array size at %s" % self.pos |
683 | | |
684 | | def array_16(self) : |
685 | | """Handles byte arrays.""" |
686 | | pos = self.pos |
687 | | datatype = self.minfile[pos] |
688 | | pos += 1 |
689 | | length = self.tags[ord(datatype)] |
690 | | if callable(length) : |
691 | | self.pos = pos |
692 | | length = length() |
693 | | pos = self.pos |
694 | | posl = pos + length |
695 | | self.pos = posl |
696 | | if length == 1 : |
697 | | return 2 * unpack("B", self.minfile[pos:posl])[0] |
698 | | elif length == 2 : |
699 | | return 2 * unpack(self.endianness + "H", self.minfile[pos:posl])[0] |
700 | | elif length == 4 : |
701 | | return 2 * unpack(self.endianness + "I", self.minfile[pos:posl])[0] |
702 | | else : |
703 | | raise PDLAnalyzerError, "Error on array size at %s" % self.pos |
704 | | |
705 | | def array_32(self) : |
706 | | """Handles byte arrays.""" |
707 | | pos = self.pos |
708 | | datatype = self.minfile[pos] |
709 | | pos += 1 |
710 | | length = self.tags[ord(datatype)] |
711 | | if callable(length) : |
712 | | self.pos = pos |
713 | | length = length() |
714 | | pos = self.pos |
715 | | posl = pos + length |
716 | | self.pos = posl |
717 | | if length == 1 : |
718 | | return 4 * unpack("B", self.minfile[pos:posl])[0] |
719 | | elif length == 2 : |
720 | | return 4 * unpack(self.endianness + "H", self.minfile[pos:posl])[0] |
721 | | elif length == 4 : |
722 | | return 4 * unpack(self.endianness + "I", self.minfile[pos:posl])[0] |
723 | | else : |
724 | | raise PDLAnalyzerError, "Error on array size at %s" % self.pos |
725 | | |
726 | | def embeddedDataSmall(self) : |
727 | | """Handle small amounts of data.""" |
728 | | pos = self.pos |
729 | | length = ord(self.minfile[pos]) |
730 | | self.pos = pos + 1 |
731 | | return length |
732 | | |
733 | | def embeddedData(self) : |
734 | | """Handle normal amounts of data.""" |
735 | | pos = self.pos |
736 | | pos4 = pos + 4 |
737 | | self.pos = pos4 |
738 | | return unpack(self.endianness + "I", self.minfile[pos:pos4])[0] |
739 | | |
740 | | def littleEndian(self) : |
741 | | """Toggles to little endianness.""" |
742 | | self.endianness = "<" # little endian |
743 | | return 0 |
744 | | |
745 | | def bigEndian(self) : |
746 | | """Toggles to big endianness.""" |
747 | | self.endianness = ">" # big endian |
748 | | return 0 |
749 | | |
750 | | def getJobSize(self) : |
751 | | """Counts pages in a PCLXL (PCL6) document. |
752 | | |
753 | | Algorithm by Jerome Alet. |
754 | | |
755 | | The documentation used for this was : |
756 | | |
757 | | HP PCL XL Feature Reference |
758 | | Protocol Class 2.0 |
759 | | http://www.hpdevelopersolutions.com/downloads/64/358/xl_ref20r22.pdf |
760 | | """ |
761 | | infileno = self.infile.fileno() |
762 | | self.pages = {} |
763 | | self.minfile = minfile = mmap.mmap(infileno, os.fstat(infileno)[6], prot=mmap.PROT_READ, flags=mmap.MAP_SHARED) |
764 | | tags = self.tags |
765 | | self.pagecount = 0 |
766 | | self.pos = pos = self.infile.tell() |
767 | | try : |
768 | | while 1 : |
769 | | char = minfile[pos] |
770 | | pos += 1 |
771 | | length = tags[ord(char)] |
772 | | if not length : |
773 | | continue |
774 | | if callable(length) : |
775 | | self.pos = pos |
776 | | length = length() |
777 | | pos = self.pos |
778 | | pos += length |
779 | | except IndexError : # EOF ? |
780 | | self.minfile.close() # reached EOF |
781 | | |
782 | | # now handle number of copies for each page (may differ). |
783 | | for pnum in range(1, self.pagecount + 1) : |
784 | | # if no number of copies defined, take 1, as explained |
785 | | # in PCLXL documentation. |
786 | | # NB : is number of copies is 0, the page won't be output |
787 | | # but the formula below is still correct : we want |
788 | | # to decrease the total number of pages in this case. |
789 | | page = self.pages.get(pnum, 1) |
790 | | copies = page["copies"] |
791 | | self.pagecount += (copies - 1) |
792 | | if self.debug : |
793 | | sys.stderr.write("%s*%s*%s*%s*%s\n" % (copies, page["mediatype"], page["mediasize"], page["orientation"], page["mediasource"])) |
794 | | |
795 | | return self.pagecount |
796 | | |