| 23 | |
| 24 | import sys |
| 25 | import mmap |
| 26 | |
| 27 | from pdlanalyzer.pdlparser import PDLParser |
| 28 | |
| 29 | class PCLParser(PDLParser) : |
| 30 | """A parser for PCL3, PCL4, PCL5 documents.""" |
| 31 | mediasizes = { # ESC&l####A |
| 32 | 0 : "Default", |
| 33 | 1 : "Executive", |
| 34 | 2 : "Letter", |
| 35 | 3 : "Legal", |
| 36 | 6 : "Ledger", |
| 37 | 25 : "A5", |
| 38 | 26 : "A4", |
| 39 | 27 : "A3", |
| 40 | 45 : "JB5", |
| 41 | 46 : "JB4", |
| 42 | 71 : "HagakiPostcard", |
| 43 | 72 : "OufukuHagakiPostcard", |
| 44 | 80 : "MonarchEnvelope", |
| 45 | 81 : "COM10Envelope", |
| 46 | 90 : "DLEnvelope", |
| 47 | 91 : "C5Envelope", |
| 48 | 100 : "B5Envelope", |
| 49 | 101 : "Custom", |
| 50 | } |
| 51 | |
| 52 | mediasources = { # ESC&l####H |
| 53 | 0 : "Default", |
| 54 | 1 : "Main", |
| 55 | 2 : "Manual", |
| 56 | 3 : "ManualEnvelope", |
| 57 | 4 : "Alternate", |
| 58 | 5 : "OptionalLarge", |
| 59 | 6 : "EnvelopeFeeder", |
| 60 | 7 : "Auto", |
| 61 | 8 : "Tray1", |
| 62 | } |
| 63 | |
| 64 | orientations = { # ESC&l####O |
| 65 | 0 : "Portrait", |
| 66 | 1 : "Landscape", |
| 67 | 2 : "ReversePortrait", |
| 68 | 3 : "ReverseLandscape", |
| 69 | } |
| 70 | |
| 71 | mediatypes = { # ESC&l####M |
| 72 | 0 : "Plain", |
| 73 | 1 : "Bond", |
| 74 | 2 : "Special", |
| 75 | 3 : "Glossy", |
| 76 | 4 : "Transparent", |
| 77 | } |
| 78 | |
| 79 | def setPageDict(self, pages, number, attribute, value) : |
| 80 | """Initializes a page dictionnary.""" |
| 81 | dict = pages.setdefault(number, { "copies" : 1, "mediasource" : "Main", "mediasize" : "Default", "mediatype" : "Plain", "orientation" : "Portrait"}) |
| 82 | dict[attribute] = value |
| 83 | |
| 84 | def getJobSize(self) : |
| 85 | """Count pages in a PCL5 document. |
| 86 | |
| 87 | Should also work for PCL3 and PCL4 documents. |
| 88 | |
| 89 | Algorithm from pclcount |
| 90 | (c) 2003, by Eduardo Gielamo Oliveira & Rodolfo Broco Manin |
| 91 | published under the terms of the GNU General Public Licence v2. |
| 92 | |
| 93 | Backported from C to Python by Jerome Alet, then enhanced |
| 94 | with more PCL tags detected. I think all the necessary PCL tags |
| 95 | are recognized to correctly handle PCL5 files wrt their number |
| 96 | of pages. The documentation used for this was : |
| 97 | |
| 98 | HP PCL/PJL Reference Set |
| 99 | PCL5 Printer Language Technical Quick Reference Guide |
| 100 | http://h20000.www2.hp.com/bc/docs/support/SupportManual/bpl13205/bpl13205.pdf |
| 101 | """ |
| 102 | infileno = self.infile.fileno() |
| 103 | minfile = mmap.mmap(infileno, os.fstat(infileno)[6], prot=mmap.PROT_READ, flags=mmap.MAP_SHARED) |
| 104 | tagsends = { "&n" : "W", |
| 105 | "&b" : "W", |
| 106 | "*i" : "W", |
| 107 | "*l" : "W", |
| 108 | "*m" : "W", |
| 109 | "*v" : "W", |
| 110 | "*c" : "W", |
| 111 | "(f" : "W", |
| 112 | "(s" : "W", |
| 113 | ")s" : "W", |
| 114 | "&p" : "X", |
| 115 | # "&l" : "XHAOM", # treated specially |
| 116 | "&a" : "G", # TODO : 0 means next side, 1 front side, 2 back side |
| 117 | "*g" : "W", |
| 118 | "*r" : "sbABC", |
| 119 | "*t" : "R", |
| 120 | # "*b" : "VW", # treated specially because it occurs very often |
| 121 | } |
| 122 | pagecount = resets = ejects = backsides = startgfx = endgfx = 0 |
| 123 | starb = ampl = ispcl3 = escstart = 0 |
| 124 | mediasourcecount = mediasizecount = orientationcount = mediatypecount = 0 |
| 125 | tag = None |
| 126 | pages = {} |
| 127 | pos = 0 |
| 128 | try : |
| 129 | while 1 : |
| 130 | char = minfile[pos] ; pos += 1 |
| 131 | if char == "\014" : |
| 132 | pagecount += 1 |
| 133 | elif char == "\033" : |
| 134 | starb = ampl = 0 |
| 135 | # |
| 136 | # <ESC>*b###y#m###v###w... -> PCL3 raster graphics |
| 137 | # <ESC>*b###W -> Start of a raster data row/block |
| 138 | # <ESC>*b###V -> Start of a raster data plane |
| 139 | # <ESC>*c###W -> Start of a user defined pattern |
| 140 | # <ESC>*i###W -> Start of a viewing illuminant block |
| 141 | # <ESC>*l###W -> Start of a color lookup table |
| 142 | # <ESC>*m###W -> Start of a download dither matrix block |
| 143 | # <ESC>*v###W -> Start of a configure image data block |
| 144 | # <ESC>*r1A -> Start Gfx |
| 145 | # <ESC>(s###W -> Start of a characters description block |
| 146 | # <ESC>)s###W -> Start of a fonts description block |
| 147 | # <ESC>(f###W -> Start of a symbol set block |
| 148 | # <ESC>&b###W -> Start of configuration data block |
| 149 | # <ESC>&l###X -> Number of copies for current page |
| 150 | # <ESC>&n###W -> Starts an alphanumeric string ID block |
| 151 | # <ESC>&p###X -> Start of a non printable characters block |
| 152 | # <ESC>&a2G -> Back side when duplex mode as generated by rastertohp |
| 153 | # <ESC>*g###W -> Needed for planes in PCL3 output |
| 154 | # <ESC>&l###H (or only 0 ?) -> Eject if NumPlanes > 1, as generated by rastertohp. Also defines mediasource |
| 155 | # <ESC>&l###A -> mediasize |
| 156 | # <ESC>&l###O -> orientation |
| 157 | # <ESC>&l###M -> mediatype |
| 158 | # <ESC>*t###R -> gfx resolution |
| 159 | # |
| 160 | tagstart = minfile[pos] ; pos += 1 |
| 161 | if tagstart in "E9=YZ" : # one byte PCL tag |
| 162 | if tagstart == "E" : |
| 163 | resets += 1 |
| 164 | continue # skip to next tag |
| 165 | tag = tagstart + minfile[pos] ; pos += 1 |
| 166 | if tag == "*b" : |
| 167 | starb = 1 |
| 168 | tagend = "VW" |
| 169 | elif tag == "&l" : |
| 170 | ampl = 1 |
| 171 | tagend = "XHAOM" |
| 172 | else : |
| 173 | try : |
| 174 | tagend = tagsends[tag] |
| 175 | except KeyError : |
| 176 | continue # Unsupported PCL tag |
| 177 | # Now read the numeric argument |
| 178 | size = 0 |
| 179 | while 1 : |
| 180 | char = minfile[pos] ; pos += 1 |
| 181 | if not char.isdigit() : |
| 182 | break |
| 183 | size = (size * 10) + int(char) |
| 184 | if char in tagend : |
| 185 | if tag == "&l" : |
| 186 | if char == "X" : |
| 187 | self.setPageDict(pages, pagecount, "copies", size) |
| 188 | elif char == "H" : |
| 189 | self.setPageDict(pages, pagecount, "mediasource", self.mediasources.get(size, str(size))) |
| 190 | mediasourcecount += 1 |
| 191 | ejects += 1 |
| 192 | elif char == "A" : |
| 193 | self.setPageDict(pages, pagecount, "mediasize", self.mediasizes.get(size, str(size))) |
| 194 | mediasizecount += 1 |
| 195 | elif char == "O" : |
| 196 | self.setPageDict(pages, pagecount, "orientation", self.orientations.get(size, str(size))) |
| 197 | orientationcount += 1 |
| 198 | elif char == "M" : |
| 199 | self.setPageDict(pages, pagecount, "mediatype", self.mediatypes.get(size, str(size))) |
| 200 | mediatypecount += 1 |
| 201 | elif tag == "*r" : |
| 202 | # Special tests for PCL3 |
| 203 | if (char == "s") and size : |
| 204 | while 1 : |
| 205 | char = minfile[pos] ; pos += 1 |
| 206 | if char == "A" : |
| 207 | break |
| 208 | elif (char == "b") and (minfile[pos] == "C") and not size : |
| 209 | ispcl3 = 1 # Certainely a PCL3 file |
| 210 | startgfx += (char == "A") and (minfile[pos - 2] in ("0", "1", "2", "3")) # Start Gfx |
| 211 | endgfx += (not size) and (char in ("C", "B")) # End Gfx |
| 212 | elif tag == "*t" : |
| 213 | escstart += 1 |
| 214 | elif (tag == "&a") and (size == 2) : |
| 215 | backsides += 1 # Back side in duplex mode |
| 216 | else : |
| 217 | # we just ignore the block. |
| 218 | if tag == "&n" : |
| 219 | # we have to take care of the operation id byte |
| 220 | # which is before the string itself |
| 221 | size += 1 |
| 222 | pos += size |
| 223 | else : |
| 224 | if starb : |
| 225 | # special handling of PCL3 in which |
| 226 | # *b introduces combined ESCape sequences |
| 227 | size = 0 |
| 228 | while 1 : |
| 229 | char = minfile[pos] ; pos += 1 |
| 230 | if not char.isdigit() : |
| 231 | break |
| 232 | size = (size * 10) + int(char) |
| 233 | if char in ("w", "v") : |
| 234 | ispcl3 = 1 # certainely a PCL3 document |
| 235 | pos += size - 1 |
| 236 | elif char in ("y", "m") : |
| 237 | ispcl3 = 1 # certainely a PCL3 document |
| 238 | pos -= 1 # fix position : we were ahead |
| 239 | elif ampl : |
| 240 | # special handling of PCL3 in which |
| 241 | # &l introduces combined ESCape sequences |
| 242 | size = 0 |
| 243 | while 1 : |
| 244 | char = minfile[pos] ; pos += 1 |
| 245 | if not char.isdigit() : |
| 246 | break |
| 247 | size = (size * 10) + int(char) |
| 248 | if char in ("a", "o", "h", "m") : |
| 249 | ispcl3 = 1 # certainely a PCL3 document |
| 250 | pos -= 1 # fix position : we were ahead |
| 251 | if char == "h" : |
| 252 | self.setPageDict(pages, pagecount, "mediasource", self.mediasources.get(size, str(size))) |
| 253 | mediasourcecount += 1 |
| 254 | elif char == "a" : |
| 255 | self.setPageDict(pages, pagecount, "mediasize", self.mediasizes.get(size, str(size))) |
| 256 | mediasizecount += 1 |
| 257 | elif char == "o" : |
| 258 | self.setPageDict(pages, pagecount, "orientation", self.orientations.get(size, str(size))) |
| 259 | orientationcount += 1 |
| 260 | elif char == "m" : |
| 261 | self.setPageDict(pages, pagecount, "mediatype", self.mediatypes.get(size, str(size))) |
| 262 | mediatypecount += 1 |
| 263 | except IndexError : # EOF ? |
| 264 | minfile.close() # reached EOF |
| 265 | |
| 266 | # if pagecount is still 0, we will use the number |
| 267 | # of resets instead of the number of form feed characters. |
| 268 | # but the number of resets is always at least 2 with a valid |
| 269 | # pcl file : one at the very start and one at the very end |
| 270 | # of the job's data. So we substract 2 from the number of |
| 271 | # resets. And since on our test data we needed to substract |
| 272 | # 1 more, we finally substract 3, and will test several |
| 273 | # PCL files with this. If resets < 2, then the file is |
| 274 | # probably not a valid PCL file, so we use 0 |
| 275 | |
| 276 | if self.debug : |
| 277 | sys.stderr.write("pagecount : %s\n" % pagecount) |
| 278 | sys.stderr.write("resets : %s\n" % resets) |
| 279 | sys.stderr.write("ejects : %s\n" % ejects) |
| 280 | sys.stderr.write("backsides : %s\n" % backsides) |
| 281 | sys.stderr.write("startgfx : %s\n" % startgfx) |
| 282 | sys.stderr.write("endgfx : %s\n" % endgfx) |
| 283 | sys.stderr.write("mediasourcecount : %s\n" % mediasourcecount) |
| 284 | sys.stderr.write("mediasizecount : %s\n" % mediasizecount) |
| 285 | sys.stderr.write("orientationcount : %s\n" % orientationcount) |
| 286 | sys.stderr.write("mediatypecount : %s\n" % mediatypecount) |
| 287 | sys.stderr.write("escstart : %s\n" % escstart) |
| 288 | |
| 289 | # if not pagecount : |
| 290 | # pagecount = (pagecount or ((resets - 3) * (resets > 2))) |
| 291 | # else : |
| 292 | # # here we add counters for other ways new pages may have |
| 293 | # # been printed and ejected by the printer |
| 294 | # pagecount += ejects + backsides |
| 295 | # |
| 296 | # # now handle number of copies for each page (may differ). |
| 297 | # # in duplex mode, number of copies may be sent only once. |
| 298 | # for pnum in range(pagecount) : |
| 299 | # # if no number of copies defined, take the preceding one else the one set before any page else 1. |
| 300 | # page = pages.get(pnum, pages.get(pnum - 1, pages.get(0, { "copies" : 1 }))) |
| 301 | # pagecount += (page["copies"] - 1) |
| 302 | # |
| 303 | # # in PCL3 files, there's one Start Gfx tag per page |
| 304 | # if ispcl3 : |
| 305 | # if endgfx == int(startgfx / 2) : # special case for cdj1600 |
| 306 | # pagecount = endgfx |
| 307 | # elif startgfx : |
| 308 | # pagecount = startgfx |
| 309 | # elif endgfx : |
| 310 | # pagecount = endgfx |
| 311 | |
| 312 | |
| 313 | if pagecount == mediasourcecount == escstart : |
| 314 | pass # should be OK. |
| 315 | elif (not startgfx) and (not endgfx) : |
| 316 | pagecount = ejects or pagecount |
| 317 | elif startgfx == endgfx : |
| 318 | pagecount = startgfx |
| 319 | elif startgfx == (endgfx - 1) : |
| 320 | pagecount = startgfx |
| 321 | else : |
| 322 | pagecount = abs(startgfx - endgfx) |
| 323 | |
| 324 | if self.debug : |
| 325 | for pnum in range(pagecount) : |
| 326 | # if no number of copies defined, take the preceding one else the one set before any page else 1. |
| 327 | page = pages.get(pnum, pages.get(pnum - 1, pages.get(0, { "copies" : 1, "mediasource" : "Main", "mediasize" : "Default", "mediatype" : "Plain", "orientation" : "Portrait"}))) |
| 328 | sys.stderr.write("%s*%s*%s*%s*%s\n" % (page["copies"], page["mediatype"], page["mediasize"], page["orientation"], page["mediasource"])) |
| 329 | |
| 330 | return pagecount |
| 331 | |
| 332 | def test() : |
| 333 | """Test function.""" |
| 334 | raise RuntimeError, "Not implemented !" |
| 335 | |
| 336 | if __name__ == "__main__" : |
| 337 | test() |