| 23 | |
| 24 | import sys |
| 25 | import mmap |
| 26 | from struct import unpack |
| 27 | |
| 28 | from pdlanalyzer.pdlparser import PDLParser, PDLParserError |
| 29 | |
| 30 | class PCLXLParser(PDLParser) : |
| 31 | """A parser for PCLXL (aka PCL6) documents.""" |
| 32 | mediasizes = { |
| 33 | 0 : "Letter", |
| 34 | 1 : "Legal", |
| 35 | 2 : "A4", |
| 36 | 3 : "Executive", |
| 37 | 4 : "Ledger", |
| 38 | 5 : "A3", |
| 39 | 6 : "COM10Envelope", |
| 40 | 7 : "MonarchEnvelope", |
| 41 | 8 : "C5Envelope", |
| 42 | 9 : "DLEnvelope", |
| 43 | 10 : "JB4", |
| 44 | 11 : "JB5", |
| 45 | 12 : "B5Envelope", |
| 46 | 14 : "JPostcard", |
| 47 | 15 : "JDoublePostcard", |
| 48 | 16 : "A5", |
| 49 | 17 : "A6", |
| 50 | 18 : "JB6", |
| 51 | } |
| 52 | |
| 53 | mediasources = { |
| 54 | 0 : "Default", |
| 55 | 1 : "Auto", |
| 56 | 2 : "Manual", |
| 57 | 3 : "MultiPurpose", |
| 58 | 4 : "UpperCassette", |
| 59 | 5 : "LowerCassette", |
| 60 | 6 : "EnvelopeTray", |
| 61 | 7 : "ThirdCassette", |
| 62 | } |
| 63 | |
| 64 | orientations = { |
| 65 | 0 : "Portrait", |
| 66 | 1 : "Landscape", |
| 67 | 2 : "ReversePortrait", |
| 68 | 3 : "ReverseLandscape", |
| 69 | } |
| 70 | |
| 71 | def beginPage(self) : |
| 72 | """Indicates the beginning of a new page, and extracts media information.""" |
| 73 | self.pagecount += 1 |
| 74 | |
| 75 | # Default values |
| 76 | mediatypelabel = "Plain" |
| 77 | mediasourcelabel = "Main" |
| 78 | mediasizelabel = "Default" |
| 79 | orientationlabel = "Portrait" |
| 80 | |
| 81 | # Now go upstream to decode media type, size, source, and orientation |
| 82 | # this saves time because we don't need a complete parser ! |
| 83 | minfile = self.minfile |
| 84 | pos = self.pos - 2 |
| 85 | while pos > 0 : # safety check : don't go back to far ! |
| 86 | val = ord(minfile[pos]) |
| 87 | if val in (0x44, 0x48, 0x41) : # if previous endPage or openDataSource or beginSession (first page) |
| 88 | break |
| 89 | if val == 0x26 : |
| 90 | mediasource = ord(minfile[pos - 2]) |
| 91 | mediasourcelabel = self.mediasources.get(mediasource, str(mediasource)) |
| 92 | pos = pos - 4 |
| 93 | elif val == 0x25 : |
| 94 | mediasize = ord(minfile[pos - 2]) |
| 95 | mediasizelabel = self.mediasizes.get(mediasize, str(mediasize)) |
| 96 | pos = pos - 4 |
| 97 | elif val == 0x28 : |
| 98 | orientation = ord(minfile[pos - 2]) |
| 99 | orienationlabel = self.orientations.get(orientation, str(orientation)) |
| 100 | pos = pos - 4 |
| 101 | elif val == 0x27 : |
| 102 | savepos = pos |
| 103 | pos = pos - 1 |
| 104 | while pos > 0 : # safety check : don't go back to far ! |
| 105 | val = ord(minfile[pos]) |
| 106 | pos -= 1 |
| 107 | if val == 0xc8 : |
| 108 | break |
| 109 | mediatypelabel = minfile[pos:savepos] # TODO : INCORRECT, WE HAVE TO STRIP OUT THE UBYTE ARRAY'S LENGTH !!! |
| 110 | # else : TODO : CUSTOM MEDIA SIZE AND UNIT ! |
| 111 | else : |
| 112 | pos = pos - 2 # ignored |
| 113 | self.pages[self.pagecount] = { "copies" : 1, |
| 114 | "orientation" : orientationlabel, |
| 115 | "mediatype" : mediatypelabel, |
| 116 | "mediasize" : mediasizelabel, |
| 117 | "mediasource" : mediasourcelabel, |
| 118 | } |
| 119 | return 0 |
| 120 | |
| 121 | def endPage(self) : |
| 122 | """Indicates the end of a page.""" |
| 123 | pos = self.pos |
| 124 | minfile = self.minfile |
| 125 | if (ord(minfile[pos-3]) == 0xf8) and (ord(minfile[pos-2]) == 0x31) : |
| 126 | # The EndPage operator may be preceded by a PageCopies attribute |
| 127 | # So set number of copies for current page. |
| 128 | # From what I read in PCLXL documentation, the number |
| 129 | # of copies is an unsigned 16 bits integer |
| 130 | self.pages[self.pagecount]["copies"] = unpack(self.endianness + "H", minfile[pos-5:pos-3])[0] |
| 131 | return 0 |
| 132 | |
| 133 | def array_8(self) : |
| 134 | """Handles byte arrays.""" |
| 135 | pos = self.pos |
| 136 | datatype = self.minfile[pos] |
| 137 | pos += 1 |
| 138 | length = self.tags[ord(datatype)] |
| 139 | if callable(length) : |
| 140 | self.pos = pos |
| 141 | length = length() |
| 142 | pos = self.pos |
| 143 | posl = pos + length |
| 144 | self.pos = posl |
| 145 | if length == 1 : |
| 146 | return unpack("B", self.minfile[pos:posl])[0] |
| 147 | elif length == 2 : |
| 148 | return unpack(self.endianness + "H", self.minfile[pos:posl])[0] |
| 149 | elif length == 4 : |
| 150 | return unpack(self.endianness + "I", self.minfile[pos:posl])[0] |
| 151 | else : |
| 152 | raise PDLParserError, "Error on array size at %s" % self.pos |
| 153 | |
| 154 | def array_16(self) : |
| 155 | """Handles byte arrays.""" |
| 156 | pos = self.pos |
| 157 | datatype = self.minfile[pos] |
| 158 | pos += 1 |
| 159 | length = self.tags[ord(datatype)] |
| 160 | if callable(length) : |
| 161 | self.pos = pos |
| 162 | length = length() |
| 163 | pos = self.pos |
| 164 | posl = pos + length |
| 165 | self.pos = posl |
| 166 | if length == 1 : |
| 167 | return 2 * unpack("B", self.minfile[pos:posl])[0] |
| 168 | elif length == 2 : |
| 169 | return 2 * unpack(self.endianness + "H", self.minfile[pos:posl])[0] |
| 170 | elif length == 4 : |
| 171 | return 2 * unpack(self.endianness + "I", self.minfile[pos:posl])[0] |
| 172 | else : |
| 173 | raise PDLParserError, "Error on array size at %s" % self.pos |
| 174 | |
| 175 | def array_32(self) : |
| 176 | """Handles byte arrays.""" |
| 177 | pos = self.pos |
| 178 | datatype = self.minfile[pos] |
| 179 | pos += 1 |
| 180 | length = self.tags[ord(datatype)] |
| 181 | if callable(length) : |
| 182 | self.pos = pos |
| 183 | length = length() |
| 184 | pos = self.pos |
| 185 | posl = pos + length |
| 186 | self.pos = posl |
| 187 | if length == 1 : |
| 188 | return 4 * unpack("B", self.minfile[pos:posl])[0] |
| 189 | elif length == 2 : |
| 190 | return 4 * unpack(self.endianness + "H", self.minfile[pos:posl])[0] |
| 191 | elif length == 4 : |
| 192 | return 4 * unpack(self.endianness + "I", self.minfile[pos:posl])[0] |
| 193 | else : |
| 194 | raise PDLParserError, "Error on array size at %s" % self.pos |
| 195 | |
| 196 | def embeddedDataSmall(self) : |
| 197 | """Handle small amounts of data.""" |
| 198 | pos = self.pos |
| 199 | length = ord(self.minfile[pos]) |
| 200 | self.pos = pos + 1 |
| 201 | return length |
| 202 | |
| 203 | def embeddedData(self) : |
| 204 | """Handle normal amounts of data.""" |
| 205 | pos = self.pos |
| 206 | pos4 = pos + 4 |
| 207 | self.pos = pos4 |
| 208 | return unpack(self.endianness + "I", self.minfile[pos:pos4])[0] |
| 209 | |
| 210 | def littleEndian(self) : |
| 211 | """Toggles to little endianness.""" |
| 212 | self.endianness = "<" # little endian |
| 213 | return 0 |
| 214 | |
| 215 | def bigEndian(self) : |
| 216 | """Toggles to big endianness.""" |
| 217 | self.endianness = ">" # big endian |
| 218 | return 0 |
| 219 | |
| 220 | def getJobSize(self) : |
| 221 | """Counts pages in a PCLXL (PCL6) document. |
| 222 | |
| 223 | Algorithm by Jerome Alet. |
| 224 | |
| 225 | The documentation used for this was : |
| 226 | |
| 227 | HP PCL XL Feature Reference |
| 228 | Protocol Class 2.0 |
| 229 | http://www.hpdevelopersolutions.com/downloads/64/358/xl_ref20r22.pdf |
| 230 | """ |
| 231 | self.endianness = None |
| 232 | found = 0 |
| 233 | while not found : |
| 234 | line = self.infile.readline() |
| 235 | if not line : |
| 236 | break |
| 237 | if line[1:12] == " HP-PCL XL;" : |
| 238 | found = 1 |
| 239 | endian = ord(line[0]) |
| 240 | if endian == 0x29 : |
| 241 | self.littleEndian() |
| 242 | elif endian == 0x28 : |
| 243 | self.bigEndian() |
| 244 | # elif endian == 0x27 : # TODO : This is the ESC code : parse it for PJL statements ! |
| 245 | # |
| 246 | else : |
| 247 | raise PDLParserError, "Unknown endianness marker 0x%02x at start !" % endian |
| 248 | if not found : |
| 249 | raise PDLParserError, "This file doesn't seem to be PCLXL (aka PCL6)" |
| 250 | |
| 251 | # Initialize table of tags |
| 252 | self.tags = [ 0 ] * 256 |
| 253 | |
| 254 | # GhostScript's sources tell us that HP printers |
| 255 | # only accept little endianness, but we can handle both. |
| 256 | self.tags[0x28] = self.bigEndian # BigEndian |
| 257 | self.tags[0x29] = self.littleEndian # LittleEndian |
| 258 | |
| 259 | self.tags[0x43] = self.beginPage # BeginPage |
| 260 | self.tags[0x44] = self.endPage # EndPage |
| 261 | |
| 262 | self.tags[0xc0] = 1 # ubyte |
| 263 | self.tags[0xc1] = 2 # uint16 |
| 264 | self.tags[0xc2] = 4 # uint32 |
| 265 | self.tags[0xc3] = 2 # sint16 |
| 266 | self.tags[0xc4] = 4 # sint32 |
| 267 | self.tags[0xc5] = 4 # real32 |
| 268 | |
| 269 | self.tags[0xc8] = self.array_8 # ubyte_array |
| 270 | self.tags[0xc9] = self.array_16 # uint16_array |
| 271 | self.tags[0xca] = self.array_32 # uint32_array |
| 272 | self.tags[0xcb] = self.array_16 # sint16_array |
| 273 | self.tags[0xcc] = self.array_32 # sint32_array |
| 274 | self.tags[0xcd] = self.array_32 # real32_array |
| 275 | |
| 276 | self.tags[0xd0] = 2 # ubyte_xy |
| 277 | self.tags[0xd1] = 4 # uint16_xy |
| 278 | self.tags[0xd2] = 8 # uint32_xy |
| 279 | self.tags[0xd3] = 4 # sint16_xy |
| 280 | self.tags[0xd4] = 8 # sint32_xy |
| 281 | self.tags[0xd5] = 8 # real32_xy |
| 282 | |
| 283 | self.tags[0xe0] = 4 # ubyte_box |
| 284 | self.tags[0xe1] = 8 # uint16_box |
| 285 | self.tags[0xe2] = 16 # uint32_box |
| 286 | self.tags[0xe3] = 8 # sint16_box |
| 287 | self.tags[0xe4] = 16 # sint32_box |
| 288 | self.tags[0xe5] = 16 # real32_box |
| 289 | |
| 290 | self.tags[0xf8] = 1 # attr_ubyte |
| 291 | self.tags[0xf9] = 2 # attr_uint16 |
| 292 | |
| 293 | self.tags[0xfa] = self.embeddedData # dataLength |
| 294 | self.tags[0xfb] = self.embeddedDataSmall # dataLengthByte |
| 295 | |
| 296 | infileno = self.infile.fileno() |
| 297 | self.pages = {} |
| 298 | self.minfile = minfile = mmap.mmap(infileno, os.fstat(infileno)[6], prot=mmap.PROT_READ, flags=mmap.MAP_SHARED) |
| 299 | tags = self.tags |
| 300 | self.pagecount = 0 |
| 301 | self.pos = pos = self.infile.tell() |
| 302 | try : |
| 303 | while 1 : |
| 304 | char = minfile[pos] |
| 305 | pos += 1 |
| 306 | length = tags[ord(char)] |
| 307 | if not length : |
| 308 | continue |
| 309 | if callable(length) : |
| 310 | self.pos = pos |
| 311 | length = length() |
| 312 | pos = self.pos |
| 313 | pos += length |
| 314 | except IndexError : # EOF ? |
| 315 | self.minfile.close() # reached EOF |
| 316 | |
| 317 | # now handle number of copies for each page (may differ). |
| 318 | for pnum in range(1, self.pagecount + 1) : |
| 319 | # if no number of copies defined, take 1, as explained |
| 320 | # in PCLXL documentation. |
| 321 | # NB : is number of copies is 0, the page won't be output |
| 322 | # but the formula below is still correct : we want |
| 323 | # to decrease the total number of pages in this case. |
| 324 | page = self.pages.get(pnum, 1) |
| 325 | copies = page["copies"] |
| 326 | self.pagecount += (copies - 1) |
| 327 | if self.debug : |
| 328 | sys.stderr.write("%s*%s*%s*%s*%s\n" % (copies, page["mediatype"], page["mediasize"], page["orientation"], page["mediasource"])) |
| 329 | |
| 330 | return self.pagecount |
| 331 | |
| 332 | def test() : |
| 333 | """Test function.""" |
| 334 | raise RuntimeError, "Not implemented !" |
| 335 | |
| 336 | if __name__ == "__main__" : |
| 337 | test() |