[193] | 1 | #! /usr/bin/env python |
---|
| 2 | # -*- coding: ISO-8859-15 -*- |
---|
[191] | 3 | # |
---|
| 4 | # pkpgcounter : a generic Page Description Language parser |
---|
| 5 | # |
---|
| 6 | # (c) 2003,2004,2005 Jerome Alet <alet@librelogiciel.com> |
---|
| 7 | # This program is free software; you can redistribute it and/or modify |
---|
| 8 | # it under the terms of the GNU General Public License as published by |
---|
| 9 | # the Free Software Foundation; either version 2 of the License, or |
---|
| 10 | # (at your option) any later version. |
---|
| 11 | # |
---|
| 12 | # This program is distributed in the hope that it will be useful, |
---|
| 13 | # but WITHOUT ANY WARRANTY; without even the implied warranty of |
---|
| 14 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
---|
| 15 | # GNU General Public License for more details. |
---|
| 16 | # |
---|
| 17 | # You should have received a copy of the GNU General Public License |
---|
| 18 | # along with this program; if not, write to the Free Software |
---|
| 19 | # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. |
---|
| 20 | # |
---|
| 21 | # $Id$ |
---|
| 22 | # |
---|
[193] | 23 | |
---|
| 24 | import sys |
---|
| 25 | import mmap |
---|
| 26 | from struct import unpack |
---|
| 27 | |
---|
[200] | 28 | from pdlanalyzer import pdlparser |
---|
[193] | 29 | |
---|
[200] | 30 | class PCLXLParser(pdlparser.PDLParser) : |
---|
[193] | 31 | """A parser for PCLXL (aka PCL6) documents.""" |
---|
| 32 | mediasizes = { |
---|
| 33 | 0 : "Letter", |
---|
| 34 | 1 : "Legal", |
---|
| 35 | 2 : "A4", |
---|
| 36 | 3 : "Executive", |
---|
| 37 | 4 : "Ledger", |
---|
| 38 | 5 : "A3", |
---|
| 39 | 6 : "COM10Envelope", |
---|
| 40 | 7 : "MonarchEnvelope", |
---|
| 41 | 8 : "C5Envelope", |
---|
| 42 | 9 : "DLEnvelope", |
---|
| 43 | 10 : "JB4", |
---|
| 44 | 11 : "JB5", |
---|
| 45 | 12 : "B5Envelope", |
---|
| 46 | 14 : "JPostcard", |
---|
| 47 | 15 : "JDoublePostcard", |
---|
| 48 | 16 : "A5", |
---|
| 49 | 17 : "A6", |
---|
| 50 | 18 : "JB6", |
---|
| 51 | } |
---|
| 52 | |
---|
| 53 | mediasources = { |
---|
| 54 | 0 : "Default", |
---|
| 55 | 1 : "Auto", |
---|
| 56 | 2 : "Manual", |
---|
| 57 | 3 : "MultiPurpose", |
---|
| 58 | 4 : "UpperCassette", |
---|
| 59 | 5 : "LowerCassette", |
---|
| 60 | 6 : "EnvelopeTray", |
---|
| 61 | 7 : "ThirdCassette", |
---|
| 62 | } |
---|
| 63 | |
---|
| 64 | orientations = { |
---|
| 65 | 0 : "Portrait", |
---|
| 66 | 1 : "Landscape", |
---|
| 67 | 2 : "ReversePortrait", |
---|
| 68 | 3 : "ReverseLandscape", |
---|
| 69 | } |
---|
| 70 | |
---|
| 71 | def beginPage(self) : |
---|
| 72 | """Indicates the beginning of a new page, and extracts media information.""" |
---|
| 73 | self.pagecount += 1 |
---|
| 74 | |
---|
| 75 | # Default values |
---|
| 76 | mediatypelabel = "Plain" |
---|
| 77 | mediasourcelabel = "Main" |
---|
| 78 | mediasizelabel = "Default" |
---|
| 79 | orientationlabel = "Portrait" |
---|
| 80 | |
---|
| 81 | # Now go upstream to decode media type, size, source, and orientation |
---|
| 82 | # this saves time because we don't need a complete parser ! |
---|
| 83 | minfile = self.minfile |
---|
| 84 | pos = self.pos - 2 |
---|
| 85 | while pos > 0 : # safety check : don't go back to far ! |
---|
| 86 | val = ord(minfile[pos]) |
---|
| 87 | if val in (0x44, 0x48, 0x41) : # if previous endPage or openDataSource or beginSession (first page) |
---|
| 88 | break |
---|
| 89 | if val == 0x26 : |
---|
| 90 | mediasource = ord(minfile[pos - 2]) |
---|
| 91 | mediasourcelabel = self.mediasources.get(mediasource, str(mediasource)) |
---|
| 92 | pos = pos - 4 |
---|
| 93 | elif val == 0x25 : |
---|
| 94 | mediasize = ord(minfile[pos - 2]) |
---|
| 95 | mediasizelabel = self.mediasizes.get(mediasize, str(mediasize)) |
---|
| 96 | pos = pos - 4 |
---|
| 97 | elif val == 0x28 : |
---|
| 98 | orientation = ord(minfile[pos - 2]) |
---|
| 99 | orienationlabel = self.orientations.get(orientation, str(orientation)) |
---|
| 100 | pos = pos - 4 |
---|
| 101 | elif val == 0x27 : |
---|
| 102 | savepos = pos |
---|
| 103 | pos = pos - 1 |
---|
| 104 | while pos > 0 : # safety check : don't go back to far ! |
---|
| 105 | val = ord(minfile[pos]) |
---|
| 106 | pos -= 1 |
---|
| 107 | if val == 0xc8 : |
---|
| 108 | break |
---|
| 109 | mediatypelabel = minfile[pos:savepos] # TODO : INCORRECT, WE HAVE TO STRIP OUT THE UBYTE ARRAY'S LENGTH !!! |
---|
| 110 | # else : TODO : CUSTOM MEDIA SIZE AND UNIT ! |
---|
| 111 | else : |
---|
| 112 | pos = pos - 2 # ignored |
---|
| 113 | self.pages[self.pagecount] = { "copies" : 1, |
---|
| 114 | "orientation" : orientationlabel, |
---|
| 115 | "mediatype" : mediatypelabel, |
---|
| 116 | "mediasize" : mediasizelabel, |
---|
| 117 | "mediasource" : mediasourcelabel, |
---|
| 118 | } |
---|
| 119 | return 0 |
---|
| 120 | |
---|
| 121 | def endPage(self) : |
---|
| 122 | """Indicates the end of a page.""" |
---|
| 123 | pos = self.pos |
---|
| 124 | minfile = self.minfile |
---|
| 125 | if (ord(minfile[pos-3]) == 0xf8) and (ord(minfile[pos-2]) == 0x31) : |
---|
| 126 | # The EndPage operator may be preceded by a PageCopies attribute |
---|
| 127 | # So set number of copies for current page. |
---|
| 128 | # From what I read in PCLXL documentation, the number |
---|
| 129 | # of copies is an unsigned 16 bits integer |
---|
| 130 | self.pages[self.pagecount]["copies"] = unpack(self.endianness + "H", minfile[pos-5:pos-3])[0] |
---|
| 131 | return 0 |
---|
| 132 | |
---|
| 133 | def array_8(self) : |
---|
| 134 | """Handles byte arrays.""" |
---|
| 135 | pos = self.pos |
---|
| 136 | datatype = self.minfile[pos] |
---|
| 137 | pos += 1 |
---|
| 138 | length = self.tags[ord(datatype)] |
---|
| 139 | if callable(length) : |
---|
| 140 | self.pos = pos |
---|
| 141 | length = length() |
---|
| 142 | pos = self.pos |
---|
| 143 | posl = pos + length |
---|
| 144 | self.pos = posl |
---|
| 145 | if length == 1 : |
---|
| 146 | return unpack("B", self.minfile[pos:posl])[0] |
---|
| 147 | elif length == 2 : |
---|
| 148 | return unpack(self.endianness + "H", self.minfile[pos:posl])[0] |
---|
| 149 | elif length == 4 : |
---|
| 150 | return unpack(self.endianness + "I", self.minfile[pos:posl])[0] |
---|
| 151 | else : |
---|
[200] | 152 | raise pdlparser.PDLParserError, "Error on array size at %s" % self.pos |
---|
[193] | 153 | |
---|
| 154 | def array_16(self) : |
---|
| 155 | """Handles byte arrays.""" |
---|
| 156 | pos = self.pos |
---|
| 157 | datatype = self.minfile[pos] |
---|
| 158 | pos += 1 |
---|
| 159 | length = self.tags[ord(datatype)] |
---|
| 160 | if callable(length) : |
---|
| 161 | self.pos = pos |
---|
| 162 | length = length() |
---|
| 163 | pos = self.pos |
---|
| 164 | posl = pos + length |
---|
| 165 | self.pos = posl |
---|
| 166 | if length == 1 : |
---|
| 167 | return 2 * unpack("B", self.minfile[pos:posl])[0] |
---|
| 168 | elif length == 2 : |
---|
| 169 | return 2 * unpack(self.endianness + "H", self.minfile[pos:posl])[0] |
---|
| 170 | elif length == 4 : |
---|
| 171 | return 2 * unpack(self.endianness + "I", self.minfile[pos:posl])[0] |
---|
| 172 | else : |
---|
[200] | 173 | raise pdlparser.PDLParserError, "Error on array size at %s" % self.pos |
---|
[193] | 174 | |
---|
| 175 | def array_32(self) : |
---|
| 176 | """Handles byte arrays.""" |
---|
| 177 | pos = self.pos |
---|
| 178 | datatype = self.minfile[pos] |
---|
| 179 | pos += 1 |
---|
| 180 | length = self.tags[ord(datatype)] |
---|
| 181 | if callable(length) : |
---|
| 182 | self.pos = pos |
---|
| 183 | length = length() |
---|
| 184 | pos = self.pos |
---|
| 185 | posl = pos + length |
---|
| 186 | self.pos = posl |
---|
| 187 | if length == 1 : |
---|
| 188 | return 4 * unpack("B", self.minfile[pos:posl])[0] |
---|
| 189 | elif length == 2 : |
---|
| 190 | return 4 * unpack(self.endianness + "H", self.minfile[pos:posl])[0] |
---|
| 191 | elif length == 4 : |
---|
| 192 | return 4 * unpack(self.endianness + "I", self.minfile[pos:posl])[0] |
---|
| 193 | else : |
---|
[200] | 194 | raise pdlparser.PDLParserError, "Error on array size at %s" % self.pos |
---|
[193] | 195 | |
---|
| 196 | def embeddedDataSmall(self) : |
---|
| 197 | """Handle small amounts of data.""" |
---|
| 198 | pos = self.pos |
---|
| 199 | length = ord(self.minfile[pos]) |
---|
| 200 | self.pos = pos + 1 |
---|
| 201 | return length |
---|
| 202 | |
---|
| 203 | def embeddedData(self) : |
---|
| 204 | """Handle normal amounts of data.""" |
---|
| 205 | pos = self.pos |
---|
| 206 | pos4 = pos + 4 |
---|
| 207 | self.pos = pos4 |
---|
| 208 | return unpack(self.endianness + "I", self.minfile[pos:pos4])[0] |
---|
| 209 | |
---|
| 210 | def littleEndian(self) : |
---|
| 211 | """Toggles to little endianness.""" |
---|
| 212 | self.endianness = "<" # little endian |
---|
| 213 | return 0 |
---|
| 214 | |
---|
| 215 | def bigEndian(self) : |
---|
| 216 | """Toggles to big endianness.""" |
---|
| 217 | self.endianness = ">" # big endian |
---|
| 218 | return 0 |
---|
| 219 | |
---|
| 220 | def getJobSize(self) : |
---|
| 221 | """Counts pages in a PCLXL (PCL6) document. |
---|
| 222 | |
---|
| 223 | Algorithm by Jerome Alet. |
---|
| 224 | |
---|
| 225 | The documentation used for this was : |
---|
| 226 | |
---|
| 227 | HP PCL XL Feature Reference |
---|
| 228 | Protocol Class 2.0 |
---|
| 229 | http://www.hpdevelopersolutions.com/downloads/64/358/xl_ref20r22.pdf |
---|
| 230 | """ |
---|
| 231 | self.endianness = None |
---|
| 232 | found = 0 |
---|
| 233 | while not found : |
---|
| 234 | line = self.infile.readline() |
---|
| 235 | if not line : |
---|
| 236 | break |
---|
| 237 | if line[1:12] == " HP-PCL XL;" : |
---|
| 238 | found = 1 |
---|
| 239 | endian = ord(line[0]) |
---|
| 240 | if endian == 0x29 : |
---|
| 241 | self.littleEndian() |
---|
| 242 | elif endian == 0x28 : |
---|
| 243 | self.bigEndian() |
---|
| 244 | # elif endian == 0x27 : # TODO : This is the ESC code : parse it for PJL statements ! |
---|
| 245 | # |
---|
| 246 | else : |
---|
[200] | 247 | raise pdlparser.PDLParserError, "Unknown endianness marker 0x%02x at start !" % endian |
---|
[193] | 248 | if not found : |
---|
[200] | 249 | raise pdlparser.PDLParserError, "This file doesn't seem to be PCLXL (aka PCL6)" |
---|
[193] | 250 | |
---|
| 251 | # Initialize table of tags |
---|
| 252 | self.tags = [ 0 ] * 256 |
---|
| 253 | |
---|
| 254 | # GhostScript's sources tell us that HP printers |
---|
| 255 | # only accept little endianness, but we can handle both. |
---|
| 256 | self.tags[0x28] = self.bigEndian # BigEndian |
---|
| 257 | self.tags[0x29] = self.littleEndian # LittleEndian |
---|
| 258 | |
---|
| 259 | self.tags[0x43] = self.beginPage # BeginPage |
---|
| 260 | self.tags[0x44] = self.endPage # EndPage |
---|
| 261 | |
---|
| 262 | self.tags[0xc0] = 1 # ubyte |
---|
| 263 | self.tags[0xc1] = 2 # uint16 |
---|
| 264 | self.tags[0xc2] = 4 # uint32 |
---|
| 265 | self.tags[0xc3] = 2 # sint16 |
---|
| 266 | self.tags[0xc4] = 4 # sint32 |
---|
| 267 | self.tags[0xc5] = 4 # real32 |
---|
| 268 | |
---|
| 269 | self.tags[0xc8] = self.array_8 # ubyte_array |
---|
| 270 | self.tags[0xc9] = self.array_16 # uint16_array |
---|
| 271 | self.tags[0xca] = self.array_32 # uint32_array |
---|
| 272 | self.tags[0xcb] = self.array_16 # sint16_array |
---|
| 273 | self.tags[0xcc] = self.array_32 # sint32_array |
---|
| 274 | self.tags[0xcd] = self.array_32 # real32_array |
---|
| 275 | |
---|
| 276 | self.tags[0xd0] = 2 # ubyte_xy |
---|
| 277 | self.tags[0xd1] = 4 # uint16_xy |
---|
| 278 | self.tags[0xd2] = 8 # uint32_xy |
---|
| 279 | self.tags[0xd3] = 4 # sint16_xy |
---|
| 280 | self.tags[0xd4] = 8 # sint32_xy |
---|
| 281 | self.tags[0xd5] = 8 # real32_xy |
---|
| 282 | |
---|
| 283 | self.tags[0xe0] = 4 # ubyte_box |
---|
| 284 | self.tags[0xe1] = 8 # uint16_box |
---|
| 285 | self.tags[0xe2] = 16 # uint32_box |
---|
| 286 | self.tags[0xe3] = 8 # sint16_box |
---|
| 287 | self.tags[0xe4] = 16 # sint32_box |
---|
| 288 | self.tags[0xe5] = 16 # real32_box |
---|
| 289 | |
---|
| 290 | self.tags[0xf8] = 1 # attr_ubyte |
---|
| 291 | self.tags[0xf9] = 2 # attr_uint16 |
---|
| 292 | |
---|
| 293 | self.tags[0xfa] = self.embeddedData # dataLength |
---|
| 294 | self.tags[0xfb] = self.embeddedDataSmall # dataLengthByte |
---|
| 295 | |
---|
| 296 | infileno = self.infile.fileno() |
---|
| 297 | self.pages = {} |
---|
| 298 | self.minfile = minfile = mmap.mmap(infileno, os.fstat(infileno)[6], prot=mmap.PROT_READ, flags=mmap.MAP_SHARED) |
---|
| 299 | tags = self.tags |
---|
| 300 | self.pagecount = 0 |
---|
| 301 | self.pos = pos = self.infile.tell() |
---|
| 302 | try : |
---|
| 303 | while 1 : |
---|
| 304 | char = minfile[pos] |
---|
| 305 | pos += 1 |
---|
| 306 | length = tags[ord(char)] |
---|
| 307 | if not length : |
---|
| 308 | continue |
---|
| 309 | if callable(length) : |
---|
| 310 | self.pos = pos |
---|
| 311 | length = length() |
---|
| 312 | pos = self.pos |
---|
| 313 | pos += length |
---|
| 314 | except IndexError : # EOF ? |
---|
| 315 | self.minfile.close() # reached EOF |
---|
| 316 | |
---|
| 317 | # now handle number of copies for each page (may differ). |
---|
| 318 | for pnum in range(1, self.pagecount + 1) : |
---|
| 319 | # if no number of copies defined, take 1, as explained |
---|
| 320 | # in PCLXL documentation. |
---|
| 321 | # NB : is number of copies is 0, the page won't be output |
---|
| 322 | # but the formula below is still correct : we want |
---|
| 323 | # to decrease the total number of pages in this case. |
---|
| 324 | page = self.pages.get(pnum, 1) |
---|
| 325 | copies = page["copies"] |
---|
| 326 | self.pagecount += (copies - 1) |
---|
| 327 | if self.debug : |
---|
| 328 | sys.stderr.write("%s*%s*%s*%s*%s\n" % (copies, page["mediatype"], page["mediasize"], page["orientation"], page["mediasource"])) |
---|
| 329 | |
---|
| 330 | return self.pagecount |
---|
| 331 | |
---|
| 332 | def test() : |
---|
| 333 | """Test function.""" |
---|
[196] | 334 | if (len(sys.argv) < 2) or ((not sys.stdin.isatty()) and ("-" not in sys.argv[1:])) : |
---|
| 335 | sys.argv.append("-") |
---|
| 336 | totalsize = 0 |
---|
| 337 | for arg in sys.argv[1:] : |
---|
| 338 | if arg == "-" : |
---|
| 339 | infile = sys.stdin |
---|
| 340 | mustclose = 0 |
---|
| 341 | else : |
---|
| 342 | infile = open(arg, "rb") |
---|
| 343 | mustclose = 1 |
---|
| 344 | try : |
---|
| 345 | parser = PCLXLParser(infile, debug=1) |
---|
| 346 | totalsize += parser.getJobSize() |
---|
[200] | 347 | except pdlparser.PDLParserError, msg : |
---|
[196] | 348 | sys.stderr.write("ERROR: %s\n" % msg) |
---|
| 349 | sys.stderr.flush() |
---|
| 350 | if mustclose : |
---|
| 351 | infile.close() |
---|
| 352 | print "%s" % totalsize |
---|
[193] | 353 | |
---|
| 354 | if __name__ == "__main__" : |
---|
| 355 | test() |
---|