Changeset 193 for pkpgcounter
- Timestamp:
- 04/04/05 00:28:37 (19 years ago)
- Location:
- pkpgcounter/trunk/pdlanalyzer
- Files:
-
- 6 modified
Legend:
- Unmodified
- Added
- Removed
-
pkpgcounter/trunk/pdlanalyzer/escp2.py
r191 r193 1 #! /usr/bin/env python 2 # -*- coding: ISO-8859-15 -*- 1 3 # 2 4 # pkpgcounter : a generic Page Description Language parser … … 19 21 # $Id$ 20 22 # 23 24 import sys 25 26 from pdlanalyzer.pdlparser import PDLParser 27 28 class ESCP2Parser(PDLParser) : 29 """A parser for ESC/P2 documents.""" 30 def getJobSize(self) : 31 """Counts pages in an ESC/P2 document.""" 32 # with Gimpprint, at least, for each page there 33 # are two Reset Printer sequences (ESC + @) 34 marker1 = "\033@" 35 36 # with other software or printer driver, we 37 # may prefer to search for "\r\n\fESCAPE" 38 # or "\r\fESCAPE" 39 marker2r = "\r\f\033" 40 marker2rn = "\r\n\f\033" 41 42 # and ghostscript's stcolor for example seems to 43 # output ESC + @ + \f for each page plus one 44 marker3 = "\033@\f" 45 46 # while ghostscript's escp driver outputs instead 47 # \f + ESC + @ 48 marker4 = "\f\033@" 49 50 data = self.infile.read() 51 pagecount1 = data.count(marker1) 52 pagecount2 = max(data.count(marker2r), data.count(marker2rn)) 53 pagecount3 = data.count(marker3) 54 pagecount4 = data.count(marker4) 55 56 if pagecount2 : 57 return pagecount2 58 elif pagecount3 > 1 : 59 return pagecount3 - 1 60 elif pagecount4 : 61 return pagecount4 62 else : 63 return int(pagecount1 / 2) 64 65 def test() : 66 """Test function.""" 67 raise RuntimeError, "Not implemented !" 68 69 if __name__ == "__main__" : 70 test() -
pkpgcounter/trunk/pdlanalyzer/pcl345.py
r191 r193 1 #! /usr/bin/env python 2 # -*- coding: ISO-8859-15 -*- 1 3 # 2 4 # pkpgcounter : a generic Page Description Language parser … … 19 21 # $Id$ 20 22 # 23 24 import sys 25 import mmap 26 27 from pdlanalyzer.pdlparser import PDLParser 28 29 class PCLParser(PDLParser) : 30 """A parser for PCL3, PCL4, PCL5 documents.""" 31 mediasizes = { # ESC&l####A 32 0 : "Default", 33 1 : "Executive", 34 2 : "Letter", 35 3 : "Legal", 36 6 : "Ledger", 37 25 : "A5", 38 26 : "A4", 39 27 : "A3", 40 45 : "JB5", 41 46 : "JB4", 42 71 : "HagakiPostcard", 43 72 : "OufukuHagakiPostcard", 44 80 : "MonarchEnvelope", 45 81 : "COM10Envelope", 46 90 : "DLEnvelope", 47 91 : "C5Envelope", 48 100 : "B5Envelope", 49 101 : "Custom", 50 } 51 52 mediasources = { # ESC&l####H 53 0 : "Default", 54 1 : "Main", 55 2 : "Manual", 56 3 : "ManualEnvelope", 57 4 : "Alternate", 58 5 : "OptionalLarge", 59 6 : "EnvelopeFeeder", 60 7 : "Auto", 61 8 : "Tray1", 62 } 63 64 orientations = { # ESC&l####O 65 0 : "Portrait", 66 1 : "Landscape", 67 2 : "ReversePortrait", 68 3 : "ReverseLandscape", 69 } 70 71 mediatypes = { # ESC&l####M 72 0 : "Plain", 73 1 : "Bond", 74 2 : "Special", 75 3 : "Glossy", 76 4 : "Transparent", 77 } 78 79 def setPageDict(self, pages, number, attribute, value) : 80 """Initializes a page dictionnary.""" 81 dict = pages.setdefault(number, { "copies" : 1, "mediasource" : "Main", "mediasize" : "Default", "mediatype" : "Plain", "orientation" : "Portrait"}) 82 dict[attribute] = value 83 84 def getJobSize(self) : 85 """Count pages in a PCL5 document. 86 87 Should also work for PCL3 and PCL4 documents. 88 89 Algorithm from pclcount 90 (c) 2003, by Eduardo Gielamo Oliveira & Rodolfo Broco Manin 91 published under the terms of the GNU General Public Licence v2. 92 93 Backported from C to Python by Jerome Alet, then enhanced 94 with more PCL tags detected. I think all the necessary PCL tags 95 are recognized to correctly handle PCL5 files wrt their number 96 of pages. The documentation used for this was : 97 98 HP PCL/PJL Reference Set 99 PCL5 Printer Language Technical Quick Reference Guide 100 http://h20000.www2.hp.com/bc/docs/support/SupportManual/bpl13205/bpl13205.pdf 101 """ 102 infileno = self.infile.fileno() 103 minfile = mmap.mmap(infileno, os.fstat(infileno)[6], prot=mmap.PROT_READ, flags=mmap.MAP_SHARED) 104 tagsends = { "&n" : "W", 105 "&b" : "W", 106 "*i" : "W", 107 "*l" : "W", 108 "*m" : "W", 109 "*v" : "W", 110 "*c" : "W", 111 "(f" : "W", 112 "(s" : "W", 113 ")s" : "W", 114 "&p" : "X", 115 # "&l" : "XHAOM", # treated specially 116 "&a" : "G", # TODO : 0 means next side, 1 front side, 2 back side 117 "*g" : "W", 118 "*r" : "sbABC", 119 "*t" : "R", 120 # "*b" : "VW", # treated specially because it occurs very often 121 } 122 pagecount = resets = ejects = backsides = startgfx = endgfx = 0 123 starb = ampl = ispcl3 = escstart = 0 124 mediasourcecount = mediasizecount = orientationcount = mediatypecount = 0 125 tag = None 126 pages = {} 127 pos = 0 128 try : 129 while 1 : 130 char = minfile[pos] ; pos += 1 131 if char == "\014" : 132 pagecount += 1 133 elif char == "\033" : 134 starb = ampl = 0 135 # 136 # <ESC>*b###y#m###v###w... -> PCL3 raster graphics 137 # <ESC>*b###W -> Start of a raster data row/block 138 # <ESC>*b###V -> Start of a raster data plane 139 # <ESC>*c###W -> Start of a user defined pattern 140 # <ESC>*i###W -> Start of a viewing illuminant block 141 # <ESC>*l###W -> Start of a color lookup table 142 # <ESC>*m###W -> Start of a download dither matrix block 143 # <ESC>*v###W -> Start of a configure image data block 144 # <ESC>*r1A -> Start Gfx 145 # <ESC>(s###W -> Start of a characters description block 146 # <ESC>)s###W -> Start of a fonts description block 147 # <ESC>(f###W -> Start of a symbol set block 148 # <ESC>&b###W -> Start of configuration data block 149 # <ESC>&l###X -> Number of copies for current page 150 # <ESC>&n###W -> Starts an alphanumeric string ID block 151 # <ESC>&p###X -> Start of a non printable characters block 152 # <ESC>&a2G -> Back side when duplex mode as generated by rastertohp 153 # <ESC>*g###W -> Needed for planes in PCL3 output 154 # <ESC>&l###H (or only 0 ?) -> Eject if NumPlanes > 1, as generated by rastertohp. Also defines mediasource 155 # <ESC>&l###A -> mediasize 156 # <ESC>&l###O -> orientation 157 # <ESC>&l###M -> mediatype 158 # <ESC>*t###R -> gfx resolution 159 # 160 tagstart = minfile[pos] ; pos += 1 161 if tagstart in "E9=YZ" : # one byte PCL tag 162 if tagstart == "E" : 163 resets += 1 164 continue # skip to next tag 165 tag = tagstart + minfile[pos] ; pos += 1 166 if tag == "*b" : 167 starb = 1 168 tagend = "VW" 169 elif tag == "&l" : 170 ampl = 1 171 tagend = "XHAOM" 172 else : 173 try : 174 tagend = tagsends[tag] 175 except KeyError : 176 continue # Unsupported PCL tag 177 # Now read the numeric argument 178 size = 0 179 while 1 : 180 char = minfile[pos] ; pos += 1 181 if not char.isdigit() : 182 break 183 size = (size * 10) + int(char) 184 if char in tagend : 185 if tag == "&l" : 186 if char == "X" : 187 self.setPageDict(pages, pagecount, "copies", size) 188 elif char == "H" : 189 self.setPageDict(pages, pagecount, "mediasource", self.mediasources.get(size, str(size))) 190 mediasourcecount += 1 191 ejects += 1 192 elif char == "A" : 193 self.setPageDict(pages, pagecount, "mediasize", self.mediasizes.get(size, str(size))) 194 mediasizecount += 1 195 elif char == "O" : 196 self.setPageDict(pages, pagecount, "orientation", self.orientations.get(size, str(size))) 197 orientationcount += 1 198 elif char == "M" : 199 self.setPageDict(pages, pagecount, "mediatype", self.mediatypes.get(size, str(size))) 200 mediatypecount += 1 201 elif tag == "*r" : 202 # Special tests for PCL3 203 if (char == "s") and size : 204 while 1 : 205 char = minfile[pos] ; pos += 1 206 if char == "A" : 207 break 208 elif (char == "b") and (minfile[pos] == "C") and not size : 209 ispcl3 = 1 # Certainely a PCL3 file 210 startgfx += (char == "A") and (minfile[pos - 2] in ("0", "1", "2", "3")) # Start Gfx 211 endgfx += (not size) and (char in ("C", "B")) # End Gfx 212 elif tag == "*t" : 213 escstart += 1 214 elif (tag == "&a") and (size == 2) : 215 backsides += 1 # Back side in duplex mode 216 else : 217 # we just ignore the block. 218 if tag == "&n" : 219 # we have to take care of the operation id byte 220 # which is before the string itself 221 size += 1 222 pos += size 223 else : 224 if starb : 225 # special handling of PCL3 in which 226 # *b introduces combined ESCape sequences 227 size = 0 228 while 1 : 229 char = minfile[pos] ; pos += 1 230 if not char.isdigit() : 231 break 232 size = (size * 10) + int(char) 233 if char in ("w", "v") : 234 ispcl3 = 1 # certainely a PCL3 document 235 pos += size - 1 236 elif char in ("y", "m") : 237 ispcl3 = 1 # certainely a PCL3 document 238 pos -= 1 # fix position : we were ahead 239 elif ampl : 240 # special handling of PCL3 in which 241 # &l introduces combined ESCape sequences 242 size = 0 243 while 1 : 244 char = minfile[pos] ; pos += 1 245 if not char.isdigit() : 246 break 247 size = (size * 10) + int(char) 248 if char in ("a", "o", "h", "m") : 249 ispcl3 = 1 # certainely a PCL3 document 250 pos -= 1 # fix position : we were ahead 251 if char == "h" : 252 self.setPageDict(pages, pagecount, "mediasource", self.mediasources.get(size, str(size))) 253 mediasourcecount += 1 254 elif char == "a" : 255 self.setPageDict(pages, pagecount, "mediasize", self.mediasizes.get(size, str(size))) 256 mediasizecount += 1 257 elif char == "o" : 258 self.setPageDict(pages, pagecount, "orientation", self.orientations.get(size, str(size))) 259 orientationcount += 1 260 elif char == "m" : 261 self.setPageDict(pages, pagecount, "mediatype", self.mediatypes.get(size, str(size))) 262 mediatypecount += 1 263 except IndexError : # EOF ? 264 minfile.close() # reached EOF 265 266 # if pagecount is still 0, we will use the number 267 # of resets instead of the number of form feed characters. 268 # but the number of resets is always at least 2 with a valid 269 # pcl file : one at the very start and one at the very end 270 # of the job's data. So we substract 2 from the number of 271 # resets. And since on our test data we needed to substract 272 # 1 more, we finally substract 3, and will test several 273 # PCL files with this. If resets < 2, then the file is 274 # probably not a valid PCL file, so we use 0 275 276 if self.debug : 277 sys.stderr.write("pagecount : %s\n" % pagecount) 278 sys.stderr.write("resets : %s\n" % resets) 279 sys.stderr.write("ejects : %s\n" % ejects) 280 sys.stderr.write("backsides : %s\n" % backsides) 281 sys.stderr.write("startgfx : %s\n" % startgfx) 282 sys.stderr.write("endgfx : %s\n" % endgfx) 283 sys.stderr.write("mediasourcecount : %s\n" % mediasourcecount) 284 sys.stderr.write("mediasizecount : %s\n" % mediasizecount) 285 sys.stderr.write("orientationcount : %s\n" % orientationcount) 286 sys.stderr.write("mediatypecount : %s\n" % mediatypecount) 287 sys.stderr.write("escstart : %s\n" % escstart) 288 289 # if not pagecount : 290 # pagecount = (pagecount or ((resets - 3) * (resets > 2))) 291 # else : 292 # # here we add counters for other ways new pages may have 293 # # been printed and ejected by the printer 294 # pagecount += ejects + backsides 295 # 296 # # now handle number of copies for each page (may differ). 297 # # in duplex mode, number of copies may be sent only once. 298 # for pnum in range(pagecount) : 299 # # if no number of copies defined, take the preceding one else the one set before any page else 1. 300 # page = pages.get(pnum, pages.get(pnum - 1, pages.get(0, { "copies" : 1 }))) 301 # pagecount += (page["copies"] - 1) 302 # 303 # # in PCL3 files, there's one Start Gfx tag per page 304 # if ispcl3 : 305 # if endgfx == int(startgfx / 2) : # special case for cdj1600 306 # pagecount = endgfx 307 # elif startgfx : 308 # pagecount = startgfx 309 # elif endgfx : 310 # pagecount = endgfx 311 312 313 if pagecount == mediasourcecount == escstart : 314 pass # should be OK. 315 elif (not startgfx) and (not endgfx) : 316 pagecount = ejects or pagecount 317 elif startgfx == endgfx : 318 pagecount = startgfx 319 elif startgfx == (endgfx - 1) : 320 pagecount = startgfx 321 else : 322 pagecount = abs(startgfx - endgfx) 323 324 if self.debug : 325 for pnum in range(pagecount) : 326 # if no number of copies defined, take the preceding one else the one set before any page else 1. 327 page = pages.get(pnum, pages.get(pnum - 1, pages.get(0, { "copies" : 1, "mediasource" : "Main", "mediasize" : "Default", "mediatype" : "Plain", "orientation" : "Portrait"}))) 328 sys.stderr.write("%s*%s*%s*%s*%s\n" % (page["copies"], page["mediatype"], page["mediasize"], page["orientation"], page["mediasource"])) 329 330 return pagecount 331 332 def test() : 333 """Test function.""" 334 raise RuntimeError, "Not implemented !" 335 336 if __name__ == "__main__" : 337 test() -
pkpgcounter/trunk/pdlanalyzer/pclxl.py
r191 r193 1 #! /usr/bin/env python 2 # -*- coding: ISO-8859-15 -*- 1 3 # 2 4 # pkpgcounter : a generic Page Description Language parser … … 19 21 # $Id$ 20 22 # 23 24 import sys 25 import mmap 26 from struct import unpack 27 28 from pdlanalyzer.pdlparser import PDLParser, PDLParserError 29 30 class PCLXLParser(PDLParser) : 31 """A parser for PCLXL (aka PCL6) documents.""" 32 mediasizes = { 33 0 : "Letter", 34 1 : "Legal", 35 2 : "A4", 36 3 : "Executive", 37 4 : "Ledger", 38 5 : "A3", 39 6 : "COM10Envelope", 40 7 : "MonarchEnvelope", 41 8 : "C5Envelope", 42 9 : "DLEnvelope", 43 10 : "JB4", 44 11 : "JB5", 45 12 : "B5Envelope", 46 14 : "JPostcard", 47 15 : "JDoublePostcard", 48 16 : "A5", 49 17 : "A6", 50 18 : "JB6", 51 } 52 53 mediasources = { 54 0 : "Default", 55 1 : "Auto", 56 2 : "Manual", 57 3 : "MultiPurpose", 58 4 : "UpperCassette", 59 5 : "LowerCassette", 60 6 : "EnvelopeTray", 61 7 : "ThirdCassette", 62 } 63 64 orientations = { 65 0 : "Portrait", 66 1 : "Landscape", 67 2 : "ReversePortrait", 68 3 : "ReverseLandscape", 69 } 70 71 def beginPage(self) : 72 """Indicates the beginning of a new page, and extracts media information.""" 73 self.pagecount += 1 74 75 # Default values 76 mediatypelabel = "Plain" 77 mediasourcelabel = "Main" 78 mediasizelabel = "Default" 79 orientationlabel = "Portrait" 80 81 # Now go upstream to decode media type, size, source, and orientation 82 # this saves time because we don't need a complete parser ! 83 minfile = self.minfile 84 pos = self.pos - 2 85 while pos > 0 : # safety check : don't go back to far ! 86 val = ord(minfile[pos]) 87 if val in (0x44, 0x48, 0x41) : # if previous endPage or openDataSource or beginSession (first page) 88 break 89 if val == 0x26 : 90 mediasource = ord(minfile[pos - 2]) 91 mediasourcelabel = self.mediasources.get(mediasource, str(mediasource)) 92 pos = pos - 4 93 elif val == 0x25 : 94 mediasize = ord(minfile[pos - 2]) 95 mediasizelabel = self.mediasizes.get(mediasize, str(mediasize)) 96 pos = pos - 4 97 elif val == 0x28 : 98 orientation = ord(minfile[pos - 2]) 99 orienationlabel = self.orientations.get(orientation, str(orientation)) 100 pos = pos - 4 101 elif val == 0x27 : 102 savepos = pos 103 pos = pos - 1 104 while pos > 0 : # safety check : don't go back to far ! 105 val = ord(minfile[pos]) 106 pos -= 1 107 if val == 0xc8 : 108 break 109 mediatypelabel = minfile[pos:savepos] # TODO : INCORRECT, WE HAVE TO STRIP OUT THE UBYTE ARRAY'S LENGTH !!! 110 # else : TODO : CUSTOM MEDIA SIZE AND UNIT ! 111 else : 112 pos = pos - 2 # ignored 113 self.pages[self.pagecount] = { "copies" : 1, 114 "orientation" : orientationlabel, 115 "mediatype" : mediatypelabel, 116 "mediasize" : mediasizelabel, 117 "mediasource" : mediasourcelabel, 118 } 119 return 0 120 121 def endPage(self) : 122 """Indicates the end of a page.""" 123 pos = self.pos 124 minfile = self.minfile 125 if (ord(minfile[pos-3]) == 0xf8) and (ord(minfile[pos-2]) == 0x31) : 126 # The EndPage operator may be preceded by a PageCopies attribute 127 # So set number of copies for current page. 128 # From what I read in PCLXL documentation, the number 129 # of copies is an unsigned 16 bits integer 130 self.pages[self.pagecount]["copies"] = unpack(self.endianness + "H", minfile[pos-5:pos-3])[0] 131 return 0 132 133 def array_8(self) : 134 """Handles byte arrays.""" 135 pos = self.pos 136 datatype = self.minfile[pos] 137 pos += 1 138 length = self.tags[ord(datatype)] 139 if callable(length) : 140 self.pos = pos 141 length = length() 142 pos = self.pos 143 posl = pos + length 144 self.pos = posl 145 if length == 1 : 146 return unpack("B", self.minfile[pos:posl])[0] 147 elif length == 2 : 148 return unpack(self.endianness + "H", self.minfile[pos:posl])[0] 149 elif length == 4 : 150 return unpack(self.endianness + "I", self.minfile[pos:posl])[0] 151 else : 152 raise PDLParserError, "Error on array size at %s" % self.pos 153 154 def array_16(self) : 155 """Handles byte arrays.""" 156 pos = self.pos 157 datatype = self.minfile[pos] 158 pos += 1 159 length = self.tags[ord(datatype)] 160 if callable(length) : 161 self.pos = pos 162 length = length() 163 pos = self.pos 164 posl = pos + length 165 self.pos = posl 166 if length == 1 : 167 return 2 * unpack("B", self.minfile[pos:posl])[0] 168 elif length == 2 : 169 return 2 * unpack(self.endianness + "H", self.minfile[pos:posl])[0] 170 elif length == 4 : 171 return 2 * unpack(self.endianness + "I", self.minfile[pos:posl])[0] 172 else : 173 raise PDLParserError, "Error on array size at %s" % self.pos 174 175 def array_32(self) : 176 """Handles byte arrays.""" 177 pos = self.pos 178 datatype = self.minfile[pos] 179 pos += 1 180 length = self.tags[ord(datatype)] 181 if callable(length) : 182 self.pos = pos 183 length = length() 184 pos = self.pos 185 posl = pos + length 186 self.pos = posl 187 if length == 1 : 188 return 4 * unpack("B", self.minfile[pos:posl])[0] 189 elif length == 2 : 190 return 4 * unpack(self.endianness + "H", self.minfile[pos:posl])[0] 191 elif length == 4 : 192 return 4 * unpack(self.endianness + "I", self.minfile[pos:posl])[0] 193 else : 194 raise PDLParserError, "Error on array size at %s" % self.pos 195 196 def embeddedDataSmall(self) : 197 """Handle small amounts of data.""" 198 pos = self.pos 199 length = ord(self.minfile[pos]) 200 self.pos = pos + 1 201 return length 202 203 def embeddedData(self) : 204 """Handle normal amounts of data.""" 205 pos = self.pos 206 pos4 = pos + 4 207 self.pos = pos4 208 return unpack(self.endianness + "I", self.minfile[pos:pos4])[0] 209 210 def littleEndian(self) : 211 """Toggles to little endianness.""" 212 self.endianness = "<" # little endian 213 return 0 214 215 def bigEndian(self) : 216 """Toggles to big endianness.""" 217 self.endianness = ">" # big endian 218 return 0 219 220 def getJobSize(self) : 221 """Counts pages in a PCLXL (PCL6) document. 222 223 Algorithm by Jerome Alet. 224 225 The documentation used for this was : 226 227 HP PCL XL Feature Reference 228 Protocol Class 2.0 229 http://www.hpdevelopersolutions.com/downloads/64/358/xl_ref20r22.pdf 230 """ 231 self.endianness = None 232 found = 0 233 while not found : 234 line = self.infile.readline() 235 if not line : 236 break 237 if line[1:12] == " HP-PCL XL;" : 238 found = 1 239 endian = ord(line[0]) 240 if endian == 0x29 : 241 self.littleEndian() 242 elif endian == 0x28 : 243 self.bigEndian() 244 # elif endian == 0x27 : # TODO : This is the ESC code : parse it for PJL statements ! 245 # 246 else : 247 raise PDLParserError, "Unknown endianness marker 0x%02x at start !" % endian 248 if not found : 249 raise PDLParserError, "This file doesn't seem to be PCLXL (aka PCL6)" 250 251 # Initialize table of tags 252 self.tags = [ 0 ] * 256 253 254 # GhostScript's sources tell us that HP printers 255 # only accept little endianness, but we can handle both. 256 self.tags[0x28] = self.bigEndian # BigEndian 257 self.tags[0x29] = self.littleEndian # LittleEndian 258 259 self.tags[0x43] = self.beginPage # BeginPage 260 self.tags[0x44] = self.endPage # EndPage 261 262 self.tags[0xc0] = 1 # ubyte 263 self.tags[0xc1] = 2 # uint16 264 self.tags[0xc2] = 4 # uint32 265 self.tags[0xc3] = 2 # sint16 266 self.tags[0xc4] = 4 # sint32 267 self.tags[0xc5] = 4 # real32 268 269 self.tags[0xc8] = self.array_8 # ubyte_array 270 self.tags[0xc9] = self.array_16 # uint16_array 271 self.tags[0xca] = self.array_32 # uint32_array 272 self.tags[0xcb] = self.array_16 # sint16_array 273 self.tags[0xcc] = self.array_32 # sint32_array 274 self.tags[0xcd] = self.array_32 # real32_array 275 276 self.tags[0xd0] = 2 # ubyte_xy 277 self.tags[0xd1] = 4 # uint16_xy 278 self.tags[0xd2] = 8 # uint32_xy 279 self.tags[0xd3] = 4 # sint16_xy 280 self.tags[0xd4] = 8 # sint32_xy 281 self.tags[0xd5] = 8 # real32_xy 282 283 self.tags[0xe0] = 4 # ubyte_box 284 self.tags[0xe1] = 8 # uint16_box 285 self.tags[0xe2] = 16 # uint32_box 286 self.tags[0xe3] = 8 # sint16_box 287 self.tags[0xe4] = 16 # sint32_box 288 self.tags[0xe5] = 16 # real32_box 289 290 self.tags[0xf8] = 1 # attr_ubyte 291 self.tags[0xf9] = 2 # attr_uint16 292 293 self.tags[0xfa] = self.embeddedData # dataLength 294 self.tags[0xfb] = self.embeddedDataSmall # dataLengthByte 295 296 infileno = self.infile.fileno() 297 self.pages = {} 298 self.minfile = minfile = mmap.mmap(infileno, os.fstat(infileno)[6], prot=mmap.PROT_READ, flags=mmap.MAP_SHARED) 299 tags = self.tags 300 self.pagecount = 0 301 self.pos = pos = self.infile.tell() 302 try : 303 while 1 : 304 char = minfile[pos] 305 pos += 1 306 length = tags[ord(char)] 307 if not length : 308 continue 309 if callable(length) : 310 self.pos = pos 311 length = length() 312 pos = self.pos 313 pos += length 314 except IndexError : # EOF ? 315 self.minfile.close() # reached EOF 316 317 # now handle number of copies for each page (may differ). 318 for pnum in range(1, self.pagecount + 1) : 319 # if no number of copies defined, take 1, as explained 320 # in PCLXL documentation. 321 # NB : is number of copies is 0, the page won't be output 322 # but the formula below is still correct : we want 323 # to decrease the total number of pages in this case. 324 page = self.pages.get(pnum, 1) 325 copies = page["copies"] 326 self.pagecount += (copies - 1) 327 if self.debug : 328 sys.stderr.write("%s*%s*%s*%s*%s\n" % (copies, page["mediatype"], page["mediasize"], page["orientation"], page["mediasource"])) 329 330 return self.pagecount 331 332 def test() : 333 """Test function.""" 334 raise RuntimeError, "Not implemented !" 335 336 if __name__ == "__main__" : 337 test() -
pkpgcounter/trunk/pdlanalyzer/pdf.py
r191 r193 1 #! /usr/bin/env python 2 # -*- coding: ISO-8859-15 -*- 1 3 # 2 4 # pkpgcounter : a generic Page Description Language parser … … 19 21 # $Id$ 20 22 # 23 24 import sys 25 import re 26 27 from pdlanalyzer.pdlparser import PDLParser 28 29 class PDFParser(PDLParser) : 30 """A parser for PDF documents.""" 31 def getJobSize(self) : 32 """Counts pages in a PDF document.""" 33 regexp = re.compile(r"(/Type) ?(/Page)[/ \t\r\n]") 34 pagecount = 0 35 for line in self.infile.xreadlines() : 36 pagecount += len(regexp.findall(line)) 37 return pagecount 38 39 def test() : 40 """Test function.""" 41 raise RuntimeError, "Not implemented !" 42 43 if __name__ == "__main__" : 44 test() -
pkpgcounter/trunk/pdlanalyzer/pdlparser.py
r192 r193 20 20 # 21 21 22 class PDLParserError(Exception): 23 """An exception for PDLParser related stuff.""" 24 def __init__(self, message = ""): 25 self.message = message 26 Exception.__init__(self, message) 27 def __repr__(self): 28 return self.message 29 __str__ = __repr__ 30 22 31 class PDLParser : 23 32 """Generic PDL parser.""" -
pkpgcounter/trunk/pdlanalyzer/postscript.py
r191 r193 1 #! /usr/bin/env python 2 # -*- coding: ISO-8859-15 -*- 1 3 # 2 4 # pkpgcounter : a generic Page Description Language parser … … 19 21 # $Id$ 20 22 # 23 24 import sys 25 import popen2 26 27 from pdlanalyzer.pdlparser import PDLParser, PDLParserError 28 29 class PostScriptParser(PDLParser) : 30 """A parser for PostScript documents.""" 31 def throughGhostScript(self) : 32 """Get the count through GhostScript, useful for non-DSC compliant PS files.""" 33 if self.debug : 34 sys.stderr.write("Internal parser sucks, using GhostScript instead...\n") 35 self.infile.seek(0) 36 command = 'gs -sDEVICE=bbox -dNOPAUSE -dBATCH -dQUIET - 2>&1 | grep -c "%%HiResBoundingBox:" 2>/dev/null' 37 child = popen2.Popen4(command) 38 try : 39 data = self.infile.read(MEGABYTE) 40 while data : 41 child.tochild.write(data) 42 data = self.infile.read(MEGABYTE) 43 child.tochild.flush() 44 child.tochild.close() 45 except (IOError, OSError), msg : 46 raise PDLParserError, "Problem during analysis of Binary PostScript document : %s" % msg 47 48 pagecount = 0 49 try : 50 pagecount = int(child.fromchild.readline().strip()) 51 except (IOError, OSError, AttributeError, ValueError), msg : 52 raise PDLParserError, "Problem during analysis of Binary PostScript document : %s" % msg 53 child.fromchild.close() 54 55 try : 56 child.wait() 57 except OSError, msg : 58 raise PDLParserError, "Problem during analysis of Binary PostScript document : %s" % msg 59 return pagecount * self.copies 60 61 def natively(self) : 62 """Count pages in a DSC compliant PostScript document.""" 63 self.infile.seek(0) 64 pagecount = 0 65 for line in self.infile.xreadlines() : 66 if line.startswith("%%Page: ") : 67 pagecount += 1 68 elif line.startswith("%%Requirements: numcopies(") : 69 try : 70 number = int(line.strip().split('(')[1].split(')')[0]) 71 except : 72 pass 73 else : 74 if number > self.copies : 75 self.copies = number 76 elif line.startswith("%%BeginNonPPDFeature: NumCopies ") : 77 # handle # of copies set by some Windows printer driver 78 try : 79 number = int(line.strip().split()[2]) 80 except : 81 pass 82 else : 83 if number > self.copies : 84 self.copies = number 85 elif line.startswith("1 dict dup /NumCopies ") : 86 # handle # of copies set by mozilla/kprinter 87 try : 88 number = int(line.strip().split()[4]) 89 except : 90 pass 91 else : 92 if number > self.copies : 93 self.copies = number 94 return pagecount * self.copies 95 96 def getJobSize(self) : 97 """Count pages in PostScript document.""" 98 return self.natively() or self.throughGhostScript() 99 100 def test() : 101 """Test function.""" 102 raise RuntimeError, "Not implemented !" 103 104 if __name__ == "__main__" : 105 test()