Changeset 1487
- Timestamp:
- 05/21/04 22:40:08 (21 years ago)
- Location:
- pykota/trunk
- Files:
-
- 3 modified
Legend:
- Unmodified
- Added
- Removed
-
pykota/trunk/bin/pkpgcounter
r1482 r1487 24 24 # 25 25 # $Log$ 26 # Revision 1.11 2004/05/21 20:40:07 jalet 27 # All the code for pkpgcounter is now in pdlanalyzer.py 28 # 26 29 # Revision 1.10 2004/05/18 09:59:54 jalet 27 30 # pkpgcounter is now just a wrapper around the PDLAnalyzer class … … 58 61 # 59 62 60 import sys61 63 from pykota import pdlanalyzer 62 64 63 65 if __name__ == "__main__" : 64 if (len(sys.argv) < 2) or ((not sys.stdin.isatty()) and ("-" not in sys.argv[1:])) : 65 sys.argv.append("-") 66 67 totalsize = 0 68 for arg in sys.argv[1:] : 69 try : 70 parser = pdlanalyzer.PDLAnalyzer(arg) 71 totalsize += parser.getJobSize() 72 except TypeError, msg : 73 sys.stderr.write("%s\n" % msg) 74 sys.stderr.flush() 75 print "%s" % totalsize 66 pdlanalyzer.main() -
pykota/trunk/NEWS
r1485 r1487 24 24 - 1.19alpha10 : 25 25 26 - Improvements to generic PDL Analyzer. 27 26 28 - 25% Speedup in PCL parser. Getting more of the Python version 27 29 will probably be difficult, since the completely table driven -
pykota/trunk/pykota/pdlanalyzer.py
r1485 r1487 22 22 # 23 23 # $Log$ 24 # Revision 1.3 2004/05/21 20:40:08 jalet 25 # All the code for pkpgcounter is now in pdlanalyzer.py 26 # 24 27 # Revision 1.2 2004/05/19 19:09:36 jalet 25 28 # Speed improvement … … 32 35 33 36 import sys 34 import os35 37 import struct 36 38 import tempfile 39 40 KILOBYTE = 1024 41 MEGABYTE = 1024 * KILOBYTE 42 43 class PDLAnalyzerError(Exception): 44 """An exception for PDL Analyzer related stuff.""" 45 def __init__(self, message = ""): 46 self.message = message 47 Exception.__init__(self, message) 48 def __repr__(self): 49 return self.message 50 __str__ = __repr__ 37 51 38 52 class PostScriptAnalyzer : … … 44 58 """Count pages in a DSC compliant PostScript document.""" 45 59 pagecount = 0 46 pagenum = None47 60 while 1 : 48 61 line = self.infile.readline() … … 63 76 if newpos >= self.len : 64 77 oldlen = self.len 65 self.data = self.infile.read( 1024*1024)78 self.data = self.infile.read(MEGABYTE) 66 79 self.len = len(self.data) 67 80 if not self.len : … … 76 89 char = self.data[self.pos] 77 90 else : 78 self.data = self.infile.read( 1024*1024)91 self.data = self.infile.read(MEGABYTE) 79 92 self.len = len(self.data) 80 93 self.pos = 0 … … 182 195 # PCL files with this. If resets < 2, then the file is 183 196 # probably not a valid PCL file, so we return 0 184 if not pagecount : 185 return copies * (resets - 3) * (resets > 2) 186 else : 187 return copies * pagecount 197 return copies * (pagecount or ((resets - 3) * (resets > 2))) 188 198 189 199 class PCLXLAnalyzer : 190 200 def __init__(self, infile) : 191 201 """Initialize PCLXL Analyzer.""" 192 raise TypeError, "PCLXL (aka PCL6) is not supported yet."202 raise PDLAnalyzerError, "PCLXL (aka PCL6) is not supported yet." 193 203 self.infile = infile 194 204 self.islittleendian = None … … 205 215 self.bigendian() 206 216 if not found : 207 raise TypeError, "This file doesn't seem to be PCLXL (aka PCL6)"208 else : 209 self.tags = [ None] * 256217 raise PDLAnalyzerError, "This file doesn't seem to be PCLXL (aka PCL6)" 218 else : 219 self.tags = [lambda: None] * 256 210 220 self.tags[0x28] = self.bigendian # big endian 211 221 self.tags[0x29] = self.littleendian # big endian … … 213 223 self.tags[0x44] = self.endPage # EndPage 214 224 215 self.tags[0xc0] = 1 # ubyte216 self.tags[0xc1] = 2 # uint16217 self.tags[0xc2] = 4 # uint32218 self.tags[0xc3] = 2 # sint16219 self.tags[0xc4] = 4 # sint32220 self.tags[0xc5] = 4 # real32225 self.tags[0xc0] = lambda: 1 # ubyte 226 self.tags[0xc1] = lambda: 2 # uint16 227 self.tags[0xc2] = lambda: 4 # uint32 228 self.tags[0xc3] = lambda: 2 # sint16 229 self.tags[0xc4] = lambda: 4 # sint32 230 self.tags[0xc5] = lambda: 4 # real32 221 231 222 232 self.tags[0xc8] = self.array_8 # ubyte_array … … 227 237 self.tags[0xcd] = self.array_32 # real32_array 228 238 229 self.tags[0xd0] = 2 # ubyte_xy230 self.tags[0xd1] = 4 # uint16_xy231 self.tags[0xd2] = 8 # uint32_xy232 self.tags[0xd3] = 4 # sint16_xy233 self.tags[0xd4] = 8 # sint32_xy234 self.tags[0xd5] = 8 # real32_xy235 236 self.tags[0xd0] = 4 # ubyte_box237 self.tags[0xd1] = 8 # uint16_box238 self.tags[0xd2] = 16 # uint32_box239 self.tags[0xd3] = 8 # sint16_box240 self.tags[0xd4] = 16 # sint32_box241 self.tags[0xd5] = 16 # real32_box242 243 self.tags[0xf8] = 1 # attr_ubyte244 self.tags[0xf9] = 2 # attr_uint16239 self.tags[0xd0] = lambda: 2 # ubyte_xy 240 self.tags[0xd1] = lambda: 4 # uint16_xy 241 self.tags[0xd2] = lambda: 8 # uint32_xy 242 self.tags[0xd3] = lambda: 4 # sint16_xy 243 self.tags[0xd4] = lambda: 8 # sint32_xy 244 self.tags[0xd5] = lambda: 8 # real32_xy 245 246 self.tags[0xd0] = lambda: 4 # ubyte_box 247 self.tags[0xd1] = lambda: 8 # uint16_box 248 self.tags[0xd2] = lambda: 16 # uint32_box 249 self.tags[0xd3] = lambda: 8 # sint16_box 250 self.tags[0xd4] = lambda: 16 # sint32_box 251 self.tags[0xd5] = lambda: 16 # real32_box 252 253 self.tags[0xf8] = lambda: 1 # attr_ubyte 254 self.tags[0xf9] = lambda: 2 # attr_uint16 245 255 246 256 self.tags[0xfa] = self.embeddedData # dataLength … … 263 273 def handleArray(self, itemsize) : 264 274 """Handles arrays.""" 275 pos = self.infile.tell() 265 276 datatype = self.infile.read(1) 266 length = self.tags[ord(datatype)] 267 sarraysize = self.infile.read(length) 268 if self.islittleendian : 269 fmt = "<" 270 else : 271 fmt = ">" 272 if length == 1 : 273 fmt += "B" 274 elif length == 2 : 275 fmt += "H" 276 elif length == 4 : 277 fmt += "I" 278 else : 279 raise TypeError, "Error on array size at %s" % self.infile.tell() 280 arraysize = struct.unpack(fmt, sarraysize)[0] 281 return arraysize * itemsize 277 length = self.tags[ord(datatype)]() 278 if length is None : 279 self.debug("Bogus array length at %s" % pos) 280 else : 281 sarraysize = self.infile.read(length) 282 if self.islittleendian : 283 fmt = "<" 284 else : 285 fmt = ">" 286 if length == 1 : 287 fmt += "B" 288 elif length == 2 : 289 fmt += "H" 290 elif length == 4 : 291 fmt += "I" 292 else : 293 raise PDLAnalyzerError, "Error on array size at %s" % self.infile.tell() 294 arraysize = struct.unpack(fmt, sarraysize)[0] 295 self.debug("Array at %s, itemsize %s, datatype 0x%02x, size %s" % (pos, itemsize, ord(datatype), arraysize)) 296 return arraysize * itemsize 282 297 283 298 def array_8(self) : … … 295 310 def embeddedDataSmall(self) : 296 311 """Handle small amounts of data.""" 297 return ord(self.infile.read(1)) 312 pos = self.infile.tell() 313 val = ord(self.infile.read(1)) 314 self.debug("smalldatablock at %s (0x%02x)" % (pos, val)) 315 return val 298 316 299 317 def embeddedData(self) : … … 303 321 else : 304 322 fmt = ">I" 305 return struct.unpack(fmt, self.infile.read(4))[0] 323 pos = self.infile.tell() 324 val = struct.unpack(fmt, self.infile.read(4))[0] 325 self.debug("datablock at %s (0x%08x)" % (pos, val)) 326 return val 306 327 307 328 def littleendian(self) : … … 317 338 self.pagecount = 0 318 339 while 1 : 319 pos = self.infile.tell()320 340 char = self.infile.read(1) 321 341 if not char : 322 342 break 323 343 index = ord(char) 324 length = self.tags[index] 325 if length is not None : 326 if not length : 327 self.debug("Unrecognized tag 0x%02x at %s\n" % (index, self.infile.tell())) 328 elif callable(length) : 329 length = length() 330 if length : 331 self.infile.read(length) 344 length = self.tags[index]() 345 if length : 346 self.infile.read(length) 332 347 return self.pagecount 333 348 334 349 class PDLAnalyzer : 335 350 """Generic PDL Analyzer class.""" 336 351 def __init__(self, filename) : 337 """Initializes the PDL analyzer.""" 352 """Initializes the PDL analyzer. 353 354 filename is the name of the file or '-' for stdin. 355 filename can also be a file-like object which 356 supports read() and seek(). 357 """ 338 358 self.filename = filename 339 359 … … 341 361 """Returns the job's size.""" 342 362 self.openFile() 343 pdlhandler = self.detectPDLHandler() 344 if pdlhandler is not None : 363 try : 364 pdlhandler = self.detectPDLHandler() 365 except PDLAnalyzerError, msg : 366 self.closeFile() 367 raise PDLAnalyzerError, "ERROR : Unknown file format for %s (%s)" % (self.filename, msg) 368 else : 345 369 try : 346 370 size = pdlhandler(self.infile).getJobSize() … … 348 372 self.closeFile() 349 373 return size 350 else :351 self.closeFile()352 raise TypeError, "ERROR : Unknown file format for %s" % self.filename353 374 354 375 def openFile(self) : 355 376 """Opens the job's data stream for reading.""" 356 if self.filename == "-" : 377 self.mustclose = 1 378 if hasattr(self.filename, "read") and hasattr(self.filename, "seek") : 379 # filename is in fact a file-like object 380 self.infile = self.filename 381 self.mustclose = 0 # we don't want to close this file when finished 382 elif self.filename == "-" : 357 383 # we must read from stdin 358 384 # but since stdin is not seekable, we have to use a temporary … … 360 386 self.infile = tempfile.TemporaryFile() 361 387 while 1 : 362 data = sys.stdin.read( 256 * 1024)388 data = sys.stdin.read(MEGABYTE) 363 389 if not data : 364 390 break … … 371 397 372 398 def closeFile(self) : 373 """Closes the job's data stream.""" 374 self.infile.close() 399 """Closes the job's data stream if we can close it.""" 400 if self.mustclose : 401 self.infile.close() 375 402 376 403 def isPostScript(self, data) : … … 415 442 # Try to detect file type by reading first block of datas 416 443 self.infile.seek(0) 417 firstblock = self.infile.read( 1024)444 firstblock = self.infile.read(KILOBYTE) 418 445 self.infile.seek(0) 419 446 if self.isPostScript(firstblock) : … … 423 450 elif self.isPCL(firstblock) : 424 451 return PCLAnalyzer 452 else : 453 raise PDLAnalyzerError, "Analysis of first data block failed." 454 455 def main() : 456 """Entry point for PDL Analyzer.""" 457 if (len(sys.argv) < 2) or ((not sys.stdin.isatty()) and ("-" not in sys.argv[1:])) : 458 sys.argv.append("-") 459 460 totalsize = 0 461 for arg in sys.argv[1:] : 462 try : 463 parser = PDLAnalyzer(arg) 464 totalsize += parser.getJobSize() 465 except PDLAnalyzerError, msg : 466 sys.stderr.write("%s\n" % msg) 467 sys.stderr.flush() 468 print "%s" % totalsize 469 470 if __name__ == "__main__" : 471 main()