Changeset 1550 for pykota/trunk
- Timestamp:
- 06/18/04 19:48:04 (20 years ago)
- Location:
- pykota/trunk
- Files:
-
- 2 modified
Legend:
- Unmodified
- Added
- Removed
-
pykota/trunk/NEWS
r1549 r1550 27 27 printers. Thanks to Ryan Suarez. 28 28 29 - Smart PDL analyzer now recognizes PDF too. 29 - Smart PDL analyzer now recognizes PDF too : two 30 PDF parsing methods are used : a native one which 31 is very fast but requires Python 2.3 or above, and 32 a slower one which uses Ghostscript. PyKota detects 33 automatically which method to use depending on your 34 version of Python. 30 35 31 36 - All tracebacks now include PyKota's version number. -
pykota/trunk/pykota/pdlanalyzer.py
r1547 r1550 22 22 # 23 23 # $Log$ 24 # Revision 1.7 2004/06/18 17:48:04 jalet 25 # Added native fast PDF parsing method 26 # 24 27 # Revision 1.6 2004/06/18 14:00:16 jalet 25 28 # Added PDF support in smart PDL analyzer (through GhostScript for now) … … 81 84 """Initialize PDF Analyzer.""" 82 85 self.infile = infile 83 84 def getJobSize(self) : 85 """Counts pages in a PDF document. TODO : don't use GhostScript in the future.""" 86 try : 87 if float(sys.version[:3]) >= 2.3 : 88 self.getJobSize = self.native_getJobSize 89 else : 90 self.getJobSize = self.gs_getJobSize 91 except : 92 self.getJobSize = self.gs_getJobSize 93 94 def native_getJobSize(self) : 95 """Counts pages in a PDF document natively.""" 96 pagecount = 0 97 content = [] 98 while 1 : 99 line = self.infile.readline() 100 if not line : 101 break 102 line = line.strip() 103 content.append(line) 104 if line.endswith("endobj") : 105 pagecount += " /".join([x.strip() for x in " ".join(content).split("/")]).count(" /Type /Page ") 106 content = [] 107 return pagecount 108 109 def gs_getJobSize(self) : 110 """Counts pages in a PDF document using GhostScript to convert PDF to PS.""" 86 111 MEGABYTE = 1024*1024 87 112 child = popen2.Popen4("gs -q -dNOPAUSE -dBATCH -dSAFER -sDEVICE=pswrite -sOutputFile=- -c save pop -f - 2>/dev/null") … … 421 446 def openFile(self) : 422 447 """Opens the job's data stream for reading.""" 423 self.mustclose = 1448 self.mustclose = 0 # by default we don't want to close the file when finished 424 449 if hasattr(self.filename, "read") and hasattr(self.filename, "seek") : 425 450 # filename is in fact a file-like object 426 self.infile = self.filename 427 self.mustclose = 0 # we don't want to close this file when finished 451 infile = self.filename 428 452 elif self.filename == "-" : 429 453 # we must read from stdin 430 # but since stdin is not seekable, we have to use a temporary 431 # file instead. 432 self.infile = tempfile.TemporaryFile() 433 while 1 : 434 data = sys.stdin.read(MEGABYTE) 435 if not data : 436 break 437 self.infile.write(data) 438 self.infile.flush() 439 self.infile.seek(0) 454 infile = sys.stdin 440 455 else : 441 456 # normal file 442 self.infile = open(self.filename, "rb") 457 self.infile = open(self.filename, "rbU") # TODO : "U" mode only works in 2.3, is ignored in 2.1 and 2.2 458 self.mustclose = 1 459 return 460 461 # Use a temporary file, always seekable contrary to standard input. 462 # This also has the benefit to let us use the "U" mode (new in Python 2.3) 463 self.infile = tempfile.TemporaryFile(mode="w+bU") # TODO : "U" mode only works in 2.3, is ignored in 2.1 and 2.2 464 while 1 : 465 data = infile.read(MEGABYTE) 466 if not data : 467 break 468 self.infile.write(data) 469 self.infile.flush() 470 self.infile.seek(0) 443 471 444 472 def closeFile(self) :