Changeset 1552

Show
Ignore:
Timestamp:
06/19/04 00:21:27 (20 years ago)
Author:
jalet
Message:

Native PDF parser greatly improved.
GhostScript? based PDF parser completely removed because native code
is now portable across Python versions.

Location:
pykota/trunk
Files:
3 modified

Legend:

Unmodified
Added
Removed
  • pykota/trunk/NEWS

    r1550 r1552  
    2222PyKota NEWS : 
    2323 
     24    - 1.19alpha24 : 
     25     
     26        - Native PDF parsing code now works perfectly with 
     27          Python 2.1 and above, and is way faster than before. 
     28          GhostScript is not used anymore to convert PDF to PS, 
     29          because PDF is parsed directly. 
     30           
    2431    - 1.19alpha23 : 
    2532     
  • pykota/trunk/pykota/pdlanalyzer.py

    r1551 r1552  
    2222# 
    2323# $Log$ 
     24# Revision 1.9  2004/06/18 22:21:27  jalet 
     25# Native PDF parser greatly improved. 
     26# GhostScript based PDF parser completely removed because native code 
     27# is now portable across Python versions. 
     28# 
    2429# Revision 1.8  2004/06/18 20:49:46  jalet 
    2530# "ERROR:" prefix added 
     
    5156import sys 
    5257import os 
     58import re 
    5359import struct 
    5460import tempfile 
     
    7581        """Count pages in a DSC compliant PostScript document.""" 
    7682        pagecount = 0 
    77         while 1 : 
    78             line = self.infile.readline() 
    79             if not line : 
    80                 break 
     83        for line in self.infile.xreadlines() :  
    8184            if line.startswith("%%Page: ") : 
    8285                pagecount += 1 
     
    8790        """Initialize PDF Analyzer.""" 
    8891        self.infile = infile 
    89         try : 
    90             if float(sys.version[:3]) >= 2.3 : 
    91                 self.getJobSize = self.native_getJobSize 
    92             else :     
    93                 self.getJobSize = self.gs_getJobSize 
    94         except : 
    95             self.getJobSize = self.gs_getJobSize 
    9692                 
    97     def native_getJobSize(self) :     
    98         """Counts pages in a PDF document natively.""" 
     93    def getJobSize(self) :     
     94        """Counts pages in a PDF document.""" 
     95        regexp = re.compile(r"(/Type) ?(/Page)[/ \r\n]") 
    9996        pagecount = 0 
    100         content = [] 
    101         while 1 :      
    102             line = self.infile.readline() 
    103             if not line : 
    104                 break 
    105             line = line.strip() 
    106             content.append(line) 
    107             if line.endswith("endobj") : 
    108                 pagecount += " /".join([x.strip() for x in " ".join(content).split("/")]).count(" /Type /Page ") 
    109                 content = [] 
    110         return pagecount     
    111          
    112     def gs_getJobSize(self) :     
    113         """Counts pages in a PDF document using GhostScript to convert PDF to PS.""" 
    114         MEGABYTE = 1024*1024 
    115         child = popen2.Popen4("gs -q -dNOPAUSE -dBATCH -dSAFER -sDEVICE=pswrite -sOutputFile=- -c save pop -f - 2>/dev/null") 
    116         try : 
    117             data = self.infile.read(MEGABYTE)     
    118             while data : 
    119                 child.tochild.write(data) 
    120                 data = self.infile.read(MEGABYTE) 
    121             child.tochild.flush() 
    122             child.tochild.close()     
    123         except (IOError, OSError), msg :     
    124             raise PDLAnalyzerError, "Unable to convert PDF input to PS with GhostScript : %s" % msg 
    125          
    126         psanalyzer = PostScriptAnalyzer(child.fromchild) 
    127         pagecount = psanalyzer.getJobSize() 
    128         child.fromchild.close() 
    129         try : 
    130             retcode = child.wait() 
    131         except OSError, msg :     
    132             self.filter.logger.log_message(_("Problem while waiting for PDF to PS converter (GhostScript pid %s) to exit : %s") % (child.pid, msg)) 
    133         else :     
    134             if os.WIFEXITED(retcode) : 
    135                 status = os.WEXITSTATUS(retcode) 
    136             else :     
    137                 status = retcode 
    138             if status :     
    139                 raise PDLAnalyzerError, "PDF to PS converter (GhostScript pid %s) exit code is %s" % (child.pid, repr(status)) 
     97        for line in self.infile.xreadlines() :  
     98            pagecount += len(regexp.findall(line)) 
    14099        return pagecount     
    141100         
     
    458417        else :     
    459418            # normal file 
    460             self.infile = open(self.filename, "rbU") # TODO : "U" mode only works in 2.3, is ignored in 2.1 and 2.2 
     419            self.infile = open(self.filename, "rb") # TODO : "U" mode only works in 2.3, is ignored in 2.1 and 2.2 
    461420            self.mustclose = 1 
    462421            return 
     
    464423        # Use a temporary file, always seekable contrary to standard input. 
    465424        # This also has the benefit to let us use the "U" mode (new in Python 2.3) 
    466         self.infile = tempfile.TemporaryFile(mode="w+bU")   # TODO : "U" mode only works in 2.3, is ignored in 2.1 and 2.2 
     425        self.infile = tempfile.TemporaryFile(mode="w+b")   # TODO : "U" mode only works in 2.3, is ignored in 2.1 and 2.2 
    467426        while 1 : 
    468427            data = infile.read(MEGABYTE)  
  • pykota/trunk/pykota/version.py

    r1545 r1552  
    2222# 
    2323 
    24 __version__ = "1.19alpha23_unofficial" 
     24__version__ = "1.19alpha24_unofficial" 
    2525 
    2626__doc__ = """PyKota : a complete Printing Quota Solution for CUPS and LPRng."""