Changeset 493 for pkpgcounter/trunk

Show
Ignore:
Timestamp:
11/21/07 16:41:06 (17 years ago)
Author:
jerome
Message:

Re-optimize disk access by not reopening and re-reading first and last block
more than once.

Location:
pkpgcounter/trunk/pkpgpdls
Files:
4 modified

Legend:

Unmodified
Added
Removed
  • pkpgcounter/trunk/pkpgpdls/analyzer.py

    r492 r493  
    136136        if not os.stat(self.filename).st_size : 
    137137            raise pdlparser.PDLParserError, "input file %s is empty !" % str(self.filename) 
     138         
     139        # Now read first and last block of the input file 
     140        # to be able to detect the real file format and the parser to use. 
     141        firstblock = self.workfile.read(pdlparser.FIRSTBLOCKSIZE) 
     142        try : 
     143            self.workfile.seek(-pdlparser.LASTBLOCKSIZE, 2) 
     144            lastblock = self.workfile.read(pdlparser.LASTBLOCKSIZE) 
     145        except IOError :     
     146            lastblock = "" 
     147             
    138148        # IMPORTANT : the order is important below. FIXME. 
    139149        for module in (postscript, \ 
     
    153163                       plain) :     # IMPORTANT : don't move this one up ! 
    154164            try :                
    155                 return module.Parser(self.filename, self.options.debug) 
     165                return module.Parser(self.filename, firstblock, 
     166                                                    lastblock, 
     167                                                    self.options.debug) 
    156168            except pdlparser.PDLParserError : 
    157169                pass # try next parser 
  • pkpgcounter/trunk/pkpgpdls/pdlparser.py

    r492 r493  
    4343    totiffcommands = None        # Default command to convert to TIFF 
    4444    openmode = "rb"              # Default file opening mode 
    45     def __init__(self, filename, debug=0) : 
     45    def __init__(self, filename, firstblock, lastblock, debug=0) : 
    4646        """Initialize the generic parser.""" 
    4747        self.filename = filename 
    4848        self.debug = debug 
    4949        self.infile = None 
    50         (self.firstblock, self.lastblock) = self.readBlocks() 
    51         self.infile = open(self.filename, self.openmode) 
     50        (self.firstblock, self.lastblock) = (firstblock, lastblock) 
    5251        if not self.isValid() : 
    5352            raise PDLParserError, "Invalid file format !" 
     
    6160            # parsing will greatly benefit from this. 
    6261            psyco.bind(self.getJobSize) 
     62        self.infile = open(self.filename, self.openmode) 
     63        # self.logdebug("Opened %s in '%s' mode." % (self.filename, self.openmode)) 
    6364             
    6465    def __del__(self) : 
     
    6667        if self.infile : 
    6768            self.infile.close() 
    68              
    69     def readBlocks(self) :         
    70         """Reads first and last block of the input file.""" 
    71         infile = open(self.filename, "rb") 
    72         try : 
    73             firstblock = infile.read(FIRSTBLOCKSIZE) 
    74             try : 
    75                 infile.seek(-LASTBLOCKSIZE, 2) 
    76                 lastblock = infile.read(LASTBLOCKSIZE) 
    77             except IOError :     
    78                 lastblock = "" 
    79         finally :         
    80             infile.close() 
    81         return (firstblock, lastblock)     
    8269             
    8370    def logdebug(self, message) :        
  • pkpgcounter/trunk/pkpgpdls/plain.py

    r492 r493  
    3838           read the first line, and if it doesn't end in CR or LF 
    3939           we consider it's not plain text. 
     40            
     41           TODO : use first and last block's content instead of readline(). 
    4042        """    
    4143        line = self.infile.readline() 
  • pkpgcounter/trunk/pkpgpdls/spl1.py

    r491 r493  
    100100        self.escapedStuff = {}   # For escaped datas, mostly PJL commands 
    101101        self.bigEndian() 
    102          
     102        codesop = chr(0x06) + chr(0x00) + chr(0x00) + chr(0x80) + chr(0x13) + chr(0x40) 
    103103        self.isbitmap = False 
    104104        pos = 0 
     
    114114                        offset = unpack(self.unpackLong, minfile[pos:pos+4])[0] 
    115115                        sequencenum = unpack(self.unpackShort, minfile[pos+4:pos+6])[0] 
    116                         codesop = " ".join([ "%02x" % ord(v) for v in minfile[pos+6:pos+12]]) 
    117                         if codesop != "06 00 00 80 13 40" : 
     116                        if minfile[pos+6:pos+12] != codesop : 
    118117                            raise pdlparser.PDLParserError, "Unfortunately SPL1 is incompletely recognized. Parsing aborted. Please report the problem to %s" % version.__authoremail__ 
    119118                        if not sequencenum :