Show
Ignore:
Timestamp:
11/20/07 00:04:09 (17 years ago)
Author:
jerome
Message:

Major code cleaning. Now clearer, although probably a bit slower since
a file can be opened several times.
Now universal line opening mode is only used when needed (PS, PDF and plain
text), and binary opening mode is used for the other formats.
This mean we will be able to remove mmap calls wherever possible, finally.

Files:
1 modified

Legend:

Unmodified
Added
Removed
  • pkpgcounter/trunk/pkpgpdls/pdf.py

    r463 r491  
    2323"""This modules implements a page counter for PDF documents.""" 
    2424 
    25 import sys 
    2625import re 
    2726 
     
    3433        self.major = major 
    3534        self.minor = minor 
     35        self.majori = int(major) 
     36        self.minori = int(minor) 
    3637        self.description = description 
    3738        self.comments = [] 
     
    4344    """A parser for PDF documents.""" 
    4445    totiffcommands = [ 'gs -sDEVICE=tiff24nc -dPARANOIDSAFER -dNOPAUSE -dBATCH -dQUIET -r%(dpi)i -sOutputFile="%(fname)s" -' ] 
     46    openmode = "rU" 
    4547    def isValid(self) :     
    4648        """Returns True if data is PDF, else False.""" 
     
    6264        objre = re.compile(r"\s?(\d+)\s+(\d+)\s+obj[<\s/]?") 
    6365        for line in self.infile : 
    64             line = line.strip() 
     66            line = line.strip()     
    6567            if line.startswith("% ") :     
    6668                if inobject : 
     
    7274                result = objre.search(line) 
    7375                if result is not None : 
    74                     (major, minor) = [int(num) for num in line[result.start():result.end()].split()[:2]] 
     76                    (major, minor) = line[result.start():result.end()].split()[:2] 
    7577                    obj = PDFObject(major, minor, lastcomment) 
    7678                    obj.content.append(line[result.end():]) 
     
    8688                        # if we want the first one, just use > instead of >= 
    8789                        oldobject = objects.setdefault(major, obj) 
    88                         if minor >= oldobject.minor : 
     90                        if int(minor) >= oldobject.minori : 
    8991                            objects[major] = obj 
     92                            # self.logdebug("Object(%i, %i) overwritten with Object(%i, %i)" % (oldobject.majori, oldobject.minori, obj.majori, obj.minori)) 
     93                        # self.logdebug("Object(%i, %i)" % (obj.majori, obj.minori)) 
    9094                        inobject = 0         
    9195                else :     
     
    9498                         
    9599        # Now we check each PDF object we've just created. 
    96         # colorregexp = re.compile(r"(/ColorSpace) ?(/DeviceRGB|/DeviceCMYK)[/ \t\r\n]", re.I) 
    97100        newpageregexp = re.compile(r"(/Type)\s?(/Page)[/>\s]", re.I) 
    98101        pagecount = 0 
     
    103106                pagecount += count 
    104107        return pagecount     
    105          
    106 if __name__ == "__main__" :     
    107     pdlparser.test(Parser)