Changeset 522

Show
Ignore:
Timestamp:
11/28/07 11:13:33 (16 years ago)
Author:
jerome
Message:

Finally we need to duplicate some datas, since for some file formats (e.g. mstrash)
a preliminary conversion will have to be done (through wvware for example) and we
would need to overwrite original values, which is not desirable.

Location:
pkpgcounter/trunk/pkpgpdls
Files:
19 modified

Legend:

Unmodified
Added
Removed
  • pkpgcounter/trunk/pkpgpdls/analyzer.py

    r520 r522  
    5656        self.workfile = None  
    5757        self.mustclose = None 
    58         self.firstblock = None 
    59         self.lastblock = None 
    6058         
    6159    def getJobSize(self) :     
     
    128126            self.workfile.close()     
    129127         
    130     def readFirstAndLastBlocks(self) :     
     128    def readFirstAndLastBlocks(self, inputfile) : 
    131129        """Reads the first and last blocks of data.""" 
    132130        # Now read first and last block of the input file 
    133131        # to be able to detect the real file format and the parser to use. 
    134         self.firstblock = self.workfile.read(pdlparser.FIRSTBLOCKSIZE) 
    135         try : 
    136             self.workfile.seek(-pdlparser.LASTBLOCKSIZE, 2) 
    137             self.lastblock = self.workfile.read(pdlparser.LASTBLOCKSIZE) 
     132        firstblock = inputfile.read(pdlparser.FIRSTBLOCKSIZE) 
     133        try : 
     134            inputfile.seek(-pdlparser.LASTBLOCKSIZE, 2) 
     135            lastblock = inputfile.read(pdlparser.LASTBLOCKSIZE) 
    138136        except IOError :     
    139             self.lastblock = "" 
     137            lastblock = "" 
     138        return (firstblock, lastblock)      
    140139             
    141140    def detectPDLHandler(self) :     
     
    146145        if not os.stat(self.filename).st_size : 
    147146            raise pdlparser.PDLParserError, "input file %s is empty !" % str(self.filename) 
    148         self.readFirstAndLastBlocks() 
     147        (firstblock, lastblock) = self.readFirstAndLastBlocks(self.workfile) 
    149148             
    150149        # IMPORTANT : the order is important below. FIXME. 
     
    167166                       plain) :     # IMPORTANT : don't move this one up ! 
    168167            try :                
    169                 return module.Parser(self) 
     168                return module.Parser(self, (firstblock, lastblock)) 
    170169            except pdlparser.PDLParserError : 
    171170                pass # try next parser 
  • pkpgcounter/trunk/pkpgpdls/dvi.py

    r520 r522  
    3636        """Returns True if data is DVI, else False.""" 
    3737        try : 
    38             if (ord(self.parent.firstblock[0]) == 0xf7) \ 
    39                 and (ord(self.parent.lastblock[-1]) == 0xdf) : 
     38            if (ord(self.firstblock[0]) == 0xf7) \ 
     39                and (ord(self.lastblock[-1]) == 0xdf) : 
    4040                self.logdebug("DEBUG: Input file is in the DVI format.") 
    4141                return True 
  • pkpgcounter/trunk/pkpgpdls/escp2.py

    r520 r522  
    3131    def isValid(self) :         
    3232        """Returns True if data is ESC/P2, else False.""" 
    33         if self.parent.firstblock.startswith("\033@") or \ 
    34            self.parent.firstblock.startswith("\033*") or \ 
    35            self.parent.firstblock.startswith("\n\033@") or \ 
    36            self.parent.firstblock.startswith("\0\0\0\033\1@EJL") : # ESC/P Raster ??? Seen on Stylus Photo 1284 
     33        if self.firstblock.startswith("\033@") or \ 
     34           self.firstblock.startswith("\033*") or \ 
     35           self.firstblock.startswith("\n\033@") or \ 
     36           self.firstblock.startswith("\0\0\0\033\1@EJL") : # ESC/P Raster ??? Seen on Stylus Photo 1284 
    3737            self.logdebug("DEBUG: Input file is in the ESC/P2 format.") 
    3838            return True 
  • pkpgcounter/trunk/pkpgpdls/escpages03.py

    r520 r522  
    3535    def isValid(self) :         
    3636        """Returns True if data is TIFF, else False.""" 
    37         if self.parent.firstblock.startswith("\033\1@EJL") and \ 
    38             (self.parent.firstblock.find("=ESC/PAGES03\n") != -1) : 
     37        if self.firstblock.startswith("\033\1@EJL") and \ 
     38            (self.firstblock.find("=ESC/PAGES03\n") != -1) : 
    3939            self.logdebug("DEBUG: Input file is in the ESC/PageS03 format.") 
    4040            return True 
  • pkpgcounter/trunk/pkpgpdls/hbp.py

    r520 r522  
    3434    def isValid(self) :         
    3535        """Returns True if data is HBP, else False.""" 
    36         if self.parent.firstblock.find("@PJL ENTER LANGUAGE = HBP\n") != -1 : 
     36        if self.firstblock.find("@PJL ENTER LANGUAGE = HBP\n") != -1 : 
    3737            self.logdebug("DEBUG: Input file is in the HBP format.") 
    3838            return True 
  • pkpgcounter/trunk/pkpgpdls/lidil.py

    r520 r522  
    6262        # for simplicity's sake. 
    6363        EOFMarker = "$\x00\x10\x00\x08\x00\x00\x00\x00\x00\xff\xff\xff\xff\xff$$\x00\x10\x00\x06\x00\x00\x00\x00\x00\xff\xff\xff\xff\xff$"  
    64         if self.parent.firstblock.startswith(BOFMarker) \ 
    65            and self.parent.lastblock.endswith(EOFMarker) : 
     64        if self.firstblock.startswith(BOFMarker) \ 
     65           and self.lastblock.endswith(EOFMarker) : 
    6666            self.logdebug("DEBUG: Input file is in the Hewlett-Packard LIDIL format.") 
    6767            return True 
  • pkpgcounter/trunk/pkpgpdls/mstrash.py

    r520 r522  
    3939           IMPORTANT : seem to be specific to some particular i18n release. 
    4040        """    
    41         if self.parent.firstblock.startswith("PO^Q`") \ 
    42            or self.parent.firstblock.startswith("\xd0\xcf\x11\xe0\xa1\xb1\x1a\xe1") \ 
    43            or self.parent.firstblock.startswith("\xfe7\x00#") \ 
    44            or self.parent.firstblock.startswith("\xdb\xa5-\x00\x00\x00") \ 
    45            or self.parent.firstblock.startswith("\x31\xbe\x00\x00") \ 
    46            or self.parent.firstblock[2112:].startswith("MSWordDoc") : 
     41        if self.firstblock.startswith("PO^Q`") \ 
     42           or self.firstblock.startswith("\xd0\xcf\x11\xe0\xa1\xb1\x1a\xe1") \ 
     43           or self.firstblock.startswith("\xfe7\x00#") \ 
     44           or self.firstblock.startswith("\xdb\xa5-\x00\x00\x00") \ 
     45           or self.firstblock.startswith("\x31\xbe\x00\x00") \ 
     46           or self.firstblock[2112:].startswith("MSWordDoc") : 
    4747            self.logdebug("DEBUG: Input file seems to be in a Microsoft shitty file format.") 
    4848            return True 
  • pkpgcounter/trunk/pkpgpdls/ooo.py

    r520 r522  
    3232    def isValid(self) :         
    3333        """Returns True if data is OpenDocument, else False.""" 
    34         if self.parent.firstblock[:2] == "PK" : 
     34        if self.firstblock[:2] == "PK" : 
    3535            try : 
    3636                self.archive = zipfile.ZipFile(self.infile) 
  • pkpgcounter/trunk/pkpgpdls/pcl345.py

    r520 r522  
    9292    def isValid(self) :     
    9393        """Returns True if data is PCL3/4/5, else False.""" 
    94         if self.parent.firstblock.startswith("\033E\033") or \ 
    95            (self.parent.firstblock.startswith("\033*rbC") and (not self.parent.lastblock[-3:] == "\f\033@")) or \ 
    96            self.parent.firstblock.startswith("\033*rB\033") or \ 
    97            self.parent.firstblock.startswith("\033%8\033") or \ 
    98            (self.parent.firstblock.find("\033%-12345X") != -1) or \ 
    99            (self.parent.firstblock.find("@PJL ENTER LANGUAGE=PCL\012\015\033") != -1) or \ 
    100            (self.parent.firstblock.startswith(chr(0xcd)+chr(0xca)) and (self.parent.firstblock.find("\033E\033") != -1)) : 
     94        if self.firstblock.startswith("\033E\033") or \ 
     95           (self.firstblock.startswith("\033*rbC") and (not self.lastblock[-3:] == "\f\033@")) or \ 
     96           self.firstblock.startswith("\033*rB\033") or \ 
     97           self.firstblock.startswith("\033%8\033") or \ 
     98           (self.firstblock.find("\033%-12345X") != -1) or \ 
     99           (self.firstblock.find("@PJL ENTER LANGUAGE=PCL\012\015\033") != -1) or \ 
     100           (self.firstblock.startswith(chr(0xcd)+chr(0xca)) and (self.firstblock.find("\033E\033") != -1)) : 
    101101            self.logdebug("DEBUG: Input file is in the PCL3/4/5 format.") 
    102102            return True 
  • pkpgcounter/trunk/pkpgpdls/pclxl.py

    r520 r522  
    8383    def isValid(self) :     
    8484        """Returns True if data is PCLXL aka PCL6, else False.""" 
    85         if (((self.parent.firstblock[:128].find("\033%-12345X") != -1) and \ 
    86              (self.parent.firstblock.find(" HP-PCL XL;") != -1) and \ 
    87              ((self.parent.firstblock.find("LANGUAGE=PCLXL") != -1) or \ 
    88               (self.parent.firstblock.find("LANGUAGE = PCLXL") != -1)))) \ 
    89              or ((self.parent.firstblock.startswith(chr(0xcd)+chr(0xca)) and (self.parent.firstblock.find(" HP-PCL XL;") != -1))) : 
     85        if (((self.firstblock[:128].find("\033%-12345X") != -1) and \ 
     86             (self.firstblock.find(" HP-PCL XL;") != -1) and \ 
     87             ((self.firstblock.find("LANGUAGE=PCLXL") != -1) or \ 
     88              (self.firstblock.find("LANGUAGE = PCLXL") != -1)))) \ 
     89             or ((self.firstblock.startswith(chr(0xcd)+chr(0xca)) and (self.firstblock.find(" HP-PCL XL;") != -1))) : 
    9090            self.logdebug("DEBUG: Input file is in the PCLXL (aka PCL6) format.") 
    9191            return True 
  • pkpgcounter/trunk/pkpgpdls/pdf.py

    r520 r522  
    4747    def isValid(self) :     
    4848        """Returns True if data is PDF, else False.""" 
    49         if self.parent.firstblock.startswith("%PDF-") or \ 
    50            self.parent.firstblock.startswith("\033%-12345X%PDF-") or \ 
    51            ((self.parent.firstblock[:128].find("\033%-12345X") != -1) and (self.parent.firstblock.upper().find("LANGUAGE=PDF") != -1)) or \ 
    52            (self.parent.firstblock.find("%PDF-") != -1) : 
     49        if self.firstblock.startswith("%PDF-") or \ 
     50           self.firstblock.startswith("\033%-12345X%PDF-") or \ 
     51           ((self.firstblock[:128].find("\033%-12345X") != -1) and (self.firstblock.upper().find("LANGUAGE=PDF") != -1)) or \ 
     52           (self.firstblock.find("%PDF-") != -1) : 
    5353            self.logdebug("DEBUG: Input file is in the PDF format.") 
    5454            return True 
  • pkpgcounter/trunk/pkpgpdls/pdlparser.py

    r520 r522  
    4343    totiffcommands = None        # Default command to convert to TIFF 
    4444    openmode = "rb"              # Default file opening mode 
    45     def __init__(self, parent) : 
     45    def __init__(self, parent, (firstblock, lastblock)) : 
    4646        """Initialize the generic parser.""" 
    4747        self.parent = parent 
     48        # We need some copies for later inclusion of parsers which 
     49        # would modify the parent's values 
     50        self.filename = parent.filename[:] 
     51        self.firstblock = firstblock[:] 
     52        self.lastblock = lastblock[:] 
    4853        self.infile = None 
    4954        if not self.isValid() : 
     
    5863            # parsing will greatly benefit from this. 
    5964            psyco.bind(self.getJobSize) 
    60         self.infile = open(self.parent.filename, self.openmode) 
    61         # self.logdebug("Opened %s in '%s' mode." % (self.parent.filename, self.openmode)) 
     65        self.infile = open(self.filename, self.openmode) 
     66        # self.logdebug("Opened %s in '%s' mode." % (self.filename, self.openmode)) 
    6267             
    6368    def __del__(self) : 
     
    8489        """    
    8590        if self.totiffcommands : 
    86             infname = self.parent.filename 
     91            infname = self.filename 
    8792            for totiffcommand in self.totiffcommands : 
    8893                error = False 
  • pkpgcounter/trunk/pkpgpdls/pil.py

    r520 r522  
    3535        """Returns True if data is an image format supported by PIL, else False."""    
    3636        try : 
    37             image = Image.open(self.parent.filename) 
     37            image = Image.open(self.filename) 
    3838        except (IOError, OverflowError) :     
    3939            return False 
     
    4545        """Counts pages in an image file.""" 
    4646        index = 0 
    47         image = Image.open(self.parent.filename) 
     47        image = Image.open(self.filename) 
    4848        try : 
    4949            while True : 
  • pkpgcounter/trunk/pkpgpdls/plain.py

    r520 r522  
    3939           If it's impossible to find one we consider it's not plain text. 
    4040        """    
    41         lines = self.parent.firstblock.split("\r\n") 
     41        lines = self.firstblock.split("\r\n") 
    4242        if len(lines) == 1 : 
    4343            lines = lines[0].split("\r") 
  • pkpgcounter/trunk/pkpgpdls/postscript.py

    r521 r522  
    3737    def isValid(self) :     
    3838        """Returns True if data is PostScript, else False.""" 
    39         if self.parent.firstblock.startswith("%!") or \ 
    40            self.parent.firstblock.startswith("\004%!") or \ 
    41            self.parent.firstblock.startswith("\033%-12345X%!PS") or \ 
    42            ((self.parent.firstblock[:128].find("\033%-12345X") != -1) and \ 
    43              ((self.parent.firstblock.find("LANGUAGE=POSTSCRIPT") != -1) or \ 
    44               (self.parent.firstblock.find("LANGUAGE = POSTSCRIPT") != -1) or \ 
    45               (self.parent.firstblock.find("LANGUAGE = Postscript") != -1))) or \ 
    46               (self.parent.firstblock.find("%!PS-Adobe") != -1) : 
     39        if self.firstblock.startswith("%!") or \ 
     40           self.firstblock.startswith("\004%!") or \ 
     41           self.firstblock.startswith("\033%-12345X%!PS") or \ 
     42           ((self.firstblock[:128].find("\033%-12345X") != -1) and \ 
     43             ((self.firstblock.find("LANGUAGE=POSTSCRIPT") != -1) or \ 
     44              (self.firstblock.find("LANGUAGE = POSTSCRIPT") != -1) or \ 
     45              (self.firstblock.find("LANGUAGE = Postscript") != -1))) or \ 
     46              (self.firstblock.find("%!PS-Adobe") != -1) : 
    4747            self.logdebug("DEBUG: Input file is in the PostScript format.") 
    4848            return True 
     
    5353        """Get the count through GhostScript, useful for non-DSC compliant PS files.""" 
    5454        self.logdebug("Internal parser sucks, using GhostScript instead...") 
    55         infname = self.parent.filename 
     55        infname = self.filename 
    5656        command = 'gs -sDEVICE=bbox -dPARANOIDSAFER -dNOPAUSE -dBATCH -dQUIET "%(infname)s" 2>&1 | grep -c "%%HiResBoundingBox:" 2>/dev/null' 
    5757        pagecount = 0 
  • pkpgcounter/trunk/pkpgpdls/qpdl.py

    r520 r522  
    7373    def isValid(self) :     
    7474        """Returns True if data is QPDL aka SPL2, else False.""" 
    75         if ((self.parent.firstblock[:128].find("\033%-12345X") != -1) and \ 
    76              ((self.parent.firstblock.find("LANGUAGE=QPDL") != -1) or \ 
    77               (self.parent.firstblock.find("LANGUAGE = QPDL") != -1))) : 
     75        if ((self.firstblock[:128].find("\033%-12345X") != -1) and \ 
     76             ((self.firstblock.find("LANGUAGE=QPDL") != -1) or \ 
     77              (self.firstblock.find("LANGUAGE = QPDL") != -1))) : 
    7878            self.logdebug("DEBUG: Input file is in the QPDL (aka SPL2) format.") 
    7979            return True 
  • pkpgcounter/trunk/pkpgpdls/spl1.py

    r520 r522  
    3737    def isValid(self) :     
    3838        """Returns True if data is SPL1, else False.""" 
    39         if ((self.parent.firstblock[:128].find("\033%-12345X") != -1) and \ 
    40             (self.parent.firstblock.find("$PJL ") != -1) and \ 
    41              ((self.parent.firstblock.find("LANGUAGE=SMART") != -1) or \ 
    42               (self.parent.firstblock.find("LANGUAGE = SMART") != -1))) : 
     39        if ((self.firstblock[:128].find("\033%-12345X") != -1) and \ 
     40            (self.firstblock.find("$PJL ") != -1) and \ 
     41             ((self.firstblock.find("LANGUAGE=SMART") != -1) or \ 
     42              (self.firstblock.find("LANGUAGE = SMART") != -1))) : 
    4343            self.logdebug("DEBUG: Input file is in the SPL1 format.") 
    4444            return True 
  • pkpgcounter/trunk/pkpgpdls/tiff.py

    r520 r522  
    3737        littleendian = (chr(0x49)*2) + chr(0x2a) + chr(0) 
    3838        bigendian = (chr(0x4d)*2) + chr(0) + chr(0x2a) 
    39         if self.parent.firstblock[:4] in (littleendian, bigendian) : 
     39        if self.firstblock[:4] in (littleendian, bigendian) : 
    4040            self.logdebug("DEBUG: Input file is in the TIFF format.") 
    4141            return True 
  • pkpgcounter/trunk/pkpgpdls/zjstream.py

    r520 r522  
    3131    def isValid(self) :     
    3232        """Returns True if data is ZjStream, else False.""" 
    33         if self.parent.firstblock[:4] == "ZJZJ" : 
     33        if self.firstblock[:4] == "ZJZJ" : 
    3434            self.logdebug("DEBUG: Input file is in the Zenographics ZjStream (little endian) format.") 
    3535            return self.littleEndian() 
    36         elif self.parent.firstblock[:4] == "JZJZ" :     
     36        elif self.firstblock[:4] == "JZJZ" :     
    3737            self.logdebug("DEBUG: Input file is in the Zenographics ZjStream (big endian) format.") 
    3838            return self.bigEndian()