Changeset 519 for pkpgcounter

Show
Ignore:
Timestamp:
11/27/07 21:55:29 (15 years ago)
Author:
jerome
Message:

Added a skeleton for Microsoft Word (c) (tm) (r) (etc...) documents.

Location:
pkpgcounter/trunk/pkpgpdls
Files:
1 modified
1 copied

Legend:

Unmodified
Added
Removed
  • pkpgcounter/trunk/pkpgpdls/analyzer.py

    r501 r519  
    2828import tempfile 
    2929 
    30 import version, pdlparser, postscript, pdf, pcl345, pclxl, hbp, pil, \ 
     30import version, pdlparser, postscript, pdf, pcl345, pclxl, hbp, pil, mstrash, \ 
    3131       lidil, escp2, dvi, tiff, ooo, zjstream, qpdl, spl1, escpages03, plain 
    3232import inkcoverage 
     
    159159                       escpages03, \ 
    160160                       pil, \ 
     161                       mstrash, \ 
    161162                       plain) :     # IMPORTANT : don't move this one up ! 
    162163            try :                
  • pkpgcounter/trunk/pkpgpdls/mstrash.py

    r495 r519  
    2121# 
    2222 
    23 """This modules implements a page counter for plain text documents.""" 
     23"""This module implements a page counter for Microsoft Word (r) (tm) (c) (etc...) documents""" 
     24 
     25import os 
     26import urllib2 
    2427 
    2528import pdlparser 
     
    2730 
    2831class Parser(pdlparser.PDLParser) : 
    29     """A parser for plain text documents.""" 
    30     totiffcommands = [ 'enscript --quiet --portrait --no-header --columns 1 --output - "%(infname)s" | gs -sDEVICE=tiff24nc -dPARANOIDSAFER -dNOPAUSE -dBATCH -dQUIET -r"%(dpi)i" -sOutputFile="%(outfname)s" -', 
    31                        'a2ps --borders 0 --quiet --portrait --no-header --columns 1 --output - "%(infname)s" | gs -sDEVICE=tiff24nc -dPARANOIDSAFER -dNOPAUSE -dBATCH -dQUIET -r"%(dpi)i" -sOutputFile="%(outfname)s" -', 
    32                      ]   
    33     openmode = "rU"                  
     32    """A parser for that MS crap thing.""" 
     33    totiffcommands = [ ] 
    3434    def isValid(self) :     
    35         """Returns True if data is plain text, else False. 
     35        """Returns True if data is MS crap, else False. 
    3636         
    37            It's hard to detect a plain text file, so we just try to 
    38            extract lines from the first block (sufficiently large). 
    39            If it's impossible to find one we consider it's not plain text. 
     37           Identifying datas taken from the file command's magic database. 
     38           IMPORTANT : some magic values are not reused here because they 
     39           IMPORTANT : seem to be specific to some particular i18n release. 
    4040        """    
    41         lines = self.firstblock.split("\r\n") 
    42         if len(lines) == 1 : 
    43             lines = lines[0].split("\r") 
    44             if len(lines) == 1 : 
    45                 lines = lines[0].split("\n") 
    46         if len(lines) > 1 : 
    47             self.logdebug("DEBUG: Input file seems to be in the plain text format.") 
     41        if self.firstblock.startswith("PO^Q`") \ 
     42           or self.firstblock.startswith("\xd0\xcf\x11\xe0\xa1\xb1\x1a\xe1") \ 
     43           or self.firstblock.startswith("\xfe7\x00#") \ 
     44           or self.firstblock.startswith("\xdb\xa5-\x00\x00\x00") \ 
     45           or self.firstblock.startswith("\x31\xbe\x00\x00") \ 
     46           or self.firstblock[2112:].startswith("MSWordDoc") : 
     47            self.logdebug("DEBUG: Input file seems to be in a Microsoft shitty file format.") 
    4848            return True 
    4949        else :     
     
    5151             
    5252    def getJobSize(self) : 
    53         """Counts pages in a plain text document.""" 
    54         pagesize = 66   # TODO : Does this vary wrt the default page size ? 
    55                         # TODO : /etc/papersize and /etc/paper.config 
    56         pagecount = 0 
    57         linecount = 0 
    58         for line in self.infile : 
    59             if line.endswith("\n") : 
    60                 linecount += 1     
    61                 if (linecount > pagesize) : 
    62                     pagecount += 1 
    63                     linecount = 0 
    64                 else :     
    65                     cnt = line.count("\f") 
    66                     if cnt : 
    67                         pagecount += cnt 
    68                         linecount = 0 
    69             else :         
    70                 raise pdlparser.PDLParserError, "Unsupported file format. Please send the file to %s" % version.__authoremail__ 
    71         return pagecount + 1    # NB : empty files are catched in isValid() 
     53        """Counts pages in a Microsoft Word (r) (tm) (c) (etc...) document.""" 
     54        return 0