Changeset 519 for pkpgcounter/trunk
- Timestamp:
- 11/27/07 21:55:29 (17 years ago)
- Location:
- pkpgcounter/trunk/pkpgpdls
- Files:
-
- 1 modified
- 1 copied
Legend:
- Unmodified
- Added
- Removed
-
pkpgcounter/trunk/pkpgpdls/analyzer.py
r501 r519 28 28 import tempfile 29 29 30 import version, pdlparser, postscript, pdf, pcl345, pclxl, hbp, pil, \30 import version, pdlparser, postscript, pdf, pcl345, pclxl, hbp, pil, mstrash, \ 31 31 lidil, escp2, dvi, tiff, ooo, zjstream, qpdl, spl1, escpages03, plain 32 32 import inkcoverage … … 159 159 escpages03, \ 160 160 pil, \ 161 mstrash, \ 161 162 plain) : # IMPORTANT : don't move this one up ! 162 163 try : -
pkpgcounter/trunk/pkpgpdls/mstrash.py
r495 r519 21 21 # 22 22 23 """This modules implements a page counter for plain text documents.""" 23 """This module implements a page counter for Microsoft Word (r) (tm) (c) (etc...) documents""" 24 25 import os 26 import urllib2 24 27 25 28 import pdlparser … … 27 30 28 31 class Parser(pdlparser.PDLParser) : 29 """A parser for plain text documents.""" 30 totiffcommands = [ 'enscript --quiet --portrait --no-header --columns 1 --output - "%(infname)s" | gs -sDEVICE=tiff24nc -dPARANOIDSAFER -dNOPAUSE -dBATCH -dQUIET -r"%(dpi)i" -sOutputFile="%(outfname)s" -', 31 'a2ps --borders 0 --quiet --portrait --no-header --columns 1 --output - "%(infname)s" | gs -sDEVICE=tiff24nc -dPARANOIDSAFER -dNOPAUSE -dBATCH -dQUIET -r"%(dpi)i" -sOutputFile="%(outfname)s" -', 32 ] 33 openmode = "rU" 32 """A parser for that MS crap thing.""" 33 totiffcommands = [ ] 34 34 def isValid(self) : 35 """Returns True if data is plain text, else False.35 """Returns True if data is MS crap, else False. 36 36 37 I t's hard to detect a plain text file, so we just try to38 extract lines from the first block (sufficiently large). 39 If it's impossible to find one we consider it's not plain text.37 Identifying datas taken from the file command's magic database. 38 IMPORTANT : some magic values are not reused here because they 39 IMPORTANT : seem to be specific to some particular i18n release. 40 40 """ 41 lines = self.firstblock.split("\r\n")42 if len(lines) == 1 :43 lines = lines[0].split("\r") 44 if len(lines) == 1 : 45 lines = lines[0].split("\n") 46 if len(lines) > 1:47 self.logdebug("DEBUG: Input file seems to be in the plain textformat.")41 if self.firstblock.startswith("PO^Q`") \ 42 or self.firstblock.startswith("\xd0\xcf\x11\xe0\xa1\xb1\x1a\xe1") \ 43 or self.firstblock.startswith("\xfe7\x00#") \ 44 or self.firstblock.startswith("\xdb\xa5-\x00\x00\x00") \ 45 or self.firstblock.startswith("\x31\xbe\x00\x00") \ 46 or self.firstblock[2112:].startswith("MSWordDoc") : 47 self.logdebug("DEBUG: Input file seems to be in a Microsoft shitty file format.") 48 48 return True 49 49 else : … … 51 51 52 52 def getJobSize(self) : 53 """Counts pages in a plain text document.""" 54 pagesize = 66 # TODO : Does this vary wrt the default page size ? 55 # TODO : /etc/papersize and /etc/paper.config 56 pagecount = 0 57 linecount = 0 58 for line in self.infile : 59 if line.endswith("\n") : 60 linecount += 1 61 if (linecount > pagesize) : 62 pagecount += 1 63 linecount = 0 64 else : 65 cnt = line.count("\f") 66 if cnt : 67 pagecount += cnt 68 linecount = 0 69 else : 70 raise pdlparser.PDLParserError, "Unsupported file format. Please send the file to %s" % version.__authoremail__ 71 return pagecount + 1 # NB : empty files are catched in isValid() 53 """Counts pages in a Microsoft Word (r) (tm) (c) (etc...) document.""" 54 return 0