root / pkpgcounter / trunk / pkpgpdls / mscrap.py @ 525

Revision 525, 2.2 kB (checked in by jerome, 16 years ago)

Added support for ink accounting of MS Word documents through abiword (because contrarily
to OpenOffice?.org it doesn't refuse to print to a file from the command line !)

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
Line 
1#! /usr/bin/env python
2# -*- coding: ISO-8859-15 -*-
3#
4# pkpgcounter : a generic Page Description Language parser
5#
6# (c) 2003, 2004, 2005, 2006, 2007 Jerome Alet <alet@librelogiciel.com>
7# This program is free software: you can redistribute it and/or modify
8# it under the terms of the GNU General Public License as published by
9# the Free Software Foundation, either version 3 of the License, or
10# (at your option) any later version.
11#
12# This program is distributed in the hope that it will be useful,
13# but WITHOUT ANY WARRANTY; without even the implied warranty of
14# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15# GNU General Public License for more details.
16#
17# You should have received a copy of the GNU General Public License
18# along with this program.  If not, see <http://www.gnu.org/licenses/>.
19#
20# $Id$
21#
22
23"""This module implements a page counter for Microsoft Word (r) (tm) (c) (etc...) documents"""
24
25import os
26import urllib2
27
28import pdlparser
29import version
30
31class Parser(pdlparser.PDLParser) :
32    """A parser for that MS crap thing."""
33    totiffcommands = [ 'xvfb-run -a abiword --import-extension=.doc --print="| gs -sDEVICE=tiff24nc -dPARANOIDSAFER -dNOPAUSE -dBATCH -dQUIET -r\"%(dpi)i\" -sOutputFile=\"%(outfname)s\" -" "%(infname)s"' ]
34    def isValid(self) :   
35        """Returns True if data is MS crap, else False.
36       
37           Identifying datas taken from the file command's magic database.
38           IMPORTANT : some magic values are not reused here because they
39           IMPORTANT : seem to be specific to some particular i18n release.
40        """   
41        if self.firstblock.startswith("PO^Q`") \
42           or self.firstblock.startswith("\xd0\xcf\x11\xe0\xa1\xb1\x1a\xe1") \
43           or self.firstblock.startswith("\xfe7\x00#") \
44           or self.firstblock.startswith("\xdb\xa5-\x00\x00\x00") \
45           or self.firstblock.startswith("\x31\xbe\x00\x00") \
46           or self.firstblock[2112:].startswith("MSWordDoc") :
47            self.logdebug("DEBUG: Input file seems to be in a Microsoft shitty file format.")
48            return True
49        else :   
50            return False
51           
52    def getJobSize(self) :
53        """Counts pages in a Microsoft Word (r) (tm) (c) (etc...) document."""
54        return 0
Note: See TracBrowser for help on using the browser.