#! /usr/bin/env python # -*- coding: ISO-8859-15 -*- # # pkpgcounter : a generic Page Description Language parser # # (c) 2003, 2004, 2005, 2006, 2007 Jerome Alet # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. # # $Id$ # """This modules implements a page counter for OpenDocument documents.""" import sys import zipfile import pdlparser class Parser(pdlparser.PDLParser) : """A parser for OpenOffice.org documents.""" def isValid(self) : """Returns True if data is OpenDocument, else False.""" if self.firstblock[:2] == "PK" : try : self.archive = zipfile.ZipFile(self.infile) self.contentxml = self.archive.read("content.xml") self.metaxml = self.archive.read("meta.xml") except : return False else : self.logdebug("DEBUG: Input file is in the OpenDocument (ISO/IEC DIS 26300) format.") return True else : return False def getJobSize(self) : """Counts pages in an OpenOffice.org document. Algorithm by Jerome Alet. """ pagecount = 0 try : # First try with Text documents index = self.metaxml.index("meta:page-count=") pagecount = int(self.metaxml[index:].split('"')[1]) except : # Now try with Impress documents pagecount = self.contentxml.count("