Context Navigation

pdf.py @ 443

Revision 428, 4.5 kB (checked in by jerome, 18 years ago)
Improved ink accounting by allowing several commands to be launch to convert to TIFF in case one of them fails.
Property svn:eol-style set to `native` Property svn:keywords set to `Auth Date Id Rev`

Line
1	#! /usr/bin/env python
2	# -- coding: ISO-8859-15 --
3	#
4	# pkpgcounter : a generic Page Description Language parser
5	#
6	# (c) 2003, 2004, 2005, 2006 Jerome Alet <alet@librelogiciel.com>
7	# This program is free software; you can redistribute it and/or modify
8	# it under the terms of the GNU General Public License as published by
9	# the Free Software Foundation; either version 2 of the License, or
10	# (at your option) any later version.
11	#
12	# This program is distributed in the hope that it will be useful,
13	# but WITHOUT ANY WARRANTY; without even the implied warranty of
14	# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15	# GNU General Public License for more details.
16	#
17	# You should have received a copy of the GNU General Public License
18	# along with this program; if not, write to the Free Software
19	# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
20	#
21	# $Id$
22	#
23
24	"""This modules implements a page counter for PDF documents."""
25
26	import sys
27	import re
28
29	import pdlparser
30
31	class PDFObject :
32	"""A class for PDF objects."""
33	def __init__(self, major, minor, description) :
34	"""Initialize the PDF object."""
35	self.major = major
36	self.minor = minor
37	self.description = description
38	self.comments = []
39	self.content = []
40	self.parent = None
41	self.kids = []
42
43	class Parser(pdlparser.PDLParser) :
44	"""A parser for PDF documents."""
45	totiffcommands = [ 'gs -sDEVICE=tiff24nc -dPARANOIDSAFER -dNOPAUSE -dBATCH -dQUIET -r%(dpi)i -sOutputFile="%(fname)s" -' ]
46	def isValid(self) :
47	"""Returns True if data is PDF, else False."""
48	if self.firstblock.startswith("%PDF-") or \
49	self.firstblock.startswith("\033%-12345X%PDF-") or \
50	((self.firstblock[:128].find("\033%-12345X") != -1) and (self.firstblock.upper().find("LANGUAGE=PDF") != -1)) or \
51	(self.firstblock.find("%PDF-") != -1) :
52	self.logdebug("DEBUG: Input file is in the PDF format.")
53	return True
54	else :
55	return False
56
57	def getJobSize(self) :
58	"""Counts pages in a PDF document."""
59	# First we start with a generic PDF parser.
60	lastcomment = None
61	objects = {}
62	inobject = 0
63	objre = re.compile(r"\s?(\d+)\s+(\d+)\s+obj[<\s/]?")
64	for fullline in self.infile.xreadlines() :
65	parts = [ l.strip() for l in fullline.splitlines() ]
66	for line in parts :
67	if line.startswith("% ") :
68	if inobject :
69	obj.comments.append(line)
70	else :
71	lastcomment = line[2:]
72	else :
73	# New object begins here
74	result = objre.search(line)
75	if result is not None :
76	(major, minor) = [int(num) for num in line[result.start():result.end()].split()[:2]]
77	obj = PDFObject(major, minor, lastcomment)
78	obj.content.append(line[result.end():])
79	inobject = 1
80	elif line.startswith("endobj") \
81	or line.startswith(">> endobj") \
82	or line.startswith(">>endobj") :
83	# Handle previous object, if any
84	if inobject :
85	# only overwrite older versions of this object
86	# same minor seems to be possible, so the latest one
87	# found in the file will be the one we keep.
88	# if we want the first one, just use > instead of >=
89	oldobject = objects.setdefault(major, obj)
90	if minor >= oldobject.minor :
91	objects[major] = obj
92	inobject = 0
93	else :
94	if inobject :
95	obj.content.append(line)
96
97	# Now we check each PDF object we've just created.
98	# colorregexp = re.compile(r"(/ColorSpace) ?(/DeviceRGB\|/DeviceCMYK)[/ \t\r\n]", re.I)
99	newpageregexp = re.compile(r"(/Type)\s?(/Page)[/\s]", re.I)
100	pagecount = 0
101	for obj in objects.values() :
102	content = "".join(obj.content)
103	count = len(newpageregexp.findall(content))
104	pagecount += count
105	return pagecount
106
107	if __name__ == "__main__" :
108	pdlparser.test(Parser)

Note: See TracBrowser for help on using the browser.

Context Navigation

root / pkpgcounter / trunk / pkpgpdls / pdf.py @ 443

Download in other formats: