Context Navigation

pkpgcounter @ 1462

Revision 1462, 14.1 kB (checked in by jalet, 20 years ago)
Improved PCL6 support
Property svn:eol-style set to `native` Property svn:executable set to ``* Property svn:keywords set to `Author Date Id Revision`

Line
1	#! /usr/bin/env python
2	# -- coding: ISO-8859-15 --
3
4	# pkpgcounter, a smart software page counter
5	#
6	# PyKota - Print Quotas for CUPS and LPRng
7	#
8	# (c) 2003-2004 Jerome Alet <alet@librelogiciel.com>
9	# This program is free software; you can redistribute it and/or modify
10	# it under the terms of the GNU General Public License as published by
11	# the Free Software Foundation; either version 2 of the License, or
12	# (at your option) any later version.
13	#
14	# This program is distributed in the hope that it will be useful,
15	# but WITHOUT ANY WARRANTY; without even the implied warranty of
16	# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17	# GNU General Public License for more details.
18	#
19	# You should have received a copy of the GNU General Public License
20	# along with this program; if not, write to the Free Software
21	# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
22	#
23	# $Id$
24	#
25	# $Log$
26	# Revision 1.8 2004/05/08 15:12:23 jalet
27	# Improved PCL6 support
28	#
29	# Revision 1.7 2004/05/07 23:08:21 jalet
30	# Skeleton for PCLXL aka PCL6
31	# Added the "potential" fix for rastertoprinter's output
32	#
33	# Revision 1.6 2004/05/06 21:19:27 jalet
34	# Doesn't exit anymore on the first nul byte
35	#
36	# Revision 1.5 2004/05/06 12:37:29 jalet
37	# pkpgcounter : comments
38	# pkprinters : when --add is used, existing printers are now skipped.
39	#
40	# Revision 1.4 2004/05/04 12:21:55 jalet
41	# Now uses mmap in PCL mode
42	#
43	# Revision 1.3 2004/05/04 04:39:26 jalet
44	# Better PCL support
45	#
46	# Revision 1.2 2004/05/04 03:14:26 jalet
47	# fixed copy&paste problem in pkpgcounter
48	#
49	# Revision 1.1 2004/04/08 17:07:42 jalet
50	# pkpgcounter added
51	#
52	#
53
54	import sys
55	import os
56	import mmap
57	import struct
58	import tempfile
59
60	def debug(msg) :
61	"""Outputs a debug message on stderr."""
62	sys.stderr.write("%s\n" % msg)
63	sys.stderr.flush()
64
65	def ispostscript(data) :
66	"""Returns 1 if data is PostScript, else 0."""
67	if data.startswith("%!") or \
68	data.startswith("\004%!") or \
69	data.startswith("\033%-12345X%!PS") or \
70	((data[:128].find("\033%-12345X") != -1) and \
71	((data.find("LANGUAGE=POSTSCRIPT") != -1) or \
72	(data.find("LANGUAGE = POSTSCRIPT") != -1) or \
73	(data.find("LANGUAGE = Postscript") != -1))) :
74	return 1
75	else :
76	return 0
77
78	def ispcl(data) :
79	"""Returns 1 if data is PCL, else 0."""
80	if data.startswith("\033E\033") or \
81	((data[:128].find("\033%-12345X") != -1) and \
82	((data.find("LANGUAGE=PCL") != -1) or \
83	(data.find("LANGUAGE = PCL") != -1) or \
84	(data.find("LANGUAGE = Pcl") != -1))) :
85	return 1
86	else :
87	return 0
88
89	def ispclxl(data) :
90	"""Returns 1 if data is PCLXL aka PCL6, else 0."""
91	if ((data[:128].find("\033%-12345X") != -1) and \
92	(data.find(" HP-PCL XL;") != -1) and \
93	((data.find("LANGUAGE=PCLXL") != -1) or \
94	(data.find("LANGUAGE = PCLXL") != -1))) :
95	return 1
96	else :
97	return 0
98
99	def postscript(infile) :
100	"""Count pages in a DSC compliant PostScript document."""
101	pagecount = 0
102	pagenum = None
103	while 1 :
104	line = infile.readline()
105	if not line :
106	break
107	if line.startswith("%%Page: ") :
108	pagecount += 1
109	return pagecount
110
111	def pcl(infile) :
112	"""Count pages in a PCL5 document."""
113	#
114	# Algorithm from pclcount
115	# (c) 2003, by Eduardo Gielamo Oliveira & Rodolfo Broco Manin
116	# published under the terms of the GNU General Public Licence v2.
117	#
118	# Backported from C to Python by Jerome Alet, then enhanced
119	# with more PCL tags detected. I think all the necessary PCL tags
120	# are recognized to correctly handle PCL5 files wrt their number
121	# of pages. The documentation used for this was :
122	#
123	# HP PCL/PJL Reference Set
124	# PCL5 Printer Language Technical Quick Reference Guide
125	# http://h20000.www2.hp.com/bc/docs/support/SupportManual/bpl13205/bpl13205.pdf
126	#
127	infileno = infile.fileno()
128	infile = mmap.mmap(infileno, os.fstat(infileno).st_size, access=mmap.ACCESS_READ)
129	tagsends = { "&n" : "W",
130	"&b" : "W",
131	"*i" : "W",
132	"*l" : "W",
133	"*m" : "W",
134	"*v" : "W",
135	"*c" : "W",
136	"(f" : "W",
137	"*b" : "VW",
138	"(s" : "W",
139	")s" : "W",
140	"&p" : "X",
141	"&l" : "X" }
142	copies = 1
143	pagecount = resets = 0
144	tag = None
145	position = 0
146	while 1 :
147	try :
148	char = infile[position]
149	except IndexError : # EOF
150	break
151	position += 1
152	if char == "\014" :
153	pagecount += 1
154	elif char == "\033" :
155	#
156	# <ESC>*b###W -> Start of a raster data row/block
157	# <ESC>*b###V -> Start of a raster data plane
158	# <ESC>*c###W -> Start of a user defined pattern
159	# <ESC>*i###W -> Start of a viewing illuminant block
160	# <ESC>*l###W -> Start of a color lookup table
161	# <ESC>*m###W -> Start of a download dither matrix block
162	# <ESC>*v###W -> Start of a configure image data block
163	# <ESC>(s###W -> Start of a characters description block
164	# <ESC>)s###W -> Start of a fonts description block
165	# <ESC>(f###W -> Start of a symbol set block
166	# <ESC>&b###W -> Start of configuration data block
167	# <ESC>&l###X -> Number of copies
168	# <ESC>&n###W -> Starts an alphanumeric string ID block
169	# <ESC>&p###X -> Start of a non printable characters block
170	#
171	tagstart = infile[position]
172	position += 1
173	if tagstart in "E9=YZ" : # one byte PCL tag
174	if tagstart == "E" :
175	resets += 1
176	continue # skip to next tag
177	tag = tagstart + infile[position]
178	position += 1
179	try :
180	tagend = tagsends[tag]
181	except KeyError :
182	pass # Unsupported PCL tag
183	else :
184	# Now read the numeric argument
185	size = 0
186	while 1 :
187	char = infile[position]
188	position += 1
189	if not char.isdigit() :
190	break
191	size = (size * 10) + int(char)
192	if char in tagend :
193	if tag == "&l" :
194	copies = size
195	else :
196	# doing a read will prevent the seek
197	# for unseekable streams.
198	# we just ignore the block anyway.
199	if tag == "&n" :
200	# we have to take care of the operation id byte
201	# which is before the string itself
202	size += 1
203	position += size
204
205	# if pagecount is still 0, we will return the number
206	# of resets instead of the number of form feed characters.
207	# but the number of resets is always at least 2 with a valid
208	# pcl file : one at the very start and one at the very end
209	# of the job's data. So we substract 2 from the number of
210	# resets. And since on our test data we needed to substract
211	# 1 more, we finally substract 3, and will test several
212	# PCL files with this. If resets < 2, then the file is
213	# probably not a valid PCL file, so we return 0
214	if not pagecount :
215	return copies * (resets - 3) * (resets > 2)
216	else :
217	return copies * pagecount
218
219	class PCLXLParser :
220	def __init__(self, infile) :
221	"""Initialize PCLXL parser."""
222	self.infile = infile
223	self.islittleendian = None
224	found = 0
225	while not found :
226	line = self.infile.readline()
227	if not line :
228	break
229	if line[1:12] == " HP-PCL XL;" :
230	found = 1
231	if line[0] == ")" :
232	self.littleendian()
233	elif line[0] == "(" :
234	self.bigendian()
235	if not found :
236	raise TypeError, "This file doesn't seem to be PCLXL (aka PCL6)"
237	else :
238	self.tags = [None] * 256
239	self.tags[0x28] = self.bigendian # big endian
240	self.tags[0x29] = self.littleendian # big endian
241	self.tags[0x43] = self.beginPage # BeginPage
242	self.tags[0x44] = self.endPage # EndPage
243
244	self.tags[0xc0] = 1 # ubyte
245	self.tags[0xc1] = 2 # uint16
246	self.tags[0xc2] = 4 # uint32
247	self.tags[0xc3] = 2 # sint16
248	self.tags[0xc4] = 4 # sint32
249	self.tags[0xc5] = 4 # real32
250
251	self.tags[0xc8] = self.array_8 # ubyte_array
252	self.tags[0xc9] = self.array_16 # uint16_array
253	self.tags[0xca] = self.array_32 # uint32_array
254	self.tags[0xcb] = self.array_16 # sint16_array
255	self.tags[0xcc] = self.array_32 # sint32_array
256	self.tags[0xcd] = self.array_32 # real32_array
257
258	self.tags[0xd0] = 2 # ubyte_xy
259	self.tags[0xd1] = 4 # uint16_xy
260	self.tags[0xd2] = 8 # uint32_xy
261	self.tags[0xd3] = 4 # sint16_xy
262	self.tags[0xd4] = 8 # sint32_xy
263	self.tags[0xd5] = 8 # real32_xy
264
265	self.tags[0xd0] = 4 # ubyte_box
266	self.tags[0xd1] = 8 # uint16_box
267	self.tags[0xd2] = 16 # uint32_box
268	self.tags[0xd3] = 8 # sint16_box
269	self.tags[0xd4] = 16 # sint32_box
270	self.tags[0xd5] = 16 # real32_box
271
272	self.tags[0xf8] = 1 # attr_ubyte
273	self.tags[0xf9] = 2 # attr_uint16
274
275	self.tags[0xfa] = self.embeddedData # dataLength
276	self.tags[0xfb] = self.embeddedDataSmall # dataLengthByte
277
278	def beginPage(self) :
279	"""Indicates the beginning of a new page."""
280	self.pagecount += 1
281	debug("Begin page %i at %s" % (self.pagecount, self.infile.tell()))
282
283	def endPage(self) :
284	"""Indicates the end of a page."""
285	debug("End page %i at %s" % (self.pagecount, self.infile.tell()))
286
287	def handleArray(self, itemsize) :
288	"""Handles arrays."""
289	datatype = self.infile.read(1)
290	length = self.tags[ord(datatype)]
291	sarraysize = self.infile.read(length)
292	if self.islittleendian :
293	fmt = "<"
294	else :
295	fmt = ">"
296	if length == 1 :
297	fmt += "B"
298	elif length == 2 :
299	fmt += "H"
300	elif length == 4 :
301	fmt += "I"
302	else :
303	raise TypeError, "Error on array size at %s" % self.infile.tell()
304	arraysize = struct.unpack(fmt, sarraysize)[0]
305	return arraysize * itemsize
306
307	def array_8(self) :
308	"""Handles byte arrays."""
309	return self.handleArray(1)
310
311	def array_16(self) :
312	"""Handles byte arrays."""
313	return self.handleArray(2)
314
315	def array_32(self) :
316	"""Handles byte arrays."""
317	return self.handleArray(4)
318
319	def embeddedDataSmall(self) :
320	"""Handle small amounts of data."""
321	return ord(self.infile.read(1))
322
323	def embeddedData(self) :
324	"""Handle normal amounts of data."""
325	if self.islittleendian :
326	fmt = "<I"
327	else :
328	fmt = ">I"
329	return struct.unpack(fmt, self.infile.read(4))[0]
330
331	def littleendian(self) :
332	"""Toggles to little endianness."""
333	self.islittleendian = 1 # little endian
334
335	def bigendian(self) :
336	"""Toggles to big endianness."""
337	self.islittleendian = 0 # big endian
338
339	def pagecount(self) :
340	"""Counts pages in a PCLXL (PCL6) document."""
341	self.pagecount = 0
342	while 1 :
343	pos = self.infile.tell()
344	char = self.infile.read(1)
345	if not char :
346	break
347	index = ord(char)
348	length = self.tags[index]
349	if length is not None :
350	if not length :
351	debug("Unrecognized tag 0x%02x at %s\n" % (index, self.infile.tell()))
352	elif callable(length) :
353	length = length()
354	if length :
355	self.infile.read(length)
356	return self.pagecount
357
358	def pclxl(infile) :
359	"""Count pages in a PCL6 aka PCLXL document."""
360	parser = PCLXLParser(infile)
361	return parser.pagecount()
362
363	def smartpagecounter(filename) :
364	"""Autodetects file format and returns number of pages."""
365	if filename == "-" :
366	# we must read from stdin
367	# but since stdin is not seekable, we have to use a temporary
368	# file instead.
369	infile = tempfile.TemporaryFile()
370	while 1 :
371	data = sys.stdin.read(256 * 1024)
372	if not data :
373	break
374	infile.write(data)
375	infile.flush()
376	infile.seek(0)
377	else :
378	# normal file
379	infile = open(filename, "rb")
380
381	# Try to detect file type by reading first block of datas
382	firstblock = infile.read(1024)
383	infile.seek(0)
384	if ispostscript(firstblock) :
385	size = postscript(infile)
386	elif ispclxl(firstblock) :
387	raise TypeError, "PCLXL (aka PCL6) is not supported yet."
388	size = pclxl(infile)
389	elif ispcl(firstblock) :
390	size = pcl(infile)
391	else :
392	sys.stderr.write("ERROR : Unknown file format for %s\n" % filename)
393	size = 0
394	infile.close()
395	return size
396
397	if __name__ == "__main__" :
398	if len(sys.argv) < 2 :
399	sys.argv.append("-")
400
401	totalsize = 0
402	for arg in sys.argv[1:] :
403	try :
404	totalsize += smartpagecounter(arg)
405	except TypeError, msg :
406	debug(msg)
407	print "%s" % totalsize

Note: See TracBrowser for help on using the browser.

Context Navigation

root / pykota / trunk / bin / pkpgcounter @ 1462

Download in other formats: