Context Navigation

pkpgcounter @ 1471

Revision 1463, 14.2 kB (checked in by jalet, 21 years ago)
pykotme now uses pkpgcounter to compute the job's size.
Property svn:eol-style set to `native` Property svn:executable set to ``* Property svn:keywords set to `Author Date Id Revision`

Line
1	#! /usr/bin/env python
2	# -- coding: ISO-8859-15 --
3
4	# pkpgcounter, a smart software page counter
5	#
6	# PyKota - Print Quotas for CUPS and LPRng
7	#
8	# (c) 2003-2004 Jerome Alet <alet@librelogiciel.com>
9	# This program is free software; you can redistribute it and/or modify
10	# it under the terms of the GNU General Public License as published by
11	# the Free Software Foundation; either version 2 of the License, or
12	# (at your option) any later version.
13	#
14	# This program is distributed in the hope that it will be useful,
15	# but WITHOUT ANY WARRANTY; without even the implied warranty of
16	# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17	# GNU General Public License for more details.
18	#
19	# You should have received a copy of the GNU General Public License
20	# along with this program; if not, write to the Free Software
21	# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
22	#
23	# $Id$
24	#
25	# $Log$
26	# Revision 1.9 2004/05/10 07:23:21 jalet
27	# pykotme now uses pkpgcounter to compute the job's size.
28	#
29	# Revision 1.8 2004/05/08 15:12:23 jalet
30	# Improved PCL6 support
31	#
32	# Revision 1.7 2004/05/07 23:08:21 jalet
33	# Skeleton for PCLXL aka PCL6
34	# Added the "potential" fix for rastertoprinter's output
35	#
36	# Revision 1.6 2004/05/06 21:19:27 jalet
37	# Doesn't exit anymore on the first nul byte
38	#
39	# Revision 1.5 2004/05/06 12:37:29 jalet
40	# pkpgcounter : comments
41	# pkprinters : when --add is used, existing printers are now skipped.
42	#
43	# Revision 1.4 2004/05/04 12:21:55 jalet
44	# Now uses mmap in PCL mode
45	#
46	# Revision 1.3 2004/05/04 04:39:26 jalet
47	# Better PCL support
48	#
49	# Revision 1.2 2004/05/04 03:14:26 jalet
50	# fixed copy&paste problem in pkpgcounter
51	#
52	# Revision 1.1 2004/04/08 17:07:42 jalet
53	# pkpgcounter added
54	#
55	#
56
57	import sys
58	import os
59	import mmap
60	import struct
61	import tempfile
62
63	def debug(msg) :
64	"""Outputs a debug message on stderr."""
65	sys.stderr.write("%s\n" % msg)
66	sys.stderr.flush()
67
68	def ispostscript(data) :
69	"""Returns 1 if data is PostScript, else 0."""
70	if data.startswith("%!") or \
71	data.startswith("\004%!") or \
72	data.startswith("\033%-12345X%!PS") or \
73	((data[:128].find("\033%-12345X") != -1) and \
74	((data.find("LANGUAGE=POSTSCRIPT") != -1) or \
75	(data.find("LANGUAGE = POSTSCRIPT") != -1) or \
76	(data.find("LANGUAGE = Postscript") != -1))) :
77	return 1
78	else :
79	return 0
80
81	def ispcl(data) :
82	"""Returns 1 if data is PCL, else 0."""
83	if data.startswith("\033E\033") or \
84	((data[:128].find("\033%-12345X") != -1) and \
85	((data.find("LANGUAGE=PCL") != -1) or \
86	(data.find("LANGUAGE = PCL") != -1) or \
87	(data.find("LANGUAGE = Pcl") != -1))) :
88	return 1
89	else :
90	return 0
91
92	def ispclxl(data) :
93	"""Returns 1 if data is PCLXL aka PCL6, else 0."""
94	if ((data[:128].find("\033%-12345X") != -1) and \
95	(data.find(" HP-PCL XL;") != -1) and \
96	((data.find("LANGUAGE=PCLXL") != -1) or \
97	(data.find("LANGUAGE = PCLXL") != -1))) :
98	return 1
99	else :
100	return 0
101
102	def postscript(infile) :
103	"""Count pages in a DSC compliant PostScript document."""
104	pagecount = 0
105	pagenum = None
106	while 1 :
107	line = infile.readline()
108	if not line :
109	break
110	if line.startswith("%%Page: ") :
111	pagecount += 1
112	return pagecount
113
114	def pcl(infile) :
115	"""Count pages in a PCL5 document."""
116	#
117	# Algorithm from pclcount
118	# (c) 2003, by Eduardo Gielamo Oliveira & Rodolfo Broco Manin
119	# published under the terms of the GNU General Public Licence v2.
120	#
121	# Backported from C to Python by Jerome Alet, then enhanced
122	# with more PCL tags detected. I think all the necessary PCL tags
123	# are recognized to correctly handle PCL5 files wrt their number
124	# of pages. The documentation used for this was :
125	#
126	# HP PCL/PJL Reference Set
127	# PCL5 Printer Language Technical Quick Reference Guide
128	# http://h20000.www2.hp.com/bc/docs/support/SupportManual/bpl13205/bpl13205.pdf
129	#
130	infileno = infile.fileno()
131	infile = mmap.mmap(infileno, os.fstat(infileno).st_size, access=mmap.ACCESS_READ)
132	tagsends = { "&n" : "W",
133	"&b" : "W",
134	"*i" : "W",
135	"*l" : "W",
136	"*m" : "W",
137	"*v" : "W",
138	"*c" : "W",
139	"(f" : "W",
140	"*b" : "VW",
141	"(s" : "W",
142	")s" : "W",
143	"&p" : "X",
144	"&l" : "X" }
145	copies = 1
146	pagecount = resets = 0
147	tag = None
148	position = 0
149	while 1 :
150	try :
151	char = infile[position]
152	except IndexError : # EOF
153	break
154	position += 1
155	if char == "\014" :
156	pagecount += 1
157	elif char == "\033" :
158	#
159	# <ESC>*b###W -> Start of a raster data row/block
160	# <ESC>*b###V -> Start of a raster data plane
161	# <ESC>*c###W -> Start of a user defined pattern
162	# <ESC>*i###W -> Start of a viewing illuminant block
163	# <ESC>*l###W -> Start of a color lookup table
164	# <ESC>*m###W -> Start of a download dither matrix block
165	# <ESC>*v###W -> Start of a configure image data block
166	# <ESC>(s###W -> Start of a characters description block
167	# <ESC>)s###W -> Start of a fonts description block
168	# <ESC>(f###W -> Start of a symbol set block
169	# <ESC>&b###W -> Start of configuration data block
170	# <ESC>&l###X -> Number of copies
171	# <ESC>&n###W -> Starts an alphanumeric string ID block
172	# <ESC>&p###X -> Start of a non printable characters block
173	#
174	tagstart = infile[position]
175	position += 1
176	if tagstart in "E9=YZ" : # one byte PCL tag
177	if tagstart == "E" :
178	resets += 1
179	continue # skip to next tag
180	tag = tagstart + infile[position]
181	position += 1
182	try :
183	tagend = tagsends[tag]
184	except KeyError :
185	pass # Unsupported PCL tag
186	else :
187	# Now read the numeric argument
188	size = 0
189	while 1 :
190	char = infile[position]
191	position += 1
192	if not char.isdigit() :
193	break
194	size = (size * 10) + int(char)
195	if char in tagend :
196	if tag == "&l" :
197	copies = size
198	else :
199	# doing a read will prevent the seek
200	# for unseekable streams.
201	# we just ignore the block anyway.
202	if tag == "&n" :
203	# we have to take care of the operation id byte
204	# which is before the string itself
205	size += 1
206	position += size
207
208	# if pagecount is still 0, we will return the number
209	# of resets instead of the number of form feed characters.
210	# but the number of resets is always at least 2 with a valid
211	# pcl file : one at the very start and one at the very end
212	# of the job's data. So we substract 2 from the number of
213	# resets. And since on our test data we needed to substract
214	# 1 more, we finally substract 3, and will test several
215	# PCL files with this. If resets < 2, then the file is
216	# probably not a valid PCL file, so we return 0
217	if not pagecount :
218	return copies * (resets - 3) * (resets > 2)
219	else :
220	return copies * pagecount
221
222	class PCLXLParser :
223	def __init__(self, infile) :
224	"""Initialize PCLXL parser."""
225	self.infile = infile
226	self.islittleendian = None
227	found = 0
228	while not found :
229	line = self.infile.readline()
230	if not line :
231	break
232	if line[1:12] == " HP-PCL XL;" :
233	found = 1
234	if line[0] == ")" :
235	self.littleendian()
236	elif line[0] == "(" :
237	self.bigendian()
238	if not found :
239	raise TypeError, "This file doesn't seem to be PCLXL (aka PCL6)"
240	else :
241	self.tags = [None] * 256
242	self.tags[0x28] = self.bigendian # big endian
243	self.tags[0x29] = self.littleendian # big endian
244	self.tags[0x43] = self.beginPage # BeginPage
245	self.tags[0x44] = self.endPage # EndPage
246
247	self.tags[0xc0] = 1 # ubyte
248	self.tags[0xc1] = 2 # uint16
249	self.tags[0xc2] = 4 # uint32
250	self.tags[0xc3] = 2 # sint16
251	self.tags[0xc4] = 4 # sint32
252	self.tags[0xc5] = 4 # real32
253
254	self.tags[0xc8] = self.array_8 # ubyte_array
255	self.tags[0xc9] = self.array_16 # uint16_array
256	self.tags[0xca] = self.array_32 # uint32_array
257	self.tags[0xcb] = self.array_16 # sint16_array
258	self.tags[0xcc] = self.array_32 # sint32_array
259	self.tags[0xcd] = self.array_32 # real32_array
260
261	self.tags[0xd0] = 2 # ubyte_xy
262	self.tags[0xd1] = 4 # uint16_xy
263	self.tags[0xd2] = 8 # uint32_xy
264	self.tags[0xd3] = 4 # sint16_xy
265	self.tags[0xd4] = 8 # sint32_xy
266	self.tags[0xd5] = 8 # real32_xy
267
268	self.tags[0xd0] = 4 # ubyte_box
269	self.tags[0xd1] = 8 # uint16_box
270	self.tags[0xd2] = 16 # uint32_box
271	self.tags[0xd3] = 8 # sint16_box
272	self.tags[0xd4] = 16 # sint32_box
273	self.tags[0xd5] = 16 # real32_box
274
275	self.tags[0xf8] = 1 # attr_ubyte
276	self.tags[0xf9] = 2 # attr_uint16
277
278	self.tags[0xfa] = self.embeddedData # dataLength
279	self.tags[0xfb] = self.embeddedDataSmall # dataLengthByte
280
281	def beginPage(self) :
282	"""Indicates the beginning of a new page."""
283	self.pagecount += 1
284	debug("Begin page %i at %s" % (self.pagecount, self.infile.tell()))
285
286	def endPage(self) :
287	"""Indicates the end of a page."""
288	debug("End page %i at %s" % (self.pagecount, self.infile.tell()))
289
290	def handleArray(self, itemsize) :
291	"""Handles arrays."""
292	datatype = self.infile.read(1)
293	length = self.tags[ord(datatype)]
294	sarraysize = self.infile.read(length)
295	if self.islittleendian :
296	fmt = "<"
297	else :
298	fmt = ">"
299	if length == 1 :
300	fmt += "B"
301	elif length == 2 :
302	fmt += "H"
303	elif length == 4 :
304	fmt += "I"
305	else :
306	raise TypeError, "Error on array size at %s" % self.infile.tell()
307	arraysize = struct.unpack(fmt, sarraysize)[0]
308	return arraysize * itemsize
309
310	def array_8(self) :
311	"""Handles byte arrays."""
312	return self.handleArray(1)
313
314	def array_16(self) :
315	"""Handles byte arrays."""
316	return self.handleArray(2)
317
318	def array_32(self) :
319	"""Handles byte arrays."""
320	return self.handleArray(4)
321
322	def embeddedDataSmall(self) :
323	"""Handle small amounts of data."""
324	return ord(self.infile.read(1))
325
326	def embeddedData(self) :
327	"""Handle normal amounts of data."""
328	if self.islittleendian :
329	fmt = "<I"
330	else :
331	fmt = ">I"
332	return struct.unpack(fmt, self.infile.read(4))[0]
333
334	def littleendian(self) :
335	"""Toggles to little endianness."""
336	self.islittleendian = 1 # little endian
337
338	def bigendian(self) :
339	"""Toggles to big endianness."""
340	self.islittleendian = 0 # big endian
341
342	def pagecount(self) :
343	"""Counts pages in a PCLXL (PCL6) document."""
344	self.pagecount = 0
345	while 1 :
346	pos = self.infile.tell()
347	char = self.infile.read(1)
348	if not char :
349	break
350	index = ord(char)
351	length = self.tags[index]
352	if length is not None :
353	if not length :
354	debug("Unrecognized tag 0x%02x at %s\n" % (index, self.infile.tell()))
355	elif callable(length) :
356	length = length()
357	if length :
358	self.infile.read(length)
359	return self.pagecount
360
361	def pclxl(infile) :
362	"""Count pages in a PCL6 aka PCLXL document."""
363	parser = PCLXLParser(infile)
364	return parser.pagecount()
365
366	def smartpagecounter(filename) :
367	"""Autodetects file format and returns number of pages."""
368	if filename == "-" :
369	# we must read from stdin
370	# but since stdin is not seekable, we have to use a temporary
371	# file instead.
372	infile = tempfile.TemporaryFile()
373	while 1 :
374	data = sys.stdin.read(256 * 1024)
375	if not data :
376	break
377	infile.write(data)
378	infile.flush()
379	infile.seek(0)
380	else :
381	# normal file
382	infile = open(filename, "rb")
383
384	# Try to detect file type by reading first block of datas
385	firstblock = infile.read(1024)
386	infile.seek(0)
387	if ispostscript(firstblock) :
388	size = postscript(infile)
389	elif ispclxl(firstblock) :
390	raise TypeError, "PCLXL (aka PCL6) is not supported yet."
391	size = pclxl(infile)
392	elif ispcl(firstblock) :
393	size = pcl(infile)
394	else :
395	sys.stderr.write("ERROR : Unknown file format for %s\n" % filename)
396	size = 0
397	infile.close()
398	return size
399
400	if __name__ == "__main__" :
401	if (len(sys.argv) < 2) or ((not sys.stdin.isatty()) and ("-" not in sys.argv[1:])) :
402	sys.argv.append("-")
403
404	totalsize = 0
405	for arg in sys.argv[1:] :
406	try :
407	totalsize += smartpagecounter(arg)
408	except TypeError, msg :
409	debug(msg)
410	print "%s" % totalsize

Note: See TracBrowser for help on using the browser.

Context Navigation

root / pykota / trunk / bin / pkpgcounter @ 1471

Download in other formats: