Context Navigation

pdlanalyzer.py @ 1485

Revision 1485, 15.5 kB (checked in by jalet, 20 years ago)
Speed improvement
Property svn:eol-style set to `native` Property svn:keywords set to `Author Date Id Revision`

Line
1	# PyKota
2	# -- coding: ISO-8859-15 --
3	#
4	# PyKota - Print Quotas for CUPS and LPRng
5	#
6	# (c) 2003-2004 Jerome Alet <alet@librelogiciel.com>
7	# This program is free software; you can redistribute it and/or modify
8	# it under the terms of the GNU General Public License as published by
9	# the Free Software Foundation; either version 2 of the License, or
10	# (at your option) any later version.
11	#
12	# This program is distributed in the hope that it will be useful,
13	# but WITHOUT ANY WARRANTY; without even the implied warranty of
14	# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15	# GNU General Public License for more details.
16	#
17	# You should have received a copy of the GNU General Public License
18	# along with this program; if not, write to the Free Software
19	# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
20	#
21	# $Id$
22	#
23	# $Log$
24	# Revision 1.2 2004/05/19 19:09:36 jalet
25	# Speed improvement
26	#
27	# Revision 1.1 2004/05/18 09:59:54 jalet
28	# pkpgcounter is now just a wrapper around the PDLAnalyzer class
29	#
30	#
31	#
32
33	import sys
34	import os
35	import struct
36	import tempfile
37
38	class PostScriptAnalyzer :
39	def __init__(self, infile) :
40	"""Initialize PostScript Analyzer."""
41	self.infile = infile
42
43	def getJobSize(self) :
44	"""Count pages in a DSC compliant PostScript document."""
45	pagecount = 0
46	pagenum = None
47	while 1 :
48	line = self.infile.readline()
49	if not line :
50	break
51	if line.startswith("%%Page: ") :
52	pagecount += 1
53	return pagecount
54
55	class PCLAnalyzer :
56	def __init__(self, infile) :
57	"""Initialize PCL Analyzer."""
58	self.infile = infile
59
60	def skip(self, nb) :
61	"""Reads a new datablock."""
62	newpos = self.pos + nb
63	if newpos >= self.len :
64	oldlen = self.len
65	self.data = self.infile.read(1024*1024)
66	self.len = len(self.data)
67	if not self.len :
68	return
69	self.pos = newpos - oldlen
70	else :
71	self.pos = newpos
72
73	def readone(self) :
74	"""Reads a new byte."""
75	if self.pos < self.len :
76	char = self.data[self.pos]
77	else :
78	self.data = self.infile.read(1024*1024)
79	self.len = len(self.data)
80	self.pos = 0
81	if not self.len :
82	return
83	char = self.data[0]
84	self.pos += 1
85	return char
86
87	def getJobSize(self) :
88	"""Count pages in a PCL5 document."""
89	#
90	# Algorithm from pclcount
91	# (c) 2003, by Eduardo Gielamo Oliveira & Rodolfo Broco Manin
92	# published under the terms of the GNU General Public Licence v2.
93	#
94	# Backported from C to Python by Jerome Alet, then enhanced
95	# with more PCL tags detected. I think all the necessary PCL tags
96	# are recognized to correctly handle PCL5 files wrt their number
97	# of pages. The documentation used for this was :
98	#
99	# HP PCL/PJL Reference Set
100	# PCL5 Printer Language Technical Quick Reference Guide
101	# http://h20000.www2.hp.com/bc/docs/support/SupportManual/bpl13205/bpl13205.pdf
102	#
103	tagsends = { "&n" : "W",
104	"&b" : "W",
105	"*i" : "W",
106	"*l" : "W",
107	"*m" : "W",
108	"*v" : "W",
109	"*c" : "W",
110	"(f" : "W",
111	"*b" : "VW",
112	"(s" : "W",
113	")s" : "W",
114	"&p" : "X",
115	"&l" : "X" }
116	self.data = []
117	self.pos = self.len = 0
118	copies = 1
119	pagecount = resets = 0
120	tag = None
121	while 1 :
122	char = self.readone()
123	if not char : # EOF ?
124	break
125	if char == "\014" :
126	pagecount += 1
127	elif char == "\033" :
128	#
129	# <ESC>*b###W -> Start of a raster data row/block
130	# <ESC>*b###V -> Start of a raster data plane
131	# <ESC>*c###W -> Start of a user defined pattern
132	# <ESC>*i###W -> Start of a viewing illuminant block
133	# <ESC>*l###W -> Start of a color lookup table
134	# <ESC>*m###W -> Start of a download dither matrix block
135	# <ESC>*v###W -> Start of a configure image data block
136	# <ESC>(s###W -> Start of a characters description block
137	# <ESC>)s###W -> Start of a fonts description block
138	# <ESC>(f###W -> Start of a symbol set block
139	# <ESC>&b###W -> Start of configuration data block
140	# <ESC>&l###X -> Number of copies
141	# <ESC>&n###W -> Starts an alphanumeric string ID block
142	# <ESC>&p###X -> Start of a non printable characters block
143	#
144	tagstart = self.readone()
145	if tagstart in "E9=YZ" : # one byte PCL tag
146	if tagstart == "E" :
147	resets += 1
148	continue # skip to next tag
149	tag = tagstart + self.readone()
150	try :
151	tagend = tagsends[tag]
152	except KeyError :
153	pass # Unsupported PCL tag
154	else :
155	# Now read the numeric argument
156	size = 0
157	while 1 :
158	char = self.readone()
159	if not char.isdigit() :
160	break
161	size = (size * 10) + int(char)
162	if char in tagend :
163	if tag == "&l" :
164	copies = size
165	else :
166	# doing a read will prevent the seek
167	# for unseekable streams.
168	# we just ignore the block anyway.
169	if tag == "&n" :
170	# we have to take care of the operation id byte
171	# which is before the string itself
172	size += 1
173	self.skip(size)
174
175	# if pagecount is still 0, we will return the number
176	# of resets instead of the number of form feed characters.
177	# but the number of resets is always at least 2 with a valid
178	# pcl file : one at the very start and one at the very end
179	# of the job's data. So we substract 2 from the number of
180	# resets. And since on our test data we needed to substract
181	# 1 more, we finally substract 3, and will test several
182	# PCL files with this. If resets < 2, then the file is
183	# probably not a valid PCL file, so we return 0
184	if not pagecount :
185	return copies * (resets - 3) * (resets > 2)
186	else :
187	return copies * pagecount
188
189	class PCLXLAnalyzer :
190	def __init__(self, infile) :
191	"""Initialize PCLXL Analyzer."""
192	raise TypeError, "PCLXL (aka PCL6) is not supported yet."
193	self.infile = infile
194	self.islittleendian = None
195	found = 0
196	while not found :
197	line = self.infile.readline()
198	if not line :
199	break
200	if line[1:12] == " HP-PCL XL;" :
201	found = 1
202	if line[0] == ")" :
203	self.littleendian()
204	elif line[0] == "(" :
205	self.bigendian()
206	if not found :
207	raise TypeError, "This file doesn't seem to be PCLXL (aka PCL6)"
208	else :
209	self.tags = [None] * 256
210	self.tags[0x28] = self.bigendian # big endian
211	self.tags[0x29] = self.littleendian # big endian
212	self.tags[0x43] = self.beginPage # BeginPage
213	self.tags[0x44] = self.endPage # EndPage
214
215	self.tags[0xc0] = 1 # ubyte
216	self.tags[0xc1] = 2 # uint16
217	self.tags[0xc2] = 4 # uint32
218	self.tags[0xc3] = 2 # sint16
219	self.tags[0xc4] = 4 # sint32
220	self.tags[0xc5] = 4 # real32
221
222	self.tags[0xc8] = self.array_8 # ubyte_array
223	self.tags[0xc9] = self.array_16 # uint16_array
224	self.tags[0xca] = self.array_32 # uint32_array
225	self.tags[0xcb] = self.array_16 # sint16_array
226	self.tags[0xcc] = self.array_32 # sint32_array
227	self.tags[0xcd] = self.array_32 # real32_array
228
229	self.tags[0xd0] = 2 # ubyte_xy
230	self.tags[0xd1] = 4 # uint16_xy
231	self.tags[0xd2] = 8 # uint32_xy
232	self.tags[0xd3] = 4 # sint16_xy
233	self.tags[0xd4] = 8 # sint32_xy
234	self.tags[0xd5] = 8 # real32_xy
235
236	self.tags[0xd0] = 4 # ubyte_box
237	self.tags[0xd1] = 8 # uint16_box
238	self.tags[0xd2] = 16 # uint32_box
239	self.tags[0xd3] = 8 # sint16_box
240	self.tags[0xd4] = 16 # sint32_box
241	self.tags[0xd5] = 16 # real32_box
242
243	self.tags[0xf8] = 1 # attr_ubyte
244	self.tags[0xf9] = 2 # attr_uint16
245
246	self.tags[0xfa] = self.embeddedData # dataLength
247	self.tags[0xfb] = self.embeddedDataSmall # dataLengthByte
248
249	def debug(self, msg) :
250	"""Outputs a debug message on stderr."""
251	sys.stderr.write("%s\n" % msg)
252	sys.stderr.flush()
253
254	def beginPage(self) :
255	"""Indicates the beginning of a new page."""
256	self.pagecount += 1
257	self.debug("Begin page %i at %s" % (self.pagecount, self.infile.tell()))
258
259	def endPage(self) :
260	"""Indicates the end of a page."""
261	self.debug("End page %i at %s" % (self.pagecount, self.infile.tell()))
262
263	def handleArray(self, itemsize) :
264	"""Handles arrays."""
265	datatype = self.infile.read(1)
266	length = self.tags[ord(datatype)]
267	sarraysize = self.infile.read(length)
268	if self.islittleendian :
269	fmt = "<"
270	else :
271	fmt = ">"
272	if length == 1 :
273	fmt += "B"
274	elif length == 2 :
275	fmt += "H"
276	elif length == 4 :
277	fmt += "I"
278	else :
279	raise TypeError, "Error on array size at %s" % self.infile.tell()
280	arraysize = struct.unpack(fmt, sarraysize)[0]
281	return arraysize * itemsize
282
283	def array_8(self) :
284	"""Handles byte arrays."""
285	return self.handleArray(1)
286
287	def array_16(self) :
288	"""Handles byte arrays."""
289	return self.handleArray(2)
290
291	def array_32(self) :
292	"""Handles byte arrays."""
293	return self.handleArray(4)
294
295	def embeddedDataSmall(self) :
296	"""Handle small amounts of data."""
297	return ord(self.infile.read(1))
298
299	def embeddedData(self) :
300	"""Handle normal amounts of data."""
301	if self.islittleendian :
302	fmt = "<I"
303	else :
304	fmt = ">I"
305	return struct.unpack(fmt, self.infile.read(4))[0]
306
307	def littleendian(self) :
308	"""Toggles to little endianness."""
309	self.islittleendian = 1 # little endian
310
311	def bigendian(self) :
312	"""Toggles to big endianness."""
313	self.islittleendian = 0 # big endian
314
315	def getJobSize(self) :
316	"""Counts pages in a PCLXL (PCL6) document."""
317	self.pagecount = 0
318	while 1 :
319	pos = self.infile.tell()
320	char = self.infile.read(1)
321	if not char :
322	break
323	index = ord(char)
324	length = self.tags[index]
325	if length is not None :
326	if not length :
327	self.debug("Unrecognized tag 0x%02x at %s\n" % (index, self.infile.tell()))
328	elif callable(length) :
329	length = length()
330	if length :
331	self.infile.read(length)
332	return self.pagecount
333
334	class PDLAnalyzer :
335	"""Generic PDL Analyzer class."""
336	def __init__(self, filename) :
337	"""Initializes the PDL analyzer."""
338	self.filename = filename
339
340	def getJobSize(self) :
341	"""Returns the job's size."""
342	self.openFile()
343	pdlhandler = self.detectPDLHandler()
344	if pdlhandler is not None :
345	try :
346	size = pdlhandler(self.infile).getJobSize()
347	finally :
348	self.closeFile()
349	return size
350	else :
351	self.closeFile()
352	raise TypeError, "ERROR : Unknown file format for %s" % self.filename
353
354	def openFile(self) :
355	"""Opens the job's data stream for reading."""
356	if self.filename == "-" :
357	# we must read from stdin
358	# but since stdin is not seekable, we have to use a temporary
359	# file instead.
360	self.infile = tempfile.TemporaryFile()
361	while 1 :
362	data = sys.stdin.read(256 * 1024)
363	if not data :
364	break
365	self.infile.write(data)
366	self.infile.flush()
367	self.infile.seek(0)
368	else :
369	# normal file
370	self.infile = open(self.filename, "rb")
371
372	def closeFile(self) :
373	"""Closes the job's data stream."""
374	self.infile.close()
375
376	def isPostScript(self, data) :
377	"""Returns 1 if data is PostScript, else 0."""
378	if data.startswith("%!") or \
379	data.startswith("\004%!") or \
380	data.startswith("\033%-12345X%!PS") or \
381	((data[:128].find("\033%-12345X") != -1) and \
382	((data.find("LANGUAGE=POSTSCRIPT") != -1) or \
383	(data.find("LANGUAGE = POSTSCRIPT") != -1) or \
384	(data.find("LANGUAGE = Postscript") != -1))) :
385	return 1
386	else :
387	return 0
388
389	def isPCL(self, data) :
390	"""Returns 1 if data is PCL, else 0."""
391	if data.startswith("\033E\033") or \
392	((data[:128].find("\033%-12345X") != -1) and \
393	((data.find("LANGUAGE=PCL") != -1) or \
394	(data.find("LANGUAGE = PCL") != -1) or \
395	(data.find("LANGUAGE = Pcl") != -1))) :
396	return 1
397	else :
398	return 0
399
400	def isPCLXL(self, data) :
401	"""Returns 1 if data is PCLXL aka PCL6, else 0."""
402	if ((data[:128].find("\033%-12345X") != -1) and \
403	(data.find(" HP-PCL XL;") != -1) and \
404	((data.find("LANGUAGE=PCLXL") != -1) or \
405	(data.find("LANGUAGE = PCLXL") != -1))) :
406	return 1
407	else :
408	return 0
409
410	def detectPDLHandler(self) :
411	"""Tries to autodetect the document format.
412
413	Returns the correct PDL handler class or None if format is unknown
414	"""
415	# Try to detect file type by reading first block of datas
416	self.infile.seek(0)
417	firstblock = self.infile.read(1024)
418	self.infile.seek(0)
419	if self.isPostScript(firstblock) :
420	return PostScriptAnalyzer
421	elif self.isPCLXL(firstblock) :
422	return PCLXLAnalyzer
423	elif self.isPCL(firstblock) :
424	return PCLAnalyzer

Note: See TracBrowser for help on using the browser.

Context Navigation

root / pykota / trunk / pykota / pdlanalyzer.py @ 1485

Download in other formats: