Context Navigation

← Previous Changeset
Next Changeset →

Changeset 1482

Timestamp:

05/18/04 11:59:54 (21 years ago)

Author:

jalet

Message:

pkpgcounter is now just a wrapper around the PDLAnalyzer class

Location:

pykota/trunk

Files:

: 1 added
: 3 modified

bin/pkpgcounter (modified) (3 diffs)
NEWS (modified) (1 diff)
pykota/Makefile.am (modified) (1 diff)
pykota/pdlanalyzer.py (added)

Legend:

: Unmodified
: Added
: Removed

pykota/trunk/bin/pkpgcounter

r1463	r1482
24	24	#
25	25	# $Log$
	26	# Revision 1.10 2004/05/18 09:59:54 jalet
	27	# pkpgcounter is now just a wrapper around the PDLAnalyzer class
	28	#
26	29	# Revision 1.9 2004/05/10 07:23:21 jalet
27	30	# pykotme now uses pkpgcounter to compute the job's size.
…	…
56	59
57	60	import sys
58		import os
59		import mmap
60		import struct
61		import tempfile
62
63		def debug(msg) :
64		"""Outputs a debug message on stderr."""
65		sys.stderr.write("%s\n" % msg)
66		sys.stderr.flush()
67
68		def ispostscript(data) :
69		"""Returns 1 if data is PostScript, else 0."""
70		if data.startswith("%!") or \
71		data.startswith("\004%!") or \
72		data.startswith("\033%-12345X%!PS") or \
73		((data[:128].find("\033%-12345X") != -1) and \
74		((data.find("LANGUAGE=POSTSCRIPT") != -1) or \
75		(data.find("LANGUAGE = POSTSCRIPT") != -1) or \
76		(data.find("LANGUAGE = Postscript") != -1))) :
77		return 1
78		else :
79		return 0
80
81		def ispcl(data) :
82		"""Returns 1 if data is PCL, else 0."""
83		if data.startswith("\033E\033") or \
84		((data[:128].find("\033%-12345X") != -1) and \
85		((data.find("LANGUAGE=PCL") != -1) or \
86		(data.find("LANGUAGE = PCL") != -1) or \
87		(data.find("LANGUAGE = Pcl") != -1))) :
88		return 1
89		else :
90		return 0
91
92		def ispclxl(data) :
93		"""Returns 1 if data is PCLXL aka PCL6, else 0."""
94		if ((data[:128].find("\033%-12345X") != -1) and \
95		(data.find(" HP-PCL XL;") != -1) and \
96		((data.find("LANGUAGE=PCLXL") != -1) or \
97		(data.find("LANGUAGE = PCLXL") != -1))) :
98		return 1
99		else :
100		return 0
101
102		def postscript(infile) :
103		"""Count pages in a DSC compliant PostScript document."""
104		pagecount = 0
105		pagenum = None
106		while 1 :
107		line = infile.readline()
108		if not line :
109		break
110		if line.startswith("%%Page: ") :
111		pagecount += 1
112		return pagecount
113
114		def pcl(infile) :
115		"""Count pages in a PCL5 document."""
116		#
117		# Algorithm from pclcount
118		# (c) 2003, by Eduardo Gielamo Oliveira & Rodolfo Broco Manin
119		# published under the terms of the GNU General Public Licence v2.
120		#
121		# Backported from C to Python by Jerome Alet, then enhanced
122		# with more PCL tags detected. I think all the necessary PCL tags
123		# are recognized to correctly handle PCL5 files wrt their number
124		# of pages. The documentation used for this was :
125		#
126		# HP PCL/PJL Reference Set
127		# PCL5 Printer Language Technical Quick Reference Guide
128		# http://h20000.www2.hp.com/bc/docs/support/SupportManual/bpl13205/bpl13205.pdf
129		#
130		infileno = infile.fileno()
131		infile = mmap.mmap(infileno, os.fstat(infileno).st_size, access=mmap.ACCESS_READ)
132		tagsends = { "&n" : "W",
133		"&b" : "W",
134		"*i" : "W",
135		"*l" : "W",
136		"*m" : "W",
137		"*v" : "W",
138		"*c" : "W",
139		"(f" : "W",
140		"*b" : "VW",
141		"(s" : "W",
142		")s" : "W",
143		"&p" : "X",
144		"&l" : "X" }
145		copies = 1
146		pagecount = resets = 0
147		tag = None
148		position = 0
149		while 1 :
150		try :
151		char = infile[position]
152		except IndexError : # EOF
153		break
154		position += 1
155		if char == "\014" :
156		pagecount += 1
157		elif char == "\033" :
158		#
159		# <ESC>*b###W -> Start of a raster data row/block
160		# <ESC>*b###V -> Start of a raster data plane
161		# <ESC>*c###W -> Start of a user defined pattern
162		# <ESC>*i###W -> Start of a viewing illuminant block
163		# <ESC>*l###W -> Start of a color lookup table
164		# <ESC>*m###W -> Start of a download dither matrix block
165		# <ESC>*v###W -> Start of a configure image data block
166		# <ESC>(s###W -> Start of a characters description block
167		# <ESC>)s###W -> Start of a fonts description block
168		# <ESC>(f###W -> Start of a symbol set block
169		# <ESC>&b###W -> Start of configuration data block
170		# <ESC>&l###X -> Number of copies
171		# <ESC>&n###W -> Starts an alphanumeric string ID block
172		# <ESC>&p###X -> Start of a non printable characters block
173		#
174		tagstart = infile[position]
175		position += 1
176		if tagstart in "E9=YZ" : # one byte PCL tag
177		if tagstart == "E" :
178		resets += 1
179		continue # skip to next tag
180		tag = tagstart + infile[position]
181		position += 1
182		try :
183		tagend = tagsends[tag]
184		except KeyError :
185		pass # Unsupported PCL tag
186		else :
187		# Now read the numeric argument
188		size = 0
189		while 1 :
190		char = infile[position]
191		position += 1
192		if not char.isdigit() :
193		break
194		size = (size * 10) + int(char)
195		if char in tagend :
196		if tag == "&l" :
197		copies = size
198		else :
199		# doing a read will prevent the seek
200		# for unseekable streams.
201		# we just ignore the block anyway.
202		if tag == "&n" :
203		# we have to take care of the operation id byte
204		# which is before the string itself
205		size += 1
206		position += size
207
208		# if pagecount is still 0, we will return the number
209		# of resets instead of the number of form feed characters.
210		# but the number of resets is always at least 2 with a valid
211		# pcl file : one at the very start and one at the very end
212		# of the job's data. So we substract 2 from the number of
213		# resets. And since on our test data we needed to substract
214		# 1 more, we finally substract 3, and will test several
215		# PCL files with this. If resets < 2, then the file is
216		# probably not a valid PCL file, so we return 0
217		if not pagecount :
218		return copies * (resets - 3) * (resets > 2)
219		else :
220		return copies * pagecount
221
222		class PCLXLParser :
223		def __init__(self, infile) :
224		"""Initialize PCLXL parser."""
225		self.infile = infile
226		self.islittleendian = None
227		found = 0
228		while not found :
229		line = self.infile.readline()
230		if not line :
231		break
232		if line[1:12] == " HP-PCL XL;" :
233		found = 1
234		if line[0] == ")" :
235		self.littleendian()
236		elif line[0] == "(" :
237		self.bigendian()
238		if not found :
239		raise TypeError, "This file doesn't seem to be PCLXL (aka PCL6)"
240		else :
241		self.tags = [None] * 256
242		self.tags[0x28] = self.bigendian # big endian
243		self.tags[0x29] = self.littleendian # big endian
244		self.tags[0x43] = self.beginPage # BeginPage
245		self.tags[0x44] = self.endPage # EndPage
246
247		self.tags[0xc0] = 1 # ubyte
248		self.tags[0xc1] = 2 # uint16
249		self.tags[0xc2] = 4 # uint32
250		self.tags[0xc3] = 2 # sint16
251		self.tags[0xc4] = 4 # sint32
252		self.tags[0xc5] = 4 # real32
253
254		self.tags[0xc8] = self.array_8 # ubyte_array
255		self.tags[0xc9] = self.array_16 # uint16_array
256		self.tags[0xca] = self.array_32 # uint32_array
257		self.tags[0xcb] = self.array_16 # sint16_array
258		self.tags[0xcc] = self.array_32 # sint32_array
259		self.tags[0xcd] = self.array_32 # real32_array
260
261		self.tags[0xd0] = 2 # ubyte_xy
262		self.tags[0xd1] = 4 # uint16_xy
263		self.tags[0xd2] = 8 # uint32_xy
264		self.tags[0xd3] = 4 # sint16_xy
265		self.tags[0xd4] = 8 # sint32_xy
266		self.tags[0xd5] = 8 # real32_xy
267
268		self.tags[0xd0] = 4 # ubyte_box
269		self.tags[0xd1] = 8 # uint16_box
270		self.tags[0xd2] = 16 # uint32_box
271		self.tags[0xd3] = 8 # sint16_box
272		self.tags[0xd4] = 16 # sint32_box
273		self.tags[0xd5] = 16 # real32_box
274
275		self.tags[0xf8] = 1 # attr_ubyte
276		self.tags[0xf9] = 2 # attr_uint16
277
278		self.tags[0xfa] = self.embeddedData # dataLength
279		self.tags[0xfb] = self.embeddedDataSmall # dataLengthByte
280
281		def beginPage(self) :
282		"""Indicates the beginning of a new page."""
283		self.pagecount += 1
284		debug("Begin page %i at %s" % (self.pagecount, self.infile.tell()))
285
286		def endPage(self) :
287		"""Indicates the end of a page."""
288		debug("End page %i at %s" % (self.pagecount, self.infile.tell()))
289
290		def handleArray(self, itemsize) :
291		"""Handles arrays."""
292		datatype = self.infile.read(1)
293		length = self.tags[ord(datatype)]
294		sarraysize = self.infile.read(length)
295		if self.islittleendian :
296		fmt = "<"
297		else :
298		fmt = ">"
299		if length == 1 :
300		fmt += "B"
301		elif length == 2 :
302		fmt += "H"
303		elif length == 4 :
304		fmt += "I"
305		else :
306		raise TypeError, "Error on array size at %s" % self.infile.tell()
307		arraysize = struct.unpack(fmt, sarraysize)[0]
308		return arraysize * itemsize
309
310		def array_8(self) :
311		"""Handles byte arrays."""
312		return self.handleArray(1)
313
314		def array_16(self) :
315		"""Handles byte arrays."""
316		return self.handleArray(2)
317
318		def array_32(self) :
319		"""Handles byte arrays."""
320		return self.handleArray(4)
321
322		def embeddedDataSmall(self) :
323		"""Handle small amounts of data."""
324		return ord(self.infile.read(1))
325
326		def embeddedData(self) :
327		"""Handle normal amounts of data."""
328		if self.islittleendian :
329		fmt = "<I"
330		else :
331		fmt = ">I"
332		return struct.unpack(fmt, self.infile.read(4))[0]
333
334		def littleendian(self) :
335		"""Toggles to little endianness."""
336		self.islittleendian = 1 # little endian
337
338		def bigendian(self) :
339		"""Toggles to big endianness."""
340		self.islittleendian = 0 # big endian
341
342		def pagecount(self) :
343		"""Counts pages in a PCLXL (PCL6) document."""
344		self.pagecount = 0
345		while 1 :
346		pos = self.infile.tell()
347		char = self.infile.read(1)
348		if not char :
349		break
350		index = ord(char)
351		length = self.tags[index]
352		if length is not None :
353		if not length :
354		debug("Unrecognized tag 0x%02x at %s\n" % (index, self.infile.tell()))
355		elif callable(length) :
356		length = length()
357		if length :
358		self.infile.read(length)
359		return self.pagecount
360
361		def pclxl(infile) :
362		"""Count pages in a PCL6 aka PCLXL document."""
363		parser = PCLXLParser(infile)
364		return parser.pagecount()
365
366		def smartpagecounter(filename) :
367		"""Autodetects file format and returns number of pages."""
368		if filename == "-" :
369		# we must read from stdin
370		# but since stdin is not seekable, we have to use a temporary
371		# file instead.
372		infile = tempfile.TemporaryFile()
373		while 1 :
374		data = sys.stdin.read(256 * 1024)
375		if not data :
376		break
377		infile.write(data)
378		infile.flush()
379		infile.seek(0)
380		else :
381		# normal file
382		infile = open(filename, "rb")
383
384		# Try to detect file type by reading first block of datas
385		firstblock = infile.read(1024)
386		infile.seek(0)
387		if ispostscript(firstblock) :
388		size = postscript(infile)
389		elif ispclxl(firstblock) :
390		raise TypeError, "PCLXL (aka PCL6) is not supported yet."
391		size = pclxl(infile)
392		elif ispcl(firstblock) :
393		size = pcl(infile)
394		else :
395		sys.stderr.write("ERROR : Unknown file format for %s\n" % filename)
396		size = 0
397		infile.close()
398		return size
	61	from pykota import pdlanalyzer
399	62
400	63	if __name__ == "__main__" :
…	…
405	68	for arg in sys.argv[1:] :
406	69	try :
407		totalsize += smartpagecounter(arg)
	70	parser = pdlanalyzer.PDLAnalyzer(arg)
	71	totalsize += parser.getJobSize()
408	72	except TypeError, msg :
409		debug(msg)
	73	sys.stderr.write("%s\n" % msg)
	74	sys.stderr.flush()
410	75	print "%s" % totalsize

pykota/trunk/NEWS

r1480	r1482
24	24	- 1.19alpha9 :
25	25
	26	- pkpgcounter is now just a wrapper around the PDLAnalyzer class
	27
26	28	- pykotaOptions LDAP attribute now accepts non-ascii characters
27	29	too.

pykota/trunk/pykota/Makefile.am

r1417	r1482
10	10	storage.py \
11	11	tool.py \
	12	pdlanalyzer.py \
12	13	version.py
13	14

Context Navigation

Changeset 1482

Legend:

pykota/trunk/bin/pkpgcounter

pykota/trunk/NEWS

pykota/trunk/pykota/Makefile.am

Download in other formats: