Context Navigation

pdlanalyzer.py @ 1485

Revision 1485, 15.5 kB (checked in by jalet, 20 years ago)
Speed improvement
Property svn:eol-style set to `native` Property svn:keywords set to `Author Date Id Revision`

Rev	Line
[1482]	1	# PyKota
	2	# -- coding: ISO-8859-15 --
	3	#
	4	# PyKota - Print Quotas for CUPS and LPRng
	5	#
	6	# (c) 2003-2004 Jerome Alet <alet@librelogiciel.com>
	7	# This program is free software; you can redistribute it and/or modify
	8	# it under the terms of the GNU General Public License as published by
	9	# the Free Software Foundation; either version 2 of the License, or
	10	# (at your option) any later version.
	11	#
	12	# This program is distributed in the hope that it will be useful,
	13	# but WITHOUT ANY WARRANTY; without even the implied warranty of
	14	# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	15	# GNU General Public License for more details.
	16	#
	17	# You should have received a copy of the GNU General Public License
	18	# along with this program; if not, write to the Free Software
	19	# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
	20	#
	21	# $Id$
	22	#
	23	# $Log$
[1485]	24	# Revision 1.2 2004/05/19 19:09:36 jalet
	25	# Speed improvement
	26	#
[1482]	27	# Revision 1.1 2004/05/18 09:59:54 jalet
	28	# pkpgcounter is now just a wrapper around the PDLAnalyzer class
	29	#
	30	#
	31	#
	32
	33	import sys
	34	import os
	35	import struct
	36	import tempfile
	37
	38	class PostScriptAnalyzer :
	39	def __init__(self, infile) :
	40	"""Initialize PostScript Analyzer."""
	41	self.infile = infile
	42
	43	def getJobSize(self) :
	44	"""Count pages in a DSC compliant PostScript document."""
	45	pagecount = 0
	46	pagenum = None
	47	while 1 :
	48	line = self.infile.readline()
	49	if not line :
	50	break
	51	if line.startswith("%%Page: ") :
	52	pagecount += 1
	53	return pagecount
	54
	55	class PCLAnalyzer :
	56	def __init__(self, infile) :
	57	"""Initialize PCL Analyzer."""
	58	self.infile = infile
	59
[1485]	60	def skip(self, nb) :
	61	"""Reads a new datablock."""
	62	newpos = self.pos + nb
	63	if newpos >= self.len :
	64	oldlen = self.len
	65	self.data = self.infile.read(1024*1024)
	66	self.len = len(self.data)
	67	if not self.len :
	68	return
	69	self.pos = newpos - oldlen
	70	else :
	71	self.pos = newpos
	72
	73	def readone(self) :
	74	"""Reads a new byte."""
	75	if self.pos < self.len :
	76	char = self.data[self.pos]
	77	else :
	78	self.data = self.infile.read(1024*1024)
	79	self.len = len(self.data)
	80	self.pos = 0
	81	if not self.len :
	82	return
	83	char = self.data[0]
	84	self.pos += 1
	85	return char
	86
[1482]	87	def getJobSize(self) :
	88	"""Count pages in a PCL5 document."""
	89	#
	90	# Algorithm from pclcount
	91	# (c) 2003, by Eduardo Gielamo Oliveira & Rodolfo Broco Manin
	92	# published under the terms of the GNU General Public Licence v2.
	93	#
	94	# Backported from C to Python by Jerome Alet, then enhanced
	95	# with more PCL tags detected. I think all the necessary PCL tags
	96	# are recognized to correctly handle PCL5 files wrt their number
	97	# of pages. The documentation used for this was :
	98	#
	99	# HP PCL/PJL Reference Set
	100	# PCL5 Printer Language Technical Quick Reference Guide
	101	# http://h20000.www2.hp.com/bc/docs/support/SupportManual/bpl13205/bpl13205.pdf
	102	#
	103	tagsends = { "&n" : "W",
	104	"&b" : "W",
	105	"*i" : "W",
	106	"*l" : "W",
	107	"*m" : "W",
	108	"*v" : "W",
	109	"*c" : "W",
	110	"(f" : "W",
	111	"*b" : "VW",
	112	"(s" : "W",
	113	")s" : "W",
	114	"&p" : "X",
	115	"&l" : "X" }
[1485]	116	self.data = []
	117	self.pos = self.len = 0
[1482]	118	copies = 1
	119	pagecount = resets = 0
	120	tag = None
	121	while 1 :
[1485]	122	char = self.readone()
[1482]	123	if not char : # EOF ?
	124	break
	125	if char == "\014" :
	126	pagecount += 1
	127	elif char == "\033" :
	128	#
	129	# <ESC>*b###W -> Start of a raster data row/block
	130	# <ESC>*b###V -> Start of a raster data plane
	131	# <ESC>*c###W -> Start of a user defined pattern
	132	# <ESC>*i###W -> Start of a viewing illuminant block
	133	# <ESC>*l###W -> Start of a color lookup table
	134	# <ESC>*m###W -> Start of a download dither matrix block
	135	# <ESC>*v###W -> Start of a configure image data block
	136	# <ESC>(s###W -> Start of a characters description block
	137	# <ESC>)s###W -> Start of a fonts description block
	138	# <ESC>(f###W -> Start of a symbol set block
	139	# <ESC>&b###W -> Start of configuration data block
	140	# <ESC>&l###X -> Number of copies
	141	# <ESC>&n###W -> Starts an alphanumeric string ID block
	142	# <ESC>&p###X -> Start of a non printable characters block
	143	#
[1485]	144	tagstart = self.readone()
[1482]	145	if tagstart in "E9=YZ" : # one byte PCL tag
	146	if tagstart == "E" :
	147	resets += 1
	148	continue # skip to next tag
[1485]	149	tag = tagstart + self.readone()
[1482]	150	try :
	151	tagend = tagsends[tag]
	152	except KeyError :
	153	pass # Unsupported PCL tag
	154	else :
	155	# Now read the numeric argument
	156	size = 0
	157	while 1 :
[1485]	158	char = self.readone()
[1482]	159	if not char.isdigit() :
	160	break
	161	size = (size * 10) + int(char)
	162	if char in tagend :
	163	if tag == "&l" :
	164	copies = size
	165	else :
	166	# doing a read will prevent the seek
	167	# for unseekable streams.
	168	# we just ignore the block anyway.
	169	if tag == "&n" :
	170	# we have to take care of the operation id byte
	171	# which is before the string itself
	172	size += 1
[1485]	173	self.skip(size)
[1482]	174
	175	# if pagecount is still 0, we will return the number
	176	# of resets instead of the number of form feed characters.
	177	# but the number of resets is always at least 2 with a valid
	178	# pcl file : one at the very start and one at the very end
	179	# of the job's data. So we substract 2 from the number of
	180	# resets. And since on our test data we needed to substract
	181	# 1 more, we finally substract 3, and will test several
	182	# PCL files with this. If resets < 2, then the file is
	183	# probably not a valid PCL file, so we return 0
	184	if not pagecount :
	185	return copies * (resets - 3) * (resets > 2)
	186	else :
	187	return copies * pagecount
	188
	189	class PCLXLAnalyzer :
	190	def __init__(self, infile) :
	191	"""Initialize PCLXL Analyzer."""
	192	raise TypeError, "PCLXL (aka PCL6) is not supported yet."
	193	self.infile = infile
	194	self.islittleendian = None
	195	found = 0
	196	while not found :
	197	line = self.infile.readline()
	198	if not line :
	199	break
	200	if line[1:12] == " HP-PCL XL;" :
	201	found = 1
	202	if line[0] == ")" :
	203	self.littleendian()
	204	elif line[0] == "(" :
	205	self.bigendian()
	206	if not found :
	207	raise TypeError, "This file doesn't seem to be PCLXL (aka PCL6)"
	208	else :
	209	self.tags = [None] * 256
	210	self.tags[0x28] = self.bigendian # big endian
	211	self.tags[0x29] = self.littleendian # big endian
	212	self.tags[0x43] = self.beginPage # BeginPage
	213	self.tags[0x44] = self.endPage # EndPage
	214
	215	self.tags[0xc0] = 1 # ubyte
	216	self.tags[0xc1] = 2 # uint16
	217	self.tags[0xc2] = 4 # uint32
	218	self.tags[0xc3] = 2 # sint16
	219	self.tags[0xc4] = 4 # sint32
	220	self.tags[0xc5] = 4 # real32
	221
	222	self.tags[0xc8] = self.array_8 # ubyte_array
	223	self.tags[0xc9] = self.array_16 # uint16_array
	224	self.tags[0xca] = self.array_32 # uint32_array
	225	self.tags[0xcb] = self.array_16 # sint16_array
	226	self.tags[0xcc] = self.array_32 # sint32_array
	227	self.tags[0xcd] = self.array_32 # real32_array
	228
	229	self.tags[0xd0] = 2 # ubyte_xy
	230	self.tags[0xd1] = 4 # uint16_xy
	231	self.tags[0xd2] = 8 # uint32_xy
	232	self.tags[0xd3] = 4 # sint16_xy
	233	self.tags[0xd4] = 8 # sint32_xy
	234	self.tags[0xd5] = 8 # real32_xy
	235
	236	self.tags[0xd0] = 4 # ubyte_box
	237	self.tags[0xd1] = 8 # uint16_box
	238	self.tags[0xd2] = 16 # uint32_box
	239	self.tags[0xd3] = 8 # sint16_box
	240	self.tags[0xd4] = 16 # sint32_box
	241	self.tags[0xd5] = 16 # real32_box
	242
	243	self.tags[0xf8] = 1 # attr_ubyte
	244	self.tags[0xf9] = 2 # attr_uint16
	245
	246	self.tags[0xfa] = self.embeddedData # dataLength
	247	self.tags[0xfb] = self.embeddedDataSmall # dataLengthByte
	248
	249	def debug(self, msg) :
	250	"""Outputs a debug message on stderr."""
	251	sys.stderr.write("%s\n" % msg)
	252	sys.stderr.flush()
	253
	254	def beginPage(self) :
	255	"""Indicates the beginning of a new page."""
	256	self.pagecount += 1
	257	self.debug("Begin page %i at %s" % (self.pagecount, self.infile.tell()))
	258
	259	def endPage(self) :
	260	"""Indicates the end of a page."""
	261	self.debug("End page %i at %s" % (self.pagecount, self.infile.tell()))
	262
	263	def handleArray(self, itemsize) :
	264	"""Handles arrays."""
	265	datatype = self.infile.read(1)
	266	length = self.tags[ord(datatype)]
	267	sarraysize = self.infile.read(length)
	268	if self.islittleendian :
	269	fmt = "<"
	270	else :
	271	fmt = ">"
	272	if length == 1 :
	273	fmt += "B"
	274	elif length == 2 :
	275	fmt += "H"
	276	elif length == 4 :
	277	fmt += "I"
	278	else :
	279	raise TypeError, "Error on array size at %s" % self.infile.tell()
	280	arraysize = struct.unpack(fmt, sarraysize)[0]
	281	return arraysize * itemsize
	282
	283	def array_8(self) :
	284	"""Handles byte arrays."""
	285	return self.handleArray(1)
	286
	287	def array_16(self) :
	288	"""Handles byte arrays."""
	289	return self.handleArray(2)
	290
	291	def array_32(self) :
	292	"""Handles byte arrays."""
	293	return self.handleArray(4)
	294
	295	def embeddedDataSmall(self) :
	296	"""Handle small amounts of data."""
	297	return ord(self.infile.read(1))
	298
	299	def embeddedData(self) :
	300	"""Handle normal amounts of data."""
	301	if self.islittleendian :
	302	fmt = "<I"
	303	else :
	304	fmt = ">I"
	305	return struct.unpack(fmt, self.infile.read(4))[0]
	306
	307	def littleendian(self) :
	308	"""Toggles to little endianness."""
	309	self.islittleendian = 1 # little endian
	310
	311	def bigendian(self) :
	312	"""Toggles to big endianness."""
	313	self.islittleendian = 0 # big endian
	314
	315	def getJobSize(self) :
	316	"""Counts pages in a PCLXL (PCL6) document."""
	317	self.pagecount = 0
	318	while 1 :
	319	pos = self.infile.tell()
	320	char = self.infile.read(1)
	321	if not char :
	322	break
	323	index = ord(char)
	324	length = self.tags[index]
	325	if length is not None :
	326	if not length :
	327	self.debug("Unrecognized tag 0x%02x at %s\n" % (index, self.infile.tell()))
	328	elif callable(length) :
	329	length = length()
	330	if length :
	331	self.infile.read(length)
	332	return self.pagecount
	333
	334	class PDLAnalyzer :
	335	"""Generic PDL Analyzer class."""
	336	def __init__(self, filename) :
	337	"""Initializes the PDL analyzer."""
	338	self.filename = filename
	339
	340	def getJobSize(self) :
	341	"""Returns the job's size."""
	342	self.openFile()
	343	pdlhandler = self.detectPDLHandler()
	344	if pdlhandler is not None :
	345	try :
	346	size = pdlhandler(self.infile).getJobSize()
	347	finally :
	348	self.closeFile()
	349	return size
	350	else :
	351	self.closeFile()
	352	raise TypeError, "ERROR : Unknown file format for %s" % self.filename
	353
	354	def openFile(self) :
	355	"""Opens the job's data stream for reading."""
	356	if self.filename == "-" :
	357	# we must read from stdin
	358	# but since stdin is not seekable, we have to use a temporary
	359	# file instead.
	360	self.infile = tempfile.TemporaryFile()
	361	while 1 :
	362	data = sys.stdin.read(256 * 1024)
	363	if not data :
	364	break
	365	self.infile.write(data)
	366	self.infile.flush()
	367	self.infile.seek(0)
	368	else :
	369	# normal file
	370	self.infile = open(self.filename, "rb")
	371
	372	def closeFile(self) :
	373	"""Closes the job's data stream."""
	374	self.infile.close()
	375
	376	def isPostScript(self, data) :
	377	"""Returns 1 if data is PostScript, else 0."""
	378	if data.startswith("%!") or \
	379	data.startswith("\004%!") or \
	380	data.startswith("\033%-12345X%!PS") or \
	381	((data[:128].find("\033%-12345X") != -1) and \
	382	((data.find("LANGUAGE=POSTSCRIPT") != -1) or \
	383	(data.find("LANGUAGE = POSTSCRIPT") != -1) or \
	384	(data.find("LANGUAGE = Postscript") != -1))) :
	385	return 1
	386	else :
	387	return 0
	388
	389	def isPCL(self, data) :
	390	"""Returns 1 if data is PCL, else 0."""
	391	if data.startswith("\033E\033") or \
	392	((data[:128].find("\033%-12345X") != -1) and \
	393	((data.find("LANGUAGE=PCL") != -1) or \
	394	(data.find("LANGUAGE = PCL") != -1) or \
	395	(data.find("LANGUAGE = Pcl") != -1))) :
	396	return 1
	397	else :
	398	return 0
	399
	400	def isPCLXL(self, data) :
	401	"""Returns 1 if data is PCLXL aka PCL6, else 0."""
	402	if ((data[:128].find("\033%-12345X") != -1) and \
	403	(data.find(" HP-PCL XL;") != -1) and \
	404	((data.find("LANGUAGE=PCLXL") != -1) or \
	405	(data.find("LANGUAGE = PCLXL") != -1))) :
	406	return 1
	407	else :
	408	return 0
	409
	410	def detectPDLHandler(self) :
	411	"""Tries to autodetect the document format.
	412
	413	Returns the correct PDL handler class or None if format is unknown
	414	"""
	415	# Try to detect file type by reading first block of datas
	416	self.infile.seek(0)
	417	firstblock = self.infile.read(1024)
	418	self.infile.seek(0)
	419	if self.isPostScript(firstblock) :
	420	return PostScriptAnalyzer
	421	elif self.isPCLXL(firstblock) :
	422	return PCLXLAnalyzer
	423	elif self.isPCL(firstblock) :
	424	return PCLAnalyzer

Note: See TracBrowser for help on using the browser.

Context Navigation

root / pykota / trunk / pykota / pdlanalyzer.py @ 1485

Download in other formats: