Context Navigation

← Previous Change
Next Change →

Changeset 193 for pkpgcounter/trunk

Timestamp:

04/04/05 00:28:37 (19 years ago)

Author:

jerome

Message:

Moved the code into the submodules

Location:

pkpgcounter/trunk/pdlanalyzer

Files:

: 6 modified

escp2.py (modified) (2 diffs)
pcl345.py (modified) (2 diffs)
pclxl.py (modified) (2 diffs)
pdf.py (modified) (2 diffs)
pdlparser.py (modified) (1 diff)
postscript.py (modified) (2 diffs)

Legend:

: Unmodified
: Added
: Removed

pkpgcounter/trunk/pdlanalyzer/escp2.py

r191	r193
	1	#! /usr/bin/env python
	2	# -- coding: ISO-8859-15 --
1	3	#
2	4	# pkpgcounter : a generic Page Description Language parser
…	…
19	21	# $Id$
20	22	#
	23
	24	import sys
	25
	26	from pdlanalyzer.pdlparser import PDLParser
	27
	28	class ESCP2Parser(PDLParser) :
	29	"""A parser for ESC/P2 documents."""
	30	def getJobSize(self) :
	31	"""Counts pages in an ESC/P2 document."""
	32	# with Gimpprint, at least, for each page there
	33	# are two Reset Printer sequences (ESC + @)
	34	marker1 = "\033@"
	35
	36	# with other software or printer driver, we
	37	# may prefer to search for "\r\n\fESCAPE"
	38	# or "\r\fESCAPE"
	39	marker2r = "\r\f\033"
	40	marker2rn = "\r\n\f\033"
	41
	42	# and ghostscript's stcolor for example seems to
	43	# output ESC + @ + \f for each page plus one
	44	marker3 = "\033@\f"
	45
	46	# while ghostscript's escp driver outputs instead
	47	# \f + ESC + @
	48	marker4 = "\f\033@"
	49
	50	data = self.infile.read()
	51	pagecount1 = data.count(marker1)
	52	pagecount2 = max(data.count(marker2r), data.count(marker2rn))
	53	pagecount3 = data.count(marker3)
	54	pagecount4 = data.count(marker4)
	55
	56	if pagecount2 :
	57	return pagecount2
	58	elif pagecount3 > 1 :
	59	return pagecount3 - 1
	60	elif pagecount4 :
	61	return pagecount4
	62	else :
	63	return int(pagecount1 / 2)
	64
	65	def test() :
	66	"""Test function."""
	67	raise RuntimeError, "Not implemented !"
	68
	69	if __name__ == "__main__" :
	70	test()

pkpgcounter/trunk/pdlanalyzer/pcl345.py

r191	r193
	1	#! /usr/bin/env python
	2	# -- coding: ISO-8859-15 --
1	3	#
2	4	# pkpgcounter : a generic Page Description Language parser
…	…
19	21	# $Id$
20	22	#
	23
	24	import sys
	25	import mmap
	26
	27	from pdlanalyzer.pdlparser import PDLParser
	28
	29	class PCLParser(PDLParser) :
	30	"""A parser for PCL3, PCL4, PCL5 documents."""
	31	mediasizes = { # ESC&l####A
	32	0 : "Default",
	33	1 : "Executive",
	34	2 : "Letter",
	35	3 : "Legal",
	36	6 : "Ledger",
	37	25 : "A5",
	38	26 : "A4",
	39	27 : "A3",
	40	45 : "JB5",
	41	46 : "JB4",
	42	71 : "HagakiPostcard",
	43	72 : "OufukuHagakiPostcard",
	44	80 : "MonarchEnvelope",
	45	81 : "COM10Envelope",
	46	90 : "DLEnvelope",
	47	91 : "C5Envelope",
	48	100 : "B5Envelope",
	49	101 : "Custom",
	50	}
	51
	52	mediasources = { # ESC&l####H
	53	0 : "Default",
	54	1 : "Main",
	55	2 : "Manual",
	56	3 : "ManualEnvelope",
	57	4 : "Alternate",
	58	5 : "OptionalLarge",
	59	6 : "EnvelopeFeeder",
	60	7 : "Auto",
	61	8 : "Tray1",
	62	}
	63
	64	orientations = { # ESC&l####O
	65	0 : "Portrait",
	66	1 : "Landscape",
	67	2 : "ReversePortrait",
	68	3 : "ReverseLandscape",
	69	}
	70
	71	mediatypes = { # ESC&l####M
	72	0 : "Plain",
	73	1 : "Bond",
	74	2 : "Special",
	75	3 : "Glossy",
	76	4 : "Transparent",
	77	}
	78
	79	def setPageDict(self, pages, number, attribute, value) :
	80	"""Initializes a page dictionnary."""
	81	dict = pages.setdefault(number, { "copies" : 1, "mediasource" : "Main", "mediasize" : "Default", "mediatype" : "Plain", "orientation" : "Portrait"})
	82	dict[attribute] = value
	83
	84	def getJobSize(self) :
	85	"""Count pages in a PCL5 document.
	86
	87	Should also work for PCL3 and PCL4 documents.
	88
	89	Algorithm from pclcount
	90	(c) 2003, by Eduardo Gielamo Oliveira & Rodolfo Broco Manin
	91	published under the terms of the GNU General Public Licence v2.
	92
	93	Backported from C to Python by Jerome Alet, then enhanced
	94	with more PCL tags detected. I think all the necessary PCL tags
	95	are recognized to correctly handle PCL5 files wrt their number
	96	of pages. The documentation used for this was :
	97
	98	HP PCL/PJL Reference Set
	99	PCL5 Printer Language Technical Quick Reference Guide
	100	http://h20000.www2.hp.com/bc/docs/support/SupportManual/bpl13205/bpl13205.pdf
	101	"""
	102	infileno = self.infile.fileno()
	103	minfile = mmap.mmap(infileno, os.fstat(infileno)[6], prot=mmap.PROT_READ, flags=mmap.MAP_SHARED)
	104	tagsends = { "&n" : "W",
	105	"&b" : "W",
	106	"*i" : "W",
	107	"*l" : "W",
	108	"*m" : "W",
	109	"*v" : "W",
	110	"*c" : "W",
	111	"(f" : "W",
	112	"(s" : "W",
	113	")s" : "W",
	114	"&p" : "X",
	115	# "&l" : "XHAOM", # treated specially
	116	"&a" : "G", # TODO : 0 means next side, 1 front side, 2 back side
	117	"*g" : "W",
	118	"*r" : "sbABC",
	119	"*t" : "R",
	120	# "*b" : "VW", # treated specially because it occurs very often
	121	}
	122	pagecount = resets = ejects = backsides = startgfx = endgfx = 0
	123	starb = ampl = ispcl3 = escstart = 0
	124	mediasourcecount = mediasizecount = orientationcount = mediatypecount = 0
	125	tag = None
	126	pages = {}
	127	pos = 0
	128	try :
	129	while 1 :
	130	char = minfile[pos] ; pos += 1
	131	if char == "\014" :
	132	pagecount += 1
	133	elif char == "\033" :
	134	starb = ampl = 0
	135	#
	136	# <ESC>*b###y#m###v###w... -> PCL3 raster graphics
	137	# <ESC>*b###W -> Start of a raster data row/block
	138	# <ESC>*b###V -> Start of a raster data plane
	139	# <ESC>*c###W -> Start of a user defined pattern
	140	# <ESC>*i###W -> Start of a viewing illuminant block
	141	# <ESC>*l###W -> Start of a color lookup table
	142	# <ESC>*m###W -> Start of a download dither matrix block
	143	# <ESC>*v###W -> Start of a configure image data block
	144	# <ESC>*r1A -> Start Gfx
	145	# <ESC>(s###W -> Start of a characters description block
	146	# <ESC>)s###W -> Start of a fonts description block
	147	# <ESC>(f###W -> Start of a symbol set block
	148	# <ESC>&b###W -> Start of configuration data block
	149	# <ESC>&l###X -> Number of copies for current page
	150	# <ESC>&n###W -> Starts an alphanumeric string ID block
	151	# <ESC>&p###X -> Start of a non printable characters block
	152	# <ESC>&a2G -> Back side when duplex mode as generated by rastertohp
	153	# <ESC>*g###W -> Needed for planes in PCL3 output
	154	# <ESC>&l###H (or only 0 ?) -> Eject if NumPlanes > 1, as generated by rastertohp. Also defines mediasource
	155	# <ESC>&l###A -> mediasize
	156	# <ESC>&l###O -> orientation
	157	# <ESC>&l###M -> mediatype
	158	# <ESC>*t###R -> gfx resolution
	159	#
	160	tagstart = minfile[pos] ; pos += 1
	161	if tagstart in "E9=YZ" : # one byte PCL tag
	162	if tagstart == "E" :
	163	resets += 1
	164	continue # skip to next tag
	165	tag = tagstart + minfile[pos] ; pos += 1
	166	if tag == "*b" :
	167	starb = 1
	168	tagend = "VW"
	169	elif tag == "&l" :
	170	ampl = 1
	171	tagend = "XHAOM"
	172	else :
	173	try :
	174	tagend = tagsends[tag]
	175	except KeyError :
	176	continue # Unsupported PCL tag
	177	# Now read the numeric argument
	178	size = 0
	179	while 1 :
	180	char = minfile[pos] ; pos += 1
	181	if not char.isdigit() :
	182	break
	183	size = (size * 10) + int(char)
	184	if char in tagend :
	185	if tag == "&l" :
	186	if char == "X" :
	187	self.setPageDict(pages, pagecount, "copies", size)
	188	elif char == "H" :
	189	self.setPageDict(pages, pagecount, "mediasource", self.mediasources.get(size, str(size)))
	190	mediasourcecount += 1
	191	ejects += 1
	192	elif char == "A" :
	193	self.setPageDict(pages, pagecount, "mediasize", self.mediasizes.get(size, str(size)))
	194	mediasizecount += 1
	195	elif char == "O" :
	196	self.setPageDict(pages, pagecount, "orientation", self.orientations.get(size, str(size)))
	197	orientationcount += 1
	198	elif char == "M" :
	199	self.setPageDict(pages, pagecount, "mediatype", self.mediatypes.get(size, str(size)))
	200	mediatypecount += 1
	201	elif tag == "*r" :
	202	# Special tests for PCL3
	203	if (char == "s") and size :
	204	while 1 :
	205	char = minfile[pos] ; pos += 1
	206	if char == "A" :
	207	break
	208	elif (char == "b") and (minfile[pos] == "C") and not size :
	209	ispcl3 = 1 # Certainely a PCL3 file
	210	startgfx += (char == "A") and (minfile[pos - 2] in ("0", "1", "2", "3")) # Start Gfx
	211	endgfx += (not size) and (char in ("C", "B")) # End Gfx
	212	elif tag == "*t" :
	213	escstart += 1
	214	elif (tag == "&a") and (size == 2) :
	215	backsides += 1 # Back side in duplex mode
	216	else :
	217	# we just ignore the block.
	218	if tag == "&n" :
	219	# we have to take care of the operation id byte
	220	# which is before the string itself
	221	size += 1
	222	pos += size
	223	else :
	224	if starb :
	225	# special handling of PCL3 in which
	226	# *b introduces combined ESCape sequences
	227	size = 0
	228	while 1 :
	229	char = minfile[pos] ; pos += 1
	230	if not char.isdigit() :
	231	break
	232	size = (size * 10) + int(char)
	233	if char in ("w", "v") :
	234	ispcl3 = 1 # certainely a PCL3 document
	235	pos += size - 1
	236	elif char in ("y", "m") :
	237	ispcl3 = 1 # certainely a PCL3 document
	238	pos -= 1 # fix position : we were ahead
	239	elif ampl :
	240	# special handling of PCL3 in which
	241	# &l introduces combined ESCape sequences
	242	size = 0
	243	while 1 :
	244	char = minfile[pos] ; pos += 1
	245	if not char.isdigit() :
	246	break
	247	size = (size * 10) + int(char)
	248	if char in ("a", "o", "h", "m") :
	249	ispcl3 = 1 # certainely a PCL3 document
	250	pos -= 1 # fix position : we were ahead
	251	if char == "h" :
	252	self.setPageDict(pages, pagecount, "mediasource", self.mediasources.get(size, str(size)))
	253	mediasourcecount += 1
	254	elif char == "a" :
	255	self.setPageDict(pages, pagecount, "mediasize", self.mediasizes.get(size, str(size)))
	256	mediasizecount += 1
	257	elif char == "o" :
	258	self.setPageDict(pages, pagecount, "orientation", self.orientations.get(size, str(size)))
	259	orientationcount += 1
	260	elif char == "m" :
	261	self.setPageDict(pages, pagecount, "mediatype", self.mediatypes.get(size, str(size)))
	262	mediatypecount += 1
	263	except IndexError : # EOF ?
	264	minfile.close() # reached EOF
	265
	266	# if pagecount is still 0, we will use the number
	267	# of resets instead of the number of form feed characters.
	268	# but the number of resets is always at least 2 with a valid
	269	# pcl file : one at the very start and one at the very end
	270	# of the job's data. So we substract 2 from the number of
	271	# resets. And since on our test data we needed to substract
	272	# 1 more, we finally substract 3, and will test several
	273	# PCL files with this. If resets < 2, then the file is
	274	# probably not a valid PCL file, so we use 0
	275
	276	if self.debug :
	277	sys.stderr.write("pagecount : %s\n" % pagecount)
	278	sys.stderr.write("resets : %s\n" % resets)
	279	sys.stderr.write("ejects : %s\n" % ejects)
	280	sys.stderr.write("backsides : %s\n" % backsides)
	281	sys.stderr.write("startgfx : %s\n" % startgfx)
	282	sys.stderr.write("endgfx : %s\n" % endgfx)
	283	sys.stderr.write("mediasourcecount : %s\n" % mediasourcecount)
	284	sys.stderr.write("mediasizecount : %s\n" % mediasizecount)
	285	sys.stderr.write("orientationcount : %s\n" % orientationcount)
	286	sys.stderr.write("mediatypecount : %s\n" % mediatypecount)
	287	sys.stderr.write("escstart : %s\n" % escstart)
	288
	289	# if not pagecount :
	290	# pagecount = (pagecount or ((resets - 3) * (resets > 2)))
	291	# else :
	292	# # here we add counters for other ways new pages may have
	293	# # been printed and ejected by the printer
	294	# pagecount += ejects + backsides
	295	#
	296	# # now handle number of copies for each page (may differ).
	297	# # in duplex mode, number of copies may be sent only once.
	298	# for pnum in range(pagecount) :
	299	# # if no number of copies defined, take the preceding one else the one set before any page else 1.
	300	# page = pages.get(pnum, pages.get(pnum - 1, pages.get(0, { "copies" : 1 })))
	301	# pagecount += (page["copies"] - 1)
	302	#
	303	# # in PCL3 files, there's one Start Gfx tag per page
	304	# if ispcl3 :
	305	# if endgfx == int(startgfx / 2) : # special case for cdj1600
	306	# pagecount = endgfx
	307	# elif startgfx :
	308	# pagecount = startgfx
	309	# elif endgfx :
	310	# pagecount = endgfx
	311
	312
	313	if pagecount == mediasourcecount == escstart :
	314	pass # should be OK.
	315	elif (not startgfx) and (not endgfx) :
	316	pagecount = ejects or pagecount
	317	elif startgfx == endgfx :
	318	pagecount = startgfx
	319	elif startgfx == (endgfx - 1) :
	320	pagecount = startgfx
	321	else :
	322	pagecount = abs(startgfx - endgfx)
	323
	324	if self.debug :
	325	for pnum in range(pagecount) :
	326	# if no number of copies defined, take the preceding one else the one set before any page else 1.
	327	page = pages.get(pnum, pages.get(pnum - 1, pages.get(0, { "copies" : 1, "mediasource" : "Main", "mediasize" : "Default", "mediatype" : "Plain", "orientation" : "Portrait"})))
	328	sys.stderr.write("%s%s%s%s%s\n" % (page["copies"], page["mediatype"], page["mediasize"], page["orientation"], page["mediasource"]))
	329
	330	return pagecount
	331
	332	def test() :
	333	"""Test function."""
	334	raise RuntimeError, "Not implemented !"
	335
	336	if __name__ == "__main__" :
	337	test()

pkpgcounter/trunk/pdlanalyzer/pclxl.py

r191	r193
	1	#! /usr/bin/env python
	2	# -- coding: ISO-8859-15 --
1	3	#
2	4	# pkpgcounter : a generic Page Description Language parser
…	…
19	21	# $Id$
20	22	#
	23
	24	import sys
	25	import mmap
	26	from struct import unpack
	27
	28	from pdlanalyzer.pdlparser import PDLParser, PDLParserError
	29
	30	class PCLXLParser(PDLParser) :
	31	"""A parser for PCLXL (aka PCL6) documents."""
	32	mediasizes = {
	33	0 : "Letter",
	34	1 : "Legal",
	35	2 : "A4",
	36	3 : "Executive",
	37	4 : "Ledger",
	38	5 : "A3",
	39	6 : "COM10Envelope",
	40	7 : "MonarchEnvelope",
	41	8 : "C5Envelope",
	42	9 : "DLEnvelope",
	43	10 : "JB4",
	44	11 : "JB5",
	45	12 : "B5Envelope",
	46	14 : "JPostcard",
	47	15 : "JDoublePostcard",
	48	16 : "A5",
	49	17 : "A6",
	50	18 : "JB6",
	51	}
	52
	53	mediasources = {
	54	0 : "Default",
	55	1 : "Auto",
	56	2 : "Manual",
	57	3 : "MultiPurpose",
	58	4 : "UpperCassette",
	59	5 : "LowerCassette",
	60	6 : "EnvelopeTray",
	61	7 : "ThirdCassette",
	62	}
	63
	64	orientations = {
	65	0 : "Portrait",
	66	1 : "Landscape",
	67	2 : "ReversePortrait",
	68	3 : "ReverseLandscape",
	69	}
	70
	71	def beginPage(self) :
	72	"""Indicates the beginning of a new page, and extracts media information."""
	73	self.pagecount += 1
	74
	75	# Default values
	76	mediatypelabel = "Plain"
	77	mediasourcelabel = "Main"
	78	mediasizelabel = "Default"
	79	orientationlabel = "Portrait"
	80
	81	# Now go upstream to decode media type, size, source, and orientation
	82	# this saves time because we don't need a complete parser !
	83	minfile = self.minfile
	84	pos = self.pos - 2
	85	while pos > 0 : # safety check : don't go back to far !
	86	val = ord(minfile[pos])
	87	if val in (0x44, 0x48, 0x41) : # if previous endPage or openDataSource or beginSession (first page)
	88	break
	89	if val == 0x26 :
	90	mediasource = ord(minfile[pos - 2])
	91	mediasourcelabel = self.mediasources.get(mediasource, str(mediasource))
	92	pos = pos - 4
	93	elif val == 0x25 :
	94	mediasize = ord(minfile[pos - 2])
	95	mediasizelabel = self.mediasizes.get(mediasize, str(mediasize))
	96	pos = pos - 4
	97	elif val == 0x28 :
	98	orientation = ord(minfile[pos - 2])
	99	orienationlabel = self.orientations.get(orientation, str(orientation))
	100	pos = pos - 4
	101	elif val == 0x27 :
	102	savepos = pos
	103	pos = pos - 1
	104	while pos > 0 : # safety check : don't go back to far !
	105	val = ord(minfile[pos])
	106	pos -= 1
	107	if val == 0xc8 :
	108	break
	109	mediatypelabel = minfile[pos:savepos] # TODO : INCORRECT, WE HAVE TO STRIP OUT THE UBYTE ARRAY'S LENGTH !!!
	110	# else : TODO : CUSTOM MEDIA SIZE AND UNIT !
	111	else :
	112	pos = pos - 2 # ignored
	113	self.pages[self.pagecount] = { "copies" : 1,
	114	"orientation" : orientationlabel,
	115	"mediatype" : mediatypelabel,
	116	"mediasize" : mediasizelabel,
	117	"mediasource" : mediasourcelabel,
	118	}
	119	return 0
	120
	121	def endPage(self) :
	122	"""Indicates the end of a page."""
	123	pos = self.pos
	124	minfile = self.minfile
	125	if (ord(minfile[pos-3]) == 0xf8) and (ord(minfile[pos-2]) == 0x31) :
	126	# The EndPage operator may be preceded by a PageCopies attribute
	127	# So set number of copies for current page.
	128	# From what I read in PCLXL documentation, the number
	129	# of copies is an unsigned 16 bits integer
	130	self.pages[self.pagecount]["copies"] = unpack(self.endianness + "H", minfile[pos-5:pos-3])[0]
	131	return 0
	132
	133	def array_8(self) :
	134	"""Handles byte arrays."""
	135	pos = self.pos
	136	datatype = self.minfile[pos]
	137	pos += 1
	138	length = self.tags[ord(datatype)]
	139	if callable(length) :
	140	self.pos = pos
	141	length = length()
	142	pos = self.pos
	143	posl = pos + length
	144	self.pos = posl
	145	if length == 1 :
	146	return unpack("B", self.minfile[pos:posl])[0]
	147	elif length == 2 :
	148	return unpack(self.endianness + "H", self.minfile[pos:posl])[0]
	149	elif length == 4 :
	150	return unpack(self.endianness + "I", self.minfile[pos:posl])[0]
	151	else :
	152	raise PDLParserError, "Error on array size at %s" % self.pos
	153
	154	def array_16(self) :
	155	"""Handles byte arrays."""
	156	pos = self.pos
	157	datatype = self.minfile[pos]
	158	pos += 1
	159	length = self.tags[ord(datatype)]
	160	if callable(length) :
	161	self.pos = pos
	162	length = length()
	163	pos = self.pos
	164	posl = pos + length
	165	self.pos = posl
	166	if length == 1 :
	167	return 2 * unpack("B", self.minfile[pos:posl])[0]
	168	elif length == 2 :
	169	return 2 * unpack(self.endianness + "H", self.minfile[pos:posl])[0]
	170	elif length == 4 :
	171	return 2 * unpack(self.endianness + "I", self.minfile[pos:posl])[0]
	172	else :
	173	raise PDLParserError, "Error on array size at %s" % self.pos
	174
	175	def array_32(self) :
	176	"""Handles byte arrays."""
	177	pos = self.pos
	178	datatype = self.minfile[pos]
	179	pos += 1
	180	length = self.tags[ord(datatype)]
	181	if callable(length) :
	182	self.pos = pos
	183	length = length()
	184	pos = self.pos
	185	posl = pos + length
	186	self.pos = posl
	187	if length == 1 :
	188	return 4 * unpack("B", self.minfile[pos:posl])[0]
	189	elif length == 2 :
	190	return 4 * unpack(self.endianness + "H", self.minfile[pos:posl])[0]
	191	elif length == 4 :
	192	return 4 * unpack(self.endianness + "I", self.minfile[pos:posl])[0]
	193	else :
	194	raise PDLParserError, "Error on array size at %s" % self.pos
	195
	196	def embeddedDataSmall(self) :
	197	"""Handle small amounts of data."""
	198	pos = self.pos
	199	length = ord(self.minfile[pos])
	200	self.pos = pos + 1
	201	return length
	202
	203	def embeddedData(self) :
	204	"""Handle normal amounts of data."""
	205	pos = self.pos
	206	pos4 = pos + 4
	207	self.pos = pos4
	208	return unpack(self.endianness + "I", self.minfile[pos:pos4])[0]
	209
	210	def littleEndian(self) :
	211	"""Toggles to little endianness."""
	212	self.endianness = "<" # little endian
	213	return 0
	214
	215	def bigEndian(self) :
	216	"""Toggles to big endianness."""
	217	self.endianness = ">" # big endian
	218	return 0
	219
	220	def getJobSize(self) :
	221	"""Counts pages in a PCLXL (PCL6) document.
	222
	223	Algorithm by Jerome Alet.
	224
	225	The documentation used for this was :
	226
	227	HP PCL XL Feature Reference
	228	Protocol Class 2.0
	229	http://www.hpdevelopersolutions.com/downloads/64/358/xl_ref20r22.pdf
	230	"""
	231	self.endianness = None
	232	found = 0
	233	while not found :
	234	line = self.infile.readline()
	235	if not line :
	236	break
	237	if line[1:12] == " HP-PCL XL;" :
	238	found = 1
	239	endian = ord(line[0])
	240	if endian == 0x29 :
	241	self.littleEndian()
	242	elif endian == 0x28 :
	243	self.bigEndian()
	244	# elif endian == 0x27 : # TODO : This is the ESC code : parse it for PJL statements !
	245	#
	246	else :
	247	raise PDLParserError, "Unknown endianness marker 0x%02x at start !" % endian
	248	if not found :
	249	raise PDLParserError, "This file doesn't seem to be PCLXL (aka PCL6)"
	250
	251	# Initialize table of tags
	252	self.tags = [ 0 ] * 256
	253
	254	# GhostScript's sources tell us that HP printers
	255	# only accept little endianness, but we can handle both.
	256	self.tags[0x28] = self.bigEndian # BigEndian
	257	self.tags[0x29] = self.littleEndian # LittleEndian
	258
	259	self.tags[0x43] = self.beginPage # BeginPage
	260	self.tags[0x44] = self.endPage # EndPage
	261
	262	self.tags[0xc0] = 1 # ubyte
	263	self.tags[0xc1] = 2 # uint16
	264	self.tags[0xc2] = 4 # uint32
	265	self.tags[0xc3] = 2 # sint16
	266	self.tags[0xc4] = 4 # sint32
	267	self.tags[0xc5] = 4 # real32
	268
	269	self.tags[0xc8] = self.array_8 # ubyte_array
	270	self.tags[0xc9] = self.array_16 # uint16_array
	271	self.tags[0xca] = self.array_32 # uint32_array
	272	self.tags[0xcb] = self.array_16 # sint16_array
	273	self.tags[0xcc] = self.array_32 # sint32_array
	274	self.tags[0xcd] = self.array_32 # real32_array
	275
	276	self.tags[0xd0] = 2 # ubyte_xy
	277	self.tags[0xd1] = 4 # uint16_xy
	278	self.tags[0xd2] = 8 # uint32_xy
	279	self.tags[0xd3] = 4 # sint16_xy
	280	self.tags[0xd4] = 8 # sint32_xy
	281	self.tags[0xd5] = 8 # real32_xy
	282
	283	self.tags[0xe0] = 4 # ubyte_box
	284	self.tags[0xe1] = 8 # uint16_box
	285	self.tags[0xe2] = 16 # uint32_box
	286	self.tags[0xe3] = 8 # sint16_box
	287	self.tags[0xe4] = 16 # sint32_box
	288	self.tags[0xe5] = 16 # real32_box
	289
	290	self.tags[0xf8] = 1 # attr_ubyte
	291	self.tags[0xf9] = 2 # attr_uint16
	292
	293	self.tags[0xfa] = self.embeddedData # dataLength
	294	self.tags[0xfb] = self.embeddedDataSmall # dataLengthByte
	295
	296	infileno = self.infile.fileno()
	297	self.pages = {}
	298	self.minfile = minfile = mmap.mmap(infileno, os.fstat(infileno)[6], prot=mmap.PROT_READ, flags=mmap.MAP_SHARED)
	299	tags = self.tags
	300	self.pagecount = 0
	301	self.pos = pos = self.infile.tell()
	302	try :
	303	while 1 :
	304	char = minfile[pos]
	305	pos += 1
	306	length = tags[ord(char)]
	307	if not length :
	308	continue
	309	if callable(length) :
	310	self.pos = pos
	311	length = length()
	312	pos = self.pos
	313	pos += length
	314	except IndexError : # EOF ?
	315	self.minfile.close() # reached EOF
	316
	317	# now handle number of copies for each page (may differ).
	318	for pnum in range(1, self.pagecount + 1) :
	319	# if no number of copies defined, take 1, as explained
	320	# in PCLXL documentation.
	321	# NB : is number of copies is 0, the page won't be output
	322	# but the formula below is still correct : we want
	323	# to decrease the total number of pages in this case.
	324	page = self.pages.get(pnum, 1)
	325	copies = page["copies"]
	326	self.pagecount += (copies - 1)
	327	if self.debug :
	328	sys.stderr.write("%s%s%s%s%s\n" % (copies, page["mediatype"], page["mediasize"], page["orientation"], page["mediasource"]))
	329
	330	return self.pagecount
	331
	332	def test() :
	333	"""Test function."""
	334	raise RuntimeError, "Not implemented !"
	335
	336	if __name__ == "__main__" :
	337	test()

pkpgcounter/trunk/pdlanalyzer/pdf.py

r191	r193
	1	#! /usr/bin/env python
	2	# -- coding: ISO-8859-15 --
1	3	#
2	4	# pkpgcounter : a generic Page Description Language parser
…	…
19	21	# $Id$
20	22	#
	23
	24	import sys
	25	import re
	26
	27	from pdlanalyzer.pdlparser import PDLParser
	28
	29	class PDFParser(PDLParser) :
	30	"""A parser for PDF documents."""
	31	def getJobSize(self) :
	32	"""Counts pages in a PDF document."""
	33	regexp = re.compile(r"(/Type) ?(/Page)[/ \t\r\n]")
	34	pagecount = 0
	35	for line in self.infile.xreadlines() :
	36	pagecount += len(regexp.findall(line))
	37	return pagecount
	38
	39	def test() :
	40	"""Test function."""
	41	raise RuntimeError, "Not implemented !"
	42
	43	if __name__ == "__main__" :
	44	test()

pkpgcounter/trunk/pdlanalyzer/pdlparser.py

r192	r193
20	20	#
21	21
	22	class PDLParserError(Exception):
	23	"""An exception for PDLParser related stuff."""
	24	def __init__(self, message = ""):
	25	self.message = message
	26	Exception.__init__(self, message)
	27	def __repr__(self):
	28	return self.message
	29	__str__ = __repr__
	30
22	31	class PDLParser :
23	32	"""Generic PDL parser."""

pkpgcounter/trunk/pdlanalyzer/postscript.py

r191	r193
	1	#! /usr/bin/env python
	2	# -- coding: ISO-8859-15 --
1	3	#
2	4	# pkpgcounter : a generic Page Description Language parser
…	…
19	21	# $Id$
20	22	#
	23
	24	import sys
	25	import popen2
	26
	27	from pdlanalyzer.pdlparser import PDLParser, PDLParserError
	28
	29	class PostScriptParser(PDLParser) :
	30	"""A parser for PostScript documents."""
	31	def throughGhostScript(self) :
	32	"""Get the count through GhostScript, useful for non-DSC compliant PS files."""
	33	if self.debug :
	34	sys.stderr.write("Internal parser sucks, using GhostScript instead...\n")
	35	self.infile.seek(0)
	36	command = 'gs -sDEVICE=bbox -dNOPAUSE -dBATCH -dQUIET - 2>&1 \| grep -c "%%HiResBoundingBox:" 2>/dev/null'
	37	child = popen2.Popen4(command)
	38	try :
	39	data = self.infile.read(MEGABYTE)
	40	while data :
	41	child.tochild.write(data)
	42	data = self.infile.read(MEGABYTE)
	43	child.tochild.flush()
	44	child.tochild.close()
	45	except (IOError, OSError), msg :
	46	raise PDLParserError, "Problem during analysis of Binary PostScript document : %s" % msg
	47
	48	pagecount = 0
	49	try :
	50	pagecount = int(child.fromchild.readline().strip())
	51	except (IOError, OSError, AttributeError, ValueError), msg :
	52	raise PDLParserError, "Problem during analysis of Binary PostScript document : %s" % msg
	53	child.fromchild.close()
	54
	55	try :
	56	child.wait()
	57	except OSError, msg :
	58	raise PDLParserError, "Problem during analysis of Binary PostScript document : %s" % msg
	59	return pagecount * self.copies
	60
	61	def natively(self) :
	62	"""Count pages in a DSC compliant PostScript document."""
	63	self.infile.seek(0)
	64	pagecount = 0
	65	for line in self.infile.xreadlines() :
	66	if line.startswith("%%Page: ") :
	67	pagecount += 1
	68	elif line.startswith("%%Requirements: numcopies(") :
	69	try :
	70	number = int(line.strip().split('(')[1].split(')')[0])
	71	except :
	72	pass
	73	else :
	74	if number > self.copies :
	75	self.copies = number
	76	elif line.startswith("%%BeginNonPPDFeature: NumCopies ") :
	77	# handle # of copies set by some Windows printer driver
	78	try :
	79	number = int(line.strip().split()[2])
	80	except :
	81	pass
	82	else :
	83	if number > self.copies :
	84	self.copies = number
	85	elif line.startswith("1 dict dup /NumCopies ") :
	86	# handle # of copies set by mozilla/kprinter
	87	try :
	88	number = int(line.strip().split()[4])
	89	except :
	90	pass
	91	else :
	92	if number > self.copies :
	93	self.copies = number
	94	return pagecount * self.copies
	95
	96	def getJobSize(self) :
	97	"""Count pages in PostScript document."""
	98	return self.natively() or self.throughGhostScript()
	99
	100	def test() :
	101	"""Test function."""
	102	raise RuntimeError, "Not implemented !"
	103
	104	if __name__ == "__main__" :
	105	test()

Context Navigation

Changeset 193 for pkpgcounter/trunk

Legend:

pkpgcounter/trunk/pdlanalyzer/escp2.py

pkpgcounter/trunk/pdlanalyzer/pcl345.py

pkpgcounter/trunk/pdlanalyzer/pclxl.py

pkpgcounter/trunk/pdlanalyzer/pdf.py

pkpgcounter/trunk/pdlanalyzer/pdlparser.py

pkpgcounter/trunk/pdlanalyzer/postscript.py

Download in other formats: