Context Navigation

← Previous Change
Next Change →

Changeset 220 for pkpgcounter

Timestamp:

07/02/05 15:41:30 (19 years ago)

Author:

jerome

Message:

Big improvements on readability + maintainability

Location:

pkpgcounter/trunk

Files:

: 12 modified

NEWS (modified) (1 diff)
pdlanalyzer/analyzer.py (modified) (6 diffs)
pdlanalyzer/dvi.py (modified) (2 diffs)
pdlanalyzer/escp2.py (modified) (2 diffs)
pdlanalyzer/pcl345.py (modified) (3 diffs)
pdlanalyzer/pclxl.py (modified) (3 diffs)
pdlanalyzer/pdf.py (modified) (2 diffs)
pdlanalyzer/pdlparser.py (modified) (2 diffs)
pdlanalyzer/postscript.py (modified) (2 diffs)
pdlanalyzer/tiff.py (modified) (2 diffs)
pdlanalyzer/version.py (modified) (1 diff)
tests/runtest.sh (modified) (1 diff)

Legend:

: Unmodified
: Added
: Removed

pkpgcounter/trunk/NEWS

r217	r220
22	22	pkpgcounter News :
23	23
	24	* 1.53 :
	25
	26	- Moved code around to improve maintainability.
	27
24	28	* 1.52 :
25	29
26		- ~~Basic~~ support for the TIFF format.
	30	- Added support for the TIFF format.
27	31
28	32	* 1.51 :
29	33
30		- ~~Basic~~ support for the DVI format.
	34	- Added support for the DVI format.
31	35
32	36	* 1.50 :

pkpgcounter/trunk/pdlanalyzer/analyzer.py

r217	r220
25	25	from pdlanalyzer import version, pdlparser, postscript, pdf, pcl345, pclxl, escp2, dvi, tiff
26	26
27		~~KILOBYTE = 1024~~
28		~~MEGABYTE = 1024 * KILOBYTE~~
29		~~LASTBLOCKSIZE = int(KILOBYTE / 4)~~
30
31	27	class PDLAnalyzer :
32	28	"""Class for PDL autodetection."""
…	…
40	36	self.debug = debug
41	37	self.filename = filename
42		~~try :~~
43		~~import psyco~~
44		~~except ImportError :~~
45		~~sys.stderr.write("pkpgcounter : you should install psyco if possible, this would greatly speedup parsing.\n")~~
46		~~pass # Psyco is not installed~~
47		~~else :~~
48		~~# Psyco is installed, tell it to compile~~
49		~~# the CPU intensive methods : PCL and PCLXL~~
50		~~# parsing will greatly benefit from this,~~
51		~~# for PostScript and PDF the difference is~~
52		~~# barely noticeable since they are already~~
53		~~# almost optimal, and much more speedy anyway.~~
54		~~psyco.bind(postscript.PostScriptParser.getJobSize)~~
55		~~psyco.bind(pdf.PDFParser.getJobSize)~~
56		~~psyco.bind(escp2.ESCP2Parser.getJobSize)~~
57		~~psyco.bind(pcl345.PCL345Parser.getJobSize)~~
58		~~psyco.bind(pclxl.PCLXLParser.getJobSize)~~
59		~~psyco.bind(dvi.DVIParser.getJobSize)~~
60		~~psyco.bind(tiff.TIFFParser.getJobSize)~~
61	38
62	39	def getJobSize(self) :
…	…
67	44	except pdlparser.PDLParserError, msg :
68	45	self.closeFile()
69		raise pdlparser.PDLParserError, "~~ERROR :~~ Unknown file format for %s (%s)" % (self.filename, msg)
	46	raise pdlparser.PDLParserError, "Unknown file format for %s (%s)" % (self.filename, msg)
70	47	else :
71	48	try :
72		size = pdlhandler~~(self.infile, self.debug)~~.getJobSize()
	49	size = pdlhandler.getJobSize()
73	50	finally :
74	51	self.closeFile()
…	…
93	70	self.infile = tempfile.TemporaryFile(mode="w+b")
94	71	while 1 :
95		data = infile.read(MEGABYTE)
	72	data = infile.read(pdlparser.MEGABYTE)
96	73	if not data :
97	74	break
…	…
114	91	pass # probably stdin, which is not seekable
115	92
116		~~def isPostScript(self, sdata, edata) :~~
117		~~"""Returns 1 if data is PostScript, else 0."""~~
118		~~if sdata.startswith("%!") or \~~
119		~~sdata.startswith("\004%!") or \~~
120		~~sdata.startswith("\033%-12345X%!PS") or \~~
121		~~((sdata[:128].find("\033%-12345X") != -1) and \~~
122		~~((sdata.find("LANGUAGE=POSTSCRIPT") != -1) or \~~
123		~~(sdata.find("LANGUAGE = POSTSCRIPT") != -1) or \~~
124		~~(sdata.find("LANGUAGE = Postscript") != -1))) or \~~
125		~~(sdata.find("%!PS-Adobe") != -1) :~~
126		~~if self.debug :~~
127		~~sys.stderr.write("%s is a PostScript file\n" % str(self.filename))~~
128		~~return 1~~
129		~~else :~~
130		~~return 0~~
131
132		~~def isPDF(self, sdata, edata) :~~
133		~~"""Returns 1 if data is PDF, else 0."""~~
134		~~if sdata.startswith("%PDF-") or \~~
135		~~sdata.startswith("\033%-12345X%PDF-") or \~~
136		~~((sdata[:128].find("\033%-12345X") != -1) and (sdata.upper().find("LANGUAGE=PDF") != -1)) or \~~
137		~~(sdata.find("%PDF-") != -1) :~~
138		~~if self.debug :~~
139		~~sys.stderr.write("%s is a PDF file\n" % str(self.filename))~~
140		~~return 1~~
141		~~else :~~
142		~~return 0~~
143
144		~~def isPCL(self, sdata, edata) :~~
145		~~"""Returns 1 if data is PCL, else 0."""~~
146		~~if sdata.startswith("\033E\033") or \~~
147		~~(sdata.startswith("\033*rbC") and (not edata[-3:] == "\f\033@")) or \~~
148		~~sdata.startswith("\033%8\033") or \~~
149		~~(sdata.find("\033%-12345X") != -1) :~~
150		~~if self.debug :~~
151		~~sys.stderr.write("%s is a PCL3/4/5 file\n" % str(self.filename))~~
152		~~return 1~~
153		~~else :~~
154		~~return 0~~
155
156		~~def isPCLXL(self, sdata, edata) :~~
157		~~"""Returns 1 if data is PCLXL aka PCL6, else 0."""~~
158		~~if ((sdata[:128].find("\033%-12345X") != -1) and \~~
159		~~(sdata.find(" HP-PCL XL;") != -1) and \~~
160		~~((sdata.find("LANGUAGE=PCLXL") != -1) or \~~
161		~~(sdata.find("LANGUAGE = PCLXL") != -1))) :~~
162		~~if self.debug :~~
163		~~sys.stderr.write("%s is a PCLXL (aka PCL6) file\n" % str(self.filename))~~
164		~~return 1~~
165		~~else :~~
166		~~return 0~~
167
168		~~def isESCP2(self, sdata, edata) :~~
169		~~"""Returns 1 if data is ESC/P2, else 0."""~~
170		~~if sdata.startswith("\033@") or \~~
171		~~sdata.startswith("\033*") or \~~
172		~~sdata.startswith("\n\033@") or \~~
173		~~sdata.startswith("\0\0\0\033\1@EJL") : # ESC/P Raster ??? Seen on Stylus Photo 1284~~
174		~~if self.debug :~~
175		~~sys.stderr.write("%s is an ESC/P2 file\n" % str(self.filename))~~
176		~~return 1~~
177		~~else :~~
178		~~return 0~~
179
180		~~def isDVI(self, sdata, edata) :~~
181		~~"""Returns 1 if data is DVI, else 0."""~~
182		~~if (ord(sdata[0]) == 0xf7) and (ord(edata[-1]) == 0xdf) :~~
183		~~if self.debug :~~
184		~~sys.stderr.write("%s is a DVI file\n" % str(self.filename))~~
185		~~return 1~~
186		~~else :~~
187		~~return 0~~
188
189		~~def isTIFF(self, sdata, edata) :~~
190		~~"""Returns 1 if data is TIFF, else 0."""~~
191		~~littleendian = (chr(0x49)*2) + chr(0x2a) + chr(0)~~
192		~~bigendian = (chr(0x4d)*2) + chr(0) + chr(0x2a)~~
193		~~if sdata[:4] in (littleendian, bigendian) :~~
194		~~if self.debug :~~
195		~~sys.stderr.write("%s is a TIFF file\n" % str(self.filename))~~
196		~~return 1~~
197		~~else :~~
198		~~return 0~~
199
200	93	def detectPDLHandler(self) :
201	94	"""Tries to autodetect the document format.
…	…
203	96	Returns the correct PDL handler class or None if format is unknown
204	97	"""
205		# Try to detect file type by reading first block of datas
	98	# Try to detect file type by reading first and last blocks of datas
	99	# Each parser can read them automatically, but here we do this only once.
206	100	self.infile.seek(0)
207		firstblock = self.infile.read(~~16 * KILOBYT~~E)
	101	firstblock = self.infile.read(pdlparser.FIRSTBLOCKSIZE)
208	102	try :
209		self.infile.seek(-LASTBLOCKSIZE, 2)
210		lastblock = self.infile.read(LASTBLOCKSIZE)
	103	self.infile.seek(-pdlparser.LASTBLOCKSIZE, 2)
	104	lastblock = self.infile.read(pdlparser.LASTBLOCKSIZE)
211	105	except IOError :
212	106	lastblock = ""
213	107	self.infile.seek(0)
214	108	if not firstblock :
215		~~sys.stderr.write("ERROR: input file %s is empty !\n" % str(self.filename)~~)
	109	raise pdlparser.PDLParserError, "input file %s is empty !" % str(self.filename)
216	110	else :
217		if self.isPostScript(firstblock, lastblock) :
218		return postscript.PostScriptParser
219		elif self.isPCLXL(firstblock, lastblock) :
220		return pclxl.PCLXLParser
221		elif self.isPDF(firstblock, lastblock) :
222		return pdf.PDFParser
223		elif self.isPCL(firstblock, lastblock) :
224		return pcl345.PCL345Parser
225		elif self.isESCP2(firstblock, lastblock) :
226		return escp2.ESCP2Parser
227		elif self.isDVI(firstblock, lastblock) :
228		return dvi.DVIParser
229		elif self.isTIFF(firstblock, lastblock) :
230		return tiff.TIFFParser
	111	for module in (postscript, \
	112	pclxl, \
	113	pdf, \
	114	pcl345, \
	115	escp2, \
	116	dvi, \
	117	tiff) :
	118	try :
	119	return getattr(module, "Parser")(self.infile, self.debug, firstblock, lastblock)
	120	except pdlparser.PDLParserError :
	121	pass # try next parser
231	122	raise pdlparser.PDLParserError, "Analysis of first data block failed."
232	123

pkpgcounter/trunk/pdlanalyzer/dvi.py

r218	r220
29	29	from pdlanalyzer import pdlparser
30	30
31		class ~~DVI~~Parser(pdlparser.PDLParser) :
	31	class Parser(pdlparser.PDLParser) :
32	32	"""A parser for DVI documents."""
	33	def isValid(self) :
	34	"""Returns 1 if data is DVI, else 0."""
	35	try :
	36	if (ord(self.firstblock[0]) == 0xf7) and (ord(self.lastblock[-1]) == 0xdf) :
	37	if self.debug :
	38	sys.stderr.write("DEBUG: Input file is in the DVI format.\n")
	39	return 1
	40	else :
	41	return 0
	42	except IndexError :
	43	return 0
	44
33	45	def getJobSize(self) :
34	46	"""Counts pages in a DVI document.
…	…
74	86	mustclose = 1
75	87	try :
76		parser = ~~DVI~~Parser(infile, debug=1)
	88	parser = Parser(infile, debug=1)
77	89	totalsize += parser.getJobSize()
78	90	except pdlparser.PDLParserError, msg :

pkpgcounter/trunk/pdlanalyzer/escp2.py

r211	r220
26	26	from pdlanalyzer import pdlparser
27	27
28		class ~~ESCP2~~Parser(pdlparser.PDLParser) :
	28	class Parser(pdlparser.PDLParser) :
29	29	"""A parser for ESC/P2 documents."""
	30	def isValid(self) :
	31	"""Returns 1 if data is ESC/P2, else 0."""
	32	if self.firstblock.startswith("\033@") or \
	33	self.firstblock.startswith("\033*") or \
	34	self.firstblock.startswith("\n\033@") or \
	35	self.firstblock.startswith("\0\0\0\033\1@EJL") : # ESC/P Raster ??? Seen on Stylus Photo 1284
	36	if self.debug :
	37	sys.stderr.write("DEBUG: Input file is in the ESC/P2 format.\n")
	38	return 1
	39	else :
	40	return 0
	41
30	42	def getJobSize(self) :
31	43	"""Counts pages in an ESC/P2 document."""
…	…
76	88	mustclose = 1
77	89	try :
78		parser = ~~ESCP2~~Parser(infile, debug=1)
	90	parser = Parser(infile, debug=1)
79	91	totalsize += parser.getJobSize()
80	92	except pdlparser.PDLParserError, msg :

pkpgcounter/trunk/pdlanalyzer/pcl345.py

r211	r220
28	28	from pdlanalyzer import pdlparser
29	29
30		class P~~CL345P~~arser(pdlparser.PDLParser) :
	30	class Parser(pdlparser.PDLParser) :
31	31	"""A parser for PCL3, PCL4, PCL5 documents."""
32	32	mediasizes = { # ESC&l####A
…	…
77	77	4 : "Transparent",
78	78	}
	79
	80	def isValid(self) :
	81	"""Returns 1 if data is PCL, else 0."""
	82	if self.firstblock.startswith("\033E\033") or \
	83	(self.firstblock.startswith("\033*rbC") and (not self.lastblock[-3:] == "\f\033@")) or \
	84	self.firstblock.startswith("\033%8\033") or \
	85	(self.firstblock.find("\033%-12345X") != -1) :
	86	if self.debug :
	87	sys.stderr.write("DEBUG: Input file is in the PCL3/4/5 format.\n")
	88	return 1
	89	else :
	90	return 0
79	91
80	92	def setPageDict(self, pages, number, attribute, value) :
…	…
346	358	mustclose = 1
347	359	try :
348		parser = P~~CL345P~~arser(infile, debug=1)
	360	parser = Parser(infile, debug=1)
349	361	totalsize += parser.getJobSize()
350	362	except pdlparser.PDLParserError, msg :

pkpgcounter/trunk/pdlanalyzer/pclxl.py

r211	r220
29	29	from pdlanalyzer import pdlparser
30	30
31		class P~~CLXLP~~arser(pdlparser.PDLParser) :
	31	class Parser(pdlparser.PDLParser) :
32	32	"""A parser for PCLXL (aka PCL6) documents."""
33	33	mediasizes = {
…	…
70	70	}
71	71
	72	def isValid(self) :
	73	"""Returns 1 if data is PCLXL aka PCL6, else 0."""
	74	if ((self.firstblock[:128].find("\033%-12345X") != -1) and \
	75	(self.firstblock.find(" HP-PCL XL;") != -1) and \
	76	((self.firstblock.find("LANGUAGE=PCLXL") != -1) or \
	77	(self.firstblock.find("LANGUAGE = PCLXL") != -1))) :
	78	if self.debug :
	79	sys.stderr.write("DEBUG: Input file is in the PCLXL (aka PCL6) format.\n")
	80	return 1
	81	else :
	82	return 0
	83
72	84	def beginPage(self) :
73	85	"""Indicates the beginning of a new page, and extracts media information."""
…	…
371	383	mustclose = 1
372	384	try :
373		parser = P~~CLXLP~~arser(infile, debug=1)
	385	parser = Parser(infile, debug=1)
374	386	totalsize += parser.getJobSize()
375	387	except pdlparser.PDLParserError, msg :

pkpgcounter/trunk/pdlanalyzer/pdf.py

r211	r220
27	27	from pdlanalyzer import pdlparser
28	28
29		class P~~DFP~~arser(pdlparser.PDLParser) :
	29	class Parser(pdlparser.PDLParser) :
30	30	"""A parser for PDF documents."""
	31	def isValid(self) :
	32	"""Returns 1 if data is PDF, else 0."""
	33	if self.firstblock.startswith("%PDF-") or \
	34	self.firstblock.startswith("\033%-12345X%PDF-") or \
	35	((self.firstblock[:128].find("\033%-12345X") != -1) and (self.firstblock.upper().find("LANGUAGE=PDF") != -1)) or \
	36	(self.firstblock.find("%PDF-") != -1) :
	37	if self.debug :
	38	sys.stderr.write("DEBUG: Input file is in the PDF format.\n")
	39	return 1
	40	else :
	41	return 0
	42
31	43	def getJobSize(self) :
32	44	"""Counts pages in a PDF document."""
…	…
56	68	mustclose = 1
57	69	try :
58		parser = P~~DFP~~arser(infile, debug=1)
	70	parser = Parser(infile, debug=1)
59	71	totalsize += parser.getJobSize()
60	72	except pdlparser.PDLParserError, msg :

pkpgcounter/trunk/pdlanalyzer/pdlparser.py

r211	r220
21	21
22	22	import sys
	23	import psyco
	24
	25	KILOBYTE = 1024
	26	MEGABYTE = 1024 * KILOBYTE
	27	FIRSTBLOCKSIZE = 16 * KILOBYTE
	28	LASTBLOCKSIZE = int(KILOBYTE / 4)
23	29
24	30	class PDLParserError(Exception):
…	…
33	39	class PDLParser :
34	40	"""Generic PDL parser."""
35		def __init__(self, infile, debug=0) :
	41	def __init__(self, infile, debug=0, firstblock=None, lastblock=None) :
36	42	"""Initialize the generic parser."""
	43	self.infile = infile
37	44	self.debug = debug
38		self.infile = infile
39
40		def getJobSize(self) :
41		"""Counts pages in the document."""
	45	if firstblock is None :
	46	self.infile.seek(0)
	47	firstblock = self.infile.read(FIRSTBLOCKSIZE)
	48	try :
	49	self.infile.seek(-LASTBLOCKSIZE, 2)
	50	lastblock = self.infile.read(LASTBLOCKSIZE)
	51	except IOError :
	52	lastblock = ""
	53	self.infile.seek(0)
	54	self.firstblock = firstblock
	55	self.lastblock = lastblock
	56	if not self.isValid() :
	57	raise PDLParserError, "Invalid file format !"
	58	try :
	59	import psyco
	60	except ImportError :
	61	sys.stderr.write("WARN: you should install psyco if possible, this would greatly speedup parsing.\n")
	62	pass # Psyco is not installed
	63	else :
	64	# Psyco is installed, tell it to compile
	65	# the CPU intensive methods : PCL and PCLXL
	66	# parsing will greatly benefit from this,
	67	# for PostScript and PDF the difference is
	68	# barely noticeable since they are already
	69	# almost optimal, and much more speedy anyway.
	70	psyco.bind(self.getJobSize)
	71
	72	def isValid(self) :
	73	"""Returns 1 if data is in the expected format, else 0."""
42	74	raise RuntimeError, "Not implemented !"
	75
	76	def getJobSize(self) :
	77	"""Counts pages in a document."""
	78	raise RuntimeError, "Not implemented !"

pkpgcounter/trunk/pdlanalyzer/postscript.py

r211	r220
27	27	from pdlanalyzer import pdlparser
28	28
29		class P~~ostScriptP~~arser(pdlparser.PDLParser) :
	29	class Parser(pdlparser.PDLParser) :
30	30	"""A parser for PostScript documents."""
	31	def isValid(self) :
	32	"""Returns 1 if data is PostScript, else 0."""
	33	if self.firstblock.startswith("%!") or \
	34	self.firstblock.startswith("\004%!") or \
	35	self.firstblock.startswith("\033%-12345X%!PS") or \
	36	((self.firstblock[:128].find("\033%-12345X") != -1) and \
	37	((self.firstblock.find("LANGUAGE=POSTSCRIPT") != -1) or \
	38	(self.firstblock.find("LANGUAGE = POSTSCRIPT") != -1) or \
	39	(self.firstblock.find("LANGUAGE = Postscript") != -1))) or \
	40	(self.firstblock.find("%!PS-Adobe") != -1) :
	41	if self.debug :
	42	sys.stderr.write("DEBUG: Input file is in the PostScript format.\n")
	43	return 1
	44	else :
	45	return 0
	46
31	47	def throughGhostScript(self) :
32	48	"""Get the count through GhostScript, useful for non-DSC compliant PS files."""
…	…
112	128	mustclose = 1
113	129	try :
114		parser = P~~ostScriptP~~arser(infile, debug=1)
	130	parser = Parser(infile, debug=1)
115	131	totalsize += parser.getJobSize()
116	132	except pdlparser.PDLParserError, msg :

pkpgcounter/trunk/pdlanalyzer/tiff.py

r219	r220
29	29	from pdlanalyzer import pdlparser
30	30
31		class ~~TIFF~~Parser(pdlparser.PDLParser) :
	31	class Parser(pdlparser.PDLParser) :
32	32	"""A parser for TIFF documents."""
	33	def isValid(self) :
	34	"""Returns 1 if data is TIFF, else 0."""
	35	littleendian = (chr(0x49)*2) + chr(0x2a) + chr(0)
	36	bigendian = (chr(0x4d)*2) + chr(0) + chr(0x2a)
	37	if self.firstblock[:4] in (littleendian, bigendian) :
	38	if self.debug :
	39	sys.stderr.write("DEBUG: Input file is in the TIFF format.\n")
	40	return 1
	41	else :
	42	return 0
	43
33	44	def getJobSize(self) :
34	45	"""Counts pages in a TIFF document.
…	…
77	88	mustclose = 1
78	89	try :
79		parser = ~~TIFF~~Parser(infile, debug=1)
	90	parser = Parser(infile, debug=1)
80	91	totalsize += parser.getJobSize()
81	92	except pdlparser.PDLParserError, msg :

pkpgcounter/trunk/pdlanalyzer/version.py

r217	r220
20	20	#
21	21
22		__version__ = "1.52"
	22	__version__ = "1.53"
23	23
24	24	__doc__ = """pkpgcounter : a generic Page Description Languages parser."""

pkpgcounter/trunk/tests/runtest.sh

r201	r220
3	3	echo -n "Generating testsuite..."
4	4	gunzip <master.ps.gz >master2.ps
5		for device in lj250 lj4dithp ljet2p ljet4pjl ljetplus laserjet ljet3 ljet4 lj5gray lj5mono pxlmono pxlcolor pdfwrite pswrite psgray psmono psrgb epson epsonc eps9mid eps9high stcolor st800 escp escpc pcl3 cdeskjet cdj1600 cdj500 cdj550 cdj670 cdj850 cdj880 cdj890 cdj970 cdjcolor cdjmono dj505j djet500 djet500c hpdj1120c hpdj310 hpdj320 hpdj340 hpdj400 hpdj500 hpdj500c hpdj510 hpdj520 hpdj540 hpdj550c hpdj560c hpdj600 hpdj660c hpdj670c hpdj680c hpdj690c hpdj850c hpdj855c hpdj870c hpdj890c hpdjplus hpdjportable ; do
	5	for device in lj250 lj4dithp ljet2p ljet4pjl ljetplus laserjet ljet3 ljet4 lj5gray lj5mono pxlmono pxlcolor pdfwrite pswrite psgray psmono psrgb epson epsonc eps9mid eps9high stcolor st800 escp escpc pcl3 cdeskjet cdj1600 cdj500 cdj550 cdj670 cdj850 cdj880 cdj890 cdj970 cdjcolor cdjmono dj505j djet500 djet500c hpdj1120c hpdj310 hpdj320 hpdj340 hpdj400 hpdj500 hpdj500c hpdj510 hpdj520 hpdj540 hpdj550c hpdj560c hpdj600 hpdj660c hpdj670c hpdj680c hpdj690c hpdj850c hpdj855c hpdj870c hpdj890c hpdjplus hpdjportable tiff12nc tiff24nc tiffcrle tiffg3 tiffg32d tiffg4 tifflzw tiffpack ; do
6	6	if ! [ -f "testsuite.$device" ] ; then
7	7	gs -dQUIET -dBATCH -dNOPAUSE -sOutputFile="testsuite.$device" -sDEVICE="$device" master2.ps ;

Download in other formats: