Context Navigation

← Previous Changeset
Next Changeset →

Changeset 199

Timestamp:

04/04/05 13:59:39 (20 years ago)

Author:

jerome

Message:

More work done on moving stuff around

Location:

pkpgcounter/trunk

Files:

: 3 modified

bin/pkpgcounter (modified) (1 diff)
pdlanalyzer/pdlparser.py (modified) (2 diffs)
setup.py (modified) (1 diff)

Legend:

: Unmodified
: Added
: Removed

pkpgcounter/trunk/bin/pkpgcounter

r195	r199
23	23	#
24	24
25		import sys
26		import tempfile
27
28		from pdlanalyzer import version
	25	from pdlanalyzer import pdlparser
29	26
30		KILOBYTE = 1024
31		MEGABYTE = 1024 * KILOBYTE
32		LASTBLOCKSIZE = int(KILOBYTE / 4)
	27	if __name__ == "__main__" :
	28	pdlparser.main()
33	29
34		~~class PDLAnalyzer :~~
35		~~"""Generic PDL Analyzer class."""~~
36		~~def __init__(self, filename, debug=0) :~~
37		~~"""Initializes the PDL analyzer.~~
38
39		~~filename is the name of the file or '-' for stdin.~~
40		~~filename can also be a file-like object which~~
41		~~supports read() and seek().~~
42		~~"""~~
43		~~self.debug = debug~~
44		~~self.filename = filename~~
45		~~try :~~
46		~~import psyco~~
47		~~except ImportError :~~
48		~~sys.stderr.write("pkpgcounter : you should install psyco if possible, this would greatly speedup parsing.\n")~~
49		~~pass # Psyco is not installed~~
50		~~else :~~
51		~~# Psyco is installed, tell it to compile~~
52		~~# the CPU intensive methods : PCL and PCLXL~~
53		~~# parsing will greatly benefit from this,~~
54		~~# for PostScript and PDF the difference is~~
55		~~# barely noticeable since they are already~~
56		~~# almost optimal, and much more speedy anyway.~~
57		~~psyco.bind(PostScriptAnalyzer.getJobSize)~~
58		~~psyco.bind(PDFAnalyzer.getJobSize)~~
59		~~psyco.bind(ESCP2Analyzer.getJobSize)~~
60		~~psyco.bind(PCLAnalyzer.getJobSize)~~
61		~~psyco.bind(PCLXLAnalyzer.getJobSize)~~
62
63		~~def getJobSize(self) :~~
64		~~"""Returns the job's size."""~~
65		~~self.openFile()~~
66		~~try :~~
67		~~pdlhandler = self.detectPDLHandler()~~
68		~~except PDLAnalyzerError, msg :~~
69		~~self.closeFile()~~
70		~~raise PDLAnalyzerError, "ERROR : Unknown file format for %s (%s)" % (self.filename, msg)~~
71		~~else :~~
72		~~try :~~
73		~~size = pdlhandler(self.infile, self.debug).getJobSize()~~
74		~~finally :~~
75		~~self.closeFile()~~
76		~~return size~~
77
78		~~def openFile(self) :~~
79		~~"""Opens the job's data stream for reading."""~~
80		~~self.mustclose = 0 # by default we don't want to close the file when finished~~
81		~~if hasattr(self.filename, "read") and hasattr(self.filename, "seek") :~~
82		~~# filename is in fact a file-like object~~
83		~~infile = self.filename~~
84		~~elif self.filename == "-" :~~
85		~~# we must read from stdin~~
86		~~infile = sys.stdin~~
87		~~else :~~
88		~~# normal file~~
89		~~self.infile = open(self.filename, "rb")~~
90		~~self.mustclose = 1~~
91		~~return~~
92
93		~~# Use a temporary file, always seekable contrary to standard input.~~
94		~~self.infile = tempfile.TemporaryFile(mode="w+b")~~
95		~~while 1 :~~
96		~~data = infile.read(MEGABYTE)~~
97		~~if not data :~~
98		~~break~~
99		~~self.infile.write(data)~~
100		~~self.infile.flush()~~
101		~~self.infile.seek(0)~~
102
103		~~def closeFile(self) :~~
104		~~"""Closes the job's data stream if we can close it."""~~
105		~~if self.mustclose :~~
106		~~self.infile.close()~~
107		~~else :~~
108		~~# if we don't have to close the file, then~~
109		~~# ensure the file pointer is reset to the~~
110		~~# start of the file in case the process wants~~
111		~~# to read the file again.~~
112		~~try :~~
113		~~self.infile.seek(0)~~
114		~~except :~~
115		~~pass # probably stdin, which is not seekable~~
116
117		~~def isPostScript(self, sdata, edata) :~~
118		~~"""Returns 1 if data is PostScript, else 0."""~~
119		~~if sdata.startswith("%!") or \~~
120		~~sdata.startswith("\004%!") or \~~
121		~~sdata.startswith("\033%-12345X%!PS") or \~~
122		~~((sdata[:128].find("\033%-12345X") != -1) and \~~
123		~~((sdata.find("LANGUAGE=POSTSCRIPT") != -1) or \~~
124		~~(sdata.find("LANGUAGE = POSTSCRIPT") != -1) or \~~
125		~~(sdata.find("LANGUAGE = Postscript") != -1))) or \~~
126		~~(sdata.find("%!PS-Adobe") != -1) :~~
127		~~if self.debug :~~
128		~~sys.stderr.write("%s is a PostScript file\n" % str(self.filename))~~
129		~~return 1~~
130		~~else :~~
131		~~return 0~~
132
133		~~def isPDF(self, sdata, edata) :~~
134		~~"""Returns 1 if data is PDF, else 0."""~~
135		~~if sdata.startswith("%PDF-") or \~~
136		~~sdata.startswith("\033%-12345X%PDF-") or \~~
137		~~((sdata[:128].find("\033%-12345X") != -1) and (sdata.upper().find("LANGUAGE=PDF") != -1)) or \~~
138		~~(sdata.find("%PDF-") != -1) :~~
139		~~if self.debug :~~
140		~~sys.stderr.write("%s is a PDF file\n" % str(self.filename))~~
141		~~return 1~~
142		~~else :~~
143		~~return 0~~
144
145		~~def isPCL(self, sdata, edata) :~~
146		~~"""Returns 1 if data is PCL, else 0."""~~
147		~~if sdata.startswith("\033E\033") or \~~
148		~~(sdata.startswith("\033*rbC") and (not edata[-3:] == "\f\033@")) or \~~
149		~~sdata.startswith("\033%8\033") or \~~
150		~~(sdata.find("\033%-12345X") != -1) :~~
151		~~if self.debug :~~
152		~~sys.stderr.write("%s is a PCL3/4/5 file\n" % str(self.filename))~~
153		~~return 1~~
154		~~else :~~
155		~~return 0~~
156
157		~~def isPCLXL(self, sdata, edata) :~~
158		~~"""Returns 1 if data is PCLXL aka PCL6, else 0."""~~
159		~~if ((sdata[:128].find("\033%-12345X") != -1) and \~~
160		~~(sdata.find(" HP-PCL XL;") != -1) and \~~
161		~~((sdata.find("LANGUAGE=PCLXL") != -1) or \~~
162		~~(sdata.find("LANGUAGE = PCLXL") != -1))) :~~
163		~~if self.debug :~~
164		~~sys.stderr.write("%s is a PCLXL (aka PCL6) file\n" % str(self.filename))~~
165		~~return 1~~
166		~~else :~~
167		~~return 0~~
168
169		~~def isESCP2(self, sdata, edata) :~~
170		~~"""Returns 1 if data is ESC/P2, else 0."""~~
171		~~if sdata.startswith("\033@") or \~~
172		~~sdata.startswith("\033*") or \~~
173		~~sdata.startswith("\n\033@") or \~~
174		~~sdata.startswith("\0\0\0\033\1@EJL") : # ESC/P Raster ??? Seen on Stylus Photo 1284~~
175		~~if self.debug :~~
176		~~sys.stderr.write("%s is an ESC/P2 file\n" % str(self.filename))~~
177		~~return 1~~
178		~~else :~~
179		~~return 0~~
180
181		~~def detectPDLHandler(self) :~~
182		~~"""Tries to autodetect the document format.~~
183
184		~~Returns the correct PDL handler class or None if format is unknown~~
185		~~"""~~
186		~~# Try to detect file type by reading first block of datas~~
187		~~self.infile.seek(0)~~
188		~~firstblock = self.infile.read(4 * KILOBYTE)~~
189		~~try :~~
190		~~self.infile.seek(-LASTBLOCKSIZE, 2)~~
191		~~lastblock = self.infile.read(LASTBLOCKSIZE)~~
192		~~except IOError :~~
193		~~lastblock = ""~~
194
195		~~self.infile.seek(0)~~
196		~~if self.isPostScript(firstblock, lastblock) :~~
197		~~return PostScriptAnalyzer~~
198		~~elif self.isPCLXL(firstblock, lastblock) :~~
199		~~return PCLXLAnalyzer~~
200		~~elif self.isPDF(firstblock, lastblock) :~~
201		~~return PDFAnalyzer~~
202		~~elif self.isPCL(firstblock, lastblock) :~~
203		~~return PCLAnalyzer~~
204		~~elif self.isESCP2(firstblock, lastblock) :~~
205		~~return ESCP2Analyzer~~
206		~~else :~~
207		~~raise PDLAnalyzerError, "Analysis of first data block failed."~~
208
209		~~def main() :~~
210		~~"""Entry point for PDL Analyzer."""~~
211
212		~~sys.stderr.write("Big changes are currently occuring in the developpment version, please use the stable version for now !\n")~~
213		~~sys.exit(-1)~~
214
215		~~if (len(sys.argv) < 2) or ((not sys.stdin.isatty()) and ("-" not in sys.argv[1:])) :~~
216		~~sys.argv.append("-")~~
217
218		~~if ("-h" in sys.argv[1:]) or ("--help" in sys.argv[1:]) :~~
219		~~print "usage : pkpgcounter file1 file2 ... fileN"~~
220		~~elif ("-v" in sys.argv[1:]) or ("--version" in sys.argv[1:]) :~~
221		~~print "%s" % version.__version__~~
222		~~else :~~
223		~~totalsize = 0~~
224		~~debug = 0~~
225		~~minindex = 1~~
226		~~if sys.argv[1] == "--debug" :~~
227		~~minindex = 2~~
228		~~debug = 1~~
229		~~for arg in sys.argv[minindex:] :~~
230		~~try :~~
231		~~parser = PDLAnalyzer(arg, debug)~~
232		~~totalsize += parser.getJobSize()~~
233		~~except PDLAnalyzerError, msg :~~
234		~~sys.stderr.write("ERROR: %s\n" % msg)~~
235		~~sys.stderr.flush()~~
236		~~print "%s" % totalsize~~
237
238		~~if __name__ == "__main__" :~~
239		~~main()~~

pkpgcounter/trunk/pdlanalyzer/pdlparser.py

r193	r199
19	19	# $Id$
20	20	#
	21
	22	import sys
	23	import tempfile
	24
	25	from pdlanalyzer.postscript import PostScriptParser
	26	from pdlanalyzer.pdf import PDFParser
	27	from pdlanalyzer.pcl345 import PCL345Parser
	28	from pdlanalyzer.pclxl import PCLXLParser
	29	from pdlanalyzer.escp2 import ESCP2Parser
	30
	31	KILOBYTE = 1024
	32	MEGABYTE = 1024 * KILOBYTE
	33	LASTBLOCKSIZE = int(KILOBYTE / 4)
21	34
22	35	class PDLParserError(Exception):
…	…
39	52	"""Counts pages in the document."""
40	53	raise RuntimeError, "Not implemented !"
	54
	55	class PDLAnalyzer :
	56	"""Class for PDL autodetection."""
	57	def __init__(self, filename, debug=0) :
	58	"""Initializes the PDL analyzer.
	59
	60	filename is the name of the file or '-' for stdin.
	61	filename can also be a file-like object which
	62	supports read() and seek().
	63	"""
	64	self.debug = debug
	65	self.filename = filename
	66	try :
	67	import psyco
	68	except ImportError :
	69	sys.stderr.write("pkpgcounter : you should install psyco if possible, this would greatly speedup parsing.\n")
	70	pass # Psyco is not installed
	71	else :
	72	# Psyco is installed, tell it to compile
	73	# the CPU intensive methods : PCL and PCLXL
	74	# parsing will greatly benefit from this,
	75	# for PostScript and PDF the difference is
	76	# barely noticeable since they are already
	77	# almost optimal, and much more speedy anyway.
	78	psyco.bind(PostScriptParser.getJobSize)
	79	psyco.bind(PDFParser.getJobSize)
	80	psyco.bind(ESCP2Parser.getJobSize)
	81	psyco.bind(PCLParser.getJobSize)
	82	psyco.bind(PCLXLParser.getJobSize)
	83
	84	def getJobSize(self) :
	85	"""Returns the job's size."""
	86	self.openFile()
	87	try :
	88	pdlhandler = self.detectPDLHandler()
	89	except PDLParserError, msg :
	90	self.closeFile()
	91	raise PDLParserError, "ERROR : Unknown file format for %s (%s)" % (self.filename, msg)
	92	else :
	93	try :
	94	size = pdlhandler(self.infile, self.debug).getJobSize()
	95	finally :
	96	self.closeFile()
	97	return size
	98
	99	def openFile(self) :
	100	"""Opens the job's data stream for reading."""
	101	self.mustclose = 0 # by default we don't want to close the file when finished
	102	if hasattr(self.filename, "read") and hasattr(self.filename, "seek") :
	103	# filename is in fact a file-like object
	104	infile = self.filename
	105	elif self.filename == "-" :
	106	# we must read from stdin
	107	infile = sys.stdin
	108	else :
	109	# normal file
	110	self.infile = open(self.filename, "rb")
	111	self.mustclose = 1
	112	return
	113
	114	# Use a temporary file, always seekable contrary to standard input.
	115	self.infile = tempfile.TemporaryFile(mode="w+b")
	116	while 1 :
	117	data = infile.read(MEGABYTE)
	118	if not data :
	119	break
	120	self.infile.write(data)
	121	self.infile.flush()
	122	self.infile.seek(0)
	123
	124	def closeFile(self) :
	125	"""Closes the job's data stream if we can close it."""
	126	if self.mustclose :
	127	self.infile.close()
	128	else :
	129	# if we don't have to close the file, then
	130	# ensure the file pointer is reset to the
	131	# start of the file in case the process wants
	132	# to read the file again.
	133	try :
	134	self.infile.seek(0)
	135	except :
	136	pass # probably stdin, which is not seekable
	137
	138	def isPostScript(self, sdata, edata) :
	139	"""Returns 1 if data is PostScript, else 0."""
	140	if sdata.startswith("%!") or \
	141	sdata.startswith("\004%!") or \
	142	sdata.startswith("\033%-12345X%!PS") or \
	143	((sdata[:128].find("\033%-12345X") != -1) and \
	144	((sdata.find("LANGUAGE=POSTSCRIPT") != -1) or \
	145	(sdata.find("LANGUAGE = POSTSCRIPT") != -1) or \
	146	(sdata.find("LANGUAGE = Postscript") != -1))) or \
	147	(sdata.find("%!PS-Adobe") != -1) :
	148	if self.debug :
	149	sys.stderr.write("%s is a PostScript file\n" % str(self.filename))
	150	return 1
	151	else :
	152	return 0
	153
	154	def isPDF(self, sdata, edata) :
	155	"""Returns 1 if data is PDF, else 0."""
	156	if sdata.startswith("%PDF-") or \
	157	sdata.startswith("\033%-12345X%PDF-") or \
	158	((sdata[:128].find("\033%-12345X") != -1) and (sdata.upper().find("LANGUAGE=PDF") != -1)) or \
	159	(sdata.find("%PDF-") != -1) :
	160	if self.debug :
	161	sys.stderr.write("%s is a PDF file\n" % str(self.filename))
	162	return 1
	163	else :
	164	return 0
	165
	166	def isPCL(self, sdata, edata) :
	167	"""Returns 1 if data is PCL, else 0."""
	168	if sdata.startswith("\033E\033") or \
	169	(sdata.startswith("\033*rbC") and (not edata[-3:] == "\f\033@")) or \
	170	sdata.startswith("\033%8\033") or \
	171	(sdata.find("\033%-12345X") != -1) :
	172	if self.debug :
	173	sys.stderr.write("%s is a PCL3/4/5 file\n" % str(self.filename))
	174	return 1
	175	else :
	176	return 0
	177
	178	def isPCLXL(self, sdata, edata) :
	179	"""Returns 1 if data is PCLXL aka PCL6, else 0."""
	180	if ((sdata[:128].find("\033%-12345X") != -1) and \
	181	(sdata.find(" HP-PCL XL;") != -1) and \
	182	((sdata.find("LANGUAGE=PCLXL") != -1) or \
	183	(sdata.find("LANGUAGE = PCLXL") != -1))) :
	184	if self.debug :
	185	sys.stderr.write("%s is a PCLXL (aka PCL6) file\n" % str(self.filename))
	186	return 1
	187	else :
	188	return 0
	189
	190	def isESCP2(self, sdata, edata) :
	191	"""Returns 1 if data is ESC/P2, else 0."""
	192	if sdata.startswith("\033@") or \
	193	sdata.startswith("\033*") or \
	194	sdata.startswith("\n\033@") or \
	195	sdata.startswith("\0\0\0\033\1@EJL") : # ESC/P Raster ??? Seen on Stylus Photo 1284
	196	if self.debug :
	197	sys.stderr.write("%s is an ESC/P2 file\n" % str(self.filename))
	198	return 1
	199	else :
	200	return 0
	201
	202	def detectPDLHandler(self) :
	203	"""Tries to autodetect the document format.
	204
	205	Returns the correct PDL handler class or None if format is unknown
	206	"""
	207	# Try to detect file type by reading first block of datas
	208	self.infile.seek(0)
	209	firstblock = self.infile.read(4 * KILOBYTE)
	210	try :
	211	self.infile.seek(-LASTBLOCKSIZE, 2)
	212	lastblock = self.infile.read(LASTBLOCKSIZE)
	213	except IOError :
	214	lastblock = ""
	215	self.infile.seek(0)
	216	if self.isPostScript(firstblock, lastblock) :
	217	return PostScriptParser
	218	elif self.isPCLXL(firstblock, lastblock) :
	219	return PCLXLParser
	220	elif self.isPDF(firstblock, lastblock) :
	221	return PDFParser
	222	elif self.isPCL(firstblock, lastblock) :
	223	return PCLParser
	224	elif self.isESCP2(firstblock, lastblock) :
	225	return ESCP2Parser
	226	else :
	227	raise PDLParserError, "Analysis of first data block failed."
	228
	229	def main() :
	230	"""Entry point for PDL Analyzer."""
	231	if (len(sys.argv) < 2) or ((not sys.stdin.isatty()) and ("-" not in sys.argv[1:])) :
	232	sys.argv.append("-")
	233
	234	if ("-h" in sys.argv[1:]) or ("--help" in sys.argv[1:]) :
	235	print "usage : pkpgcounter file1 file2 ... fileN"
	236	elif ("-v" in sys.argv[1:]) or ("--version" in sys.argv[1:]) :
	237	print "%s" % version.__version__
	238	else :
	239	totalsize = 0
	240	debug = 0
	241	minindex = 1
	242	if sys.argv[1] == "--debug" :
	243	minindex = 2
	244	debug = 1
	245	for arg in sys.argv[minindex:] :
	246	try :
	247	parser = PDLAnalyzer(arg, debug)
	248	totalsize += parser.getJobSize()
	249	except PDLParserError, msg :
	250	sys.stderr.write("ERROR: %s\n" % msg)
	251	sys.stderr.flush()
	252	print "%s" % totalsize
	253
	254	if __name__ == "__main__" :
	255	main()

pkpgcounter/trunk/setup.py

r198	r199
45	45
46	46	docdir = "share/doc/pkpgcounter"
47		docfiles = ["README", "~~FAQ", "SECURITY", "COPYING", "LICENSE", "CREDITS", "TODO~~", "NEWS"]
	47	docfiles = ["README", "COPYING", "BUGS", "CREDITS", "NEWS"]
48	48	data_files.append((docdir, docfiles))
49	49

Context Navigation

Changeset 199

Legend:

pkpgcounter/trunk/bin/pkpgcounter

pkpgcounter/trunk/pdlanalyzer/pdlparser.py

pkpgcounter/trunk/setup.py

Download in other formats: