Context Navigation

pdlanalyzer.py @ 1622

Revision 1622, 26.1 kB (checked in by jalet, 20 years ago)
Added support for binary PostScript? through GhostScript? if native DSC compliant PostScript? analyzer doesn't find any page. This is much slower though, so native analyzer is tried first.
Property svn:eol-style set to `native` Property svn:keywords set to `Author Date Id Revision`

Line
1	# PyKota
2	# -- coding: ISO-8859-15 --
3	#
4	# PyKota - Print Quotas for CUPS and LPRng
5	#
6	# (c) 2003-2004 Jerome Alet <alet@librelogiciel.com>
7	# This program is free software; you can redistribute it and/or modify
8	# it under the terms of the GNU General Public License as published by
9	# the Free Software Foundation; either version 2 of the License, or
10	# (at your option) any later version.
11	#
12	# This program is distributed in the hope that it will be useful,
13	# but WITHOUT ANY WARRANTY; without even the implied warranty of
14	# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15	# GNU General Public License for more details.
16	#
17	# You should have received a copy of the GNU General Public License
18	# along with this program; if not, write to the Free Software
19	# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
20	#
21	# $Id$
22	#
23	# $Log$
24	# Revision 1.26 2004/07/22 13:49:51 jalet
25	# Added support for binary PostScript through GhostScript if native DSC
26	# compliant PostScript analyzer doesn't find any page. This is much
27	# slower though, so native analyzer is tried first.
28	#
29	# Revision 1.25 2004/07/10 14:06:36 jalet
30	# Fix for Python2.1 incompatibilities
31	#
32	# Revision 1.24 2004/07/05 21:00:39 jalet
33	# Fix for number of copies for each page in PCLXL parser
34	#
35	# Revision 1.23 2004/07/03 08:21:59 jalet
36	# Testsuite for PDL Analyzer added
37	#
38	# Revision 1.22 2004/06/29 14:21:41 jalet
39	# Smallish optimization
40	#
41	# Revision 1.21 2004/06/28 23:11:26 jalet
42	# Code de-factorization in PCLXL parser
43	#
44	# Revision 1.20 2004/06/28 22:38:41 jalet
45	# Increased speed by a factor of 2 in PCLXL parser
46	#
47	# Revision 1.19 2004/06/28 21:20:30 jalet
48	# PCLXL support now works !
49	#
50	# Revision 1.18 2004/06/27 22:59:37 jalet
51	# More work on PCLXL parser
52	#
53	# Revision 1.17 2004/06/26 23:20:01 jalet
54	# Additionnal speedup for GhostScript generated PCL5 files
55	#
56	# Revision 1.16 2004/06/26 15:31:00 jalet
57	# mmap reintroduced in PCL5 parser
58	#
59	# Revision 1.15 2004/06/26 14:14:31 jalet
60	# Now uses Psyco if it is available
61	#
62	# Revision 1.14 2004/06/25 09:50:28 jalet
63	# More debug info in PCLXL parser
64	#
65	# Revision 1.13 2004/06/25 08:10:08 jalet
66	# Another fix for PCL5 parser
67	#
68	# Revision 1.12 2004/06/24 23:09:53 jalet
69	# Fix for number of copies in PCL5 parser
70	#
71	# Revision 1.11 2004/06/23 22:07:50 jalet
72	# Fixed PCL5 parser according to the sources of rastertohp
73	#
74	# Revision 1.10 2004/06/18 22:24:03 jalet
75	# Removed old comments
76	#
77	# Revision 1.9 2004/06/18 22:21:27 jalet
78	# Native PDF parser greatly improved.
79	# GhostScript based PDF parser completely removed because native code
80	# is now portable across Python versions.
81	#
82	# Revision 1.8 2004/06/18 20:49:46 jalet
83	# "ERROR:" prefix added
84	#
85	# Revision 1.7 2004/06/18 17:48:04 jalet
86	# Added native fast PDF parsing method
87	#
88	# Revision 1.6 2004/06/18 14:00:16 jalet
89	# Added PDF support in smart PDL analyzer (through GhostScript for now)
90	#
91	# Revision 1.5 2004/06/18 10:09:05 jalet
92	# Resets file pointer to start of file in all cases
93	#
94	# Revision 1.4 2004/06/18 06:16:14 jalet
95	# Fixes PostScript detection code for incorrect drivers
96	#
97	# Revision 1.3 2004/05/21 20:40:08 jalet
98	# All the code for pkpgcounter is now in pdlanalyzer.py
99	#
100	# Revision 1.2 2004/05/19 19:09:36 jalet
101	# Speed improvement
102	#
103	# Revision 1.1 2004/05/18 09:59:54 jalet
104	# pkpgcounter is now just a wrapper around the PDLAnalyzer class
105	#
106	#
107	#
108
109	import sys
110	import os
111	import re
112	from struct import unpack
113	import tempfile
114	import mmap
115	import popen2
116
117	KILOBYTE = 1024
118	MEGABYTE = 1024 * KILOBYTE
119
120	class PDLAnalyzerError(Exception):
121	"""An exception for PDL Analyzer related stuff."""
122	def __init__(self, message = ""):
123	self.message = message
124	Exception.__init__(self, message)
125	def __repr__(self):
126	return self.message
127	__str__ = __repr__
128
129	class PostScriptAnalyzer :
130	def __init__(self, infile) :
131	"""Initialize PostScript Analyzer."""
132	self.infile = infile
133
134	def throughGhostScript(self) :
135	"""Get the count through GhostScript, useful for non-DSC compliant PS files."""
136	self.infile.seek(0)
137	command = 'gs -sDEVICE=bbox -dNOPAUSE -dBATCH -dQUIET - 2>&1 \| grep -c "%%HiResBoundingBox:" 2>/dev/null'
138	child = popen2.Popen4(command)
139	try :
140	data = self.infile.read(MEGABYTE)
141	while data :
142	child.tochild.write(data)
143	data = self.infile.read(MEGABYTE)
144	child.tochild.flush()
145	child.tochild.close()
146	except (IOError, OSError), msg :
147	raise PDLAnalyzerError, "Problem during analysis of Binary PostScript document."
148
149	pagecount = 0
150	try :
151	pagecount = int(child.fromchild.readline().strip())
152	except (IOError, OSError, AttributeError, ValueError) :
153	raise PDLAnalyzerError, "Problem during analysis of Binary PostScript document."
154	child.fromchild.close()
155
156	try :
157	retcode = child.wait()
158	except OSError, msg :
159	raise PDLAnalyzerError, "Problem during analysis of Binary PostScript document."
160	return pagecount
161
162	def natively(self) :
163	"""Count pages in a DSC compliant PostScript document."""
164	self.infile.seek(0)
165	pagecount = 0
166	for line in self.infile.xreadlines() :
167	if line.startswith("%%Page: ") :
168	pagecount += 1
169	return pagecount
170
171	def getJobSize(self) :
172	"""Count pages in PostScript document."""
173	return self.natively() or self.throughGhostScript()
174
175	class PDFAnalyzer :
176	def __init__(self, infile) :
177	"""Initialize PDF Analyzer."""
178	self.infile = infile
179
180	def getJobSize(self) :
181	"""Counts pages in a PDF document."""
182	regexp = re.compile(r"(/Type) ?(/Page)[/ \t\r\n]")
183	pagecount = 0
184	for line in self.infile.xreadlines() :
185	pagecount += len(regexp.findall(line))
186	return pagecount
187
188	class PCLAnalyzer :
189	def __init__(self, infile) :
190	"""Initialize PCL Analyzer."""
191	self.infile = infile
192
193	def getJobSize(self) :
194	"""Count pages in a PCL5 document.
195
196	Should also work for PCL3 and PCL4 documents.
197
198	Algorithm from pclcount
199	(c) 2003, by Eduardo Gielamo Oliveira & Rodolfo Broco Manin
200	published under the terms of the GNU General Public Licence v2.
201
202	Backported from C to Python by Jerome Alet, then enhanced
203	with more PCL tags detected. I think all the necessary PCL tags
204	are recognized to correctly handle PCL5 files wrt their number
205	of pages. The documentation used for this was :
206
207	HP PCL/PJL Reference Set
208	PCL5 Printer Language Technical Quick Reference Guide
209	http://h20000.www2.hp.com/bc/docs/support/SupportManual/bpl13205/bpl13205.pdf
210	"""
211	infileno = self.infile.fileno()
212	minfile = mmap.mmap(infileno, os.fstat(infileno)[6], prot=mmap.PROT_READ, flags=mmap.MAP_SHARED)
213	tagsends = { "&n" : "W",
214	"&b" : "W",
215	"*i" : "W",
216	"*l" : "W",
217	"*m" : "W",
218	"*v" : "W",
219	"*c" : "W",
220	"(f" : "W",
221	"(s" : "W",
222	")s" : "W",
223	"&p" : "X",
224	"&l" : "XH",
225	"&a" : "G",
226	# "*b" : "VW", # treated specially because it occurs very often
227	}
228	pagecount = resets = ejects = backsides = 0
229	tag = None
230	copies = {}
231	pos = 0
232	try :
233	while 1 :
234	char = minfile[pos] ; pos += 1
235	if char == "\014" :
236	pagecount += 1
237	elif char == "\033" :
238	#
239	# <ESC>*b###W -> Start of a raster data row/block
240	# <ESC>*b###V -> Start of a raster data plane
241	# <ESC>*c###W -> Start of a user defined pattern
242	# <ESC>*i###W -> Start of a viewing illuminant block
243	# <ESC>*l###W -> Start of a color lookup table
244	# <ESC>*m###W -> Start of a download dither matrix block
245	# <ESC>*v###W -> Start of a configure image data block
246	# <ESC>(s###W -> Start of a characters description block
247	# <ESC>)s###W -> Start of a fonts description block
248	# <ESC>(f###W -> Start of a symbol set block
249	# <ESC>&b###W -> Start of configuration data block
250	# <ESC>&l###X -> Number of copies for current page
251	# <ESC>&n###W -> Starts an alphanumeric string ID block
252	# <ESC>&p###X -> Start of a non printable characters block
253	# <ESC>&a2G -> Back side when duplex mode as generated by rastertohp
254	# <ESC>&l0H -> Eject if NumPlanes > 1, as generated by rastertohp
255	#
256	tagstart = minfile[pos] ; pos += 1
257	if tagstart in "E9=YZ" : # one byte PCL tag
258	if tagstart == "E" :
259	resets += 1
260	continue # skip to next tag
261	tag = tagstart + minfile[pos] ; pos += 1
262	if tag == "*b" :
263	tagend = "VW"
264	else :
265	try :
266	tagend = tagsends[tag]
267	except KeyError :
268	continue # Unsupported PCL tag
269	# Now read the numeric argument
270	size = 0
271	while 1 :
272	char = minfile[pos] ; pos += 1
273	if not char.isdigit() :
274	break
275	size = (size * 10) + int(char)
276	if char in tagend :
277	if (tag == "&l") and (char == "X") : # copies for current page
278	copies[pagecount] = size
279	elif (tag == "&l") and (char == "H") and (size == 0) :
280	ejects += 1 # Eject
281	elif (tag == "&a") and (size == 2) :
282	backsides += 1 # Back side in duplex mode
283	else :
284	# we just ignore the block.
285	if tag == "&n" :
286	# we have to take care of the operation id byte
287	# which is before the string itself
288	size += 1
289	pos += size
290	except IndexError : # EOF ?
291	minfile.close() # reached EOF
292
293	# if pagecount is still 0, we will use the number
294	# of resets instead of the number of form feed characters.
295	# but the number of resets is always at least 2 with a valid
296	# pcl file : one at the very start and one at the very end
297	# of the job's data. So we substract 2 from the number of
298	# resets. And since on our test data we needed to substract
299	# 1 more, we finally substract 3, and will test several
300	# PCL files with this. If resets < 2, then the file is
301	# probably not a valid PCL file, so we use 0
302	if not pagecount :
303	pagecount = (pagecount or ((resets - 3) * (resets > 2)))
304	else :
305	# here we add counters for other ways new pages may have
306	# been printed and ejected by the printer
307	pagecount += ejects + backsides
308
309	# now handle number of copies for each page (may differ).
310	# in duplex mode, number of copies may be sent only once.
311	for pnum in range(pagecount) :
312	# if no number of copies defined, take the preceding one else 1.
313	nb = copies.get(pnum, copies.get(pnum-1, 1))
314	pagecount += (nb - 1)
315	return pagecount
316
317	class PCLXLAnalyzer :
318	def __init__(self, infile) :
319	"""Initialize PCLXL Analyzer."""
320	self.infile = infile
321	self.endianness = None
322	found = 0
323	while not found :
324	line = self.infile.readline()
325	if not line :
326	break
327	if line[1:12] == " HP-PCL XL;" :
328	found = 1
329	endian = ord(line[0])
330	if endian == 0x29 :
331	self.littleEndian()
332	elif endian == 0x28 :
333	self.bigEndian()
334	# elif endian == 0x27 : TODO : What can we do here ?
335	#
336	else :
337	raise PDLAnalyzerError, "Unknown endianness marker 0x%02x at start !" % endian
338	if not found :
339	raise PDLAnalyzerError, "This file doesn't seem to be PCLXL (aka PCL6)"
340	else :
341	# Initialize table of tags
342	self.tags = [ 0 ] * 256
343
344	# GhostScript's sources tell us that HP printers
345	# only accept little endianness, but we can handle both.
346	self.tags[0x28] = self.bigEndian # BigEndian
347	self.tags[0x29] = self.littleEndian # LittleEndian
348
349	self.tags[0x43] = self.beginPage # BeginPage
350	self.tags[0x44] = self.endPage # EndPage
351
352	self.tags[0xc0] = 1 # ubyte
353	self.tags[0xc1] = 2 # uint16
354	self.tags[0xc2] = 4 # uint32
355	self.tags[0xc3] = 2 # sint16
356	self.tags[0xc4] = 4 # sint32
357	self.tags[0xc5] = 4 # real32
358
359	self.tags[0xc8] = self.array_8 # ubyte_array
360	self.tags[0xc9] = self.array_16 # uint16_array
361	self.tags[0xca] = self.array_32 # uint32_array
362	self.tags[0xcb] = self.array_16 # sint16_array
363	self.tags[0xcc] = self.array_32 # sint32_array
364	self.tags[0xcd] = self.array_32 # real32_array
365
366	self.tags[0xd0] = 2 # ubyte_xy
367	self.tags[0xd1] = 4 # uint16_xy
368	self.tags[0xd2] = 8 # uint32_xy
369	self.tags[0xd3] = 4 # sint16_xy
370	self.tags[0xd4] = 8 # sint32_xy
371	self.tags[0xd5] = 8 # real32_xy
372
373	self.tags[0xe0] = 4 # ubyte_box
374	self.tags[0xe1] = 8 # uint16_box
375	self.tags[0xe2] = 16 # uint32_box
376	self.tags[0xe3] = 8 # sint16_box
377	self.tags[0xe4] = 16 # sint32_box
378	self.tags[0xe5] = 16 # real32_box
379
380	self.tags[0xf8] = 1 # attr_ubyte
381	self.tags[0xf9] = 2 # attr_uint16
382
383	self.tags[0xfa] = self.embeddedData # dataLength
384	self.tags[0xfb] = self.embeddedDataSmall # dataLengthByte
385
386	def beginPage(self) :
387	"""Indicates the beginning of a new page."""
388	self.pagecount += 1
389	return 0
390
391	def endPage(self) :
392	"""Indicates the end of a page."""
393	pos = self.pos
394	minfile = self.minfile
395	if (ord(minfile[pos-3]) == 0xf8) and (ord(minfile[pos-2]) == 0x31) :
396	# The EndPage operator is preceded by a PageCopies attribute
397	# So set number of copies for current page.
398	# From what I read in PCLXL documentation, the number
399	# of copies is an unsigned 16 bits integer
400	self.copies[self.pagecount] = unpack(self.endianness + "H", minfile[pos-5:pos-3])[0]
401	return 0
402
403	def array_8(self) :
404	"""Handles byte arrays."""
405	pos = self.pos
406	datatype = self.minfile[pos]
407	pos += 1
408	length = self.tags[ord(datatype)]
409	if callable(length) :
410	self.pos = pos
411	length = length()
412	pos = self.pos
413	posl = pos + length
414	self.pos = posl
415	if length == 1 :
416	return unpack("B", self.minfile[pos:posl])[0]
417	elif length == 2 :
418	return unpack(self.endianness + "H", self.minfile[pos:posl])[0]
419	elif length == 4 :
420	return unpack(self.endianness + "I", self.minfile[pos:posl])[0]
421	else :
422	raise PDLAnalyzerError, "Error on array size at %s" % self.pos
423
424	def array_16(self) :
425	"""Handles byte arrays."""
426	pos = self.pos
427	datatype = self.minfile[pos]
428	pos += 1
429	length = self.tags[ord(datatype)]
430	if callable(length) :
431	self.pos = pos
432	length = length()
433	pos = self.pos
434	posl = pos + length
435	self.pos = posl
436	if length == 1 :
437	return 2 * unpack("B", self.minfile[pos:posl])[0]
438	elif length == 2 :
439	return 2 * unpack(self.endianness + "H", self.minfile[pos:posl])[0]
440	elif length == 4 :
441	return 2 * unpack(self.endianness + "I", self.minfile[pos:posl])[0]
442	else :
443	raise PDLAnalyzerError, "Error on array size at %s" % self.pos
444
445	def array_32(self) :
446	"""Handles byte arrays."""
447	pos = self.pos
448	datatype = self.minfile[pos]
449	pos += 1
450	length = self.tags[ord(datatype)]
451	if callable(length) :
452	self.pos = pos
453	length = length()
454	pos = self.pos
455	posl = pos + length
456	self.pos = posl
457	if length == 1 :
458	return 4 * unpack("B", self.minfile[pos:posl])[0]
459	elif length == 2 :
460	return 4 * unpack(self.endianness + "H", self.minfile[pos:posl])[0]
461	elif length == 4 :
462	return 4 * unpack(self.endianness + "I", self.minfile[pos:posl])[0]
463	else :
464	raise PDLAnalyzerError, "Error on array size at %s" % self.pos
465
466	def embeddedDataSmall(self) :
467	"""Handle small amounts of data."""
468	pos = self.pos
469	length = ord(self.minfile[pos])
470	self.pos = pos + 1
471	return length
472
473	def embeddedData(self) :
474	"""Handle normal amounts of data."""
475	pos = self.pos
476	pos4 = pos + 4
477	self.pos = pos4
478	return unpack(self.endianness + "I", self.minfile[pos:pos4])[0]
479
480	def littleEndian(self) :
481	"""Toggles to little endianness."""
482	self.endianness = "<" # little endian
483	return 0
484
485	def bigEndian(self) :
486	"""Toggles to big endianness."""
487	self.endianness = ">" # big endian
488	return 0
489
490	def getJobSize(self) :
491	"""Counts pages in a PCLXL (PCL6) document.
492
493	Algorithm by Jerome Alet.
494
495	The documentation used for this was :
496
497	HP PCL XL Feature Reference
498	Protocol Class 2.0
499	http://www.hpdevelopersolutions.com/downloads/64/358/xl_ref20r22.pdf
500	"""
501	infileno = self.infile.fileno()
502	self.copies = {}
503	self.minfile = minfile = mmap.mmap(infileno, os.fstat(infileno)[6], prot=mmap.PROT_READ, flags=mmap.MAP_SHARED)
504	tags = self.tags
505	self.pagecount = 0
506	self.pos = pos = self.infile.tell()
507	try :
508	while 1 :
509	char = minfile[pos]
510	pos += 1
511	length = tags[ord(char)]
512	if not length :
513	continue
514	if callable(length) :
515	self.pos = pos
516	length = length()
517	pos = self.pos
518	pos += length
519	except IndexError : # EOF ?
520	self.minfile.close() # reached EOF
521
522	# now handle number of copies for each page (may differ).
523	for pnum in range(self.pagecount) :
524	# if no number of copies defined, take 1, as explained
525	# in PCLXL documentation.
526	# NB : is number of copies is 0, the page won't be output
527	# but the formula below is still correct : we want
528	# to decrease the total number of pages in this case.
529	self.pagecount += (self.copies.get(pnum, 1) - 1)
530
531	return self.pagecount
532
533	class PDLAnalyzer :
534	"""Generic PDL Analyzer class."""
535	def __init__(self, filename) :
536	"""Initializes the PDL analyzer.
537
538	filename is the name of the file or '-' for stdin.
539	filename can also be a file-like object which
540	supports read() and seek().
541	"""
542	self.filename = filename
543	try :
544	import psyco
545	except ImportError :
546	pass # Psyco is not installed
547	else :
548	# Psyco is installed, tell it to compile
549	# the CPU intensive methods : PCL and PCLXL
550	# parsing will greatly benefit from this,
551	# for PostScript and PDF the difference is
552	# barely noticeable since they are already
553	# almost optimal, and much more speedy anyway.
554	psyco.bind(PostScriptAnalyzer.getJobSize)
555	psyco.bind(PDFAnalyzer.getJobSize)
556	psyco.bind(PCLAnalyzer.getJobSize)
557	psyco.bind(PCLXLAnalyzer.getJobSize)
558
559	def getJobSize(self) :
560	"""Returns the job's size."""
561	self.openFile()
562	try :
563	pdlhandler = self.detectPDLHandler()
564	except PDLAnalyzerError, msg :
565	self.closeFile()
566	raise PDLAnalyzerError, "ERROR : Unknown file format for %s (%s)" % (self.filename, msg)
567	else :
568	try :
569	size = pdlhandler(self.infile).getJobSize()
570	finally :
571	self.closeFile()
572	return size
573
574	def openFile(self) :
575	"""Opens the job's data stream for reading."""
576	self.mustclose = 0 # by default we don't want to close the file when finished
577	if hasattr(self.filename, "read") and hasattr(self.filename, "seek") :
578	# filename is in fact a file-like object
579	infile = self.filename
580	elif self.filename == "-" :
581	# we must read from stdin
582	infile = sys.stdin
583	else :
584	# normal file
585	self.infile = open(self.filename, "rb")
586	self.mustclose = 1
587	return
588
589	# Use a temporary file, always seekable contrary to standard input.
590	self.infile = tempfile.TemporaryFile(mode="w+b")
591	while 1 :
592	data = infile.read(MEGABYTE)
593	if not data :
594	break
595	self.infile.write(data)
596	self.infile.flush()
597	self.infile.seek(0)
598
599	def closeFile(self) :
600	"""Closes the job's data stream if we can close it."""
601	if self.mustclose :
602	self.infile.close()
603	else :
604	# if we don't have to close the file, then
605	# ensure the file pointer is reset to the
606	# start of the file in case the process wants
607	# to read the file again.
608	try :
609	self.infile.seek(0)
610	except :
611	pass # probably stdin, which is not seekable
612
613	def isPostScript(self, data) :
614	"""Returns 1 if data is PostScript, else 0."""
615	if data.startswith("%!") or \
616	data.startswith("\004%!") or \
617	data.startswith("\033%-12345X%!PS") or \
618	((data[:128].find("\033%-12345X") != -1) and \
619	((data.find("LANGUAGE=POSTSCRIPT") != -1) or \
620	(data.find("LANGUAGE = POSTSCRIPT") != -1) or \
621	(data.find("LANGUAGE = Postscript") != -1))) or \
622	(data.find("%!PS-Adobe") != -1) :
623	return 1
624	else :
625	return 0
626
627	def isPDF(self, data) :
628	"""Returns 1 if data is PDF, else 0."""
629	if data.startswith("%PDF-") or \
630	data.startswith("\033%-12345X%PDF-") or \
631	((data[:128].find("\033%-12345X") != -1) and (data.upper().find("LANGUAGE=PDF") != -1)) or \
632	(data.find("%PDF-") != -1) :
633	return 1
634	else :
635	return 0
636
637	def isPCL(self, data) :
638	"""Returns 1 if data is PCL, else 0."""
639	if data.startswith("\033E\033") or \
640	((data[:128].find("\033%-12345X") != -1) and \
641	((data.find("LANGUAGE=PCL") != -1) or \
642	(data.find("LANGUAGE = PCL") != -1) or \
643	(data.find("LANGUAGE = Pcl") != -1))) :
644	return 1
645	else :
646	return 0
647
648	def isPCLXL(self, data) :
649	"""Returns 1 if data is PCLXL aka PCL6, else 0."""
650	if ((data[:128].find("\033%-12345X") != -1) and \
651	(data.find(" HP-PCL XL;") != -1) and \
652	((data.find("LANGUAGE=PCLXL") != -1) or \
653	(data.find("LANGUAGE = PCLXL") != -1))) :
654	return 1
655	else :
656	return 0
657
658	def detectPDLHandler(self) :
659	"""Tries to autodetect the document format.
660
661	Returns the correct PDL handler class or None if format is unknown
662	"""
663	# Try to detect file type by reading first block of datas
664	self.infile.seek(0)
665	firstblock = self.infile.read(KILOBYTE)
666	self.infile.seek(0)
667	if self.isPostScript(firstblock) :
668	return PostScriptAnalyzer
669	elif self.isPCLXL(firstblock) :
670	return PCLXLAnalyzer
671	elif self.isPCL(firstblock) :
672	return PCLAnalyzer
673	elif self.isPDF(firstblock) :
674	return PDFAnalyzer
675	else :
676	raise PDLAnalyzerError, "Analysis of first data block failed."
677
678	def main() :
679	"""Entry point for PDL Analyzer."""
680	if (len(sys.argv) < 2) or ((not sys.stdin.isatty()) and ("-" not in sys.argv[1:])) :
681	sys.argv.append("-")
682
683	totalsize = 0
684	for arg in sys.argv[1:] :
685	try :
686	parser = PDLAnalyzer(arg)
687	totalsize += parser.getJobSize()
688	except PDLAnalyzerError, msg :
689	sys.stderr.write("ERROR: %s\n" % msg)
690	sys.stderr.flush()
691	print "%s" % totalsize
692
693	if __name__ == "__main__" :
694	main()

Note: See TracBrowser for help on using the browser.

Context Navigation

root / pykota / trunk / pykota / pdlanalyzer.py @ 1622

Download in other formats: