Context Navigation

← Previous Changeset
Next Changeset →

Changeset 1552

Timestamp:

06/19/04 00:21:27 (20 years ago)

Author:

jalet

Message:

Native PDF parser greatly improved.
GhostScript? based PDF parser completely removed because native code
is now portable across Python versions.

Location:

pykota/trunk

Files:

: 3 modified

NEWS (modified) (1 diff)
pykota/pdlanalyzer.py (modified) (6 diffs)
pykota/version.py (modified) (1 diff)

Legend:

: Unmodified
: Added
: Removed

pykota/trunk/NEWS

r1550	r1552
22	22	PyKota NEWS :
23	23
	24	- 1.19alpha24 :
	25
	26	- Native PDF parsing code now works perfectly with
	27	Python 2.1 and above, and is way faster than before.
	28	GhostScript is not used anymore to convert PDF to PS,
	29	because PDF is parsed directly.
	30
24	31	- 1.19alpha23 :
25	32

pykota/trunk/pykota/pdlanalyzer.py

r1551	r1552
22	22	#
23	23	# $Log$
	24	# Revision 1.9 2004/06/18 22:21:27 jalet
	25	# Native PDF parser greatly improved.
	26	# GhostScript based PDF parser completely removed because native code
	27	# is now portable across Python versions.
	28	#
24	29	# Revision 1.8 2004/06/18 20:49:46 jalet
25	30	# "ERROR:" prefix added
…	…
51	56	import sys
52	57	import os
	58	import re
53	59	import struct
54	60	import tempfile
…	…
75	81	"""Count pages in a DSC compliant PostScript document."""
76	82	pagecount = 0
77		while 1 :
78		line = self.infile.readline()
79		if not line :
80		break
	83	for line in self.infile.xreadlines() :
81	84	if line.startswith("%%Page: ") :
82	85	pagecount += 1
…	…
87	90	"""Initialize PDF Analyzer."""
88	91	self.infile = infile
89		~~try :~~
90		~~if float(sys.version[:3]) >= 2.3 :~~
91		~~self.getJobSize = self.native_getJobSize~~
92		~~else :~~
93		~~self.getJobSize = self.gs_getJobSize~~
94		~~except :~~
95		~~self.getJobSize = self.gs_getJobSize~~
96	92
97		def native_getJobSize(self) :
98		"""Counts pages in a PDF document natively."""
	93	def getJobSize(self) :
	94	"""Counts pages in a PDF document."""
	95	regexp = re.compile(r"(/Type) ?(/Page)[/ \r\n]")
99	96	pagecount = 0
100		content = []
101		while 1 :
102		line = self.infile.readline()
103		if not line :
104		break
105		line = line.strip()
106		content.append(line)
107		if line.endswith("endobj") :
108		pagecount += " /".join([x.strip() for x in " ".join(content).split("/")]).count(" /Type /Page ")
109		content = []
110		return pagecount
111
112		def gs_getJobSize(self) :
113		"""Counts pages in a PDF document using GhostScript to convert PDF to PS."""
114		MEGABYTE = 1024*1024
115		child = popen2.Popen4("gs -q -dNOPAUSE -dBATCH -dSAFER -sDEVICE=pswrite -sOutputFile=- -c save pop -f - 2>/dev/null")
116		try :
117		data = self.infile.read(MEGABYTE)
118		while data :
119		child.tochild.write(data)
120		data = self.infile.read(MEGABYTE)
121		child.tochild.flush()
122		child.tochild.close()
123		except (IOError, OSError), msg :
124		raise PDLAnalyzerError, "Unable to convert PDF input to PS with GhostScript : %s" % msg
125
126		psanalyzer = PostScriptAnalyzer(child.fromchild)
127		pagecount = psanalyzer.getJobSize()
128		child.fromchild.close()
129		try :
130		retcode = child.wait()
131		except OSError, msg :
132		self.filter.logger.log_message(_("Problem while waiting for PDF to PS converter (GhostScript pid %s) to exit : %s") % (child.pid, msg))
133		else :
134		if os.WIFEXITED(retcode) :
135		status = os.WEXITSTATUS(retcode)
136		else :
137		status = retcode
138		if status :
139		raise PDLAnalyzerError, "PDF to PS converter (GhostScript pid %s) exit code is %s" % (child.pid, repr(status))
	97	for line in self.infile.xreadlines() :
	98	pagecount += len(regexp.findall(line))
140	99	return pagecount
141	100
…	…
458	417	else :
459	418	# normal file
460		self.infile = open(self.filename, "rbU") # TODO : "U" mode only works in 2.3, is ignored in 2.1 and 2.2
	419	self.infile = open(self.filename, "rb") # TODO : "U" mode only works in 2.3, is ignored in 2.1 and 2.2
461	420	self.mustclose = 1
462	421	return
…	…
464	423	# Use a temporary file, always seekable contrary to standard input.
465	424	# This also has the benefit to let us use the "U" mode (new in Python 2.3)
466		self.infile = tempfile.TemporaryFile(mode="w+bU") # TODO : "U" mode only works in 2.3, is ignored in 2.1 and 2.2
	425	self.infile = tempfile.TemporaryFile(mode="w+b") # TODO : "U" mode only works in 2.3, is ignored in 2.1 and 2.2
467	426	while 1 :
468	427	data = infile.read(MEGABYTE)

pykota/trunk/pykota/version.py

r1545	r1552
22	22	#
23	23
24		__version__ = "1.19alpha23_unofficial"
	24	__version__ = "1.19alpha24_unofficial"
25	25
26	26	__doc__ = """PyKota : a complete Printing Quota Solution for CUPS and LPRng."""