Context Navigation

← Previous Change
Next Change →

pdlanalyzer.py

Timestamp:

06/19/04 00:21:27 (20 years ago)

Author:

jalet

Message:

Native PDF parser greatly improved.
GhostScript? based PDF parser completely removed because native code
is now portable across Python versions.

Files:

: 1 modified

pykota/trunk/pykota/pdlanalyzer.py (modified) (6 diffs)

Legend:

: Unmodified
: Added
: Removed

pykota/trunk/pykota/pdlanalyzer.py

r1551	r1552
22	22	#
23	23	# $Log$
	24	# Revision 1.9 2004/06/18 22:21:27 jalet
	25	# Native PDF parser greatly improved.
	26	# GhostScript based PDF parser completely removed because native code
	27	# is now portable across Python versions.
	28	#
24	29	# Revision 1.8 2004/06/18 20:49:46 jalet
25	30	# "ERROR:" prefix added
…	…
51	56	import sys
52	57	import os
	58	import re
53	59	import struct
54	60	import tempfile
…	…
75	81	"""Count pages in a DSC compliant PostScript document."""
76	82	pagecount = 0
77		while 1 :
78		line = self.infile.readline()
79		if not line :
80		break
	83	for line in self.infile.xreadlines() :
81	84	if line.startswith("%%Page: ") :
82	85	pagecount += 1
…	…
87	90	"""Initialize PDF Analyzer."""
88	91	self.infile = infile
89		~~try :~~
90		~~if float(sys.version[:3]) >= 2.3 :~~
91		~~self.getJobSize = self.native_getJobSize~~
92		~~else :~~
93		~~self.getJobSize = self.gs_getJobSize~~
94		~~except :~~
95		~~self.getJobSize = self.gs_getJobSize~~
96	92
97		def native_getJobSize(self) :
98		"""Counts pages in a PDF document natively."""
	93	def getJobSize(self) :
	94	"""Counts pages in a PDF document."""
	95	regexp = re.compile(r"(/Type) ?(/Page)[/ \r\n]")
99	96	pagecount = 0
100		content = []
101		while 1 :
102		line = self.infile.readline()
103		if not line :
104		break
105		line = line.strip()
106		content.append(line)
107		if line.endswith("endobj") :
108		pagecount += " /".join([x.strip() for x in " ".join(content).split("/")]).count(" /Type /Page ")
109		content = []
110		return pagecount
111
112		def gs_getJobSize(self) :
113		"""Counts pages in a PDF document using GhostScript to convert PDF to PS."""
114		MEGABYTE = 1024*1024
115		child = popen2.Popen4("gs -q -dNOPAUSE -dBATCH -dSAFER -sDEVICE=pswrite -sOutputFile=- -c save pop -f - 2>/dev/null")
116		try :
117		data = self.infile.read(MEGABYTE)
118		while data :
119		child.tochild.write(data)
120		data = self.infile.read(MEGABYTE)
121		child.tochild.flush()
122		child.tochild.close()
123		except (IOError, OSError), msg :
124		raise PDLAnalyzerError, "Unable to convert PDF input to PS with GhostScript : %s" % msg
125
126		psanalyzer = PostScriptAnalyzer(child.fromchild)
127		pagecount = psanalyzer.getJobSize()
128		child.fromchild.close()
129		try :
130		retcode = child.wait()
131		except OSError, msg :
132		self.filter.logger.log_message(_("Problem while waiting for PDF to PS converter (GhostScript pid %s) to exit : %s") % (child.pid, msg))
133		else :
134		if os.WIFEXITED(retcode) :
135		status = os.WEXITSTATUS(retcode)
136		else :
137		status = retcode
138		if status :
139		raise PDLAnalyzerError, "PDF to PS converter (GhostScript pid %s) exit code is %s" % (child.pid, repr(status))
	97	for line in self.infile.xreadlines() :
	98	pagecount += len(regexp.findall(line))
140	99	return pagecount
141	100
…	…
458	417	else :
459	418	# normal file
460		self.infile = open(self.filename, "rbU") # TODO : "U" mode only works in 2.3, is ignored in 2.1 and 2.2
	419	self.infile = open(self.filename, "rb") # TODO : "U" mode only works in 2.3, is ignored in 2.1 and 2.2
461	420	self.mustclose = 1
462	421	return
…	…
464	423	# Use a temporary file, always seekable contrary to standard input.
465	424	# This also has the benefit to let us use the "U" mode (new in Python 2.3)
466		self.infile = tempfile.TemporaryFile(mode="w+bU") # TODO : "U" mode only works in 2.3, is ignored in 2.1 and 2.2
	425	self.infile = tempfile.TemporaryFile(mode="w+b") # TODO : "U" mode only works in 2.3, is ignored in 2.1 and 2.2
467	426	while 1 :
468	427	data = infile.read(MEGABYTE)

Context Navigation

Changeset 1552 for pykota/trunk/pykota/pdlanalyzer.py

Legend:

pykota/trunk/pykota/pdlanalyzer.py

Download in other formats: