Context Navigation

← Previous Changeset
Next Changeset →

Changeset 243

Timestamp:

07/21/05 00:03:12 (19 years ago)

Author:

jerome

Message:

Fixed the different PDF problems reported.

Location:

pkpgcounter/trunk

Files:

: 3 modified

NEWS (modified) (1 diff)
pkpgpdls/pdf.py (modified) (2 diffs)
pkpgpdls/version.py (modified) (1 diff)

Legend:

: Unmodified
: Added
: Removed

pkpgcounter/trunk/NEWS

r240	r243
22	22	pkpgcounter News :
23	23
	24	* 1.59 :
	25
	26	- Major rewrite of the PDF parser to correctly handle all line endings.
	27
24	28	* 1.58 :
25	29

pkpgcounter/trunk/pkpgpdls/pdf.py

r241	r243
59	59	objects = {}
60	60	inobject = 0
	61	# objre = re.compile(r"\s(\d+)\s+(\d+)\s+obj[<\s/]")
	62	objre = re.compile(r"\s?(\d+)\s+(\d+)\s+obj[<\s/]?")
61	63	for fullline in self.infile.xreadlines() :
62	64	parts = [ l.strip() for l in fullline.splitlines() ]
…	…
67	69	else :
68	70	lastcomment = line[2:]
69		el~~if line.endswith(" obj")~~ :
	71	else :
70	72	# New object begins here
71		(n0, n1, dummy) = line.split()
72		(major, minor) = map(int, (n0, n1))
73		obj = PDFObject(major, minor, lastcomment)
74		inobject = 1
75		elif line.startswith("endobj") :
76		try :
77		# try to find a different version of this object
78		oldobject = objects[major]
79		except KeyError :
80		# not found, so we add it
81		objects[major] = obj
	73	result = objre.search(line)
	74	if result is not None :
	75	(major, minor) = map(int, line[result.start():result.end()].split()[:2])
	76	obj = PDFObject(major, minor, lastcomment)
	77	obj.content.append(line[result.end():])
	78	inobject = 1
	79	elif line.startswith("endobj") \
	80	or line.startswith(">> endobj") \
	81	or line.startswith(">>endobj") :
	82	# Handle previous object, if any
	83	if inobject :
	84	# only overwrite older versions of this object
	85	# same minor seems to be possible, so the latest one
	86	# found in the file will be the one we keep.
	87	# if we want the first one, just use > instead of >=
	88	oldobject = objects.setdefault(major, obj)
	89	if minor >= oldobject.minor :
	90	objects[major] = obj
	91	inobject = 0
82	92	else :
83		# only overwrite older versions of this object
84		# same minor seems to be possible, so the latest one
85		# found in the file will be the one we keep.
86		# if we want the first one, just use > instead of >=
87		if minor >= oldobject.minor :
88		objects[major] = obj
89		inobject = 0
90		else :
91		if inobject :
92		obj.content.append(line)
	93	if inobject :
	94	obj.content.append(line)
93	95
94	96	# Now we check each PDF object we've just created.
95	97	self.iscolor = None
96		newpageregexp = re.compile(r"(/Type) ~~?(/Page)[/ \t\r\n~~]", re.I)
	98	newpageregexp = re.compile(r"(/Type)\s?(/Page)[/\s]", re.I)
97	99	colorregexp = re.compile(r"(/ColorSpace) ?(/DeviceRGB\|/DeviceCMYK)[/ \t\r\n]", re.I)
98	100	pagecount = 0
99	101	for object in objects.values() :
100	102	content = "".join(object.content)
101		pagecount += len(newpageregexp.findall(content))
	103	count = len(newpageregexp.findall(content))
	104	pagecount += count
102	105	if colorregexp.match(content) :
103	106	self.iscolor = 1

pkpgcounter/trunk/pkpgpdls/version.py

r240	r243
20	20	#
21	21
22		__version__ = "1.58"
	22	__version__ = "1.59"
23	23
24	24	__doc__ = """pkpgcounter : a generic Page Description Languages parser."""

Download in other formats: