Context Navigation

← Previous Change
Next Change →

pdf.py

Timestamp:

02/08/07 22:23:59 (18 years ago)

Author:

jerome

Message:

Now uses Python's universal newline detection to read input files,
and also uses file objects directly instead of calling their xreadlines()
method.
Fixed an accounting problem in the PDF parser for some type of files.

Files:

: 1 modified

pkpgcounter/trunk/pkpgpdls/pdf.py (modified) (1 diff)

Legend:

: Unmodified
: Added
: Removed

pkpgcounter/trunk/pkpgpdls/pdf.py

r428	r450
62	62	inobject = 0
63	63	objre = re.compile(r"\s?(\d+)\s+(\d+)\s+obj[<\s/]?")
64		for fullline in self.infile.xreadlines() :
65		parts = [ l.strip() for l in fullline.splitlines() ]
66		for line in parts :
67		if line.startswith("% ") :
	64	for line in self.infile :
	65	line = line.strip()
	66	if line.startswith("% ") :
	67	if inobject :
	68	obj.comments.append(line)
	69	else :
	70	lastcomment = line[2:]
	71	else :
	72	# New object begins here
	73	result = objre.search(line)
	74	if result is not None :
	75	(major, minor) = [int(num) for num in line[result.start():result.end()].split()[:2]]
	76	obj = PDFObject(major, minor, lastcomment)
	77	obj.content.append(line[result.end():])
	78	inobject = 1
	79	elif line.startswith("endobj") \
	80	or line.startswith(">> endobj") \
	81	or line.startswith(">>endobj") :
	82	# Handle previous object, if any
68	83	if inobject :
69		obj.comments.append(line)
70		else :
71		lastcomment = line[2:]
72		else :
73		# New object begins here
74		result = objre.search(line)
75		if result is not None :
76		(major, minor) = [int(num) for num in line[result.start():result.end()].split()[:2]]
77		obj = PDFObject(major, minor, lastcomment)
78		obj.content.append(line[result.end():])
79		inobject = 1
80		elif line.startswith("endobj") \
81		or line.startswith(">> endobj") \
82		or line.startswith(">>endobj") :
83		# Handle previous object, if any
84		if inobject :
85		# only overwrite older versions of this object
86		# same minor seems to be possible, so the latest one
87		# found in the file will be the one we keep.
88		# if we want the first one, just use > instead of >=
89		oldobject = objects.setdefault(major, obj)
90		if minor >= oldobject.minor :
91		objects[major] = obj
92		inobject = 0
93		else :
94		if inobject :
95		obj.content.append(line)
	84	# only overwrite older versions of this object
	85	# same minor seems to be possible, so the latest one
	86	# found in the file will be the one we keep.
	87	# if we want the first one, just use > instead of >=
	88	oldobject = objects.setdefault(major, obj)
	89	if minor >= oldobject.minor :
	90	objects[major] = obj
	91	inobject = 0
	92	else :
	93	if inobject :
	94	obj.content.append(line)
96	95
97	96	# Now we check each PDF object we've just created.
98	97	# colorregexp = re.compile(r"(/ColorSpace) ?(/DeviceRGB\|/DeviceCMYK)[/ \t\r\n]", re.I)
99		newpageregexp = re.compile(r"(/Type)\s?(/Page)[/\s]", re.I)
	98	newpageregexp = re.compile(r"(/Type)\s?(/Page)[/>\s]", re.I)
100	99	pagecount = 0
101	100	for obj in objects.values() :
102	101	content = "".join(obj.content)
103	102	count = len(newpageregexp.findall(content))
104		pagecount += count
	103	if count and (content != r"<</Type /Page>>") : # Empty pages which are not rendered ?
	104	pagecount += count
105	105	return pagecount
106	106

Context Navigation

Changeset 450 for pkpgcounter/trunk/pkpgpdls/pdf.py

Legend:

pkpgcounter/trunk/pkpgpdls/pdf.py

Download in other formats: