Context Navigation

← Previous Change
Next Change →

pkpgpdls

Timestamp:

02/08/07 22:23:59 (18 years ago)

Author:

jerome

Message:

Now uses Python's universal newline detection to read input files,
and also uses file objects directly instead of calling their xreadlines()
method.
Fixed an accounting problem in the PDF parser for some type of files.

Location:

pkpgcounter/trunk/pkpgpdls

Files:

: 5 modified

analyzer.py (modified) (1 diff)
pdf.py (modified) (1 diff)
pdlparser.py (modified) (1 diff)
postscript.py (modified) (1 diff)
version.py (modified) (1 diff)

Legend:

: Unmodified
: Added
: Removed

pkpgcounter/trunk/pkpgpdls/analyzer.py

r443	r450
110	110	else :
111	111	# normal file
112		self.infile = open(self.filename, "rb")
	112	self.infile = open(self.filename, "rbU")
113	113	self.mustclose = 1
114	114	return
115	115
116	116	# Use a temporary file, always seekable contrary to standard input.
117		self.infile = tempfile.TemporaryFile(mode="w+b")
	117	self.infile = tempfile.TemporaryFile(mode="w+bU")
118	118	while 1 :
119	119	data = infile.read(pdlparser.MEGABYTE)

pkpgcounter/trunk/pkpgpdls/pdf.py

r428	r450
62	62	inobject = 0
63	63	objre = re.compile(r"\s?(\d+)\s+(\d+)\s+obj[<\s/]?")
64		for fullline in self.infile.xreadlines() :
65		parts = [ l.strip() for l in fullline.splitlines() ]
66		for line in parts :
67		if line.startswith("% ") :
	64	for line in self.infile :
	65	line = line.strip()
	66	if line.startswith("% ") :
	67	if inobject :
	68	obj.comments.append(line)
	69	else :
	70	lastcomment = line[2:]
	71	else :
	72	# New object begins here
	73	result = objre.search(line)
	74	if result is not None :
	75	(major, minor) = [int(num) for num in line[result.start():result.end()].split()[:2]]
	76	obj = PDFObject(major, minor, lastcomment)
	77	obj.content.append(line[result.end():])
	78	inobject = 1
	79	elif line.startswith("endobj") \
	80	or line.startswith(">> endobj") \
	81	or line.startswith(">>endobj") :
	82	# Handle previous object, if any
68	83	if inobject :
69		obj.comments.append(line)
70		else :
71		lastcomment = line[2:]
72		else :
73		# New object begins here
74		result = objre.search(line)
75		if result is not None :
76		(major, minor) = [int(num) for num in line[result.start():result.end()].split()[:2]]
77		obj = PDFObject(major, minor, lastcomment)
78		obj.content.append(line[result.end():])
79		inobject = 1
80		elif line.startswith("endobj") \
81		or line.startswith(">> endobj") \
82		or line.startswith(">>endobj") :
83		# Handle previous object, if any
84		if inobject :
85		# only overwrite older versions of this object
86		# same minor seems to be possible, so the latest one
87		# found in the file will be the one we keep.
88		# if we want the first one, just use > instead of >=
89		oldobject = objects.setdefault(major, obj)
90		if minor >= oldobject.minor :
91		objects[major] = obj
92		inobject = 0
93		else :
94		if inobject :
95		obj.content.append(line)
	84	# only overwrite older versions of this object
	85	# same minor seems to be possible, so the latest one
	86	# found in the file will be the one we keep.
	87	# if we want the first one, just use > instead of >=
	88	oldobject = objects.setdefault(major, obj)
	89	if minor >= oldobject.minor :
	90	objects[major] = obj
	91	inobject = 0
	92	else :
	93	if inobject :
	94	obj.content.append(line)
96	95
97	96	# Now we check each PDF object we've just created.
98	97	# colorregexp = re.compile(r"(/ColorSpace) ?(/DeviceRGB\|/DeviceCMYK)[/ \t\r\n]", re.I)
99		newpageregexp = re.compile(r"(/Type)\s?(/Page)[/\s]", re.I)
	98	newpageregexp = re.compile(r"(/Type)\s?(/Page)[/>\s]", re.I)
100	99	pagecount = 0
101	100	for obj in objects.values() :
102	101	content = "".join(obj.content)
103	102	count = len(newpageregexp.findall(content))
104		pagecount += count
	103	if count and (content != r"<</Type /Page>>") : # Empty pages which are not rendered ?
	104	pagecount += count
105	105	return pagecount
106	106

pkpgcounter/trunk/pkpgpdls/pdlparser.py

r443	r450
140	140	mustclose = 0
141	141	else :
142		infile = open(arg, "rb")
	142	infile = open(arg, "rbU")
143	143	mustclose = 1
144	144	try :

pkpgcounter/trunk/pkpgpdls/postscript.py

r448	r450
91	91	acrobatmarker = 0
92	92	pagescomment = None
93		for line in self.infile~~.xreadlines()~~ :
	93	for line in self.infile :
94	94	if (not prescribe) and line.startswith(r"%%BeginResource: procset pdf") \
95	95	and not acrobatmarker :

pkpgcounter/trunk/pkpgpdls/version.py

r447	r450
23	23
24	24
25		__version__ = "2.15~~alpha~~"
	25	__version__ = "2.15"
26	26
27	27	__doc__ = """pkpgcounter : a generic Page Description Languages parser."""