Changeset 243 for pkpgcounter
- Timestamp:
- 07/21/05 00:03:12 (19 years ago)
- Location:
- pkpgcounter/trunk
- Files:
-
- 3 modified
Legend:
- Unmodified
- Added
- Removed
-
pkpgcounter/trunk/NEWS
r240 r243 22 22 pkpgcounter News : 23 23 24 * 1.59 : 25 26 - Major rewrite of the PDF parser to correctly handle all line endings. 27 24 28 * 1.58 : 25 29 -
pkpgcounter/trunk/pkpgpdls/pdf.py
r241 r243 59 59 objects = {} 60 60 inobject = 0 61 # objre = re.compile(r"\s*(\d+)\s+(\d+)\s+obj[<\s/]*") 62 objre = re.compile(r"\s?(\d+)\s+(\d+)\s+obj[<\s/]?") 61 63 for fullline in self.infile.xreadlines() : 62 64 parts = [ l.strip() for l in fullline.splitlines() ] … … 67 69 else : 68 70 lastcomment = line[2:] 69 el if line.endswith(" obj"):71 else : 70 72 # New object begins here 71 (n0, n1, dummy) = line.split() 72 (major, minor) = map(int, (n0, n1)) 73 obj = PDFObject(major, minor, lastcomment) 74 inobject = 1 75 elif line.startswith("endobj") : 76 try : 77 # try to find a different version of this object 78 oldobject = objects[major] 79 except KeyError : 80 # not found, so we add it 81 objects[major] = obj 73 result = objre.search(line) 74 if result is not None : 75 (major, minor) = map(int, line[result.start():result.end()].split()[:2]) 76 obj = PDFObject(major, minor, lastcomment) 77 obj.content.append(line[result.end():]) 78 inobject = 1 79 elif line.startswith("endobj") \ 80 or line.startswith(">> endobj") \ 81 or line.startswith(">>endobj") : 82 # Handle previous object, if any 83 if inobject : 84 # only overwrite older versions of this object 85 # same minor seems to be possible, so the latest one 86 # found in the file will be the one we keep. 87 # if we want the first one, just use > instead of >= 88 oldobject = objects.setdefault(major, obj) 89 if minor >= oldobject.minor : 90 objects[major] = obj 91 inobject = 0 82 92 else : 83 # only overwrite older versions of this object 84 # same minor seems to be possible, so the latest one 85 # found in the file will be the one we keep. 86 # if we want the first one, just use > instead of >= 87 if minor >= oldobject.minor : 88 objects[major] = obj 89 inobject = 0 90 else : 91 if inobject : 92 obj.content.append(line) 93 if inobject : 94 obj.content.append(line) 93 95 94 96 # Now we check each PDF object we've just created. 95 97 self.iscolor = None 96 newpageregexp = re.compile(r"(/Type) ?(/Page)[/ \t\r\n]", re.I)98 newpageregexp = re.compile(r"(/Type)\s?(/Page)[/\s]", re.I) 97 99 colorregexp = re.compile(r"(/ColorSpace) ?(/DeviceRGB|/DeviceCMYK)[/ \t\r\n]", re.I) 98 100 pagecount = 0 99 101 for object in objects.values() : 100 102 content = "".join(object.content) 101 pagecount += len(newpageregexp.findall(content)) 103 count = len(newpageregexp.findall(content)) 104 pagecount += count 102 105 if colorregexp.match(content) : 103 106 self.iscolor = 1 -
pkpgcounter/trunk/pkpgpdls/version.py
r240 r243 20 20 # 21 21 22 __version__ = "1.5 8"22 __version__ = "1.59" 23 23 24 24 __doc__ = """pkpgcounter : a generic Page Description Languages parser."""