| 126 | |
| 127 | def veryFastAndNotAlwaysCorrectgetJobSize(self) : |
| 128 | """Counts pages in a PDF document.""" |
| 129 | newpageregexp = re.compile(r"/Type\s*/Page[/>\s]") |
| 130 | return len(newpageregexp.findall(self.infile.read())) |
| 131 | |
| 132 | def thisOneIsSlowButCorrectgetJobSize(self) : |
| 133 | """Counts pages in a PDF document.""" |
| 134 | oregexp = re.compile(r"\s+(\d+)\s+(\d+)\s+(obj\s*.+?\s*?endobj)", \ |
| 135 | re.DOTALL) |
| 136 | objtokeep = {} |
| 137 | for (smajor, sminor, content) in oregexp.findall(self.infile.read()) : |
| 138 | major = int(smajor) |
| 139 | minor = int(sminor) |
| 140 | (prevmin, prevcont) = objtokeep.get(major, (None, None)) |
| 141 | if (minor >= prevmin) : # Handles both None and real previous minor |
| 142 | objtokeep[major] = (minor, content) |
| 143 | #if prevmin is not None : |
| 144 | # self.logdebug("Object %i.%i overwritten with %i.%i" \ |
| 145 | # % (major, prevmin, \ |
| 146 | # major, minor)) |
| 147 | #else : |
| 148 | # self.logdebug("Object %i.%i OK" % (major, minor)) |
| 149 | npregexp = re.compile(r"/Type\s*/Page[/>\s]") |
| 150 | pagecount = 0 |
| 151 | for (major, (minor, content)) in objtokeep.items() : |
| 152 | count = len(npregexp.findall(content)) |
| 153 | if count : |
| 154 | emptycount = content.count("obj\n<< \n/Type /Page \n>> \nendobj") + content.count("obj\n<< \n/Type /Page \n\n>> \nendobj") # TODO : make this clean |
| 155 | if not emptycount : |
| 156 | self.logdebug("%i.%i : %s\n" % (major, minor, repr(content))) |
| 157 | pagecount += count - emptycount |
| 158 | return pagecount |