Changeset 450 for pkpgcounter
- Timestamp:
- 02/08/07 22:23:59 (18 years ago)
- Location:
- pkpgcounter/trunk
- Files:
-
- 7 modified
Legend:
- Unmodified
- Added
- Removed
-
pkpgcounter/trunk/man/pkpgcounter.1
r445 r450 1 1 .\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.36. 2 .TH PKPGCOUNTER "1" " January 2007" "C@LL - Conseil Internet & Logiciels Libres" "User Commands"2 .TH PKPGCOUNTER "1" "February 2007" "C@LL - Conseil Internet & Logiciels Libres" "User Commands" 3 3 .SH NAME 4 pkpgcounter \- manual page for pkpgcounter 2.1 44 pkpgcounter \- manual page for pkpgcounter 2.15 5 5 .SH DESCRIPTION 6 pkpgcounter v2.1 4(c) 2003, 2004, 2005, 2006, 2007 Jerome Alet6 pkpgcounter v2.15 (c) 2003, 2004, 2005, 2006, 2007 Jerome Alet 7 7 .PP 8 8 pkpgcounter is a generic Page Description Language parser. -
pkpgcounter/trunk/NEWS
r444 r450 22 22 pkpgcounter News : 23 23 24 * 2.15 : 25 26 - Fixed a regexp in the PDF parser which caused some files to be 27 incorrectly accounted for. 28 29 - Improved the detection of the number of copies in PostScript documents. 30 24 31 * 2.14 : 25 32 -
pkpgcounter/trunk/pkpgpdls/analyzer.py
r443 r450 110 110 else : 111 111 # normal file 112 self.infile = open(self.filename, "rb ")112 self.infile = open(self.filename, "rbU") 113 113 self.mustclose = 1 114 114 return 115 115 116 116 # Use a temporary file, always seekable contrary to standard input. 117 self.infile = tempfile.TemporaryFile(mode="w+b ")117 self.infile = tempfile.TemporaryFile(mode="w+bU") 118 118 while 1 : 119 119 data = infile.read(pdlparser.MEGABYTE) -
pkpgcounter/trunk/pkpgpdls/pdf.py
r428 r450 62 62 inobject = 0 63 63 objre = re.compile(r"\s?(\d+)\s+(\d+)\s+obj[<\s/]?") 64 for fullline in self.infile.xreadlines() : 65 parts = [ l.strip() for l in fullline.splitlines() ] 66 for line in parts : 67 if line.startswith("% ") : 64 for line in self.infile : 65 line = line.strip() 66 if line.startswith("% ") : 67 if inobject : 68 obj.comments.append(line) 69 else : 70 lastcomment = line[2:] 71 else : 72 # New object begins here 73 result = objre.search(line) 74 if result is not None : 75 (major, minor) = [int(num) for num in line[result.start():result.end()].split()[:2]] 76 obj = PDFObject(major, minor, lastcomment) 77 obj.content.append(line[result.end():]) 78 inobject = 1 79 elif line.startswith("endobj") \ 80 or line.startswith(">> endobj") \ 81 or line.startswith(">>endobj") : 82 # Handle previous object, if any 68 83 if inobject : 69 obj.comments.append(line) 70 else : 71 lastcomment = line[2:] 72 else : 73 # New object begins here 74 result = objre.search(line) 75 if result is not None : 76 (major, minor) = [int(num) for num in line[result.start():result.end()].split()[:2]] 77 obj = PDFObject(major, minor, lastcomment) 78 obj.content.append(line[result.end():]) 79 inobject = 1 80 elif line.startswith("endobj") \ 81 or line.startswith(">> endobj") \ 82 or line.startswith(">>endobj") : 83 # Handle previous object, if any 84 if inobject : 85 # only overwrite older versions of this object 86 # same minor seems to be possible, so the latest one 87 # found in the file will be the one we keep. 88 # if we want the first one, just use > instead of >= 89 oldobject = objects.setdefault(major, obj) 90 if minor >= oldobject.minor : 91 objects[major] = obj 92 inobject = 0 93 else : 94 if inobject : 95 obj.content.append(line) 84 # only overwrite older versions of this object 85 # same minor seems to be possible, so the latest one 86 # found in the file will be the one we keep. 87 # if we want the first one, just use > instead of >= 88 oldobject = objects.setdefault(major, obj) 89 if minor >= oldobject.minor : 90 objects[major] = obj 91 inobject = 0 92 else : 93 if inobject : 94 obj.content.append(line) 96 95 97 96 # Now we check each PDF object we've just created. 98 97 # colorregexp = re.compile(r"(/ColorSpace) ?(/DeviceRGB|/DeviceCMYK)[/ \t\r\n]", re.I) 99 newpageregexp = re.compile(r"(/Type)\s?(/Page)[/ \s]", re.I)98 newpageregexp = re.compile(r"(/Type)\s?(/Page)[/>\s]", re.I) 100 99 pagecount = 0 101 100 for obj in objects.values() : 102 101 content = "".join(obj.content) 103 102 count = len(newpageregexp.findall(content)) 104 pagecount += count 103 if count and (content != r"<</Type /Page>>") : # Empty pages which are not rendered ? 104 pagecount += count 105 105 return pagecount 106 106 -
pkpgcounter/trunk/pkpgpdls/pdlparser.py
r443 r450 140 140 mustclose = 0 141 141 else : 142 infile = open(arg, "rb ")142 infile = open(arg, "rbU") 143 143 mustclose = 1 144 144 try : -
pkpgcounter/trunk/pkpgpdls/postscript.py
r448 r450 91 91 acrobatmarker = 0 92 92 pagescomment = None 93 for line in self.infile .xreadlines():93 for line in self.infile : 94 94 if (not prescribe) and line.startswith(r"%%BeginResource: procset pdf") \ 95 95 and not acrobatmarker : -
pkpgcounter/trunk/pkpgpdls/version.py
r447 r450 23 23 24 24 25 __version__ = "2.15 alpha"25 __version__ = "2.15" 26 26 27 27 __doc__ = """pkpgcounter : a generic Page Description Languages parser."""