Context Navigation

← Previous Changeset
Next Changeset →

Changeset 519

Timestamp:

11/27/07 21:55:29 (17 years ago)

Author:

jerome

Message:

Added a skeleton for Microsoft Word (c) (tm) (r) (etc...) documents.

Location:

pkpgcounter/trunk/pkpgpdls

Files:

: 1 modified
: 1 copied

analyzer.py (modified) (2 diffs)
mstrash.py (copied) (copied from pkpgcounter/trunk/pkpgpdls/plain.py) (3 diffs)

Legend:

: Unmodified
: Added
: Removed

pkpgcounter/trunk/pkpgpdls/analyzer.py

r501	r519
28	28	import tempfile
29	29
30		import version, pdlparser, postscript, pdf, pcl345, pclxl, hbp, pil, \
	30	import version, pdlparser, postscript, pdf, pcl345, pclxl, hbp, pil, mstrash, \
31	31	lidil, escp2, dvi, tiff, ooo, zjstream, qpdl, spl1, escpages03, plain
32	32	import inkcoverage
…	…
159	159	escpages03, \
160	160	pil, \
	161	mstrash, \
161	162	plain) : # IMPORTANT : don't move this one up !
162	163	try :

pkpgcounter/trunk/pkpgpdls/mstrash.py

r495	r519
21	21	#
22	22
23		"""This modules implements a page counter for plain text documents."""
	23	"""This module implements a page counter for Microsoft Word (r) (tm) (c) (etc...) documents"""
	24
	25	import os
	26	import urllib2
24	27
25	28	import pdlparser
…	…
27	30
28	31	class Parser(pdlparser.PDLParser) :
29		"""A parser for plain text documents."""
30		totiffcommands = [ 'enscript --quiet --portrait --no-header --columns 1 --output - "%(infname)s" \| gs -sDEVICE=tiff24nc -dPARANOIDSAFER -dNOPAUSE -dBATCH -dQUIET -r"%(dpi)i" -sOutputFile="%(outfname)s" -',
31		'a2ps --borders 0 --quiet --portrait --no-header --columns 1 --output - "%(infname)s" \| gs -sDEVICE=tiff24nc -dPARANOIDSAFER -dNOPAUSE -dBATCH -dQUIET -r"%(dpi)i" -sOutputFile="%(outfname)s" -',
32		]
33		openmode = "rU"
	32	"""A parser for that MS crap thing."""
	33	totiffcommands = [ ]
34	34	def isValid(self) :
35		"""Returns True if data is ~~plain text~~, else False.
	35	"""Returns True if data is MS crap, else False.
36	36
37		I~~t's hard to detect a plain text file, so we just try to~~
38		extract lines from the first block (sufficiently large).
39		~~If it's impossible to find one we consider it's not plain text~~.
	37	Identifying datas taken from the file command's magic database.
	38	IMPORTANT : some magic values are not reused here because they
	39	IMPORTANT : seem to be specific to some particular i18n release.
40	40	"""
41		~~lines = self.firstblock.split("\r\n")~~
42		~~if len(lines) == 1 :~~
43		lines = lines[0].split("\r")
44		if len(lines) == 1 :
45		lines = lines[0].split("\n")
46		~~if len(lines) > 1~~ :
47		self.logdebug("DEBUG: Input file seems to be in ~~the plain text~~ format.")
	41	if self.firstblock.startswith("PO^Q`") \
	42	or self.firstblock.startswith("\xd0\xcf\x11\xe0\xa1\xb1\x1a\xe1") \
	43	or self.firstblock.startswith("\xfe7\x00#") \
	44	or self.firstblock.startswith("\xdb\xa5-\x00\x00\x00") \
	45	or self.firstblock.startswith("\x31\xbe\x00\x00") \
	46	or self.firstblock[2112:].startswith("MSWordDoc") :
	47	self.logdebug("DEBUG: Input file seems to be in a Microsoft shitty file format.")
48	48	return True
49	49	else :
…	…
51	51
52	52	def getJobSize(self) :
53		"""Counts pages in a plain text document."""
54		pagesize = 66 # TODO : Does this vary wrt the default page size ?
55		# TODO : /etc/papersize and /etc/paper.config
56		pagecount = 0
57		linecount = 0
58		for line in self.infile :
59		if line.endswith("\n") :
60		linecount += 1
61		if (linecount > pagesize) :
62		pagecount += 1
63		linecount = 0
64		else :
65		cnt = line.count("\f")
66		if cnt :
67		pagecount += cnt
68		linecount = 0
69		else :
70		raise pdlparser.PDLParserError, "Unsupported file format. Please send the file to %s" % version.__authoremail__
71		return pagecount + 1 # NB : empty files are catched in isValid()
	53	"""Counts pages in a Microsoft Word (r) (tm) (c) (etc...) document."""
	54	return 0

Context Navigation

Changeset 519

Legend:

pkpgcounter/trunk/pkpgpdls/analyzer.py

pkpgcounter/trunk/pkpgpdls/mstrash.py

Download in other formats: