Changeset 3006

Show
Ignore:
Timestamp:
09/03/06 17:45:15 (18 years ago)
Author:
jerome
Message:

Now uses an automatic charset detection module if installed,
or fallback to the ISO-8859-15 charset, whenever we are in
a situation where the character set announced by CUPS (probably
always UTF-8) and the character set into which the title and
filenames are encoded don't match. This should "fix" the
annoying problem some people reported.

Location:
pykota/trunk
Files:
3 modified

Legend:

Unmodified
Added
Removed
  • pykota/trunk/checkdeps.py

    r2892 r3006  
    8282                       ("Python-PAM", "PAM", "Python-PAM is recommended if you plan to use pknotify+PyKotIcon.\nGrab it from http://www.pangalactic.org/PyPAM/"), 
    8383                       ("Python-pkipplib", "pkipplib", "Python-pkipplib is now mandatory.\nGrab it from http://www.pykota.com/software/pkipplib/"), 
     84                       ("Python-chardet", "chardet", "Python-chardet is recommended.\nGrab it from http://chardet.feedparser.org/"), 
    8485                     ] 
    8586    commandstocheck = [ ("GhostScript", "gs", "Depending on your configuration, GhostScript may be needed in different parts of PyKota."), 
  • pykota/trunk/pykota/tool.py

    r2992 r3006  
    3737 
    3838from mx import DateTime 
     39 
     40try : 
     41    import chardet 
     42except ImportError :     
     43    def detectCharset(text) : 
     44        """Fakes a charset detection if the chardet module is not installed.""" 
     45        return "ISO-8859-15" 
     46else :     
     47    def detectCharset(text) : 
     48        """Uses the chardet module to workaround CUPS lying to us.""" 
     49        return chardet.detect(text)["encoding"] 
    3950 
    4051from pykota import config, storage, logger 
     
    133144        # Else we use the current locale's one. 
    134145        # If nothing is set, we use ISO-8859-15 widely used in western Europe. 
    135         localecharset = None 
     146        self.localecharset = None 
    136147        try : 
    137148            try : 
    138                 localecharset = locale.nl_langinfo(locale.CODESET) 
     149                self.localecharset = locale.nl_langinfo(locale.CODESET) 
    139150            except AttributeError :     
    140151                try : 
    141                     localecharset = locale.getpreferredencoding() 
     152                    self.localecharset = locale.getpreferredencoding() 
    142153                except AttributeError :     
    143154                    try : 
    144                         localecharset = locale.getlocale()[1] 
    145                         localecharset = localecharset or locale.getdefaultlocale()[1] 
     155                        self.localecharset = locale.getlocale()[1] 
     156                        self.localecharset = self.localecharset or locale.getdefaultlocale()[1] 
    146157                    except ValueError :     
    147158                        pass        # Unknown locale, strange... 
    148159        except locale.Error :             
    149160            pass 
    150         self.charset = charset or os.environ.get("CHARSET") or localecharset or "ISO-8859-15" 
     161        self.charset = charset or os.environ.get("CHARSET") or self.localecharset or "ISO-8859-15" 
    151162     
    152163        # pykota specific stuff 
     
    184195            self.printInfo("The 'pykota' system account is missing. Configuration files were searched in /etc/pykota instead.", "warn") 
    185196         
     197        self.logdebug("Charset detected from locale settings : %s" % self.localecharset) 
    186198        self.logdebug("Charset in use : %s" % self.charset) 
    187199        arguments = " ".join(['"%s"' % arg for arg in sys.argv]) 
     
    230242    def UTF8ToUserCharset(self, text) : 
    231243        """Converts from UTF-8 to user's charset.""" 
    232         if text is not None : 
     244        if text is None : 
     245            return None 
     246        try : 
     247            return text.decode("UTF-8").encode(self.charset, "replace")  
     248        except (UnicodeError, AttributeError) :     
    233249            try : 
    234                 return text.decode("UTF-8").encode(self.charset, "replace")  
     250                # Maybe already in Unicode ? 
     251                return text.encode(self.charset, "replace")  
     252            except (UnicodeError, AttributeError) : 
     253                # Try to autodetect the charset 
     254                return text.decode(detectCharset(text), "replace").encode(self.charset, "replace") 
     255         
     256    def userCharsetToUTF8(self, text) : 
     257        """Converts from user's charset to UTF-8.""" 
     258        if text is None : 
     259            return None 
     260        try : 
     261            # We don't necessarily trust the default charset, because 
     262            # xprint sends us titles in UTF-8 but CUPS gives us an ISO-8859-1 charset ! 
     263            # So we first try to see if the text is already in UTF-8 or not, and 
     264            # if it is, we delete characters which can't be converted to the user's charset, 
     265            # then convert back to UTF-8. PostgreSQL 7.3.x used to reject some unicode characters, 
     266            # this is fixed by the ugly line below : 
     267            return text.decode("UTF-8").encode(self.charset, "replace").decode(self.charset).encode("UTF-8", "replace") 
     268        except (UnicodeError, AttributeError) : 
     269            try : 
     270                return text.decode(self.charset).encode("UTF-8", "replace")  
    235271            except (UnicodeError, AttributeError) :     
    236272                try : 
    237                     # Maybe already in Unicode 
    238                     return text.encode(self.charset, "replace")  
     273                    # Maybe already in Unicode ? 
     274                    return text.encode("UTF-8", "replace")  
    239275                except (UnicodeError, AttributeError) : 
    240                     pass # Don't know what to do 
    241         return text 
    242          
    243     def userCharsetToUTF8(self, text) : 
    244         """Converts from user's charset to UTF-8.""" 
    245         if text is not None : 
    246             try : 
    247                 # We don't necessarily trust the default charset, because 
    248                 # xprint sends us titles in UTF-8 but CUPS gives us an ISO-8859-1 charset ! 
    249                 # So we first try to see if the text is already in UTF-8 or not, and 
    250                 # if it is, we delete characters which can't be converted to the user's charset, 
    251                 # then convert back to UTF-8. PostgreSQL 7.3.x used to reject some unicode characters, 
    252                 # this is fixed by the ugly line below : 
    253                 return text.decode("UTF-8").encode(self.charset, "replace").decode(self.charset).encode("UTF-8", "replace") 
    254             except (UnicodeError, AttributeError) : 
    255                 try : 
    256                     return text.decode(self.charset).encode("UTF-8", "replace")  
    257                 except (UnicodeError, AttributeError) :     
    258                     try : 
    259                         # Maybe already in Unicode 
    260                         return text.encode("UTF-8", "replace")  
    261                     except (UnicodeError, AttributeError) : 
    262                         pass # Don't know what to do 
    263         return text 
     276                    # Try to autodetect the charset 
     277                    return text.decode(detectCharset(text), "replace").encode("UTF-8", "replace") 
     278        return newtext 
    264279         
    265280    def display(self, message) : 
  • pykota/trunk/README

    r2943 r3006  
    373373      (http://www.librelogiciel.com/software/) 
    374374    - The Python-PAM module if you need the pknotify command to be able   
    375       to check usernames and passwords. 
     375      to check usernames and passwords. (http://www.pangalactic.org/PyPAM) 
    376376    - The Python-SNMP module to query printers for their page counter. 
    377       (http://pysnmp.sourceforge.net).  
     377      (http://pysnmp.sourceforge.net)  
    378378      IMPORTANT : version 3.4.2 or higher is REQUIRED. 
    379379      Versions 2.x won't work. Versions 4.x now work (tested with v4.1.5a). 
    380380    - The Python-OSD module to use the graphical print quota reminder. 
    381381      (http://repose.cx/pyosd/) 
     382    - The Python-chardet module to autodetect user's character set when   
     383      printing. (http://chardet.feedparser.org) 
    382384    - SNMP tools (specifically the snmpget command) if you prefer to 
    383385      use your own script to request query printers.