Show
Ignore:
Timestamp:
04/14/06 00:13:03 (18 years ago)
Author:
jerome
Message:

Improved character encoding routines to handle incorrect input datas like
UTF-8 encoded strings received with an ISO-8859-1 charset, thanks to
CUPS and Xprint !

Files:
1 modified

Legend:

Unmodified
Added
Removed
  • pykota/trunk/pykota/tool.py

    r2860 r2867  
    231231        if text is not None : 
    232232            try : 
    233                 return unicode(text, "UTF-8").encode(self.charset)  
    234             except (UnicodeError, TypeError) :     
     233                return text.decode("UTF-8").encode(self.charset, "replace")  
     234            except (UnicodeError, AttributeError) :     
    235235                try : 
    236                     # Incorrect locale settings ? 
    237                     return unicode(text, "UTF-8").encode("ISO-8859-15")  
    238                 except (UnicodeError, TypeError) :     
    239                     try : 
    240                         return text.encode(self.charset)  
    241                     except (UnicodeError, TypeError, AttributeError) : 
    242                         pass 
     236                    # Maybe already in Unicode 
     237                    return text.encode(self.charset, "replace")  
     238                except (UnicodeError, AttributeError) : 
     239                    pass # Don't know what to do 
    243240        return text 
    244241         
     
    247244        if text is not None : 
    248245            try : 
    249                 return unicode(text, self.charset).encode("UTF-8")  
    250             except (UnicodeError, TypeError) :     
     246                # We don't necessarily trust the default charset, because 
     247                # xprint sends us titles in UTF-8 but CUPS gives us an ISO-8859-1 charset ! 
     248                # So we first try to see if the text is already in UTF-8 or not, and 
     249                # if it is, we delete characters which can't be converted to the user's charset, 
     250                # then convert back to UTF-8. PostgreSQL 7.3.x used to reject some unicode characters, 
     251                # this is fixed by the ugly line below : 
     252                return text.decode("UTF-8").encode(self.charset, "replace").decode(self.charset).encode("UTF-8", "replace") 
     253            except (UnicodeError, AttributeError) : 
    251254                try : 
    252                     # Incorrect locale settings ? 
    253                     return unicode(text, "ISO-8859-15").encode("UTF-8")  
    254                 except (UnicodeError, TypeError) :     
     255                    return text.decode(self.charset).encode("UTF-8", "replace")  
     256                except (UnicodeError, AttributeError) :     
    255257                    try : 
    256                         return text.encode("UTF-8")  
    257                     except (UnicodeError, TypeError, AttributeError) : 
    258                         pass 
     258                        # Maybe already in Unicode 
     259                        return text.encode("UTF-8", "replace")  
     260                    except (UnicodeError, AttributeError) : 
     261                        pass # Don't know what to do 
    259262        return text 
    260263