From f2594a032503c86eeb4c8512dd4c53f5b71d711e Mon Sep 17 00:00:00 2001 From: Dustyn Gibson <miigotu@gmail.com> Date: Tue, 14 Jul 2015 03:42:09 -0700 Subject: [PATCH] Try SYS_ENCODING first, then utf-8, then latin-1, then chardet. Chardet can be wrong --- sickbeard/db.py | 19 ++++++++++--------- sickbeard/encodingKludge.py | 25 ++++++++++++++----------- sickbeard/webserve.py | 29 +++++++++++++++-------------- 3 files changed, 39 insertions(+), 34 deletions(-) diff --git a/sickbeard/db.py b/sickbeard/db.py index 346b37259..e1045711e 100644 --- a/sickbeard/db.py +++ b/sickbeard/db.py @@ -230,19 +230,20 @@ class DBConnection(object): def _unicode_text_factory(self, x): try: x = unicode(x) - except UnicodeDecodeError: + except Exception: try: - x = unicode(x, chardet.detect(x).get('encoding')) - except UnicodeDecodeError: + x = unicode(x, sickbeard.SYS_ENCODING) + except Exception: try: - x = unicode(x, sickbeard.SYS_ENCODING) - except UnicodeDecodeError: + x = unicode(x, 'utf-8') + except Exception: try: - x = unicode(x, 'utf-8') - except UnicodeDecodeError: + x = unicode(x, 'latin-1') + except Exception: try: - x = unicode(x, 'latin-1') - except UnicodeDecodeError: + # Chardet can be wrong, so try it before ignoring + x = unicode(x, chardet.detect(x).get('encoding')) + except Exception: x = unicode(x, sickbeard.SYS_ENCODING, errors="ignore") return x diff --git a/sickbeard/encodingKludge.py b/sickbeard/encodingKludge.py index 4cb8ac983..04a2ec27d 100644 --- a/sickbeard/encodingKludge.py +++ b/sickbeard/encodingKludge.py @@ -24,14 +24,18 @@ def _toUnicode(x): if isinstance(x, str): try: x = unicode(x) - except UnicodeDecodeError: + except Exception: try: - x = unicode(x, chardet.detect(x).get('encoding')) - except UnicodeDecodeError: + x = unicode(x, sickbeard.SYS_ENCODING) + except Exception: try: - x = unicode(x, sickbeard.SYS_ENCODING) - except UnicodeDecodeError: - pass + x = unicode(x, 'utf-8') + except Exception: + try: + x = unicode(x, 'latin-1') + except Exception: + # Chardet can be wrong, so try it last + x = unicode(x, chardet.detect(x).get('encoding')) return x def ss(x): @@ -39,16 +43,15 @@ def ss(x): try: x = x.encode(sickbeard.SYS_ENCODING) - except UnicodeDecodeError, UnicodeEncodeError: + except Exception: try: x = x.encode('utf-8') - except UnicodeDecodeError, UnicodeEncodeError: + except Exception: try: x = x.encode(sickbeard.SYS_ENCODING, 'replace') - except UnicodeDecodeError, UnicodeEncodeError: + except Exception: x = x.encode('utf-8', 'ignore') - finally: - return x + return x def fixListEncodings(x): if not isinstance(x, (list, tuple)): diff --git a/sickbeard/webserve.py b/sickbeard/webserve.py index eaf03ff7d..078b862cd 100644 --- a/sickbeard/webserve.py +++ b/sickbeard/webserve.py @@ -91,23 +91,24 @@ class html_entities(CheetahFilter): filtered = '' elif isinstance(val, str): try: - filtered = unicode(val).encode('ascii', 'xmlcharrefreplace') - except UnicodeDecodeError, UnicodeEncodeError: + filtered = unicode(val) + except Exception: try: - filtered = unicode(val, chardet.detect(val).get('encoding')).encode('ascii', 'xmlcharrefreplace') - except (UnicodeDecodeError, UnicodeEncodeError) as e: + filtered = unicode(val, sickbeard.SYS_ENCODING) + except Exception: try: - filtered = unicode(val, sickbeard.SYS_ENCODING).encode('ascii', 'xmlcharrefreplace') - except (UnicodeDecodeError, UnicodeEncodeError) as e: - logger.log(u'Unable to decode using {0}, trying utf-8. Error is: {1}'.format(sickbeard.SYS_ENCODING, ex(e)), logger.DEBUG) + filtered = unicode(val, 'utf-8') + except Exception: try: - filtered = unicode(val, 'utf-8').encode('ascii', 'xmlcharrefreplace') - except (UnicodeDecodeError, UnicodeEncodeError) as e: - try: - logger.log(u'Unable to decode using utf-8, trying latin-1. Error is: {1}'.format(ex(e)), logger.DEBUG) - filtered = unicode(val, 'latin-1').encode('ascii', 'xmlcharrefreplace') - except UnicodeDecodeError, UnicodeEncodeError: - logger.log(u'Unable to decode using latin-1, Error is {0}.'.format(ex(e)),logger.ERROR) + filtered = unicode(val, 'latin-1') + except Exception: + logger.log(u'Unable to decode using %s, utf-8, or latin-1. Falling back to chardet!' % + sickbeard.SYS_ENCODING, logger.ERROR) + filtered = unicode(val, chardet.detect(val).get('encoding')) + try: + filtered = filtered.encode('ascii', 'xmlcharrefreplace') + except Exception: + logger.log(u'Unable to encode to ascii using xmlcharrefreplace.', logger.ERROR) else: filtered = self.filter(str(val)) -- GitLab