From f2594a032503c86eeb4c8512dd4c53f5b71d711e Mon Sep 17 00:00:00 2001
From: Dustyn Gibson <miigotu@gmail.com>
Date: Tue, 14 Jul 2015 03:42:09 -0700
Subject: [PATCH] Try SYS_ENCODING first, then utf-8, then latin-1, then
 chardet. Chardet can be wrong

---
 sickbeard/db.py             | 19 ++++++++++---------
 sickbeard/encodingKludge.py | 25 ++++++++++++++-----------
 sickbeard/webserve.py       | 29 +++++++++++++++--------------
 3 files changed, 39 insertions(+), 34 deletions(-)

diff --git a/sickbeard/db.py b/sickbeard/db.py
index 346b37259..e1045711e 100644
--- a/sickbeard/db.py
+++ b/sickbeard/db.py
@@ -230,19 +230,20 @@ class DBConnection(object):
     def _unicode_text_factory(self, x):
         try:
             x = unicode(x)
-        except UnicodeDecodeError:
+        except Exception:
             try:
-                x = unicode(x, chardet.detect(x).get('encoding'))
-            except UnicodeDecodeError:
+                x = unicode(x, sickbeard.SYS_ENCODING)
+            except Exception:
                 try:
-                    x = unicode(x, sickbeard.SYS_ENCODING)
-                except UnicodeDecodeError:
+                    x = unicode(x, 'utf-8')
+                except Exception:
                     try:
-                        x = unicode(x, 'utf-8')
-                    except UnicodeDecodeError:
+                        x = unicode(x, 'latin-1')
+                    except Exception:
                         try:
-                            x = unicode(x, 'latin-1')
-                        except UnicodeDecodeError:
+                            # Chardet can be wrong, so try it before ignoring
+                            x = unicode(x, chardet.detect(x).get('encoding'))
+                        except Exception:
                             x = unicode(x, sickbeard.SYS_ENCODING, errors="ignore")
         return x
 
diff --git a/sickbeard/encodingKludge.py b/sickbeard/encodingKludge.py
index 4cb8ac983..04a2ec27d 100644
--- a/sickbeard/encodingKludge.py
+++ b/sickbeard/encodingKludge.py
@@ -24,14 +24,18 @@ def _toUnicode(x):
     if isinstance(x, str):
         try:
             x = unicode(x)
-        except UnicodeDecodeError:
+        except Exception:
             try:
-                x = unicode(x, chardet.detect(x).get('encoding'))
-            except UnicodeDecodeError:
+                x = unicode(x, sickbeard.SYS_ENCODING)
+            except Exception:
                 try:
-                    x = unicode(x, sickbeard.SYS_ENCODING)
-                except UnicodeDecodeError:
-                    pass
+                   x = unicode(x, 'utf-8')
+                except Exception:
+                    try:
+                        x = unicode(x, 'latin-1')
+                    except Exception:
+                        # Chardet can be wrong, so try it last
+                        x = unicode(x, chardet.detect(x).get('encoding'))
     return x
 
 def ss(x):
@@ -39,16 +43,15 @@ def ss(x):
 
     try:
         x = x.encode(sickbeard.SYS_ENCODING)
-    except UnicodeDecodeError, UnicodeEncodeError:
+    except Exception:
         try:
             x = x.encode('utf-8')
-        except UnicodeDecodeError, UnicodeEncodeError:
+        except Exception:
             try:
                 x = x.encode(sickbeard.SYS_ENCODING, 'replace')
-            except UnicodeDecodeError, UnicodeEncodeError:
+            except Exception:
                 x = x.encode('utf-8', 'ignore')
-    finally:
-        return x
+    return x
 
 def fixListEncodings(x):
     if not isinstance(x, (list, tuple)):
diff --git a/sickbeard/webserve.py b/sickbeard/webserve.py
index eaf03ff7d..078b862cd 100644
--- a/sickbeard/webserve.py
+++ b/sickbeard/webserve.py
@@ -91,23 +91,24 @@ class html_entities(CheetahFilter):
             filtered = ''
         elif isinstance(val, str):
             try:
-                filtered = unicode(val).encode('ascii', 'xmlcharrefreplace')
-            except UnicodeDecodeError, UnicodeEncodeError:
+                filtered = unicode(val)
+            except Exception:
                 try:
-                    filtered = unicode(val, chardet.detect(val).get('encoding')).encode('ascii', 'xmlcharrefreplace')
-                except (UnicodeDecodeError, UnicodeEncodeError) as e:
+                    filtered = unicode(val, sickbeard.SYS_ENCODING)
+                except Exception:
                     try:
-                        filtered = unicode(val, sickbeard.SYS_ENCODING).encode('ascii', 'xmlcharrefreplace')
-                    except (UnicodeDecodeError, UnicodeEncodeError) as e:
-                        logger.log(u'Unable to decode using {0}, trying utf-8. Error is: {1}'.format(sickbeard.SYS_ENCODING, ex(e)), logger.DEBUG)
+                        filtered = unicode(val, 'utf-8')
+                    except Exception:
                         try:
-                            filtered = unicode(val, 'utf-8').encode('ascii', 'xmlcharrefreplace')
-                        except (UnicodeDecodeError, UnicodeEncodeError) as e:
-                            try:
-                                logger.log(u'Unable to decode using utf-8, trying latin-1. Error is: {1}'.format(ex(e)), logger.DEBUG)
-                                filtered = unicode(val, 'latin-1').encode('ascii', 'xmlcharrefreplace')
-                            except UnicodeDecodeError, UnicodeEncodeError:
-                                logger.log(u'Unable to decode using latin-1, Error is {0}.'.format(ex(e)),logger.ERROR)
+                            filtered = unicode(val, 'latin-1')
+                        except Exception:
+                            logger.log(u'Unable to decode using %s, utf-8, or latin-1. Falling back to chardet!' %
+                                    sickbeard.SYS_ENCODING, logger.ERROR)
+                            filtered = unicode(val, chardet.detect(val).get('encoding'))
+            try:
+                filtered = filtered.encode('ascii', 'xmlcharrefreplace')
+            except Exception:
+                logger.log(u'Unable to encode to ascii using xmlcharrefreplace.', logger.ERROR)
         else:
             filtered = self.filter(str(val))
 
-- 
GitLab