diff --git a/SickBeard.py b/SickBeard.py index 75c5277eff0a734e2f8c398d76f97e257f5c1d26..bd2279eb6f09528f0afb6c764bb683aaa3213e99 100755 --- a/SickBeard.py +++ b/SickBeard.py @@ -359,22 +359,17 @@ class SickRage(object): } # start web server - try: - self.webserver = SRWebServer(self.web_options) - self.webserver.start() - except IOError: - logger.log(u"Unable to start web server, is something else running on port %d?" % self.startPort, - logger.ERROR) - if sickbeard.LAUNCH_BROWSER and not self.runAsDaemon: - logger.log(u"Launching browser and exiting", logger.ERROR) - sickbeard.launchBrowser('https' if sickbeard.ENABLE_HTTPS else 'http', self.startPort, sickbeard.WEB_ROOT) - os._exit(1) + self.webserver = SRWebServer(self.web_options) + self.webserver.start() if self.consoleLogging: print "Starting up SickRage " + sickbeard.BRANCH + " from " + sickbeard.CONFIG_FILE # Fire up all our threads sickbeard.start() + + # Build internal name cache + name_cache.buildNameCache() # Prepopulate network timezones, it isn't thread safe network_timezones.update_network_dict() diff --git a/gui/slick/images/providers/nzbs2go.png b/gui/slick/images/providers/nzbs2go.png new file mode 100644 index 0000000000000000000000000000000000000000..e3817063c06b99f760bc5bbdb72a3d54d8700d25 Binary files /dev/null and b/gui/slick/images/providers/nzbs2go.png differ diff --git a/gui/slick/images/providers/nzbs2go_com.png b/gui/slick/images/providers/nzbs2go_com.png new file mode 100644 index 0000000000000000000000000000000000000000..e3817063c06b99f760bc5bbdb72a3d54d8700d25 Binary files /dev/null and b/gui/slick/images/providers/nzbs2go_com.png differ diff --git a/gui/slick/images/providers/nzbtor.png b/gui/slick/images/providers/nzbtor.png new file mode 100644 index 0000000000000000000000000000000000000000..7a44db5260315e9d4d58e9ead9e221814e79fead Binary files /dev/null and b/gui/slick/images/providers/nzbtor.png differ diff --git a/gui/slick/images/providers/nzbtor_cr.png b/gui/slick/images/providers/nzbtor_cr.png new file mode 100644 index 0000000000000000000000000000000000000000..7a44db5260315e9d4d58e9ead9e221814e79fead Binary files /dev/null and b/gui/slick/images/providers/nzbtor_cr.png differ diff --git a/gui/slick/images/providers/6box.png b/gui/slick/images/providers/sixbox.png similarity index 100% rename from gui/slick/images/providers/6box.png rename to gui/slick/images/providers/sixbox.png diff --git a/gui/slick/images/providers/6box_me.png b/gui/slick/images/providers/sixbox_me.png similarity index 100% rename from gui/slick/images/providers/6box_me.png rename to gui/slick/images/providers/sixbox_me.png diff --git a/gui/slick/images/flags/afr.png b/gui/slick/images/subtitles/flags/afr.png similarity index 100% rename from gui/slick/images/flags/afr.png rename to gui/slick/images/subtitles/flags/afr.png diff --git a/gui/slick/images/flags/amh.png b/gui/slick/images/subtitles/flags/amh.png similarity index 100% rename from gui/slick/images/flags/amh.png rename to gui/slick/images/subtitles/flags/amh.png diff --git a/gui/slick/images/flags/ara.png b/gui/slick/images/subtitles/flags/ara.png similarity index 100% rename from gui/slick/images/flags/ara.png rename to gui/slick/images/subtitles/flags/ara.png diff --git a/gui/slick/images/flags/arg.png b/gui/slick/images/subtitles/flags/arg.png similarity index 100% rename from gui/slick/images/flags/arg.png rename to gui/slick/images/subtitles/flags/arg.png diff --git a/gui/slick/images/flags/asm.png b/gui/slick/images/subtitles/flags/asm.png similarity index 100% rename from gui/slick/images/flags/asm.png rename to gui/slick/images/subtitles/flags/asm.png diff --git a/gui/slick/images/flags/ave.png b/gui/slick/images/subtitles/flags/ave.png similarity index 100% rename from gui/slick/images/flags/ave.png rename to gui/slick/images/subtitles/flags/ave.png diff --git a/gui/slick/images/flags/aym.png b/gui/slick/images/subtitles/flags/aym.png similarity index 100% rename from gui/slick/images/flags/aym.png rename to gui/slick/images/subtitles/flags/aym.png diff --git a/gui/slick/images/flags/aze.png b/gui/slick/images/subtitles/flags/aze.png similarity index 100% rename from gui/slick/images/flags/aze.png rename to gui/slick/images/subtitles/flags/aze.png diff --git a/gui/slick/images/flags/bak.png b/gui/slick/images/subtitles/flags/bak.png similarity index 100% rename from gui/slick/images/flags/bak.png rename to gui/slick/images/subtitles/flags/bak.png diff --git a/gui/slick/images/flags/bam.png b/gui/slick/images/subtitles/flags/bam.png similarity index 100% rename from gui/slick/images/flags/bam.png rename to gui/slick/images/subtitles/flags/bam.png diff --git a/gui/slick/images/flags/bel.png b/gui/slick/images/subtitles/flags/bel.png similarity index 100% rename from gui/slick/images/flags/bel.png rename to gui/slick/images/subtitles/flags/bel.png diff --git a/gui/slick/images/flags/ben.png b/gui/slick/images/subtitles/flags/ben.png similarity index 100% rename from gui/slick/images/flags/ben.png rename to gui/slick/images/subtitles/flags/ben.png diff --git a/gui/slick/images/flags/bis.png b/gui/slick/images/subtitles/flags/bis.png similarity index 100% rename from gui/slick/images/flags/bis.png rename to gui/slick/images/subtitles/flags/bis.png diff --git a/gui/slick/images/flags/bod.png b/gui/slick/images/subtitles/flags/bod.png similarity index 100% rename from gui/slick/images/flags/bod.png rename to gui/slick/images/subtitles/flags/bod.png diff --git a/gui/slick/images/flags/bos.png b/gui/slick/images/subtitles/flags/bos.png similarity index 100% rename from gui/slick/images/flags/bos.png rename to gui/slick/images/subtitles/flags/bos.png diff --git a/gui/slick/images/flags/bre.png b/gui/slick/images/subtitles/flags/bre.png similarity index 100% rename from gui/slick/images/flags/bre.png rename to gui/slick/images/subtitles/flags/bre.png diff --git a/gui/slick/images/flags/bul.png b/gui/slick/images/subtitles/flags/bul.png similarity index 100% rename from gui/slick/images/flags/bul.png rename to gui/slick/images/subtitles/flags/bul.png diff --git a/gui/slick/images/flags/cat.png b/gui/slick/images/subtitles/flags/cat.png similarity index 100% rename from gui/slick/images/flags/cat.png rename to gui/slick/images/subtitles/flags/cat.png diff --git a/gui/slick/images/flags/ces.png b/gui/slick/images/subtitles/flags/ces.png similarity index 100% rename from gui/slick/images/flags/ces.png rename to gui/slick/images/subtitles/flags/ces.png diff --git a/gui/slick/images/flags/cha.png b/gui/slick/images/subtitles/flags/cha.png similarity index 100% rename from gui/slick/images/flags/cha.png rename to gui/slick/images/subtitles/flags/cha.png diff --git a/gui/slick/images/flags/chu.png b/gui/slick/images/subtitles/flags/chu.png similarity index 100% rename from gui/slick/images/flags/chu.png rename to gui/slick/images/subtitles/flags/chu.png diff --git a/gui/slick/images/flags/chv.png b/gui/slick/images/subtitles/flags/chv.png similarity index 100% rename from gui/slick/images/flags/chv.png rename to gui/slick/images/subtitles/flags/chv.png diff --git a/gui/slick/images/flags/cor.png b/gui/slick/images/subtitles/flags/cor.png similarity index 100% rename from gui/slick/images/flags/cor.png rename to gui/slick/images/subtitles/flags/cor.png diff --git a/gui/slick/images/flags/cos.png b/gui/slick/images/subtitles/flags/cos.png similarity index 100% rename from gui/slick/images/flags/cos.png rename to gui/slick/images/subtitles/flags/cos.png diff --git a/gui/slick/images/flags/cre.png b/gui/slick/images/subtitles/flags/cre.png similarity index 100% rename from gui/slick/images/flags/cre.png rename to gui/slick/images/subtitles/flags/cre.png diff --git a/gui/slick/images/flags/cym.png b/gui/slick/images/subtitles/flags/cym.png similarity index 100% rename from gui/slick/images/flags/cym.png rename to gui/slick/images/subtitles/flags/cym.png diff --git a/gui/slick/images/flags/dan.png b/gui/slick/images/subtitles/flags/dan.png similarity index 100% rename from gui/slick/images/flags/dan.png rename to gui/slick/images/subtitles/flags/dan.png diff --git a/gui/slick/images/flags/deu.png b/gui/slick/images/subtitles/flags/deu.png similarity index 100% rename from gui/slick/images/flags/deu.png rename to gui/slick/images/subtitles/flags/deu.png diff --git a/gui/slick/images/flags/dzo.png b/gui/slick/images/subtitles/flags/dzo.png similarity index 100% rename from gui/slick/images/flags/dzo.png rename to gui/slick/images/subtitles/flags/dzo.png diff --git a/gui/slick/images/flags/ell.png b/gui/slick/images/subtitles/flags/ell.png similarity index 100% rename from gui/slick/images/flags/ell.png rename to gui/slick/images/subtitles/flags/ell.png diff --git a/gui/slick/images/flags/eng.png b/gui/slick/images/subtitles/flags/eng.png similarity index 100% rename from gui/slick/images/flags/eng.png rename to gui/slick/images/subtitles/flags/eng.png diff --git a/gui/slick/images/flags/epo.png b/gui/slick/images/subtitles/flags/epo.png similarity index 100% rename from gui/slick/images/flags/epo.png rename to gui/slick/images/subtitles/flags/epo.png diff --git a/gui/slick/images/flags/est.png b/gui/slick/images/subtitles/flags/est.png similarity index 100% rename from gui/slick/images/flags/est.png rename to gui/slick/images/subtitles/flags/est.png diff --git a/gui/slick/images/flags/ewe.png b/gui/slick/images/subtitles/flags/ewe.png similarity index 100% rename from gui/slick/images/flags/ewe.png rename to gui/slick/images/subtitles/flags/ewe.png diff --git a/gui/slick/images/flags/fam.png b/gui/slick/images/subtitles/flags/fam.png similarity index 100% rename from gui/slick/images/flags/fam.png rename to gui/slick/images/subtitles/flags/fam.png diff --git a/gui/slick/images/flags/fao.png b/gui/slick/images/subtitles/flags/fao.png similarity index 100% rename from gui/slick/images/flags/fao.png rename to gui/slick/images/subtitles/flags/fao.png diff --git a/gui/slick/images/flags/fas.png b/gui/slick/images/subtitles/flags/fas.png similarity index 100% rename from gui/slick/images/flags/fas.png rename to gui/slick/images/subtitles/flags/fas.png diff --git a/gui/slick/images/flags/fij.png b/gui/slick/images/subtitles/flags/fij.png similarity index 100% rename from gui/slick/images/flags/fij.png rename to gui/slick/images/subtitles/flags/fij.png diff --git a/gui/slick/images/flags/fin.png b/gui/slick/images/subtitles/flags/fin.png similarity index 100% rename from gui/slick/images/flags/fin.png rename to gui/slick/images/subtitles/flags/fin.png diff --git a/gui/slick/images/flags/fra.png b/gui/slick/images/subtitles/flags/fra.png similarity index 100% rename from gui/slick/images/flags/fra.png rename to gui/slick/images/subtitles/flags/fra.png diff --git a/gui/slick/images/flags/gla.png b/gui/slick/images/subtitles/flags/gla.png similarity index 100% rename from gui/slick/images/flags/gla.png rename to gui/slick/images/subtitles/flags/gla.png diff --git a/gui/slick/images/flags/gle.png b/gui/slick/images/subtitles/flags/gle.png similarity index 100% rename from gui/slick/images/flags/gle.png rename to gui/slick/images/subtitles/flags/gle.png diff --git a/gui/slick/images/flags/glg.png b/gui/slick/images/subtitles/flags/glg.png similarity index 100% rename from gui/slick/images/flags/glg.png rename to gui/slick/images/subtitles/flags/glg.png diff --git a/gui/slick/images/flags/grn.png b/gui/slick/images/subtitles/flags/grn.png similarity index 100% rename from gui/slick/images/flags/grn.png rename to gui/slick/images/subtitles/flags/grn.png diff --git a/gui/slick/images/flags/guj.png b/gui/slick/images/subtitles/flags/guj.png similarity index 100% rename from gui/slick/images/flags/guj.png rename to gui/slick/images/subtitles/flags/guj.png diff --git a/gui/slick/images/flags/hat.png b/gui/slick/images/subtitles/flags/hat.png similarity index 100% rename from gui/slick/images/flags/hat.png rename to gui/slick/images/subtitles/flags/hat.png diff --git a/gui/slick/images/flags/heb.png b/gui/slick/images/subtitles/flags/heb.png similarity index 100% rename from gui/slick/images/flags/heb.png rename to gui/slick/images/subtitles/flags/heb.png diff --git a/gui/slick/images/flags/hin.png b/gui/slick/images/subtitles/flags/hin.png similarity index 100% rename from gui/slick/images/flags/hin.png rename to gui/slick/images/subtitles/flags/hin.png diff --git a/gui/slick/images/flags/hrv.png b/gui/slick/images/subtitles/flags/hrv.png similarity index 100% rename from gui/slick/images/flags/hrv.png rename to gui/slick/images/subtitles/flags/hrv.png diff --git a/gui/slick/images/flags/hun.png b/gui/slick/images/subtitles/flags/hun.png similarity index 100% rename from gui/slick/images/flags/hun.png rename to gui/slick/images/subtitles/flags/hun.png diff --git a/gui/slick/images/flags/hye.png b/gui/slick/images/subtitles/flags/hye.png similarity index 100% rename from gui/slick/images/flags/hye.png rename to gui/slick/images/subtitles/flags/hye.png diff --git a/gui/slick/images/flags/ido.png b/gui/slick/images/subtitles/flags/ido.png similarity index 100% rename from gui/slick/images/flags/ido.png rename to gui/slick/images/subtitles/flags/ido.png diff --git a/gui/slick/images/flags/ile.png b/gui/slick/images/subtitles/flags/ile.png similarity index 100% rename from gui/slick/images/flags/ile.png rename to gui/slick/images/subtitles/flags/ile.png diff --git a/gui/slick/images/flags/ind.png b/gui/slick/images/subtitles/flags/ind.png similarity index 100% rename from gui/slick/images/flags/ind.png rename to gui/slick/images/subtitles/flags/ind.png diff --git a/gui/slick/images/flags/isl.png b/gui/slick/images/subtitles/flags/isl.png similarity index 100% rename from gui/slick/images/flags/isl.png rename to gui/slick/images/subtitles/flags/isl.png diff --git a/gui/slick/images/flags/ita.png b/gui/slick/images/subtitles/flags/ita.png similarity index 100% rename from gui/slick/images/flags/ita.png rename to gui/slick/images/subtitles/flags/ita.png diff --git a/gui/slick/images/flags/jpn.png b/gui/slick/images/subtitles/flags/jpn.png similarity index 100% rename from gui/slick/images/flags/jpn.png rename to gui/slick/images/subtitles/flags/jpn.png diff --git a/gui/slick/images/flags/kan.png b/gui/slick/images/subtitles/flags/kan.png similarity index 100% rename from gui/slick/images/flags/kan.png rename to gui/slick/images/subtitles/flags/kan.png diff --git a/gui/slick/images/flags/kat.png b/gui/slick/images/subtitles/flags/kat.png similarity index 100% rename from gui/slick/images/flags/kat.png rename to gui/slick/images/subtitles/flags/kat.png diff --git a/gui/slick/images/flags/kau.png b/gui/slick/images/subtitles/flags/kau.png similarity index 100% rename from gui/slick/images/flags/kau.png rename to gui/slick/images/subtitles/flags/kau.png diff --git a/gui/slick/images/flags/kaz.png b/gui/slick/images/subtitles/flags/kaz.png similarity index 100% rename from gui/slick/images/flags/kaz.png rename to gui/slick/images/subtitles/flags/kaz.png diff --git a/gui/slick/images/flags/khm.png b/gui/slick/images/subtitles/flags/khm.png similarity index 100% rename from gui/slick/images/flags/khm.png rename to gui/slick/images/subtitles/flags/khm.png diff --git a/gui/slick/images/flags/kik.png b/gui/slick/images/subtitles/flags/kik.png similarity index 100% rename from gui/slick/images/flags/kik.png rename to gui/slick/images/subtitles/flags/kik.png diff --git a/gui/slick/images/flags/kin.png b/gui/slick/images/subtitles/flags/kin.png similarity index 100% rename from gui/slick/images/flags/kin.png rename to gui/slick/images/subtitles/flags/kin.png diff --git a/gui/slick/images/flags/kir.png b/gui/slick/images/subtitles/flags/kir.png similarity index 100% rename from gui/slick/images/flags/kir.png rename to gui/slick/images/subtitles/flags/kir.png diff --git a/gui/slick/images/flags/kon.png b/gui/slick/images/subtitles/flags/kon.png similarity index 100% rename from gui/slick/images/flags/kon.png rename to gui/slick/images/subtitles/flags/kon.png diff --git a/gui/slick/images/flags/kor.png b/gui/slick/images/subtitles/flags/kor.png similarity index 100% rename from gui/slick/images/flags/kor.png rename to gui/slick/images/subtitles/flags/kor.png diff --git a/gui/slick/images/flags/lat.png b/gui/slick/images/subtitles/flags/lat.png similarity index 100% rename from gui/slick/images/flags/lat.png rename to gui/slick/images/subtitles/flags/lat.png diff --git a/gui/slick/images/flags/lav.png b/gui/slick/images/subtitles/flags/lav.png similarity index 100% rename from gui/slick/images/flags/lav.png rename to gui/slick/images/subtitles/flags/lav.png diff --git a/gui/slick/images/flags/lim.png b/gui/slick/images/subtitles/flags/lim.png similarity index 100% rename from gui/slick/images/flags/lim.png rename to gui/slick/images/subtitles/flags/lim.png diff --git a/gui/slick/images/flags/lit.png b/gui/slick/images/subtitles/flags/lit.png similarity index 100% rename from gui/slick/images/flags/lit.png rename to gui/slick/images/subtitles/flags/lit.png diff --git a/gui/slick/images/flags/ltz.png b/gui/slick/images/subtitles/flags/ltz.png similarity index 100% rename from gui/slick/images/flags/ltz.png rename to gui/slick/images/subtitles/flags/ltz.png diff --git a/gui/slick/images/flags/lub.png b/gui/slick/images/subtitles/flags/lub.png similarity index 100% rename from gui/slick/images/flags/lub.png rename to gui/slick/images/subtitles/flags/lub.png diff --git a/gui/slick/images/flags/mah.png b/gui/slick/images/subtitles/flags/mah.png similarity index 100% rename from gui/slick/images/flags/mah.png rename to gui/slick/images/subtitles/flags/mah.png diff --git a/gui/slick/images/flags/mal.png b/gui/slick/images/subtitles/flags/mal.png similarity index 100% rename from gui/slick/images/flags/mal.png rename to gui/slick/images/subtitles/flags/mal.png diff --git a/gui/slick/images/flags/mar.png b/gui/slick/images/subtitles/flags/mar.png similarity index 100% rename from gui/slick/images/flags/mar.png rename to gui/slick/images/subtitles/flags/mar.png diff --git a/gui/slick/images/flags/mkd.png b/gui/slick/images/subtitles/flags/mkd.png similarity index 100% rename from gui/slick/images/flags/mkd.png rename to gui/slick/images/subtitles/flags/mkd.png diff --git a/gui/slick/images/flags/mlg.png b/gui/slick/images/subtitles/flags/mlg.png similarity index 100% rename from gui/slick/images/flags/mlg.png rename to gui/slick/images/subtitles/flags/mlg.png diff --git a/gui/slick/images/flags/mlt.png b/gui/slick/images/subtitles/flags/mlt.png similarity index 100% rename from gui/slick/images/flags/mlt.png rename to gui/slick/images/subtitles/flags/mlt.png diff --git a/gui/slick/images/flags/mon.png b/gui/slick/images/subtitles/flags/mon.png similarity index 100% rename from gui/slick/images/flags/mon.png rename to gui/slick/images/subtitles/flags/mon.png diff --git a/gui/slick/images/flags/msa.png b/gui/slick/images/subtitles/flags/msa.png similarity index 100% rename from gui/slick/images/flags/msa.png rename to gui/slick/images/subtitles/flags/msa.png diff --git a/gui/slick/images/flags/mya.png b/gui/slick/images/subtitles/flags/mya.png similarity index 100% rename from gui/slick/images/flags/mya.png rename to gui/slick/images/subtitles/flags/mya.png diff --git a/gui/slick/images/flags/nau.png b/gui/slick/images/subtitles/flags/nau.png similarity index 100% rename from gui/slick/images/flags/nau.png rename to gui/slick/images/subtitles/flags/nau.png diff --git a/gui/slick/images/flags/nbl.png b/gui/slick/images/subtitles/flags/nbl.png similarity index 100% rename from gui/slick/images/flags/nbl.png rename to gui/slick/images/subtitles/flags/nbl.png diff --git a/gui/slick/images/flags/ndo.png b/gui/slick/images/subtitles/flags/ndo.png similarity index 100% rename from gui/slick/images/flags/ndo.png rename to gui/slick/images/subtitles/flags/ndo.png diff --git a/gui/slick/images/flags/nep.png b/gui/slick/images/subtitles/flags/nep.png similarity index 100% rename from gui/slick/images/flags/nep.png rename to gui/slick/images/subtitles/flags/nep.png diff --git a/gui/slick/images/flags/nld.png b/gui/slick/images/subtitles/flags/nld.png similarity index 100% rename from gui/slick/images/flags/nld.png rename to gui/slick/images/subtitles/flags/nld.png diff --git a/gui/slick/images/flags/nor.png b/gui/slick/images/subtitles/flags/nor.png similarity index 100% rename from gui/slick/images/flags/nor.png rename to gui/slick/images/subtitles/flags/nor.png diff --git a/gui/slick/images/flags/oci.png b/gui/slick/images/subtitles/flags/oci.png similarity index 100% rename from gui/slick/images/flags/oci.png rename to gui/slick/images/subtitles/flags/oci.png diff --git a/gui/slick/images/flags/orm.png b/gui/slick/images/subtitles/flags/orm.png similarity index 100% rename from gui/slick/images/flags/orm.png rename to gui/slick/images/subtitles/flags/orm.png diff --git a/gui/slick/images/flags/pan.png b/gui/slick/images/subtitles/flags/pan.png similarity index 100% rename from gui/slick/images/flags/pan.png rename to gui/slick/images/subtitles/flags/pan.png diff --git a/gui/slick/images/flags/pol.png b/gui/slick/images/subtitles/flags/pol.png similarity index 100% rename from gui/slick/images/flags/pol.png rename to gui/slick/images/subtitles/flags/pol.png diff --git a/gui/slick/images/flags/por.png b/gui/slick/images/subtitles/flags/por.png similarity index 100% rename from gui/slick/images/flags/por.png rename to gui/slick/images/subtitles/flags/por.png diff --git a/gui/slick/images/flags/pus.png b/gui/slick/images/subtitles/flags/pus.png similarity index 100% rename from gui/slick/images/flags/pus.png rename to gui/slick/images/subtitles/flags/pus.png diff --git a/gui/slick/images/flags/ron.png b/gui/slick/images/subtitles/flags/ron.png similarity index 100% rename from gui/slick/images/flags/ron.png rename to gui/slick/images/subtitles/flags/ron.png diff --git a/gui/slick/images/flags/rus.png b/gui/slick/images/subtitles/flags/rus.png similarity index 100% rename from gui/slick/images/flags/rus.png rename to gui/slick/images/subtitles/flags/rus.png diff --git a/gui/slick/images/flags/sag.png b/gui/slick/images/subtitles/flags/sag.png similarity index 100% rename from gui/slick/images/flags/sag.png rename to gui/slick/images/subtitles/flags/sag.png diff --git a/gui/slick/images/flags/san.png b/gui/slick/images/subtitles/flags/san.png similarity index 100% rename from gui/slick/images/flags/san.png rename to gui/slick/images/subtitles/flags/san.png diff --git a/gui/slick/images/flags/sin.png b/gui/slick/images/subtitles/flags/sin.png similarity index 100% rename from gui/slick/images/flags/sin.png rename to gui/slick/images/subtitles/flags/sin.png diff --git a/gui/slick/images/flags/slk.png b/gui/slick/images/subtitles/flags/slk.png similarity index 100% rename from gui/slick/images/flags/slk.png rename to gui/slick/images/subtitles/flags/slk.png diff --git a/gui/slick/images/flags/slv.png b/gui/slick/images/subtitles/flags/slv.png similarity index 100% rename from gui/slick/images/flags/slv.png rename to gui/slick/images/subtitles/flags/slv.png diff --git a/gui/slick/images/flags/sme.png b/gui/slick/images/subtitles/flags/sme.png similarity index 100% rename from gui/slick/images/flags/sme.png rename to gui/slick/images/subtitles/flags/sme.png diff --git a/gui/slick/images/flags/smo.png b/gui/slick/images/subtitles/flags/smo.png similarity index 100% rename from gui/slick/images/flags/smo.png rename to gui/slick/images/subtitles/flags/smo.png diff --git a/gui/slick/images/flags/sna.png b/gui/slick/images/subtitles/flags/sna.png similarity index 100% rename from gui/slick/images/flags/sna.png rename to gui/slick/images/subtitles/flags/sna.png diff --git a/gui/slick/images/flags/snd.png b/gui/slick/images/subtitles/flags/snd.png similarity index 100% rename from gui/slick/images/flags/snd.png rename to gui/slick/images/subtitles/flags/snd.png diff --git a/gui/slick/images/flags/som.png b/gui/slick/images/subtitles/flags/som.png similarity index 100% rename from gui/slick/images/flags/som.png rename to gui/slick/images/subtitles/flags/som.png diff --git a/gui/slick/images/flags/sot.png b/gui/slick/images/subtitles/flags/sot.png similarity index 100% rename from gui/slick/images/flags/sot.png rename to gui/slick/images/subtitles/flags/sot.png diff --git a/gui/slick/images/flags/spa.png b/gui/slick/images/subtitles/flags/spa.png similarity index 100% rename from gui/slick/images/flags/spa.png rename to gui/slick/images/subtitles/flags/spa.png diff --git a/gui/slick/images/flags/sqi.png b/gui/slick/images/subtitles/flags/sqi.png similarity index 100% rename from gui/slick/images/flags/sqi.png rename to gui/slick/images/subtitles/flags/sqi.png diff --git a/gui/slick/images/flags/srd.png b/gui/slick/images/subtitles/flags/srd.png similarity index 100% rename from gui/slick/images/flags/srd.png rename to gui/slick/images/subtitles/flags/srd.png diff --git a/gui/slick/images/flags/srp.png b/gui/slick/images/subtitles/flags/srp.png similarity index 100% rename from gui/slick/images/flags/srp.png rename to gui/slick/images/subtitles/flags/srp.png diff --git a/gui/slick/images/flags/swe.png b/gui/slick/images/subtitles/flags/swe.png similarity index 100% rename from gui/slick/images/flags/swe.png rename to gui/slick/images/subtitles/flags/swe.png diff --git a/gui/slick/images/flags/tat.png b/gui/slick/images/subtitles/flags/tat.png similarity index 100% rename from gui/slick/images/flags/tat.png rename to gui/slick/images/subtitles/flags/tat.png diff --git a/gui/slick/images/flags/tgk.png b/gui/slick/images/subtitles/flags/tgk.png similarity index 100% rename from gui/slick/images/flags/tgk.png rename to gui/slick/images/subtitles/flags/tgk.png diff --git a/gui/slick/images/flags/tgl.png b/gui/slick/images/subtitles/flags/tgl.png similarity index 100% rename from gui/slick/images/flags/tgl.png rename to gui/slick/images/subtitles/flags/tgl.png diff --git a/gui/slick/images/flags/tha.png b/gui/slick/images/subtitles/flags/tha.png similarity index 100% rename from gui/slick/images/flags/tha.png rename to gui/slick/images/subtitles/flags/tha.png diff --git a/gui/slick/images/flags/ton.png b/gui/slick/images/subtitles/flags/ton.png similarity index 100% rename from gui/slick/images/flags/ton.png rename to gui/slick/images/subtitles/flags/ton.png diff --git a/gui/slick/images/flags/tsn.png b/gui/slick/images/subtitles/flags/tsn.png similarity index 100% rename from gui/slick/images/flags/tsn.png rename to gui/slick/images/subtitles/flags/tsn.png diff --git a/gui/slick/images/flags/tuk.png b/gui/slick/images/subtitles/flags/tuk.png similarity index 100% rename from gui/slick/images/flags/tuk.png rename to gui/slick/images/subtitles/flags/tuk.png diff --git a/gui/slick/images/flags/tur.png b/gui/slick/images/subtitles/flags/tur.png similarity index 100% rename from gui/slick/images/flags/tur.png rename to gui/slick/images/subtitles/flags/tur.png diff --git a/gui/slick/images/flags/twi.png b/gui/slick/images/subtitles/flags/twi.png similarity index 100% rename from gui/slick/images/flags/twi.png rename to gui/slick/images/subtitles/flags/twi.png diff --git a/gui/slick/images/flags/uig.png b/gui/slick/images/subtitles/flags/uig.png similarity index 100% rename from gui/slick/images/flags/uig.png rename to gui/slick/images/subtitles/flags/uig.png diff --git a/gui/slick/images/flags/ukr.png b/gui/slick/images/subtitles/flags/ukr.png similarity index 100% rename from gui/slick/images/flags/ukr.png rename to gui/slick/images/subtitles/flags/ukr.png diff --git a/gui/slick/images/flags/und.png b/gui/slick/images/subtitles/flags/und.png similarity index 100% rename from gui/slick/images/flags/und.png rename to gui/slick/images/subtitles/flags/und.png diff --git a/gui/slick/images/subtitles/flags/unknown.png b/gui/slick/images/subtitles/flags/unknown.png new file mode 100644 index 0000000000000000000000000000000000000000..af9249bc317b724a8923e1447c60977dc9a91827 Binary files /dev/null and b/gui/slick/images/subtitles/flags/unknown.png differ diff --git a/gui/slick/images/flags/uzb.png b/gui/slick/images/subtitles/flags/uzb.png similarity index 100% rename from gui/slick/images/flags/uzb.png rename to gui/slick/images/subtitles/flags/uzb.png diff --git a/gui/slick/images/flags/ven.png b/gui/slick/images/subtitles/flags/ven.png similarity index 100% rename from gui/slick/images/flags/ven.png rename to gui/slick/images/subtitles/flags/ven.png diff --git a/gui/slick/images/flags/vie.png b/gui/slick/images/subtitles/flags/vie.png similarity index 100% rename from gui/slick/images/flags/vie.png rename to gui/slick/images/subtitles/flags/vie.png diff --git a/gui/slick/images/flags/zha.png b/gui/slick/images/subtitles/flags/zha.png similarity index 100% rename from gui/slick/images/flags/zha.png rename to gui/slick/images/subtitles/flags/zha.png diff --git a/gui/slick/images/flags/zho.png b/gui/slick/images/subtitles/flags/zho.png similarity index 100% rename from gui/slick/images/flags/zho.png rename to gui/slick/images/subtitles/flags/zho.png diff --git a/gui/slick/interfaces/default/apiBuilder.tmpl b/gui/slick/interfaces/default/apiBuilder.tmpl index 79eefee603375306575bdc1df1c2835d66893a29..3e4e5fd0410d3b87d16f1e386a87fee520776985 100644 --- a/gui/slick/interfaces/default/apiBuilder.tmpl +++ b/gui/slick/interfaces/default/apiBuilder.tmpl @@ -61,6 +61,7 @@ addList("Command", "Show.AddNew", "?cmd=show.addnew", "show.addnew", "", "", "ac addList("Command", "Show.Cache", "?cmd=show.cache", "indexerid", "", "", "action"); addList("Command", "Show.Delete", "?cmd=show.delete", "show.delete", "", "", "action"); addList("Command", "Show.GetBanner", "?cmd=show.getbanner", "indexerid", "", "", "action"); +addList("Command", "Show.GetNetworkLogo", "?cmd=show.getnetworklogo", "indexerid", "", "", "action"); addList("Command", "Show.GetPoster", "?cmd=show.getposter", "indexerid", "", "", "action"); addList("Command", "Show.GetQuality", "?cmd=show.getquality", "indexerid", "", "", "action"); addList("Command", "Show.Pause", "?cmd=show.pause", "show.pause", "", "", "action"); diff --git a/gui/slick/interfaces/default/config_general.tmpl b/gui/slick/interfaces/default/config_general.tmpl index ada2e4513eaa3e3d9a012c51d14f9960dd593b70..792c9856be01cd3bffb291728fce373d38830997 100644 --- a/gui/slick/interfaces/default/config_general.tmpl +++ b/gui/slick/interfaces/default/config_general.tmpl @@ -566,7 +566,15 @@ </span> </label> </div> - + <div class="field-pair"> + <label for="ssl_verify"> + <span class="component-title">Verify SSL Certs</span> + <span class="component-desc"> + <input type="checkbox" name="ssl_verify" id="ssl_verify" #if $sickbeard.SSL_VERIFY then 'checked="checked"' else ''#/> + <p>Verify SSL Certificates (Disable this for broken SSL installs (Like QNAP)<p> + </span> + </label> + </div> <div class="field-pair"> <label for="no_restart"> <span class="component-title">No Restart</span> diff --git a/gui/slick/interfaces/default/config_notifications.tmpl b/gui/slick/interfaces/default/config_notifications.tmpl index 0a1816b6f6fcebd6de716863fd0feb05c25e5f12..0686a4386a2010aa0f21fbfe25bce7ff92056fb4 100644 --- a/gui/slick/interfaces/default/config_notifications.tmpl +++ b/gui/slick/interfaces/default/config_notifications.tmpl @@ -1469,16 +1469,7 @@ <span class="component-desc">PIN code to authorize SickRage to access Trakt on your behalf.</span> </p> </div> - <input type="button" class="btn hide" value="Authorize SickRage" id="authTrakt" /> - <div class="field-pair"> - <label for="trakt_disable_ssl_verify"> - <span class="component-title">Disable SSL Verification</span> - <span class="component-desc"> - <input type="checkbox" class="enabler" name="trakt_disable_ssl_verify" id="trakt_disable_ssl_verify" #if $sickbeard.TRAKT_DISABLE_SSL_VERIFY then "checked=\"checked\"" else ""# /> - <p>Disable SSL certificate verification for broken SSL installs (like QNAP NAS)</p> - </span> - </label> - </div> + <input type="button" class="btn hide" value="Authorize SickRage" id="authTrakt" /> <div class="field-pair"> <label for="trakt_timeout"> <span class="component-title">API Timeout</span> diff --git a/gui/slick/interfaces/default/config_subtitles.tmpl b/gui/slick/interfaces/default/config_subtitles.tmpl index 145c847c2f837c177e0d5806d9c6162c609ca84e..75e4434aeedb6fb6faa813af3b5e3ceab0be80d1 100644 --- a/gui/slick/interfaces/default/config_subtitles.tmpl +++ b/gui/slick/interfaces/default/config_subtitles.tmpl @@ -18,7 +18,7 @@ \$(document).ready(function() { \$("#subtitles_languages").tokenInput( [ - <%=",\r\n".join("{id: \"" + lang.alpha3 + "\", name: \"" + lang.name + "\"}" for lang in subtitles.subtitleLanguageFilter())%> + <%=",\r\n".join("{id: \"" + lang.opensubtitles + "\", name: \"" + lang.name + "\"}" for lang in subtitles.subtitleLanguageFilter())%> ], { method: "POST", @@ -28,7 +28,7 @@ [ <%= - ",\r\n".join("{id: \"" + subtitles.fromietf(lang).alpha3 + "\", name: \"" + subtitles.fromietf(lang).name + "\"}" for lang in subtitles.wantedLanguages()) if subtitles.wantedLanguages() else '' + ",\r\n".join("{id: \"" + subtitles.fromietf(lang).opensubtitles + "\", name: \"" + subtitles.fromietf(lang).name + "\"}" for lang in subtitles.wantedLanguages()) if subtitles.wantedLanguages() else '' %> ] } diff --git a/gui/slick/interfaces/default/displayShow.tmpl b/gui/slick/interfaces/default/displayShow.tmpl index 29e75c6dd76cde56c556bf3cec43fa2489de1796..27ae2c46ba94897b80771e4a040a3405af23a46c 100644 --- a/gui/slick/interfaces/default/displayShow.tmpl +++ b/gui/slick/interfaces/default/displayShow.tmpl @@ -130,7 +130,7 @@ #end if #if not $sickbeard.DISPLAY_SHOW_SPECIALS and $season_special: - #$seasonResults.pop(-1) + #set lastSeason = $seasonResults.pop(-1); del lastSeason #end if <span class="h2footer displayspecials pull-right"> @@ -284,7 +284,7 @@ </table> <table style="width:180px; float: right; vertical-align: middle; height: 100%;"> - #set $info_flag = $subtitles.fromietf($show.lang).alpha3 if $subtitles.isValidLanguage($show.lang) else 'unknown' + #set $info_flag = $subtitles.fromietf($show.lang).alpha3 <tr><td class="showLegend">Info Language:</td><td><img src="$sbRoot/images/flags/${info_flag}.png" width="16" height="11" alt="$show.lang" title="$show.lang" onError="this.onerror=null;this.src='$sbRoot/images/flags/unknown.png';"/></td></tr> #if $sickbeard.USE_SUBTITLES <tr><td class="showLegend">Subtitles: </td><td><img src="$sbRoot/images/#if $show.subtitles then "yes16.png\" alt=\"Y" else "no16.png\" alt=\"N"#" width="16" height="16" /></td></tr> @@ -579,8 +579,8 @@ </td> <td class="col-subtitles" align="center"> #for $sub_lang in [$subtitles.fromietf(x) for x in $epResult["subtitles"].split(',') if $epResult["subtitles"]]: - #set $flag = $sub_lang.alpha3 if $hasattr($sub_lang, 'alpha3') and $sub_lang.alpha3 else $sub_lang.alpha2 if $hasattr($sub_lang, 'alpha2') and $sub_lang.alpha2 else 'unknown' - <img src="$sbRoot/images/flags/${flag}.png" width="16" height="11" alt="${sub_lang.name}" onError="this.onerror=null;this.src='$sbRoot/images/flags/unknown.png';" /> + #set $flag = $sub_lang.opensubtitles + <img src="$sbRoot/images/subtitles/flags/${flag}.png" width="16" height="11" alt="${sub_lang.name}" onError="this.onerror=null;this.src='$sbRoot/images/flags/unknown.png';" /> #end for </td> #set $curStatus, $curQuality = $Quality.splitCompositeStatus(int($epResult["status"])) diff --git a/gui/slick/interfaces/default/home.tmpl b/gui/slick/interfaces/default/home.tmpl index 840b37cb23c48e4a1882d7bd5671df8ba907d32c..1f1262d21a949fbcd40400e0f59a06a32cf4f145 100644 --- a/gui/slick/interfaces/default/home.tmpl +++ b/gui/slick/interfaces/default/home.tmpl @@ -603,7 +603,7 @@ $myShowList.sort(lambda x, y: cmp(x.name, y.name)) <td class="show-table"> #if $layout != 'simple': #if $curShow.network: - <span title="$curShow.network"><img class="show-network-image" src="$sbRoot/images/network/${curShow.network.replace(u"\u00C9",'e').replace(u"\u00E9",'e').lower()}.png" alt="$curShow.network" title="$curShow.network" /></span> + <span title="$curShow.network"><img class="show-network-image" src="$sbRoot/showNetworkLogo/?show=$curShow.indexerid" alt="$curShow.network" title="$curShow.network" /></span> #else: <span title="No Network"><img class="show-network-image" src="$sbRoot/images/network/nonetwork.png" alt="No Network" title="No Network" /></span> #end if @@ -801,7 +801,7 @@ $myShowList.sort(lambda x, y: cmp(x.name, y.name)) #if $layout != 'simple': <td align="center"> #if $curShow.network: - <span title="$curShow.network"><img id="network" width="54" height="27" src="$sbRoot/images/network/${curShow.network.replace(u"\u00C9",'e').replace(u"\u00E9",'e').lower()}.png" alt="$curShow.network" title="$curShow.network" /></span> + <span title="$curShow.network"><img id="network" width="54" height="27" src="$sbRoot/showNetworkLogo/?show=$curShow.indexerid" alt="$curShow.network" title="$curShow.network" /></span> #else: <span title="No Network"><img id="network" width="54" height="27" src="$sbRoot/images/network/nonetwork.png" alt="No Network" title="No Network" /></span> #end if diff --git a/gui/slick/interfaces/default/inc_bottom.tmpl b/gui/slick/interfaces/default/inc_bottom.tmpl index edc6064962f2e94aee098542bcd601086b59037c..e300853518a30c0144fba91d0411aa431dda0fe6 100644 --- a/gui/slick/interfaces/default/inc_bottom.tmpl +++ b/gui/slick/interfaces/default/inc_bottom.tmpl @@ -65,7 +65,7 @@ %> / <span class="footerhighlight">$ep_total</span> Episodes Downloaded $ep_percentage | Daily Search: <span class="footerhighlight"><%=str(sickbeard.dailySearchScheduler.timeLeft()).split('.')[0]%></span> - | Backlog Search: <span class="footerhighlight">$sbdatetime.sbdatetime.sbfdate($sickbeard.backlogSearchScheduler.nextRun())</span> + | Backlog Search: <span class="footerhighlight"><%=str(sickbeard.backlogSearchScheduler.timeLeft()).split('.')[0]%></span> </div> <!-- diff --git a/gui/slick/interfaces/default/inc_top.tmpl b/gui/slick/interfaces/default/inc_top.tmpl index ee77dc1be48e70749561163065728d97d65f69a1..a0b2f31b1bdcd4c669ded35f4e8cdb7c5dd4abaa 100644 --- a/gui/slick/interfaces/default/inc_top.tmpl +++ b/gui/slick/interfaces/default/inc_top.tmpl @@ -6,7 +6,14 @@ <head> <meta charset="utf-8"> <meta http-equiv="X-UA-Compatible" content="IE=edge"> - <meta name="viewport" content="width=device-width, initial-scale=1"> + <meta name="viewport" content="width=device-width"> + + <!-- These values come from css/dark.css and css/light.css --> + #if $sickbeard.THEME_NAME == "dark": + <meta name="theme-color" content="#15528F"> + #elif $sickbeard.THEME_NAME == "light": + <meta name="theme-color" content="#333333"> + #end if <title>SickRage - BRANCH:[$sickbeard.BRANCH] - $title</title> diff --git a/gui/slick/interfaces/default/manage_subtitleMissed.tmpl b/gui/slick/interfaces/default/manage_subtitleMissed.tmpl index 38d061d6096697317125a4e6c39218d86e8d21a4..f5962e1db98d2a576aaa77fa37cc6eba1707eb09 100644 --- a/gui/slick/interfaces/default/manage_subtitleMissed.tmpl +++ b/gui/slick/interfaces/default/manage_subtitleMissed.tmpl @@ -30,7 +30,7 @@ Manage episodes without <select name="whichSubs" class="form-control form-control-inline input-sm"> <option value="all">All</option> #for $sub_lang in [$subtitles.fromietf(x) for x in $subtitles.wantedLanguages]: -<option value="$sub_lang.alpha3">$sub_lang.name</option> +<option value="$sub_lang.opensubtitles">$sub_lang.name</option> #end for </select> subtitles diff --git a/gui/slick/interfaces/default/status.tmpl b/gui/slick/interfaces/default/status.tmpl index 675c5886a47e55957a3d682d0d8678129544c4fe..f36bb178632539f571974021867c85d7cae596c1 100644 --- a/gui/slick/interfaces/default/status.tmpl +++ b/gui/slick/interfaces/default/status.tmpl @@ -62,11 +62,11 @@ #for $schedulerName, $scheduler in $schedulerList.iteritems() #set service = getattr($sickbeard, $scheduler) <tr> - <td>#echo $schedulerName #</td> + <td>$schedulerName</td> #if $service.isAlive() - <td style="background-color:green">#echo $service.isAlive() #</td> + <td style="background-color:green">$service.isAlive()</td> #else - <td style="background-color:red">#echo $service.isAlive() #</td> + <td style="background-color:red">$service.isAlive()</td> #end if #if $scheduler == 'backlogSearchScheduler' #set searchQueue = getattr($sickbeard, 'searchQueueScheduler') @@ -75,10 +75,10 @@ #if $BLSpaused <td>Paused</td> #else - <td>#echo $service.enable #</td> + <td>$service.enable</td> #end if #else - <td>#echo $service.enable #</td> + <td>$service.enable</td> #end if #if $scheduler == 'backlogSearchScheduler' #set searchQueue = getattr($sickbeard, 'searchQueueScheduler') @@ -89,7 +89,7 @@ #else #try #set amActive = $service.action.amActive - <td>#echo $amActive #</td> + <td>$amActive</td> #except Exception <td>N/A</td> #end try @@ -97,22 +97,22 @@ #else #try #set amActive = $service.action.amActive - <td>#echo $amActive #</td> + <td>$amActive</td> #except Exception <td>N/A</td> #end try #end if - <td>#echo $service.start_time #</td> + <td align="right">$service.start_time</td> #set $cycleTime = ($service.cycleTime.microseconds + ($service.cycleTime.seconds + $service.cycleTime.days * 24 * 3600) * 10**6) / 10**6 - <td>#echo $helpers.pretty_time_delta($cycleTime) #</td> + <td align="right">$helpers.pretty_time_delta($cycleTime)</td> #if $service.enable #set $timeLeft = ($service.timeLeft().microseconds + ($service.timeLeft().seconds + $service.timeLeft().days * 24 * 3600) * 10**6) / 10**6 - <td>#echo $helpers.pretty_time_delta($timeLeft) #</td> + <td align="right">$helpers.pretty_time_delta($timeLeft)</td> #else <td></td> #end if - <td>#echo $service.lastRun.strftime("%Y-%m-%d %H:%M:%S") #</td> - <td>#echo $service.silent #</td> + <td>$service.lastRun.strftime("%Y-%m-%d %H:%M:%S")</td> + <td>$service.silent</td> </tr> #del service #end for diff --git a/gui/slick/js/ajaxEpSubtitles.js b/gui/slick/js/ajaxEpSubtitles.js index c9e769b86564e574263aa8e60d139edcc68cd7a4..3514e7af2ca95bca629012dd5166a93be07e2594 100644 --- a/gui/slick/js/ajaxEpSubtitles.js +++ b/gui/slick/js/ajaxEpSubtitles.js @@ -14,9 +14,9 @@ $.each(subtitles,function(index, language){ if (language != "" && language != "und") { if (index != subtitles.length - 1) { - subtitles_td.append($("<img/>").attr({"src": sbRoot+"/images/flags/"+language+".png", "alt": language, "width": 16, "height": 11})); + subtitles_td.append($("<img/>").attr({"src": sbRoot+"/images/subtitles/flags/"+language+".png", "alt": language, "width": 16, "height": 11})); } else { - subtitles_td.append($("<img/>").attr({"src": sbRoot+"/images/flags/"+language+".png", "alt": language, "width": 16, "height": 11})); + subtitles_td.append($("<img/>").attr({"src": sbRoot+"/images/subtitles/flags/"+language+".png", "alt": language, "width": 16, "height": 11})); } } }); diff --git a/gui/slick/js/lib/jquery.tokeninput.js b/gui/slick/js/lib/jquery.tokeninput.js index 2afcc0f2c4ad5ded5747b8e86353e47904b456d0..bf309d79fd17d3638703f596c3cdcceb25189213 100644 --- a/gui/slick/js/lib/jquery.tokeninput.js +++ b/gui/slick/js/lib/jquery.tokeninput.js @@ -43,8 +43,8 @@ var DEFAULT_SETTINGS = { idPrefix: "token-input-", // Formatters - resultsFormatter: function(item){ return "<li><img src='"+sbRoot+"/images/flags/"+item["id"]+".png' /> " + item[this.propertyToSearch]+ "</li>" }, - tokenFormatter: function(item) { return "<li><img src='"+sbRoot+"/images/flags/"+item["id"]+".png' /> <p>" + item[this.propertyToSearch] + "</p></li>" }, + resultsFormatter: function(item){ return "<li><img src='"+sbRoot+"/images/subtitles/flags/"+item["id"]+".png' /> " + item[this.propertyToSearch]+ "</li>" }, + tokenFormatter: function(item) { return "<li><img src='"+sbRoot+"/images/subtitles/flags/"+item["id"]+".png' /> <p>" + item[this.propertyToSearch] + "</p></li>" }, flag: "flag", // Callbacks diff --git a/gui/slick/js/manageSubtitleMissed.js b/gui/slick/js/manageSubtitleMissed.js index 18d0844824c3cc8417d6f12b9f2897ac8e6f6c10..262ecbb282e38fbba962c0813ac97ca310d6ae03 100644 --- a/gui/slick/js/manageSubtitleMissed.js +++ b/gui/slick/js/manageSubtitleMissed.js @@ -15,7 +15,7 @@ $(document).ready(function() { subtitles = subtitles.split(',') for (i in subtitles) { - row += ' <img src="/images/flags/'+subtitles[i]+'.png" width="16" height="11" alt="'+subtitles[i]+'" /> '; + row += ' <img src="/images/subtitles/flags/'+subtitles[i]+'.png" width="16" height="11" alt="'+subtitles[i]+'" /> '; } row += ' </td>'; row += ' </tr>' @@ -69,4 +69,4 @@ $(document).ready(function() { }); }); -}); \ No newline at end of file +}); diff --git a/lib/chardet/__init__.py b/lib/chardet/__init__.py index e4f0799d621d277d529d057ce680abfc61beb123..c943fd876dd39c9b4a7b99c349d3c068c2eef971 100644 --- a/lib/chardet/__init__.py +++ b/lib/chardet/__init__.py @@ -15,18 +15,18 @@ # 02110-1301 USA ######################### END LICENSE BLOCK ######################### -__version__ = "2.2.1" -from sys import version_info +from .compat import PY2, PY3, bin_type as _bin_type +from .universaldetector import UniversalDetector +from .version import __version__, VERSION -def detect(aBuf): - if ((version_info < (3, 0) and isinstance(aBuf, unicode)) or - (version_info >= (3, 0) and not isinstance(aBuf, bytes))): - raise ValueError('Expected a bytes object, not a unicode object') - from . import universaldetector - u = universaldetector.UniversalDetector() - u.reset() - u.feed(aBuf) +def detect(byte_str): + if not isinstance(byte_str, _bin_type): + raise TypeError('Expected object of {0} type, got: {1}' + ''.format(_bin_type, type(byte_str))) + + u = UniversalDetector() + u.feed(byte_str) u.close() return u.result diff --git a/lib/chardet/big5freq.py b/lib/chardet/big5freq.py index 65bffc04b0d948522d4ebdf6be154aca27ccd9b0..38f32517aa8f6cf5970f7ceddd1a415289184c3e 100644 --- a/lib/chardet/big5freq.py +++ b/lib/chardet/big5freq.py @@ -45,7 +45,7 @@ BIG5_TYPICAL_DISTRIBUTION_RATIO = 0.75 #Char to FreqOrder table BIG5_TABLE_SIZE = 5376 -Big5CharToFreqOrder = ( +BIG5_CHAR_TO_FREQ_ORDER = ( 1,1801,1506, 255,1431, 198, 9, 82, 6,5008, 177, 202,3681,1256,2821, 110, # 16 3814, 33,3274, 261, 76, 44,2114, 16,2946,2187,1176, 659,3971, 26,3451,2653, # 32 1198,3972,3350,4202, 410,2215, 302, 590, 361,1964, 8, 204, 58,4510,5009,1932, # 48 @@ -381,545 +381,6 @@ Big5CharToFreqOrder = ( 938,3941, 553,2680, 116,5783,3942,3667,5784,3545,2681,2783,3438,3344,2820,5785, # 5328 3668,2943,4160,1747,2944,2983,5786,5787, 207,5788,4809,5789,4810,2521,5790,3033, # 5344 890,3669,3943,5791,1878,3798,3439,5792,2186,2358,3440,1652,5793,5794,5795, 941, # 5360 -2299, 208,3546,4161,2020, 330,4438,3944,2906,2499,3799,4439,4811,5796,5797,5798, # 5376 #last 512 -#Everything below is of no interest for detection purpose -2522,1613,4812,5799,3345,3945,2523,5800,4162,5801,1637,4163,2471,4813,3946,5802, # 5392 -2500,3034,3800,5803,5804,2195,4814,5805,2163,5806,5807,5808,5809,5810,5811,5812, # 5408 -5813,5814,5815,5816,5817,5818,5819,5820,5821,5822,5823,5824,5825,5826,5827,5828, # 5424 -5829,5830,5831,5832,5833,5834,5835,5836,5837,5838,5839,5840,5841,5842,5843,5844, # 5440 -5845,5846,5847,5848,5849,5850,5851,5852,5853,5854,5855,5856,5857,5858,5859,5860, # 5456 -5861,5862,5863,5864,5865,5866,5867,5868,5869,5870,5871,5872,5873,5874,5875,5876, # 5472 -5877,5878,5879,5880,5881,5882,5883,5884,5885,5886,5887,5888,5889,5890,5891,5892, # 5488 -5893,5894,5895,5896,5897,5898,5899,5900,5901,5902,5903,5904,5905,5906,5907,5908, # 5504 -5909,5910,5911,5912,5913,5914,5915,5916,5917,5918,5919,5920,5921,5922,5923,5924, # 5520 -5925,5926,5927,5928,5929,5930,5931,5932,5933,5934,5935,5936,5937,5938,5939,5940, # 5536 -5941,5942,5943,5944,5945,5946,5947,5948,5949,5950,5951,5952,5953,5954,5955,5956, # 5552 -5957,5958,5959,5960,5961,5962,5963,5964,5965,5966,5967,5968,5969,5970,5971,5972, # 5568 -5973,5974,5975,5976,5977,5978,5979,5980,5981,5982,5983,5984,5985,5986,5987,5988, # 5584 -5989,5990,5991,5992,5993,5994,5995,5996,5997,5998,5999,6000,6001,6002,6003,6004, # 5600 -6005,6006,6007,6008,6009,6010,6011,6012,6013,6014,6015,6016,6017,6018,6019,6020, # 5616 -6021,6022,6023,6024,6025,6026,6027,6028,6029,6030,6031,6032,6033,6034,6035,6036, # 5632 -6037,6038,6039,6040,6041,6042,6043,6044,6045,6046,6047,6048,6049,6050,6051,6052, # 5648 -6053,6054,6055,6056,6057,6058,6059,6060,6061,6062,6063,6064,6065,6066,6067,6068, # 5664 -6069,6070,6071,6072,6073,6074,6075,6076,6077,6078,6079,6080,6081,6082,6083,6084, # 5680 -6085,6086,6087,6088,6089,6090,6091,6092,6093,6094,6095,6096,6097,6098,6099,6100, # 5696 -6101,6102,6103,6104,6105,6106,6107,6108,6109,6110,6111,6112,6113,6114,6115,6116, # 5712 -6117,6118,6119,6120,6121,6122,6123,6124,6125,6126,6127,6128,6129,6130,6131,6132, # 5728 -6133,6134,6135,6136,6137,6138,6139,6140,6141,6142,6143,6144,6145,6146,6147,6148, # 5744 -6149,6150,6151,6152,6153,6154,6155,6156,6157,6158,6159,6160,6161,6162,6163,6164, # 5760 -6165,6166,6167,6168,6169,6170,6171,6172,6173,6174,6175,6176,6177,6178,6179,6180, # 5776 -6181,6182,6183,6184,6185,6186,6187,6188,6189,6190,6191,6192,6193,6194,6195,6196, # 5792 -6197,6198,6199,6200,6201,6202,6203,6204,6205,6206,6207,6208,6209,6210,6211,6212, # 5808 -6213,6214,6215,6216,6217,6218,6219,6220,6221,6222,6223,3670,6224,6225,6226,6227, # 5824 -6228,6229,6230,6231,6232,6233,6234,6235,6236,6237,6238,6239,6240,6241,6242,6243, # 5840 -6244,6245,6246,6247,6248,6249,6250,6251,6252,6253,6254,6255,6256,6257,6258,6259, # 5856 -6260,6261,6262,6263,6264,6265,6266,6267,6268,6269,6270,6271,6272,6273,6274,6275, # 5872 -6276,6277,6278,6279,6280,6281,6282,6283,6284,6285,4815,6286,6287,6288,6289,6290, # 5888 -6291,6292,4816,6293,6294,6295,6296,6297,6298,6299,6300,6301,6302,6303,6304,6305, # 5904 -6306,6307,6308,6309,6310,6311,4817,4818,6312,6313,6314,6315,6316,6317,6318,4819, # 5920 -6319,6320,6321,6322,6323,6324,6325,6326,6327,6328,6329,6330,6331,6332,6333,6334, # 5936 -6335,6336,6337,4820,6338,6339,6340,6341,6342,6343,6344,6345,6346,6347,6348,6349, # 5952 -6350,6351,6352,6353,6354,6355,6356,6357,6358,6359,6360,6361,6362,6363,6364,6365, # 5968 -6366,6367,6368,6369,6370,6371,6372,6373,6374,6375,6376,6377,6378,6379,6380,6381, # 5984 -6382,6383,6384,6385,6386,6387,6388,6389,6390,6391,6392,6393,6394,6395,6396,6397, # 6000 -6398,6399,6400,6401,6402,6403,6404,6405,6406,6407,6408,6409,6410,3441,6411,6412, # 6016 -6413,6414,6415,6416,6417,6418,6419,6420,6421,6422,6423,6424,6425,4440,6426,6427, # 6032 -6428,6429,6430,6431,6432,6433,6434,6435,6436,6437,6438,6439,6440,6441,6442,6443, # 6048 -6444,6445,6446,6447,6448,6449,6450,6451,6452,6453,6454,4821,6455,6456,6457,6458, # 6064 -6459,6460,6461,6462,6463,6464,6465,6466,6467,6468,6469,6470,6471,6472,6473,6474, # 6080 -6475,6476,6477,3947,3948,6478,6479,6480,6481,3272,4441,6482,6483,6484,6485,4442, # 6096 -6486,6487,6488,6489,6490,6491,6492,6493,6494,6495,6496,4822,6497,6498,6499,6500, # 6112 -6501,6502,6503,6504,6505,6506,6507,6508,6509,6510,6511,6512,6513,6514,6515,6516, # 6128 -6517,6518,6519,6520,6521,6522,6523,6524,6525,6526,6527,6528,6529,6530,6531,6532, # 6144 -6533,6534,6535,6536,6537,6538,6539,6540,6541,6542,6543,6544,6545,6546,6547,6548, # 6160 -6549,6550,6551,6552,6553,6554,6555,6556,2784,6557,4823,6558,6559,6560,6561,6562, # 6176 -6563,6564,6565,6566,6567,6568,6569,3949,6570,6571,6572,4824,6573,6574,6575,6576, # 6192 -6577,6578,6579,6580,6581,6582,6583,4825,6584,6585,6586,3950,2785,6587,6588,6589, # 6208 -6590,6591,6592,6593,6594,6595,6596,6597,6598,6599,6600,6601,6602,6603,6604,6605, # 6224 -6606,6607,6608,6609,6610,6611,6612,4826,6613,6614,6615,4827,6616,6617,6618,6619, # 6240 -6620,6621,6622,6623,6624,6625,4164,6626,6627,6628,6629,6630,6631,6632,6633,6634, # 6256 -3547,6635,4828,6636,6637,6638,6639,6640,6641,6642,3951,2984,6643,6644,6645,6646, # 6272 -6647,6648,6649,4165,6650,4829,6651,6652,4830,6653,6654,6655,6656,6657,6658,6659, # 6288 -6660,6661,6662,4831,6663,6664,6665,6666,6667,6668,6669,6670,6671,4166,6672,4832, # 6304 -3952,6673,6674,6675,6676,4833,6677,6678,6679,4167,6680,6681,6682,3198,6683,6684, # 6320 -6685,6686,6687,6688,6689,6690,6691,6692,6693,6694,6695,6696,6697,4834,6698,6699, # 6336 -6700,6701,6702,6703,6704,6705,6706,6707,6708,6709,6710,6711,6712,6713,6714,6715, # 6352 -6716,6717,6718,6719,6720,6721,6722,6723,6724,6725,6726,6727,6728,6729,6730,6731, # 6368 -6732,6733,6734,4443,6735,6736,6737,6738,6739,6740,6741,6742,6743,6744,6745,4444, # 6384 -6746,6747,6748,6749,6750,6751,6752,6753,6754,6755,6756,6757,6758,6759,6760,6761, # 6400 -6762,6763,6764,6765,6766,6767,6768,6769,6770,6771,6772,6773,6774,6775,6776,6777, # 6416 -6778,6779,6780,6781,4168,6782,6783,3442,6784,6785,6786,6787,6788,6789,6790,6791, # 6432 -4169,6792,6793,6794,6795,6796,6797,6798,6799,6800,6801,6802,6803,6804,6805,6806, # 6448 -6807,6808,6809,6810,6811,4835,6812,6813,6814,4445,6815,6816,4446,6817,6818,6819, # 6464 -6820,6821,6822,6823,6824,6825,6826,6827,6828,6829,6830,6831,6832,6833,6834,6835, # 6480 -3548,6836,6837,6838,6839,6840,6841,6842,6843,6844,6845,6846,4836,6847,6848,6849, # 6496 -6850,6851,6852,6853,6854,3953,6855,6856,6857,6858,6859,6860,6861,6862,6863,6864, # 6512 -6865,6866,6867,6868,6869,6870,6871,6872,6873,6874,6875,6876,6877,3199,6878,6879, # 6528 -6880,6881,6882,4447,6883,6884,6885,6886,6887,6888,6889,6890,6891,6892,6893,6894, # 6544 -6895,6896,6897,6898,6899,6900,6901,6902,6903,6904,4170,6905,6906,6907,6908,6909, # 6560 -6910,6911,6912,6913,6914,6915,6916,6917,6918,6919,6920,6921,6922,6923,6924,6925, # 6576 -6926,6927,4837,6928,6929,6930,6931,6932,6933,6934,6935,6936,3346,6937,6938,4838, # 6592 -6939,6940,6941,4448,6942,6943,6944,6945,6946,4449,6947,6948,6949,6950,6951,6952, # 6608 -6953,6954,6955,6956,6957,6958,6959,6960,6961,6962,6963,6964,6965,6966,6967,6968, # 6624 -6969,6970,6971,6972,6973,6974,6975,6976,6977,6978,6979,6980,6981,6982,6983,6984, # 6640 -6985,6986,6987,6988,6989,6990,6991,6992,6993,6994,3671,6995,6996,6997,6998,4839, # 6656 -6999,7000,7001,7002,3549,7003,7004,7005,7006,7007,7008,7009,7010,7011,7012,7013, # 6672 -7014,7015,7016,7017,7018,7019,7020,7021,7022,7023,7024,7025,7026,7027,7028,7029, # 6688 -7030,4840,7031,7032,7033,7034,7035,7036,7037,7038,4841,7039,7040,7041,7042,7043, # 6704 -7044,7045,7046,7047,7048,7049,7050,7051,7052,7053,7054,7055,7056,7057,7058,7059, # 6720 -7060,7061,7062,7063,7064,7065,7066,7067,7068,7069,7070,2985,7071,7072,7073,7074, # 6736 -7075,7076,7077,7078,7079,7080,4842,7081,7082,7083,7084,7085,7086,7087,7088,7089, # 6752 -7090,7091,7092,7093,7094,7095,7096,7097,7098,7099,7100,7101,7102,7103,7104,7105, # 6768 -7106,7107,7108,7109,7110,7111,7112,7113,7114,7115,7116,7117,7118,4450,7119,7120, # 6784 -7121,7122,7123,7124,7125,7126,7127,7128,7129,7130,7131,7132,7133,7134,7135,7136, # 6800 -7137,7138,7139,7140,7141,7142,7143,4843,7144,7145,7146,7147,7148,7149,7150,7151, # 6816 -7152,7153,7154,7155,7156,7157,7158,7159,7160,7161,7162,7163,7164,7165,7166,7167, # 6832 -7168,7169,7170,7171,7172,7173,7174,7175,7176,7177,7178,7179,7180,7181,7182,7183, # 6848 -7184,7185,7186,7187,7188,4171,4172,7189,7190,7191,7192,7193,7194,7195,7196,7197, # 6864 -7198,7199,7200,7201,7202,7203,7204,7205,7206,7207,7208,7209,7210,7211,7212,7213, # 6880 -7214,7215,7216,7217,7218,7219,7220,7221,7222,7223,7224,7225,7226,7227,7228,7229, # 6896 -7230,7231,7232,7233,7234,7235,7236,7237,7238,7239,7240,7241,7242,7243,7244,7245, # 6912 -7246,7247,7248,7249,7250,7251,7252,7253,7254,7255,7256,7257,7258,7259,7260,7261, # 6928 -7262,7263,7264,7265,7266,7267,7268,7269,7270,7271,7272,7273,7274,7275,7276,7277, # 6944 -7278,7279,7280,7281,7282,7283,7284,7285,7286,7287,7288,7289,7290,7291,7292,7293, # 6960 -7294,7295,7296,4844,7297,7298,7299,7300,7301,7302,7303,7304,7305,7306,7307,7308, # 6976 -7309,7310,7311,7312,7313,7314,7315,7316,4451,7317,7318,7319,7320,7321,7322,7323, # 6992 -7324,7325,7326,7327,7328,7329,7330,7331,7332,7333,7334,7335,7336,7337,7338,7339, # 7008 -7340,7341,7342,7343,7344,7345,7346,7347,7348,7349,7350,7351,7352,7353,4173,7354, # 7024 -7355,4845,7356,7357,7358,7359,7360,7361,7362,7363,7364,7365,7366,7367,7368,7369, # 7040 -7370,7371,7372,7373,7374,7375,7376,7377,7378,7379,7380,7381,7382,7383,7384,7385, # 7056 -7386,7387,7388,4846,7389,7390,7391,7392,7393,7394,7395,7396,7397,7398,7399,7400, # 7072 -7401,7402,7403,7404,7405,3672,7406,7407,7408,7409,7410,7411,7412,7413,7414,7415, # 7088 -7416,7417,7418,7419,7420,7421,7422,7423,7424,7425,7426,7427,7428,7429,7430,7431, # 7104 -7432,7433,7434,7435,7436,7437,7438,7439,7440,7441,7442,7443,7444,7445,7446,7447, # 7120 -7448,7449,7450,7451,7452,7453,4452,7454,3200,7455,7456,7457,7458,7459,7460,7461, # 7136 -7462,7463,7464,7465,7466,7467,7468,7469,7470,7471,7472,7473,7474,4847,7475,7476, # 7152 -7477,3133,7478,7479,7480,7481,7482,7483,7484,7485,7486,7487,7488,7489,7490,7491, # 7168 -7492,7493,7494,7495,7496,7497,7498,7499,7500,7501,7502,3347,7503,7504,7505,7506, # 7184 -7507,7508,7509,7510,7511,7512,7513,7514,7515,7516,7517,7518,7519,7520,7521,4848, # 7200 -7522,7523,7524,7525,7526,7527,7528,7529,7530,7531,7532,7533,7534,7535,7536,7537, # 7216 -7538,7539,7540,7541,7542,7543,7544,7545,7546,7547,7548,7549,3801,4849,7550,7551, # 7232 -7552,7553,7554,7555,7556,7557,7558,7559,7560,7561,7562,7563,7564,7565,7566,7567, # 7248 -7568,7569,3035,7570,7571,7572,7573,7574,7575,7576,7577,7578,7579,7580,7581,7582, # 7264 -7583,7584,7585,7586,7587,7588,7589,7590,7591,7592,7593,7594,7595,7596,7597,7598, # 7280 -7599,7600,7601,7602,7603,7604,7605,7606,7607,7608,7609,7610,7611,7612,7613,7614, # 7296 -7615,7616,4850,7617,7618,3802,7619,7620,7621,7622,7623,7624,7625,7626,7627,7628, # 7312 -7629,7630,7631,7632,4851,7633,7634,7635,7636,7637,7638,7639,7640,7641,7642,7643, # 7328 -7644,7645,7646,7647,7648,7649,7650,7651,7652,7653,7654,7655,7656,7657,7658,7659, # 7344 -7660,7661,7662,7663,7664,7665,7666,7667,7668,7669,7670,4453,7671,7672,7673,7674, # 7360 -7675,7676,7677,7678,7679,7680,7681,7682,7683,7684,7685,7686,7687,7688,7689,7690, # 7376 -7691,7692,7693,7694,7695,7696,7697,3443,7698,7699,7700,7701,7702,4454,7703,7704, # 7392 -7705,7706,7707,7708,7709,7710,7711,7712,7713,2472,7714,7715,7716,7717,7718,7719, # 7408 -7720,7721,7722,7723,7724,7725,7726,7727,7728,7729,7730,7731,3954,7732,7733,7734, # 7424 -7735,7736,7737,7738,7739,7740,7741,7742,7743,7744,7745,7746,7747,7748,7749,7750, # 7440 -3134,7751,7752,4852,7753,7754,7755,4853,7756,7757,7758,7759,7760,4174,7761,7762, # 7456 -7763,7764,7765,7766,7767,7768,7769,7770,7771,7772,7773,7774,7775,7776,7777,7778, # 7472 -7779,7780,7781,7782,7783,7784,7785,7786,7787,7788,7789,7790,7791,7792,7793,7794, # 7488 -7795,7796,7797,7798,7799,7800,7801,7802,7803,7804,7805,4854,7806,7807,7808,7809, # 7504 -7810,7811,7812,7813,7814,7815,7816,7817,7818,7819,7820,7821,7822,7823,7824,7825, # 7520 -4855,7826,7827,7828,7829,7830,7831,7832,7833,7834,7835,7836,7837,7838,7839,7840, # 7536 -7841,7842,7843,7844,7845,7846,7847,3955,7848,7849,7850,7851,7852,7853,7854,7855, # 7552 -7856,7857,7858,7859,7860,3444,7861,7862,7863,7864,7865,7866,7867,7868,7869,7870, # 7568 -7871,7872,7873,7874,7875,7876,7877,7878,7879,7880,7881,7882,7883,7884,7885,7886, # 7584 -7887,7888,7889,7890,7891,4175,7892,7893,7894,7895,7896,4856,4857,7897,7898,7899, # 7600 -7900,2598,7901,7902,7903,7904,7905,7906,7907,7908,4455,7909,7910,7911,7912,7913, # 7616 -7914,3201,7915,7916,7917,7918,7919,7920,7921,4858,7922,7923,7924,7925,7926,7927, # 7632 -7928,7929,7930,7931,7932,7933,7934,7935,7936,7937,7938,7939,7940,7941,7942,7943, # 7648 -7944,7945,7946,7947,7948,7949,7950,7951,7952,7953,7954,7955,7956,7957,7958,7959, # 7664 -7960,7961,7962,7963,7964,7965,7966,7967,7968,7969,7970,7971,7972,7973,7974,7975, # 7680 -7976,7977,7978,7979,7980,7981,4859,7982,7983,7984,7985,7986,7987,7988,7989,7990, # 7696 -7991,7992,7993,7994,7995,7996,4860,7997,7998,7999,8000,8001,8002,8003,8004,8005, # 7712 -8006,8007,8008,8009,8010,8011,8012,8013,8014,8015,8016,4176,8017,8018,8019,8020, # 7728 -8021,8022,8023,4861,8024,8025,8026,8027,8028,8029,8030,8031,8032,8033,8034,8035, # 7744 -8036,4862,4456,8037,8038,8039,8040,4863,8041,8042,8043,8044,8045,8046,8047,8048, # 7760 -8049,8050,8051,8052,8053,8054,8055,8056,8057,8058,8059,8060,8061,8062,8063,8064, # 7776 -8065,8066,8067,8068,8069,8070,8071,8072,8073,8074,8075,8076,8077,8078,8079,8080, # 7792 -8081,8082,8083,8084,8085,8086,8087,8088,8089,8090,8091,8092,8093,8094,8095,8096, # 7808 -8097,8098,8099,4864,4177,8100,8101,8102,8103,8104,8105,8106,8107,8108,8109,8110, # 7824 -8111,8112,8113,8114,8115,8116,8117,8118,8119,8120,4178,8121,8122,8123,8124,8125, # 7840 -8126,8127,8128,8129,8130,8131,8132,8133,8134,8135,8136,8137,8138,8139,8140,8141, # 7856 -8142,8143,8144,8145,4865,4866,8146,8147,8148,8149,8150,8151,8152,8153,8154,8155, # 7872 -8156,8157,8158,8159,8160,8161,8162,8163,8164,8165,4179,8166,8167,8168,8169,8170, # 7888 -8171,8172,8173,8174,8175,8176,8177,8178,8179,8180,8181,4457,8182,8183,8184,8185, # 7904 -8186,8187,8188,8189,8190,8191,8192,8193,8194,8195,8196,8197,8198,8199,8200,8201, # 7920 -8202,8203,8204,8205,8206,8207,8208,8209,8210,8211,8212,8213,8214,8215,8216,8217, # 7936 -8218,8219,8220,8221,8222,8223,8224,8225,8226,8227,8228,8229,8230,8231,8232,8233, # 7952 -8234,8235,8236,8237,8238,8239,8240,8241,8242,8243,8244,8245,8246,8247,8248,8249, # 7968 -8250,8251,8252,8253,8254,8255,8256,3445,8257,8258,8259,8260,8261,8262,4458,8263, # 7984 -8264,8265,8266,8267,8268,8269,8270,8271,8272,4459,8273,8274,8275,8276,3550,8277, # 8000 -8278,8279,8280,8281,8282,8283,8284,8285,8286,8287,8288,8289,4460,8290,8291,8292, # 8016 -8293,8294,8295,8296,8297,8298,8299,8300,8301,8302,8303,8304,8305,8306,8307,4867, # 8032 -8308,8309,8310,8311,8312,3551,8313,8314,8315,8316,8317,8318,8319,8320,8321,8322, # 8048 -8323,8324,8325,8326,4868,8327,8328,8329,8330,8331,8332,8333,8334,8335,8336,8337, # 8064 -8338,8339,8340,8341,8342,8343,8344,8345,8346,8347,8348,8349,8350,8351,8352,8353, # 8080 -8354,8355,8356,8357,8358,8359,8360,8361,8362,8363,4869,4461,8364,8365,8366,8367, # 8096 -8368,8369,8370,4870,8371,8372,8373,8374,8375,8376,8377,8378,8379,8380,8381,8382, # 8112 -8383,8384,8385,8386,8387,8388,8389,8390,8391,8392,8393,8394,8395,8396,8397,8398, # 8128 -8399,8400,8401,8402,8403,8404,8405,8406,8407,8408,8409,8410,4871,8411,8412,8413, # 8144 -8414,8415,8416,8417,8418,8419,8420,8421,8422,4462,8423,8424,8425,8426,8427,8428, # 8160 -8429,8430,8431,8432,8433,2986,8434,8435,8436,8437,8438,8439,8440,8441,8442,8443, # 8176 -8444,8445,8446,8447,8448,8449,8450,8451,8452,8453,8454,8455,8456,8457,8458,8459, # 8192 -8460,8461,8462,8463,8464,8465,8466,8467,8468,8469,8470,8471,8472,8473,8474,8475, # 8208 -8476,8477,8478,4180,8479,8480,8481,8482,8483,8484,8485,8486,8487,8488,8489,8490, # 8224 -8491,8492,8493,8494,8495,8496,8497,8498,8499,8500,8501,8502,8503,8504,8505,8506, # 8240 -8507,8508,8509,8510,8511,8512,8513,8514,8515,8516,8517,8518,8519,8520,8521,8522, # 8256 -8523,8524,8525,8526,8527,8528,8529,8530,8531,8532,8533,8534,8535,8536,8537,8538, # 8272 -8539,8540,8541,8542,8543,8544,8545,8546,8547,8548,8549,8550,8551,8552,8553,8554, # 8288 -8555,8556,8557,8558,8559,8560,8561,8562,8563,8564,4872,8565,8566,8567,8568,8569, # 8304 -8570,8571,8572,8573,4873,8574,8575,8576,8577,8578,8579,8580,8581,8582,8583,8584, # 8320 -8585,8586,8587,8588,8589,8590,8591,8592,8593,8594,8595,8596,8597,8598,8599,8600, # 8336 -8601,8602,8603,8604,8605,3803,8606,8607,8608,8609,8610,8611,8612,8613,4874,3804, # 8352 -8614,8615,8616,8617,8618,8619,8620,8621,3956,8622,8623,8624,8625,8626,8627,8628, # 8368 -8629,8630,8631,8632,8633,8634,8635,8636,8637,8638,2865,8639,8640,8641,8642,8643, # 8384 -8644,8645,8646,8647,8648,8649,8650,8651,8652,8653,8654,8655,8656,4463,8657,8658, # 8400 -8659,4875,4876,8660,8661,8662,8663,8664,8665,8666,8667,8668,8669,8670,8671,8672, # 8416 -8673,8674,8675,8676,8677,8678,8679,8680,8681,4464,8682,8683,8684,8685,8686,8687, # 8432 -8688,8689,8690,8691,8692,8693,8694,8695,8696,8697,8698,8699,8700,8701,8702,8703, # 8448 -8704,8705,8706,8707,8708,8709,2261,8710,8711,8712,8713,8714,8715,8716,8717,8718, # 8464 -8719,8720,8721,8722,8723,8724,8725,8726,8727,8728,8729,8730,8731,8732,8733,4181, # 8480 -8734,8735,8736,8737,8738,8739,8740,8741,8742,8743,8744,8745,8746,8747,8748,8749, # 8496 -8750,8751,8752,8753,8754,8755,8756,8757,8758,8759,8760,8761,8762,8763,4877,8764, # 8512 -8765,8766,8767,8768,8769,8770,8771,8772,8773,8774,8775,8776,8777,8778,8779,8780, # 8528 -8781,8782,8783,8784,8785,8786,8787,8788,4878,8789,4879,8790,8791,8792,4880,8793, # 8544 -8794,8795,8796,8797,8798,8799,8800,8801,4881,8802,8803,8804,8805,8806,8807,8808, # 8560 -8809,8810,8811,8812,8813,8814,8815,3957,8816,8817,8818,8819,8820,8821,8822,8823, # 8576 -8824,8825,8826,8827,8828,8829,8830,8831,8832,8833,8834,8835,8836,8837,8838,8839, # 8592 -8840,8841,8842,8843,8844,8845,8846,8847,4882,8848,8849,8850,8851,8852,8853,8854, # 8608 -8855,8856,8857,8858,8859,8860,8861,8862,8863,8864,8865,8866,8867,8868,8869,8870, # 8624 -8871,8872,8873,8874,8875,8876,8877,8878,8879,8880,8881,8882,8883,8884,3202,8885, # 8640 -8886,8887,8888,8889,8890,8891,8892,8893,8894,8895,8896,8897,8898,8899,8900,8901, # 8656 -8902,8903,8904,8905,8906,8907,8908,8909,8910,8911,8912,8913,8914,8915,8916,8917, # 8672 -8918,8919,8920,8921,8922,8923,8924,4465,8925,8926,8927,8928,8929,8930,8931,8932, # 8688 -4883,8933,8934,8935,8936,8937,8938,8939,8940,8941,8942,8943,2214,8944,8945,8946, # 8704 -8947,8948,8949,8950,8951,8952,8953,8954,8955,8956,8957,8958,8959,8960,8961,8962, # 8720 -8963,8964,8965,4884,8966,8967,8968,8969,8970,8971,8972,8973,8974,8975,8976,8977, # 8736 -8978,8979,8980,8981,8982,8983,8984,8985,8986,8987,8988,8989,8990,8991,8992,4885, # 8752 -8993,8994,8995,8996,8997,8998,8999,9000,9001,9002,9003,9004,9005,9006,9007,9008, # 8768 -9009,9010,9011,9012,9013,9014,9015,9016,9017,9018,9019,9020,9021,4182,9022,9023, # 8784 -9024,9025,9026,9027,9028,9029,9030,9031,9032,9033,9034,9035,9036,9037,9038,9039, # 8800 -9040,9041,9042,9043,9044,9045,9046,9047,9048,9049,9050,9051,9052,9053,9054,9055, # 8816 -9056,9057,9058,9059,9060,9061,9062,9063,4886,9064,9065,9066,9067,9068,9069,4887, # 8832 -9070,9071,9072,9073,9074,9075,9076,9077,9078,9079,9080,9081,9082,9083,9084,9085, # 8848 -9086,9087,9088,9089,9090,9091,9092,9093,9094,9095,9096,9097,9098,9099,9100,9101, # 8864 -9102,9103,9104,9105,9106,9107,9108,9109,9110,9111,9112,9113,9114,9115,9116,9117, # 8880 -9118,9119,9120,9121,9122,9123,9124,9125,9126,9127,9128,9129,9130,9131,9132,9133, # 8896 -9134,9135,9136,9137,9138,9139,9140,9141,3958,9142,9143,9144,9145,9146,9147,9148, # 8912 -9149,9150,9151,4888,9152,9153,9154,9155,9156,9157,9158,9159,9160,9161,9162,9163, # 8928 -9164,9165,9166,9167,9168,9169,9170,9171,9172,9173,9174,9175,4889,9176,9177,9178, # 8944 -9179,9180,9181,9182,9183,9184,9185,9186,9187,9188,9189,9190,9191,9192,9193,9194, # 8960 -9195,9196,9197,9198,9199,9200,9201,9202,9203,4890,9204,9205,9206,9207,9208,9209, # 8976 -9210,9211,9212,9213,9214,9215,9216,9217,9218,9219,9220,9221,9222,4466,9223,9224, # 8992 -9225,9226,9227,9228,9229,9230,9231,9232,9233,9234,9235,9236,9237,9238,9239,9240, # 9008 -9241,9242,9243,9244,9245,4891,9246,9247,9248,9249,9250,9251,9252,9253,9254,9255, # 9024 -9256,9257,4892,9258,9259,9260,9261,4893,4894,9262,9263,9264,9265,9266,9267,9268, # 9040 -9269,9270,9271,9272,9273,4467,9274,9275,9276,9277,9278,9279,9280,9281,9282,9283, # 9056 -9284,9285,3673,9286,9287,9288,9289,9290,9291,9292,9293,9294,9295,9296,9297,9298, # 9072 -9299,9300,9301,9302,9303,9304,9305,9306,9307,9308,9309,9310,9311,9312,9313,9314, # 9088 -9315,9316,9317,9318,9319,9320,9321,9322,4895,9323,9324,9325,9326,9327,9328,9329, # 9104 -9330,9331,9332,9333,9334,9335,9336,9337,9338,9339,9340,9341,9342,9343,9344,9345, # 9120 -9346,9347,4468,9348,9349,9350,9351,9352,9353,9354,9355,9356,9357,9358,9359,9360, # 9136 -9361,9362,9363,9364,9365,9366,9367,9368,9369,9370,9371,9372,9373,4896,9374,4469, # 9152 -9375,9376,9377,9378,9379,4897,9380,9381,9382,9383,9384,9385,9386,9387,9388,9389, # 9168 -9390,9391,9392,9393,9394,9395,9396,9397,9398,9399,9400,9401,9402,9403,9404,9405, # 9184 -9406,4470,9407,2751,9408,9409,3674,3552,9410,9411,9412,9413,9414,9415,9416,9417, # 9200 -9418,9419,9420,9421,4898,9422,9423,9424,9425,9426,9427,9428,9429,3959,9430,9431, # 9216 -9432,9433,9434,9435,9436,4471,9437,9438,9439,9440,9441,9442,9443,9444,9445,9446, # 9232 -9447,9448,9449,9450,3348,9451,9452,9453,9454,9455,9456,9457,9458,9459,9460,9461, # 9248 -9462,9463,9464,9465,9466,9467,9468,9469,9470,9471,9472,4899,9473,9474,9475,9476, # 9264 -9477,4900,9478,9479,9480,9481,9482,9483,9484,9485,9486,9487,9488,3349,9489,9490, # 9280 -9491,9492,9493,9494,9495,9496,9497,9498,9499,9500,9501,9502,9503,9504,9505,9506, # 9296 -9507,9508,9509,9510,9511,9512,9513,9514,9515,9516,9517,9518,9519,9520,4901,9521, # 9312 -9522,9523,9524,9525,9526,4902,9527,9528,9529,9530,9531,9532,9533,9534,9535,9536, # 9328 -9537,9538,9539,9540,9541,9542,9543,9544,9545,9546,9547,9548,9549,9550,9551,9552, # 9344 -9553,9554,9555,9556,9557,9558,9559,9560,9561,9562,9563,9564,9565,9566,9567,9568, # 9360 -9569,9570,9571,9572,9573,9574,9575,9576,9577,9578,9579,9580,9581,9582,9583,9584, # 9376 -3805,9585,9586,9587,9588,9589,9590,9591,9592,9593,9594,9595,9596,9597,9598,9599, # 9392 -9600,9601,9602,4903,9603,9604,9605,9606,9607,4904,9608,9609,9610,9611,9612,9613, # 9408 -9614,4905,9615,9616,9617,9618,9619,9620,9621,9622,9623,9624,9625,9626,9627,9628, # 9424 -9629,9630,9631,9632,4906,9633,9634,9635,9636,9637,9638,9639,9640,9641,9642,9643, # 9440 -4907,9644,9645,9646,9647,9648,9649,9650,9651,9652,9653,9654,9655,9656,9657,9658, # 9456 -9659,9660,9661,9662,9663,9664,9665,9666,9667,9668,9669,9670,9671,9672,4183,9673, # 9472 -9674,9675,9676,9677,4908,9678,9679,9680,9681,4909,9682,9683,9684,9685,9686,9687, # 9488 -9688,9689,9690,4910,9691,9692,9693,3675,9694,9695,9696,2945,9697,9698,9699,9700, # 9504 -9701,9702,9703,9704,9705,4911,9706,9707,9708,9709,9710,9711,9712,9713,9714,9715, # 9520 -9716,9717,9718,9719,9720,9721,9722,9723,9724,9725,9726,9727,9728,9729,9730,9731, # 9536 -9732,9733,9734,9735,4912,9736,9737,9738,9739,9740,4913,9741,9742,9743,9744,9745, # 9552 -9746,9747,9748,9749,9750,9751,9752,9753,9754,9755,9756,9757,9758,4914,9759,9760, # 9568 -9761,9762,9763,9764,9765,9766,9767,9768,9769,9770,9771,9772,9773,9774,9775,9776, # 9584 -9777,9778,9779,9780,9781,9782,4915,9783,9784,9785,9786,9787,9788,9789,9790,9791, # 9600 -9792,9793,4916,9794,9795,9796,9797,9798,9799,9800,9801,9802,9803,9804,9805,9806, # 9616 -9807,9808,9809,9810,9811,9812,9813,9814,9815,9816,9817,9818,9819,9820,9821,9822, # 9632 -9823,9824,9825,9826,9827,9828,9829,9830,9831,9832,9833,9834,9835,9836,9837,9838, # 9648 -9839,9840,9841,9842,9843,9844,9845,9846,9847,9848,9849,9850,9851,9852,9853,9854, # 9664 -9855,9856,9857,9858,9859,9860,9861,9862,9863,9864,9865,9866,9867,9868,4917,9869, # 9680 -9870,9871,9872,9873,9874,9875,9876,9877,9878,9879,9880,9881,9882,9883,9884,9885, # 9696 -9886,9887,9888,9889,9890,9891,9892,4472,9893,9894,9895,9896,9897,3806,9898,9899, # 9712 -9900,9901,9902,9903,9904,9905,9906,9907,9908,9909,9910,9911,9912,9913,9914,4918, # 9728 -9915,9916,9917,4919,9918,9919,9920,9921,4184,9922,9923,9924,9925,9926,9927,9928, # 9744 -9929,9930,9931,9932,9933,9934,9935,9936,9937,9938,9939,9940,9941,9942,9943,9944, # 9760 -9945,9946,4920,9947,9948,9949,9950,9951,9952,9953,9954,9955,4185,9956,9957,9958, # 9776 -9959,9960,9961,9962,9963,9964,9965,4921,9966,9967,9968,4473,9969,9970,9971,9972, # 9792 -9973,9974,9975,9976,9977,4474,9978,9979,9980,9981,9982,9983,9984,9985,9986,9987, # 9808 -9988,9989,9990,9991,9992,9993,9994,9995,9996,9997,9998,9999,10000,10001,10002,10003, # 9824 -10004,10005,10006,10007,10008,10009,10010,10011,10012,10013,10014,10015,10016,10017,10018,10019, # 9840 -10020,10021,4922,10022,4923,10023,10024,10025,10026,10027,10028,10029,10030,10031,10032,10033, # 9856 -10034,10035,10036,10037,10038,10039,10040,10041,10042,10043,10044,10045,10046,10047,10048,4924, # 9872 -10049,10050,10051,10052,10053,10054,10055,10056,10057,10058,10059,10060,10061,10062,10063,10064, # 9888 -10065,10066,10067,10068,10069,10070,10071,10072,10073,10074,10075,10076,10077,10078,10079,10080, # 9904 -10081,10082,10083,10084,10085,10086,10087,4475,10088,10089,10090,10091,10092,10093,10094,10095, # 9920 -10096,10097,4476,10098,10099,10100,10101,10102,10103,10104,10105,10106,10107,10108,10109,10110, # 9936 -10111,2174,10112,10113,10114,10115,10116,10117,10118,10119,10120,10121,10122,10123,10124,10125, # 9952 -10126,10127,10128,10129,10130,10131,10132,10133,10134,10135,10136,10137,10138,10139,10140,3807, # 9968 -4186,4925,10141,10142,10143,10144,10145,10146,10147,4477,4187,10148,10149,10150,10151,10152, # 9984 -10153,4188,10154,10155,10156,10157,10158,10159,10160,10161,4926,10162,10163,10164,10165,10166, #10000 -10167,10168,10169,10170,10171,10172,10173,10174,10175,10176,10177,10178,10179,10180,10181,10182, #10016 -10183,10184,10185,10186,10187,10188,10189,10190,10191,10192,3203,10193,10194,10195,10196,10197, #10032 -10198,10199,10200,4478,10201,10202,10203,10204,4479,10205,10206,10207,10208,10209,10210,10211, #10048 -10212,10213,10214,10215,10216,10217,10218,10219,10220,10221,10222,10223,10224,10225,10226,10227, #10064 -10228,10229,10230,10231,10232,10233,10234,4927,10235,10236,10237,10238,10239,10240,10241,10242, #10080 -10243,10244,10245,10246,10247,10248,10249,10250,10251,10252,10253,10254,10255,10256,10257,10258, #10096 -10259,10260,10261,10262,10263,10264,10265,10266,10267,10268,10269,10270,10271,10272,10273,4480, #10112 -4928,4929,10274,10275,10276,10277,10278,10279,10280,10281,10282,10283,10284,10285,10286,10287, #10128 -10288,10289,10290,10291,10292,10293,10294,10295,10296,10297,10298,10299,10300,10301,10302,10303, #10144 -10304,10305,10306,10307,10308,10309,10310,10311,10312,10313,10314,10315,10316,10317,10318,10319, #10160 -10320,10321,10322,10323,10324,10325,10326,10327,10328,10329,10330,10331,10332,10333,10334,4930, #10176 -10335,10336,10337,10338,10339,10340,10341,10342,4931,10343,10344,10345,10346,10347,10348,10349, #10192 -10350,10351,10352,10353,10354,10355,3088,10356,2786,10357,10358,10359,10360,4189,10361,10362, #10208 -10363,10364,10365,10366,10367,10368,10369,10370,10371,10372,10373,10374,10375,4932,10376,10377, #10224 -10378,10379,10380,10381,10382,10383,10384,10385,10386,10387,10388,10389,10390,10391,10392,4933, #10240 -10393,10394,10395,4934,10396,10397,10398,10399,10400,10401,10402,10403,10404,10405,10406,10407, #10256 -10408,10409,10410,10411,10412,3446,10413,10414,10415,10416,10417,10418,10419,10420,10421,10422, #10272 -10423,4935,10424,10425,10426,10427,10428,10429,10430,4936,10431,10432,10433,10434,10435,10436, #10288 -10437,10438,10439,10440,10441,10442,10443,4937,10444,10445,10446,10447,4481,10448,10449,10450, #10304 -10451,10452,10453,10454,10455,10456,10457,10458,10459,10460,10461,10462,10463,10464,10465,10466, #10320 -10467,10468,10469,10470,10471,10472,10473,10474,10475,10476,10477,10478,10479,10480,10481,10482, #10336 -10483,10484,10485,10486,10487,10488,10489,10490,10491,10492,10493,10494,10495,10496,10497,10498, #10352 -10499,10500,10501,10502,10503,10504,10505,4938,10506,10507,10508,10509,10510,2552,10511,10512, #10368 -10513,10514,10515,10516,3447,10517,10518,10519,10520,10521,10522,10523,10524,10525,10526,10527, #10384 -10528,10529,10530,10531,10532,10533,10534,10535,10536,10537,10538,10539,10540,10541,10542,10543, #10400 -4482,10544,4939,10545,10546,10547,10548,10549,10550,10551,10552,10553,10554,10555,10556,10557, #10416 -10558,10559,10560,10561,10562,10563,10564,10565,10566,10567,3676,4483,10568,10569,10570,10571, #10432 -10572,3448,10573,10574,10575,10576,10577,10578,10579,10580,10581,10582,10583,10584,10585,10586, #10448 -10587,10588,10589,10590,10591,10592,10593,10594,10595,10596,10597,10598,10599,10600,10601,10602, #10464 -10603,10604,10605,10606,10607,10608,10609,10610,10611,10612,10613,10614,10615,10616,10617,10618, #10480 -10619,10620,10621,10622,10623,10624,10625,10626,10627,4484,10628,10629,10630,10631,10632,4940, #10496 -10633,10634,10635,10636,10637,10638,10639,10640,10641,10642,10643,10644,10645,10646,10647,10648, #10512 -10649,10650,10651,10652,10653,10654,10655,10656,4941,10657,10658,10659,2599,10660,10661,10662, #10528 -10663,10664,10665,10666,3089,10667,10668,10669,10670,10671,10672,10673,10674,10675,10676,10677, #10544 -10678,10679,10680,4942,10681,10682,10683,10684,10685,10686,10687,10688,10689,10690,10691,10692, #10560 -10693,10694,10695,10696,10697,4485,10698,10699,10700,10701,10702,10703,10704,4943,10705,3677, #10576 -10706,10707,10708,10709,10710,10711,10712,4944,10713,10714,10715,10716,10717,10718,10719,10720, #10592 -10721,10722,10723,10724,10725,10726,10727,10728,4945,10729,10730,10731,10732,10733,10734,10735, #10608 -10736,10737,10738,10739,10740,10741,10742,10743,10744,10745,10746,10747,10748,10749,10750,10751, #10624 -10752,10753,10754,10755,10756,10757,10758,10759,10760,10761,4946,10762,10763,10764,10765,10766, #10640 -10767,4947,4948,10768,10769,10770,10771,10772,10773,10774,10775,10776,10777,10778,10779,10780, #10656 -10781,10782,10783,10784,10785,10786,10787,10788,10789,10790,10791,10792,10793,10794,10795,10796, #10672 -10797,10798,10799,10800,10801,10802,10803,10804,10805,10806,10807,10808,10809,10810,10811,10812, #10688 -10813,10814,10815,10816,10817,10818,10819,10820,10821,10822,10823,10824,10825,10826,10827,10828, #10704 -10829,10830,10831,10832,10833,10834,10835,10836,10837,10838,10839,10840,10841,10842,10843,10844, #10720 -10845,10846,10847,10848,10849,10850,10851,10852,10853,10854,10855,10856,10857,10858,10859,10860, #10736 -10861,10862,10863,10864,10865,10866,10867,10868,10869,10870,10871,10872,10873,10874,10875,10876, #10752 -10877,10878,4486,10879,10880,10881,10882,10883,10884,10885,4949,10886,10887,10888,10889,10890, #10768 -10891,10892,10893,10894,10895,10896,10897,10898,10899,10900,10901,10902,10903,10904,10905,10906, #10784 -10907,10908,10909,10910,10911,10912,10913,10914,10915,10916,10917,10918,10919,4487,10920,10921, #10800 -10922,10923,10924,10925,10926,10927,10928,10929,10930,10931,10932,4950,10933,10934,10935,10936, #10816 -10937,10938,10939,10940,10941,10942,10943,10944,10945,10946,10947,10948,10949,4488,10950,10951, #10832 -10952,10953,10954,10955,10956,10957,10958,10959,4190,10960,10961,10962,10963,10964,10965,10966, #10848 -10967,10968,10969,10970,10971,10972,10973,10974,10975,10976,10977,10978,10979,10980,10981,10982, #10864 -10983,10984,10985,10986,10987,10988,10989,10990,10991,10992,10993,10994,10995,10996,10997,10998, #10880 -10999,11000,11001,11002,11003,11004,11005,11006,3960,11007,11008,11009,11010,11011,11012,11013, #10896 -11014,11015,11016,11017,11018,11019,11020,11021,11022,11023,11024,11025,11026,11027,11028,11029, #10912 -11030,11031,11032,4951,11033,11034,11035,11036,11037,11038,11039,11040,11041,11042,11043,11044, #10928 -11045,11046,11047,4489,11048,11049,11050,11051,4952,11052,11053,11054,11055,11056,11057,11058, #10944 -4953,11059,11060,11061,11062,11063,11064,11065,11066,11067,11068,11069,11070,11071,4954,11072, #10960 -11073,11074,11075,11076,11077,11078,11079,11080,11081,11082,11083,11084,11085,11086,11087,11088, #10976 -11089,11090,11091,11092,11093,11094,11095,11096,11097,11098,11099,11100,11101,11102,11103,11104, #10992 -11105,11106,11107,11108,11109,11110,11111,11112,11113,11114,11115,3808,11116,11117,11118,11119, #11008 -11120,11121,11122,11123,11124,11125,11126,11127,11128,11129,11130,11131,11132,11133,11134,4955, #11024 -11135,11136,11137,11138,11139,11140,11141,11142,11143,11144,11145,11146,11147,11148,11149,11150, #11040 -11151,11152,11153,11154,11155,11156,11157,11158,11159,11160,11161,4956,11162,11163,11164,11165, #11056 -11166,11167,11168,11169,11170,11171,11172,11173,11174,11175,11176,11177,11178,11179,11180,4957, #11072 -11181,11182,11183,11184,11185,11186,4958,11187,11188,11189,11190,11191,11192,11193,11194,11195, #11088 -11196,11197,11198,11199,11200,3678,11201,11202,11203,11204,11205,11206,4191,11207,11208,11209, #11104 -11210,11211,11212,11213,11214,11215,11216,11217,11218,11219,11220,11221,11222,11223,11224,11225, #11120 -11226,11227,11228,11229,11230,11231,11232,11233,11234,11235,11236,11237,11238,11239,11240,11241, #11136 -11242,11243,11244,11245,11246,11247,11248,11249,11250,11251,4959,11252,11253,11254,11255,11256, #11152 -11257,11258,11259,11260,11261,11262,11263,11264,11265,11266,11267,11268,11269,11270,11271,11272, #11168 -11273,11274,11275,11276,11277,11278,11279,11280,11281,11282,11283,11284,11285,11286,11287,11288, #11184 -11289,11290,11291,11292,11293,11294,11295,11296,11297,11298,11299,11300,11301,11302,11303,11304, #11200 -11305,11306,11307,11308,11309,11310,11311,11312,11313,11314,3679,11315,11316,11317,11318,4490, #11216 -11319,11320,11321,11322,11323,11324,11325,11326,11327,11328,11329,11330,11331,11332,11333,11334, #11232 -11335,11336,11337,11338,11339,11340,11341,11342,11343,11344,11345,11346,11347,4960,11348,11349, #11248 -11350,11351,11352,11353,11354,11355,11356,11357,11358,11359,11360,11361,11362,11363,11364,11365, #11264 -11366,11367,11368,11369,11370,11371,11372,11373,11374,11375,11376,11377,3961,4961,11378,11379, #11280 -11380,11381,11382,11383,11384,11385,11386,11387,11388,11389,11390,11391,11392,11393,11394,11395, #11296 -11396,11397,4192,11398,11399,11400,11401,11402,11403,11404,11405,11406,11407,11408,11409,11410, #11312 -11411,4962,11412,11413,11414,11415,11416,11417,11418,11419,11420,11421,11422,11423,11424,11425, #11328 -11426,11427,11428,11429,11430,11431,11432,11433,11434,11435,11436,11437,11438,11439,11440,11441, #11344 -11442,11443,11444,11445,11446,11447,11448,11449,11450,11451,11452,11453,11454,11455,11456,11457, #11360 -11458,11459,11460,11461,11462,11463,11464,11465,11466,11467,11468,11469,4963,11470,11471,4491, #11376 -11472,11473,11474,11475,4964,11476,11477,11478,11479,11480,11481,11482,11483,11484,11485,11486, #11392 -11487,11488,11489,11490,11491,11492,4965,11493,11494,11495,11496,11497,11498,11499,11500,11501, #11408 -11502,11503,11504,11505,11506,11507,11508,11509,11510,11511,11512,11513,11514,11515,11516,11517, #11424 -11518,11519,11520,11521,11522,11523,11524,11525,11526,11527,11528,11529,3962,11530,11531,11532, #11440 -11533,11534,11535,11536,11537,11538,11539,11540,11541,11542,11543,11544,11545,11546,11547,11548, #11456 -11549,11550,11551,11552,11553,11554,11555,11556,11557,11558,11559,11560,11561,11562,11563,11564, #11472 -4193,4194,11565,11566,11567,11568,11569,11570,11571,11572,11573,11574,11575,11576,11577,11578, #11488 -11579,11580,11581,11582,11583,11584,11585,11586,11587,11588,11589,11590,11591,4966,4195,11592, #11504 -11593,11594,11595,11596,11597,11598,11599,11600,11601,11602,11603,11604,3090,11605,11606,11607, #11520 -11608,11609,11610,4967,11611,11612,11613,11614,11615,11616,11617,11618,11619,11620,11621,11622, #11536 -11623,11624,11625,11626,11627,11628,11629,11630,11631,11632,11633,11634,11635,11636,11637,11638, #11552 -11639,11640,11641,11642,11643,11644,11645,11646,11647,11648,11649,11650,11651,11652,11653,11654, #11568 -11655,11656,11657,11658,11659,11660,11661,11662,11663,11664,11665,11666,11667,11668,11669,11670, #11584 -11671,11672,11673,11674,4968,11675,11676,11677,11678,11679,11680,11681,11682,11683,11684,11685, #11600 -11686,11687,11688,11689,11690,11691,11692,11693,3809,11694,11695,11696,11697,11698,11699,11700, #11616 -11701,11702,11703,11704,11705,11706,11707,11708,11709,11710,11711,11712,11713,11714,11715,11716, #11632 -11717,11718,3553,11719,11720,11721,11722,11723,11724,11725,11726,11727,11728,11729,11730,4969, #11648 -11731,11732,11733,11734,11735,11736,11737,11738,11739,11740,4492,11741,11742,11743,11744,11745, #11664 -11746,11747,11748,11749,11750,11751,11752,4970,11753,11754,11755,11756,11757,11758,11759,11760, #11680 -11761,11762,11763,11764,11765,11766,11767,11768,11769,11770,11771,11772,11773,11774,11775,11776, #11696 -11777,11778,11779,11780,11781,11782,11783,11784,11785,11786,11787,11788,11789,11790,4971,11791, #11712 -11792,11793,11794,11795,11796,11797,4972,11798,11799,11800,11801,11802,11803,11804,11805,11806, #11728 -11807,11808,11809,11810,4973,11811,11812,11813,11814,11815,11816,11817,11818,11819,11820,11821, #11744 -11822,11823,11824,11825,11826,11827,11828,11829,11830,11831,11832,11833,11834,3680,3810,11835, #11760 -11836,4974,11837,11838,11839,11840,11841,11842,11843,11844,11845,11846,11847,11848,11849,11850, #11776 -11851,11852,11853,11854,11855,11856,11857,11858,11859,11860,11861,11862,11863,11864,11865,11866, #11792 -11867,11868,11869,11870,11871,11872,11873,11874,11875,11876,11877,11878,11879,11880,11881,11882, #11808 -11883,11884,4493,11885,11886,11887,11888,11889,11890,11891,11892,11893,11894,11895,11896,11897, #11824 -11898,11899,11900,11901,11902,11903,11904,11905,11906,11907,11908,11909,11910,11911,11912,11913, #11840 -11914,11915,4975,11916,11917,11918,11919,11920,11921,11922,11923,11924,11925,11926,11927,11928, #11856 -11929,11930,11931,11932,11933,11934,11935,11936,11937,11938,11939,11940,11941,11942,11943,11944, #11872 -11945,11946,11947,11948,11949,4976,11950,11951,11952,11953,11954,11955,11956,11957,11958,11959, #11888 -11960,11961,11962,11963,11964,11965,11966,11967,11968,11969,11970,11971,11972,11973,11974,11975, #11904 -11976,11977,11978,11979,11980,11981,11982,11983,11984,11985,11986,11987,4196,11988,11989,11990, #11920 -11991,11992,4977,11993,11994,11995,11996,11997,11998,11999,12000,12001,12002,12003,12004,12005, #11936 -12006,12007,12008,12009,12010,12011,12012,12013,12014,12015,12016,12017,12018,12019,12020,12021, #11952 -12022,12023,12024,12025,12026,12027,12028,12029,12030,12031,12032,12033,12034,12035,12036,12037, #11968 -12038,12039,12040,12041,12042,12043,12044,12045,12046,12047,12048,12049,12050,12051,12052,12053, #11984 -12054,12055,12056,12057,12058,12059,12060,12061,4978,12062,12063,12064,12065,12066,12067,12068, #12000 -12069,12070,12071,12072,12073,12074,12075,12076,12077,12078,12079,12080,12081,12082,12083,12084, #12016 -12085,12086,12087,12088,12089,12090,12091,12092,12093,12094,12095,12096,12097,12098,12099,12100, #12032 -12101,12102,12103,12104,12105,12106,12107,12108,12109,12110,12111,12112,12113,12114,12115,12116, #12048 -12117,12118,12119,12120,12121,12122,12123,4979,12124,12125,12126,12127,12128,4197,12129,12130, #12064 -12131,12132,12133,12134,12135,12136,12137,12138,12139,12140,12141,12142,12143,12144,12145,12146, #12080 -12147,12148,12149,12150,12151,12152,12153,12154,4980,12155,12156,12157,12158,12159,12160,4494, #12096 -12161,12162,12163,12164,3811,12165,12166,12167,12168,12169,4495,12170,12171,4496,12172,12173, #12112 -12174,12175,12176,3812,12177,12178,12179,12180,12181,12182,12183,12184,12185,12186,12187,12188, #12128 -12189,12190,12191,12192,12193,12194,12195,12196,12197,12198,12199,12200,12201,12202,12203,12204, #12144 -12205,12206,12207,12208,12209,12210,12211,12212,12213,12214,12215,12216,12217,12218,12219,12220, #12160 -12221,4981,12222,12223,12224,12225,12226,12227,12228,12229,12230,12231,12232,12233,12234,12235, #12176 -4982,12236,12237,12238,12239,12240,12241,12242,12243,12244,12245,4983,12246,12247,12248,12249, #12192 -4984,12250,12251,12252,12253,12254,12255,12256,12257,12258,12259,12260,12261,12262,12263,12264, #12208 -4985,12265,4497,12266,12267,12268,12269,12270,12271,12272,12273,12274,12275,12276,12277,12278, #12224 -12279,12280,12281,12282,12283,12284,12285,12286,12287,4986,12288,12289,12290,12291,12292,12293, #12240 -12294,12295,12296,2473,12297,12298,12299,12300,12301,12302,12303,12304,12305,12306,12307,12308, #12256 -12309,12310,12311,12312,12313,12314,12315,12316,12317,12318,12319,3963,12320,12321,12322,12323, #12272 -12324,12325,12326,12327,12328,12329,12330,12331,12332,4987,12333,12334,12335,12336,12337,12338, #12288 -12339,12340,12341,12342,12343,12344,12345,12346,12347,12348,12349,12350,12351,12352,12353,12354, #12304 -12355,12356,12357,12358,12359,3964,12360,12361,12362,12363,12364,12365,12366,12367,12368,12369, #12320 -12370,3965,12371,12372,12373,12374,12375,12376,12377,12378,12379,12380,12381,12382,12383,12384, #12336 -12385,12386,12387,12388,12389,12390,12391,12392,12393,12394,12395,12396,12397,12398,12399,12400, #12352 -12401,12402,12403,12404,12405,12406,12407,12408,4988,12409,12410,12411,12412,12413,12414,12415, #12368 -12416,12417,12418,12419,12420,12421,12422,12423,12424,12425,12426,12427,12428,12429,12430,12431, #12384 -12432,12433,12434,12435,12436,12437,12438,3554,12439,12440,12441,12442,12443,12444,12445,12446, #12400 -12447,12448,12449,12450,12451,12452,12453,12454,12455,12456,12457,12458,12459,12460,12461,12462, #12416 -12463,12464,4989,12465,12466,12467,12468,12469,12470,12471,12472,12473,12474,12475,12476,12477, #12432 -12478,12479,12480,4990,12481,12482,12483,12484,12485,12486,12487,12488,12489,4498,12490,12491, #12448 -12492,12493,12494,12495,12496,12497,12498,12499,12500,12501,12502,12503,12504,12505,12506,12507, #12464 -12508,12509,12510,12511,12512,12513,12514,12515,12516,12517,12518,12519,12520,12521,12522,12523, #12480 -12524,12525,12526,12527,12528,12529,12530,12531,12532,12533,12534,12535,12536,12537,12538,12539, #12496 -12540,12541,12542,12543,12544,12545,12546,12547,12548,12549,12550,12551,4991,12552,12553,12554, #12512 -12555,12556,12557,12558,12559,12560,12561,12562,12563,12564,12565,12566,12567,12568,12569,12570, #12528 -12571,12572,12573,12574,12575,12576,12577,12578,3036,12579,12580,12581,12582,12583,3966,12584, #12544 -12585,12586,12587,12588,12589,12590,12591,12592,12593,12594,12595,12596,12597,12598,12599,12600, #12560 -12601,12602,12603,12604,12605,12606,12607,12608,12609,12610,12611,12612,12613,12614,12615,12616, #12576 -12617,12618,12619,12620,12621,12622,12623,12624,12625,12626,12627,12628,12629,12630,12631,12632, #12592 -12633,12634,12635,12636,12637,12638,12639,12640,12641,12642,12643,12644,12645,12646,4499,12647, #12608 -12648,12649,12650,12651,12652,12653,12654,12655,12656,12657,12658,12659,12660,12661,12662,12663, #12624 -12664,12665,12666,12667,12668,12669,12670,12671,12672,12673,12674,12675,12676,12677,12678,12679, #12640 -12680,12681,12682,12683,12684,12685,12686,12687,12688,12689,12690,12691,12692,12693,12694,12695, #12656 -12696,12697,12698,4992,12699,12700,12701,12702,12703,12704,12705,12706,12707,12708,12709,12710, #12672 -12711,12712,12713,12714,12715,12716,12717,12718,12719,12720,12721,12722,12723,12724,12725,12726, #12688 -12727,12728,12729,12730,12731,12732,12733,12734,12735,12736,12737,12738,12739,12740,12741,12742, #12704 -12743,12744,12745,12746,12747,12748,12749,12750,12751,12752,12753,12754,12755,12756,12757,12758, #12720 -12759,12760,12761,12762,12763,12764,12765,12766,12767,12768,12769,12770,12771,12772,12773,12774, #12736 -12775,12776,12777,12778,4993,2175,12779,12780,12781,12782,12783,12784,12785,12786,4500,12787, #12752 -12788,12789,12790,12791,12792,12793,12794,12795,12796,12797,12798,12799,12800,12801,12802,12803, #12768 -12804,12805,12806,12807,12808,12809,12810,12811,12812,12813,12814,12815,12816,12817,12818,12819, #12784 -12820,12821,12822,12823,12824,12825,12826,4198,3967,12827,12828,12829,12830,12831,12832,12833, #12800 -12834,12835,12836,12837,12838,12839,12840,12841,12842,12843,12844,12845,12846,12847,12848,12849, #12816 -12850,12851,12852,12853,12854,12855,12856,12857,12858,12859,12860,12861,4199,12862,12863,12864, #12832 -12865,12866,12867,12868,12869,12870,12871,12872,12873,12874,12875,12876,12877,12878,12879,12880, #12848 -12881,12882,12883,12884,12885,12886,12887,4501,12888,12889,12890,12891,12892,12893,12894,12895, #12864 -12896,12897,12898,12899,12900,12901,12902,12903,12904,12905,12906,12907,12908,12909,12910,12911, #12880 -12912,4994,12913,12914,12915,12916,12917,12918,12919,12920,12921,12922,12923,12924,12925,12926, #12896 -12927,12928,12929,12930,12931,12932,12933,12934,12935,12936,12937,12938,12939,12940,12941,12942, #12912 -12943,12944,12945,12946,12947,12948,12949,12950,12951,12952,12953,12954,12955,12956,1772,12957, #12928 -12958,12959,12960,12961,12962,12963,12964,12965,12966,12967,12968,12969,12970,12971,12972,12973, #12944 -12974,12975,12976,12977,12978,12979,12980,12981,12982,12983,12984,12985,12986,12987,12988,12989, #12960 -12990,12991,12992,12993,12994,12995,12996,12997,4502,12998,4503,12999,13000,13001,13002,13003, #12976 -4504,13004,13005,13006,13007,13008,13009,13010,13011,13012,13013,13014,13015,13016,13017,13018, #12992 -13019,13020,13021,13022,13023,13024,13025,13026,13027,13028,13029,3449,13030,13031,13032,13033, #13008 -13034,13035,13036,13037,13038,13039,13040,13041,13042,13043,13044,13045,13046,13047,13048,13049, #13024 -13050,13051,13052,13053,13054,13055,13056,13057,13058,13059,13060,13061,13062,13063,13064,13065, #13040 -13066,13067,13068,13069,13070,13071,13072,13073,13074,13075,13076,13077,13078,13079,13080,13081, #13056 -13082,13083,13084,13085,13086,13087,13088,13089,13090,13091,13092,13093,13094,13095,13096,13097, #13072 -13098,13099,13100,13101,13102,13103,13104,13105,13106,13107,13108,13109,13110,13111,13112,13113, #13088 -13114,13115,13116,13117,13118,3968,13119,4995,13120,13121,13122,13123,13124,13125,13126,13127, #13104 -4505,13128,13129,13130,13131,13132,13133,13134,4996,4506,13135,13136,13137,13138,13139,4997, #13120 -13140,13141,13142,13143,13144,13145,13146,13147,13148,13149,13150,13151,13152,13153,13154,13155, #13136 -13156,13157,13158,13159,4998,13160,13161,13162,13163,13164,13165,13166,13167,13168,13169,13170, #13152 -13171,13172,13173,13174,13175,13176,4999,13177,13178,13179,13180,13181,13182,13183,13184,13185, #13168 -13186,13187,13188,13189,13190,13191,13192,13193,13194,13195,13196,13197,13198,13199,13200,13201, #13184 -13202,13203,13204,13205,13206,5000,13207,13208,13209,13210,13211,13212,13213,13214,13215,13216, #13200 -13217,13218,13219,13220,13221,13222,13223,13224,13225,13226,13227,4200,5001,13228,13229,13230, #13216 -13231,13232,13233,13234,13235,13236,13237,13238,13239,13240,3969,13241,13242,13243,13244,3970, #13232 -13245,13246,13247,13248,13249,13250,13251,13252,13253,13254,13255,13256,13257,13258,13259,13260, #13248 -13261,13262,13263,13264,13265,13266,13267,13268,3450,13269,13270,13271,13272,13273,13274,13275, #13264 -13276,5002,13277,13278,13279,13280,13281,13282,13283,13284,13285,13286,13287,13288,13289,13290, #13280 -13291,13292,13293,13294,13295,13296,13297,13298,13299,13300,13301,13302,3813,13303,13304,13305, #13296 -13306,13307,13308,13309,13310,13311,13312,13313,13314,13315,13316,13317,13318,13319,13320,13321, #13312 -13322,13323,13324,13325,13326,13327,13328,4507,13329,13330,13331,13332,13333,13334,13335,13336, #13328 -13337,13338,13339,13340,13341,5003,13342,13343,13344,13345,13346,13347,13348,13349,13350,13351, #13344 -13352,13353,13354,13355,13356,13357,13358,13359,13360,13361,13362,13363,13364,13365,13366,13367, #13360 -5004,13368,13369,13370,13371,13372,13373,13374,13375,13376,13377,13378,13379,13380,13381,13382, #13376 -13383,13384,13385,13386,13387,13388,13389,13390,13391,13392,13393,13394,13395,13396,13397,13398, #13392 -13399,13400,13401,13402,13403,13404,13405,13406,13407,13408,13409,13410,13411,13412,13413,13414, #13408 -13415,13416,13417,13418,13419,13420,13421,13422,13423,13424,13425,13426,13427,13428,13429,13430, #13424 -13431,13432,4508,13433,13434,13435,4201,13436,13437,13438,13439,13440,13441,13442,13443,13444, #13440 -13445,13446,13447,13448,13449,13450,13451,13452,13453,13454,13455,13456,13457,5005,13458,13459, #13456 -13460,13461,13462,13463,13464,13465,13466,13467,13468,13469,13470,4509,13471,13472,13473,13474, #13472 -13475,13476,13477,13478,13479,13480,13481,13482,13483,13484,13485,13486,13487,13488,13489,13490, #13488 -13491,13492,13493,13494,13495,13496,13497,13498,13499,13500,13501,13502,13503,13504,13505,13506, #13504 -13507,13508,13509,13510,13511,13512,13513,13514,13515,13516,13517,13518,13519,13520,13521,13522, #13520 -13523,13524,13525,13526,13527,13528,13529,13530,13531,13532,13533,13534,13535,13536,13537,13538, #13536 -13539,13540,13541,13542,13543,13544,13545,13546,13547,13548,13549,13550,13551,13552,13553,13554, #13552 -13555,13556,13557,13558,13559,13560,13561,13562,13563,13564,13565,13566,13567,13568,13569,13570, #13568 -13571,13572,13573,13574,13575,13576,13577,13578,13579,13580,13581,13582,13583,13584,13585,13586, #13584 -13587,13588,13589,13590,13591,13592,13593,13594,13595,13596,13597,13598,13599,13600,13601,13602, #13600 -13603,13604,13605,13606,13607,13608,13609,13610,13611,13612,13613,13614,13615,13616,13617,13618, #13616 -13619,13620,13621,13622,13623,13624,13625,13626,13627,13628,13629,13630,13631,13632,13633,13634, #13632 -13635,13636,13637,13638,13639,13640,13641,13642,5006,13643,13644,13645,13646,13647,13648,13649, #13648 -13650,13651,5007,13652,13653,13654,13655,13656,13657,13658,13659,13660,13661,13662,13663,13664, #13664 -13665,13666,13667,13668,13669,13670,13671,13672,13673,13674,13675,13676,13677,13678,13679,13680, #13680 -13681,13682,13683,13684,13685,13686,13687,13688,13689,13690,13691,13692,13693,13694,13695,13696, #13696 -13697,13698,13699,13700,13701,13702,13703,13704,13705,13706,13707,13708,13709,13710,13711,13712, #13712 -13713,13714,13715,13716,13717,13718,13719,13720,13721,13722,13723,13724,13725,13726,13727,13728, #13728 -13729,13730,13731,13732,13733,13734,13735,13736,13737,13738,13739,13740,13741,13742,13743,13744, #13744 -13745,13746,13747,13748,13749,13750,13751,13752,13753,13754,13755,13756,13757,13758,13759,13760, #13760 -13761,13762,13763,13764,13765,13766,13767,13768,13769,13770,13771,13772,13773,13774,3273,13775, #13776 -13776,13777,13778,13779,13780,13781,13782,13783,13784,13785,13786,13787,13788,13789,13790,13791, #13792 -13792,13793,13794,13795,13796,13797,13798,13799,13800,13801,13802,13803,13804,13805,13806,13807, #13808 -13808,13809,13810,13811,13812,13813,13814,13815,13816,13817,13818,13819,13820,13821,13822,13823, #13824 -13824,13825,13826,13827,13828,13829,13830,13831,13832,13833,13834,13835,13836,13837,13838,13839, #13840 -13840,13841,13842,13843,13844,13845,13846,13847,13848,13849,13850,13851,13852,13853,13854,13855, #13856 -13856,13857,13858,13859,13860,13861,13862,13863,13864,13865,13866,13867,13868,13869,13870,13871, #13872 -13872,13873,13874,13875,13876,13877,13878,13879,13880,13881,13882,13883,13884,13885,13886,13887, #13888 -13888,13889,13890,13891,13892,13893,13894,13895,13896,13897,13898,13899,13900,13901,13902,13903, #13904 -13904,13905,13906,13907,13908,13909,13910,13911,13912,13913,13914,13915,13916,13917,13918,13919, #13920 -13920,13921,13922,13923,13924,13925,13926,13927,13928,13929,13930,13931,13932,13933,13934,13935, #13936 -13936,13937,13938,13939,13940,13941,13942,13943,13944,13945,13946,13947,13948,13949,13950,13951, #13952 -13952,13953,13954,13955,13956,13957,13958,13959,13960,13961,13962,13963,13964,13965,13966,13967, #13968 -13968,13969,13970,13971,13972) #13973 +2299, 208,3546,4161,2020, 330,4438,3944,2906,2499,3799,4439,4811,5796,5797,5798, # 5376 +) -# flake8: noqa diff --git a/lib/chardet/big5prober.py b/lib/chardet/big5prober.py index becce81e5e8c2b6b672f9dfb69ce4177128cf590..e4da9e170181fbb475122ad780e6c2a119dc5cdc 100644 --- a/lib/chardet/big5prober.py +++ b/lib/chardet/big5prober.py @@ -28,15 +28,16 @@ from .mbcharsetprober import MultiByteCharSetProber from .codingstatemachine import CodingStateMachine from .chardistribution import Big5DistributionAnalysis -from .mbcssm import Big5SMModel +from .mbcssm import BIG5_SM_MODEL class Big5Prober(MultiByteCharSetProber): def __init__(self): - MultiByteCharSetProber.__init__(self) - self._mCodingSM = CodingStateMachine(Big5SMModel) - self._mDistributionAnalyzer = Big5DistributionAnalysis() + super(Big5Prober, self).__init__() + self.coding_sm = CodingStateMachine(BIG5_SM_MODEL) + self.distribution_analyzer = Big5DistributionAnalysis() self.reset() - def get_charset_name(self): + @property + def charset_name(self): return "Big5" diff --git a/lib/chardet/chardetect.py b/lib/chardet/chardetect.py deleted file mode 100644 index ecd0163be726bf513048c69d47770527829e8143..0000000000000000000000000000000000000000 --- a/lib/chardet/chardetect.py +++ /dev/null @@ -1,46 +0,0 @@ -#!/usr/bin/env python -""" -Script which takes one or more file paths and reports on their detected -encodings - -Example:: - - % chardetect somefile someotherfile - somefile: windows-1252 with confidence 0.5 - someotherfile: ascii with confidence 1.0 - -If no paths are provided, it takes its input from stdin. - -""" -from io import open -from sys import argv, stdin - -from chardet.universaldetector import UniversalDetector - - -def description_of(file, name='stdin'): - """Return a string describing the probable encoding of a file.""" - u = UniversalDetector() - for line in file: - u.feed(line) - u.close() - result = u.result - if result['encoding']: - return '%s: %s with confidence %s' % (name, - result['encoding'], - result['confidence']) - else: - return '%s: no result' % name - - -def main(): - if len(argv) <= 1: - print(description_of(stdin)) - else: - for path in argv[1:]: - with open(path, 'rb') as f: - print(description_of(f, path)) - - -if __name__ == '__main__': - main() diff --git a/lib/chardet/chardistribution.py b/lib/chardet/chardistribution.py index 4e64a00befb85883ab60e23fe7c630c151149eb1..d8e9e149cd707dddb3f74b078f8020e875f1d463 100644 --- a/lib/chardet/chardistribution.py +++ b/lib/chardet/chardistribution.py @@ -25,82 +25,85 @@ # 02110-1301 USA ######################### END LICENSE BLOCK ######################### -from .euctwfreq import (EUCTWCharToFreqOrder, EUCTW_TABLE_SIZE, +from .euctwfreq import (EUCTW_CHAR_TO_FREQ_ORDER, EUCTW_TABLE_SIZE, EUCTW_TYPICAL_DISTRIBUTION_RATIO) -from .euckrfreq import (EUCKRCharToFreqOrder, EUCKR_TABLE_SIZE, +from .euckrfreq import (EUCKR_CHAR_TO_FREQ_ORDER, EUCKR_TABLE_SIZE, EUCKR_TYPICAL_DISTRIBUTION_RATIO) -from .gb2312freq import (GB2312CharToFreqOrder, GB2312_TABLE_SIZE, +from .gb2312freq import (GB2312_CHAR_TO_FREQ_ORDER, GB2312_TABLE_SIZE, GB2312_TYPICAL_DISTRIBUTION_RATIO) -from .big5freq import (Big5CharToFreqOrder, BIG5_TABLE_SIZE, +from .big5freq import (BIG5_CHAR_TO_FREQ_ORDER, BIG5_TABLE_SIZE, BIG5_TYPICAL_DISTRIBUTION_RATIO) -from .jisfreq import (JISCharToFreqOrder, JIS_TABLE_SIZE, +from .jisfreq import (JIS_CHAR_TO_FREQ_ORDER, JIS_TABLE_SIZE, JIS_TYPICAL_DISTRIBUTION_RATIO) from .compat import wrap_ord -ENOUGH_DATA_THRESHOLD = 1024 -SURE_YES = 0.99 -SURE_NO = 0.01 -MINIMUM_DATA_THRESHOLD = 3 +class CharDistributionAnalysis(object): + ENOUGH_DATA_THRESHOLD = 1024 + SURE_YES = 0.99 + SURE_NO = 0.01 + MINIMUM_DATA_THRESHOLD = 3 -class CharDistributionAnalysis: def __init__(self): # Mapping table to get frequency order from char order (get from # GetOrder()) - self._mCharToFreqOrder = None - self._mTableSize = None # Size of above table + self._char_to_freq_order = None + self._table_size = None # Size of above table # This is a constant value which varies from language to language, # used in calculating confidence. See # http://www.mozilla.org/projects/intl/UniversalCharsetDetection.html # for further detail. - self._mTypicalDistributionRatio = None + self.typical_distribution_ratio = None + self._done = None + self._total_chars = None + self._freq_chars = None self.reset() def reset(self): """reset analyser, clear any state""" # If this flag is set to True, detection is done and conclusion has # been made - self._mDone = False - self._mTotalChars = 0 # Total characters encountered + self._done = False + self._total_chars = 0 # Total characters encountered # The number of characters whose frequency order is less than 512 - self._mFreqChars = 0 + self._freq_chars = 0 - def feed(self, aBuf, aCharLen): + def feed(self, char, char_len): """feed a character with known length""" - if aCharLen == 2: + if char_len == 2: # we only care about 2-bytes character in our distribution analysis - order = self.get_order(aBuf) + order = self.get_order(char) else: order = -1 if order >= 0: - self._mTotalChars += 1 + self._total_chars += 1 # order is valid - if order < self._mTableSize: - if 512 > self._mCharToFreqOrder[order]: - self._mFreqChars += 1 + if order < self._table_size: + if 512 > self._char_to_freq_order[order]: + self._freq_chars += 1 def get_confidence(self): """return confidence based on existing data""" # if we didn't receive any character in our consideration range, # return negative answer - if self._mTotalChars <= 0 or self._mFreqChars <= MINIMUM_DATA_THRESHOLD: - return SURE_NO + if self._total_chars <= 0 or self._freq_chars <= self.MINIMUM_DATA_THRESHOLD: + return self.SURE_NO - if self._mTotalChars != self._mFreqChars: - r = (self._mFreqChars / ((self._mTotalChars - self._mFreqChars) - * self._mTypicalDistributionRatio)) - if r < SURE_YES: + if self._total_chars != self._freq_chars: + r = (self._freq_chars / ((self._total_chars - self._freq_chars) + * self.typical_distribution_ratio)) + if r < self.SURE_YES: return r # normalize confidence (we don't want to be 100% sure) - return SURE_YES + return self.SURE_YES def got_enough_data(self): # It is not necessary to receive all data to draw conclusion. # For charset detection, certain amount of data is enough - return self._mTotalChars > ENOUGH_DATA_THRESHOLD + return self._total_chars > self.ENOUGH_DATA_THRESHOLD - def get_order(self, aBuf): + def get_order(self, byte_str): # We do not handle characters based on the original encoding string, # but convert this encoding string to a number, here called order. # This allows multiple encodings of a language to share one frequency @@ -110,55 +113,55 @@ class CharDistributionAnalysis: class EUCTWDistributionAnalysis(CharDistributionAnalysis): def __init__(self): - CharDistributionAnalysis.__init__(self) - self._mCharToFreqOrder = EUCTWCharToFreqOrder - self._mTableSize = EUCTW_TABLE_SIZE - self._mTypicalDistributionRatio = EUCTW_TYPICAL_DISTRIBUTION_RATIO + super(EUCTWDistributionAnalysis, self).__init__() + self._char_to_freq_order = EUCTW_CHAR_TO_FREQ_ORDER + self._table_size = EUCTW_TABLE_SIZE + self.typical_distribution_ratio = EUCTW_TYPICAL_DISTRIBUTION_RATIO - def get_order(self, aBuf): + def get_order(self, byte_str): # for euc-TW encoding, we are interested # first byte range: 0xc4 -- 0xfe # second byte range: 0xa1 -- 0xfe # no validation needed here. State machine has done that - first_char = wrap_ord(aBuf[0]) + first_char = wrap_ord(byte_str[0]) if first_char >= 0xC4: - return 94 * (first_char - 0xC4) + wrap_ord(aBuf[1]) - 0xA1 + return 94 * (first_char - 0xC4) + wrap_ord(byte_str[1]) - 0xA1 else: return -1 class EUCKRDistributionAnalysis(CharDistributionAnalysis): def __init__(self): - CharDistributionAnalysis.__init__(self) - self._mCharToFreqOrder = EUCKRCharToFreqOrder - self._mTableSize = EUCKR_TABLE_SIZE - self._mTypicalDistributionRatio = EUCKR_TYPICAL_DISTRIBUTION_RATIO + super(EUCKRDistributionAnalysis, self).__init__() + self._char_to_freq_order = EUCKR_CHAR_TO_FREQ_ORDER + self._table_size = EUCKR_TABLE_SIZE + self.typical_distribution_ratio = EUCKR_TYPICAL_DISTRIBUTION_RATIO - def get_order(self, aBuf): + def get_order(self, byte_str): # for euc-KR encoding, we are interested # first byte range: 0xb0 -- 0xfe # second byte range: 0xa1 -- 0xfe # no validation needed here. State machine has done that - first_char = wrap_ord(aBuf[0]) + first_char = wrap_ord(byte_str[0]) if first_char >= 0xB0: - return 94 * (first_char - 0xB0) + wrap_ord(aBuf[1]) - 0xA1 + return 94 * (first_char - 0xB0) + wrap_ord(byte_str[1]) - 0xA1 else: return -1 class GB2312DistributionAnalysis(CharDistributionAnalysis): def __init__(self): - CharDistributionAnalysis.__init__(self) - self._mCharToFreqOrder = GB2312CharToFreqOrder - self._mTableSize = GB2312_TABLE_SIZE - self._mTypicalDistributionRatio = GB2312_TYPICAL_DISTRIBUTION_RATIO + super(GB2312DistributionAnalysis, self).__init__() + self._char_to_freq_order = GB2312_CHAR_TO_FREQ_ORDER + self._table_size = GB2312_TABLE_SIZE + self.typical_distribution_ratio = GB2312_TYPICAL_DISTRIBUTION_RATIO - def get_order(self, aBuf): + def get_order(self, byte_str): # for GB2312 encoding, we are interested # first byte range: 0xb0 -- 0xfe # second byte range: 0xa1 -- 0xfe # no validation needed here. State machine has done that - first_char, second_char = wrap_ord(aBuf[0]), wrap_ord(aBuf[1]) + first_char, second_char = wrap_ord(byte_str[0]), wrap_ord(byte_str[1]) if (first_char >= 0xB0) and (second_char >= 0xA1): return 94 * (first_char - 0xB0) + second_char - 0xA1 else: @@ -167,17 +170,17 @@ class GB2312DistributionAnalysis(CharDistributionAnalysis): class Big5DistributionAnalysis(CharDistributionAnalysis): def __init__(self): - CharDistributionAnalysis.__init__(self) - self._mCharToFreqOrder = Big5CharToFreqOrder - self._mTableSize = BIG5_TABLE_SIZE - self._mTypicalDistributionRatio = BIG5_TYPICAL_DISTRIBUTION_RATIO + super(Big5DistributionAnalysis, self).__init__() + self._char_to_freq_order = BIG5_CHAR_TO_FREQ_ORDER + self._table_size = BIG5_TABLE_SIZE + self.typical_distribution_ratio = BIG5_TYPICAL_DISTRIBUTION_RATIO - def get_order(self, aBuf): + def get_order(self, byte_str): # for big5 encoding, we are interested # first byte range: 0xa4 -- 0xfe # second byte range: 0x40 -- 0x7e , 0xa1 -- 0xfe # no validation needed here. State machine has done that - first_char, second_char = wrap_ord(aBuf[0]), wrap_ord(aBuf[1]) + first_char, second_char = wrap_ord(byte_str[0]), wrap_ord(byte_str[1]) if first_char >= 0xA4: if second_char >= 0xA1: return 157 * (first_char - 0xA4) + second_char - 0xA1 + 63 @@ -189,17 +192,17 @@ class Big5DistributionAnalysis(CharDistributionAnalysis): class SJISDistributionAnalysis(CharDistributionAnalysis): def __init__(self): - CharDistributionAnalysis.__init__(self) - self._mCharToFreqOrder = JISCharToFreqOrder - self._mTableSize = JIS_TABLE_SIZE - self._mTypicalDistributionRatio = JIS_TYPICAL_DISTRIBUTION_RATIO + super(SJISDistributionAnalysis, self).__init__() + self._char_to_freq_order = JIS_CHAR_TO_FREQ_ORDER + self._table_size = JIS_TABLE_SIZE + self.typical_distribution_ratio = JIS_TYPICAL_DISTRIBUTION_RATIO - def get_order(self, aBuf): + def get_order(self, byte_str): # for sjis encoding, we are interested # first byte range: 0x81 -- 0x9f , 0xe0 -- 0xfe # second byte range: 0x40 -- 0x7e, 0x81 -- oxfe # no validation needed here. State machine has done that - first_char, second_char = wrap_ord(aBuf[0]), wrap_ord(aBuf[1]) + first_char, second_char = wrap_ord(byte_str[0]), wrap_ord(byte_str[1]) if (first_char >= 0x81) and (first_char <= 0x9F): order = 188 * (first_char - 0x81) elif (first_char >= 0xE0) and (first_char <= 0xEF): @@ -214,18 +217,18 @@ class SJISDistributionAnalysis(CharDistributionAnalysis): class EUCJPDistributionAnalysis(CharDistributionAnalysis): def __init__(self): - CharDistributionAnalysis.__init__(self) - self._mCharToFreqOrder = JISCharToFreqOrder - self._mTableSize = JIS_TABLE_SIZE - self._mTypicalDistributionRatio = JIS_TYPICAL_DISTRIBUTION_RATIO + super(EUCJPDistributionAnalysis, self).__init__() + self._char_to_freq_order = JIS_CHAR_TO_FREQ_ORDER + self._table_size = JIS_TABLE_SIZE + self.typical_distribution_ratio = JIS_TYPICAL_DISTRIBUTION_RATIO - def get_order(self, aBuf): + def get_order(self, byte_str): # for euc-JP encoding, we are interested # first byte range: 0xa0 -- 0xfe # second byte range: 0xa1 -- 0xfe # no validation needed here. State machine has done that - char = wrap_ord(aBuf[0]) + char = wrap_ord(byte_str[0]) if char >= 0xA0: - return 94 * (char - 0xA1) + wrap_ord(aBuf[1]) - 0xa1 + return 94 * (char - 0xA1) + wrap_ord(byte_str[1]) - 0xa1 else: return -1 diff --git a/lib/chardet/charsetgroupprober.py b/lib/chardet/charsetgroupprober.py index 85e7a1c67db9343fcdce9f5a7dced9e067b99158..80c82334342cc7071e0e854a597838a24bf31319 100644 --- a/lib/chardet/charsetgroupprober.py +++ b/lib/chardet/charsetgroupprober.py @@ -1,11 +1,11 @@ ######################## BEGIN LICENSE BLOCK ######################## # The Original Code is Mozilla Communicator client code. -# +# # The Initial Developer of the Original Code is # Netscape Communications Corporation. # Portions created by the Initial Developer are Copyright (C) 1998 # the Initial Developer. All Rights Reserved. -# +# # Contributor(s): # Mark Pilgrim - port to Python # @@ -13,94 +13,86 @@ # modify it under the terms of the GNU Lesser General Public # License as published by the Free Software Foundation; either # version 2.1 of the License, or (at your option) any later version. -# +# # This library is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # Lesser General Public License for more details. -# +# # You should have received a copy of the GNU Lesser General Public # License along with this library; if not, write to the Free Software # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA # 02110-1301 USA ######################### END LICENSE BLOCK ######################### -from . import constants -import sys +from .enums import ProbingState from .charsetprober import CharSetProber class CharSetGroupProber(CharSetProber): - def __init__(self): - CharSetProber.__init__(self) - self._mActiveNum = 0 - self._mProbers = [] - self._mBestGuessProber = None + def __init__(self, lang_filter=None): + super(CharSetGroupProber, self).__init__(lang_filter=lang_filter) + self._active_num = 0 + self.probers = [] + self._best_guess_prober = None def reset(self): - CharSetProber.reset(self) - self._mActiveNum = 0 - for prober in self._mProbers: + super(CharSetGroupProber, self).reset() + self._active_num = 0 + for prober in self.probers: if prober: prober.reset() prober.active = True - self._mActiveNum += 1 - self._mBestGuessProber = None + self._active_num += 1 + self._best_guess_prober = None - def get_charset_name(self): - if not self._mBestGuessProber: + @property + def charset_name(self): + if not self._best_guess_prober: self.get_confidence() - if not self._mBestGuessProber: + if not self._best_guess_prober: return None -# self._mBestGuessProber = self._mProbers[0] - return self._mBestGuessProber.get_charset_name() + return self._best_guess_prober.charset_name - def feed(self, aBuf): - for prober in self._mProbers: + def feed(self, byte_str): + for prober in self.probers: if not prober: continue if not prober.active: continue - st = prober.feed(aBuf) - if not st: + state = prober.feed(byte_str) + if not state: continue - if st == constants.eFoundIt: - self._mBestGuessProber = prober - return self.get_state() - elif st == constants.eNotMe: + if state == ProbingState.found_it: + self._best_guess_prober = prober + return self.state + elif state == ProbingState.not_me: prober.active = False - self._mActiveNum -= 1 - if self._mActiveNum <= 0: - self._mState = constants.eNotMe - return self.get_state() - return self.get_state() + self._active_num -= 1 + if self._active_num <= 0: + self._state = ProbingState.not_me + return self.state + return self.state def get_confidence(self): - st = self.get_state() - if st == constants.eFoundIt: + state = self.state + if state == ProbingState.found_it: return 0.99 - elif st == constants.eNotMe: + elif state == ProbingState.not_me: return 0.01 - bestConf = 0.0 - self._mBestGuessProber = None - for prober in self._mProbers: + best_conf = 0.0 + self._best_guess_prober = None + for prober in self.probers: if not prober: continue if not prober.active: - if constants._debug: - sys.stderr.write(prober.get_charset_name() - + ' not active\n') + self.logger.debug('%s not active', prober.charset_name) continue - cf = prober.get_confidence() - if constants._debug: - sys.stderr.write('%s confidence = %s\n' % - (prober.get_charset_name(), cf)) - if bestConf < cf: - bestConf = cf - self._mBestGuessProber = prober - if not self._mBestGuessProber: + conf = prober.get_confidence() + self.logger.debug('%s confidence = %s', prober.charset_name, conf) + if best_conf < conf: + best_conf = conf + self._best_guess_prober = prober + if not self._best_guess_prober: return 0.0 - return bestConf -# else: -# self._mBestGuessProber = self._mProbers[0] -# return self._mBestGuessProber.get_confidence() + return best_conf diff --git a/lib/chardet/charsetprober.py b/lib/chardet/charsetprober.py index 97581712c1c2cfd31f89f396be24b27f657e1dd6..bc5f9c241d08ee706f7039edc1ebe3e206f9f0c1 100644 --- a/lib/chardet/charsetprober.py +++ b/lib/chardet/charsetprober.py @@ -26,37 +26,121 @@ # 02110-1301 USA ######################### END LICENSE BLOCK ######################### -from . import constants +import logging import re +from io import BytesIO +from .enums import ProbingState -class CharSetProber: - def __init__(self): - pass + +class CharSetProber(object): + + SHORTCUT_THRESHOLD = 0.95 + + def __init__(self, lang_filter=None): + self._state = None + self.lang_filter = lang_filter + self.logger = logging.getLogger(__name__) def reset(self): - self._mState = constants.eDetecting + self._state = ProbingState.detecting - def get_charset_name(self): + @property + def charset_name(self): return None - def feed(self, aBuf): + def feed(self, buf): pass - def get_state(self): - return self._mState + @property + def state(self): + return self._state def get_confidence(self): return 0.0 - def filter_high_bit_only(self, aBuf): - aBuf = re.sub(b'([\x00-\x7F])+', b' ', aBuf) - return aBuf + @staticmethod + def filter_high_byte_only(buf): + buf = re.sub(b'([\x00-\x7F])+', b' ', buf) + return buf + + @staticmethod + def filter_international_words(buf): + """ + We define three types of bytes: + alphabet: english alphabets [a-zA-Z] + international: international characters [\x80-\xFF] + marker: everything else [^a-zA-Z\x80-\xFF] + + The input buffer can be thought to contain a series of words delimited + by markers. This function works to filter all words that contain at + least one international character. All contiguous sequences of markers + are replaced by a single space ascii character. + + This filter applies to all scripts which do not use English characters. + """ + filtered = BytesIO() + + # This regex expression filters out only words that have at-least one + # international character. The word may include one marker character at + # the end. + words = re.findall( + b'[a-zA-Z]*[\x80-\xFF]+[a-zA-Z]*[^a-zA-Z\x80-\xFF]?', buf) + + for word in words: + filtered.write(word[:-1]) + + # If the last character in the word is a marker, replace it with a + # space as markers shouldn't affect our analysis (they are used + # similarly across all languages and may thus have similar + # frequencies). + last_char = word[-1:] + if not last_char.isalpha() and last_char < b'\x80': + last_char = b' ' + filtered.write(last_char) + + return filtered.getvalue() + + @staticmethod + def filter_with_english_letters(buf): + """ + Returns a copy of ``buf`` that retains only the sequences of English + alphabet and high byte characters that are not between <> characters. + Also retains English alphabet and high byte characters immediately + before occurrences of >. + + This filter can be applied to all scripts which contain both English + characters and extended ASCII characters, but is currently only used by + ``Latin1Prober``. + """ + filtered = BytesIO() + in_tag = False + prev = 0 + + for curr in range(len(buf)): + # Slice here to get bytes instead of an int with Python 3 + buf_char = buf[curr:curr + 1] + # Check if we're coming out of or entering an HTML tag + if buf_char == b'>': + in_tag = False + elif buf_char == b'<': + in_tag = True + + # If current character is not extended-ASCII and not alphabetic... + if buf_char < b'\x80' and not buf_char.isalpha(): + # ...and we're not in a tag + if curr > prev and not in_tag: + # Keep everything after last non-extended-ASCII, + # non-alphabetic character + filtered.write(buf[prev:curr]) + # Output a space to delimit stretch we kept + filtered.write(b' ') + prev = curr + 1 - def filter_without_english_letters(self, aBuf): - aBuf = re.sub(b'([A-Za-z])+', b' ', aBuf) - return aBuf + # If we're not in a tag... + if not in_tag: + # Keep everything after last non-extended-ASCII, non-alphabetic + # character + filtered.write(buf[prev:]) - def filter_with_english_letters(self, aBuf): - # TODO - return aBuf + return filtered.getvalue() diff --git a/lib/chardet/cli/__init__.py b/lib/chardet/cli/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..8b137891791fe96927ad78e64b0aad7bded08bdc --- /dev/null +++ b/lib/chardet/cli/__init__.py @@ -0,0 +1 @@ + diff --git a/lib/chardet/cli/chardetect.py b/lib/chardet/cli/chardetect.py new file mode 100755 index 0000000000000000000000000000000000000000..e01e9c259e8402b75bd0974afa28a8132e4cd6dc --- /dev/null +++ b/lib/chardet/cli/chardetect.py @@ -0,0 +1,84 @@ +#!/usr/bin/env python +""" +Script which takes one or more file paths and reports on their detected +encodings + +Example:: + + % chardetect somefile someotherfile + somefile: windows-1252 with confidence 0.5 + someotherfile: ascii with confidence 1.0 + +If no paths are provided, it takes its input from stdin. + +""" + +from __future__ import absolute_import, print_function, unicode_literals + +import argparse +import sys +from io import open + +from chardet import __version__ +from chardet.compat import PY2 +from chardet.universaldetector import UniversalDetector + + + + +def description_of(lines, name='stdin'): + """ + Return a string describing the probable encoding of a file or + list of strings. + + :param lines: The lines to get the encoding of. + :type lines: Iterable of bytes + :param name: Name of file or collection of lines + :type name: str + """ + u = UniversalDetector() + for line in lines: + u.feed(line) + u.close() + result = u.result + if PY2: + name = name.decode(sys.getfilesystemencoding(), 'ignore') + if result['encoding']: + return '{0}: {1} with confidence {2}'.format(name, result['encoding'], + result['confidence']) + else: + return '{0}: no result'.format(name) + + +def main(argv=None): + """ + Handles command line arguments and gets things started. + + :param argv: List of arguments, as if specified on the command-line. + If None, ``sys.argv[1:]`` is used instead. + :type argv: list of str + """ + # Get command line arguments + parser = argparse.ArgumentParser( + description="Takes one or more file paths and reports their detected \ + encodings") + parser.add_argument('input', + help='File whose encoding we would like to determine. \ + (default: stdin)', + type=argparse.FileType('rb'), nargs='*', + default=[sys.stdin if PY2 else sys.stdin.buffer]) + parser.add_argument('--version', action='version', + version='%(prog)s {0}'.format(__version__)) + args = parser.parse_args(argv) + + for f in args.input: + if f.isatty(): + print("You are running chardetect interactively. Press " + + "CTRL-D twice at the start of a blank line to signal the " + + "end of your input. If you want help, run chardetect " + + "--help\n", file=sys.stderr) + print(description_of(f, f.name)) + + +if __name__ == '__main__': + main() diff --git a/lib/chardet/codingstatemachine.py b/lib/chardet/codingstatemachine.py index 8dd8c917983f6e34718fe95888e1bf18b07392ef..24a890d759d7f978b204f2bf38586f1dc838293e 100644 --- a/lib/chardet/codingstatemachine.py +++ b/lib/chardet/codingstatemachine.py @@ -25,37 +25,61 @@ # 02110-1301 USA ######################### END LICENSE BLOCK ######################### -from .constants import eStart +import logging + +from .enums import MachineState from .compat import wrap_ord -class CodingStateMachine: +class CodingStateMachine(object): + """ + A state machine to verify a byte sequence for a particular encoding. For + each byte the detector receives, it will feed that byte to every active + state machine available, one byte at a time. The state machine changes its + state based on its previous state and the byte it receives. There are 3 + states in a state machine that are of interest to an auto-detector: + + START state: This is the state to start with, or a legal byte sequence + (i.e. a valid code point) for character has been identified. + + ME state: This indicates that the state machine identified a byte sequence + that is specific to the charset it is designed for and that + there is no other possible encoding which can contain this byte + sequence. This will to lead to an immediate positive answer for + the detector. + + ERROR state: This indicates the state machine identified an illegal byte + sequence for that encoding. This will lead to an immediate + negative answer for this encoding. Detector will exclude this + encoding from consideration from here on. + """ def __init__(self, sm): - self._mModel = sm - self._mCurrentBytePos = 0 - self._mCurrentCharLen = 0 + self._model = sm + self._curr_byte_pos = 0 + self._curr_char_len = 0 + self._curr_state = None + self.logger = logging.getLogger(__name__) self.reset() def reset(self): - self._mCurrentState = eStart + self._curr_state = MachineState.start def next_state(self, c): # for each byte we get its class # if it is first byte, we also get byte length - # PY3K: aBuf is a byte stream, so c is an int, not a byte - byteCls = self._mModel['classTable'][wrap_ord(c)] - if self._mCurrentState == eStart: - self._mCurrentBytePos = 0 - self._mCurrentCharLen = self._mModel['charLenTable'][byteCls] - # from byte's class and stateTable, we get its next state - curr_state = (self._mCurrentState * self._mModel['classFactor'] - + byteCls) - self._mCurrentState = self._mModel['stateTable'][curr_state] - self._mCurrentBytePos += 1 - return self._mCurrentState + byte_class = self._model['class_table'][wrap_ord(c)] + if self._curr_state == MachineState.start: + self._curr_byte_pos = 0 + self._curr_char_len = self._model['char_len_table'][byte_class] + # from byte's class and state_table, we get its next state + curr_state = (self._curr_state * self._model['class_factor'] + + byte_class) + self._curr_state = self._model['state_table'][curr_state] + self._curr_byte_pos += 1 + return self._curr_state def get_current_charlen(self): - return self._mCurrentCharLen + return self._curr_char_len def get_coding_state_machine(self): - return self._mModel['name'] + return self._model['name'] diff --git a/lib/chardet/compat.py b/lib/chardet/compat.py index d9e30addf9b5fd5f2677a4e799e17ce5fd788c9a..8480eb56e2a518191787eb2a50f9b0a4c95aecc5 100644 --- a/lib/chardet/compat.py +++ b/lib/chardet/compat.py @@ -1,6 +1,7 @@ ######################## BEGIN LICENSE BLOCK ######################## # Contributor(s): -# Ian Cordasco - port to Python +# Dan Blanchard +# Ian Cordasco # # This library is free software; you can redistribute it and/or # modify it under the terms of the GNU Lesser General Public @@ -22,13 +23,21 @@ import sys if sys.version_info < (3, 0): + PY2 = True + PY3 = False base_str = (str, unicode) + text_type = unicode + bin_type = str else: + PY2 = False + PY3 = True base_str = (bytes, str) + text_type = str + bin_type = (bytes, bytearray) def wrap_ord(a): - if sys.version_info < (3, 0) and isinstance(a, base_str): + if PY2 and isinstance(a, base_str): return ord(a) else: return a diff --git a/lib/chardet/constants.py b/lib/chardet/constants.py deleted file mode 100644 index e4d148b3c5b995f810428ce6e44643147d406b15..0000000000000000000000000000000000000000 --- a/lib/chardet/constants.py +++ /dev/null @@ -1,39 +0,0 @@ -######################## BEGIN LICENSE BLOCK ######################## -# The Original Code is Mozilla Universal charset detector code. -# -# The Initial Developer of the Original Code is -# Netscape Communications Corporation. -# Portions created by the Initial Developer are Copyright (C) 2001 -# the Initial Developer. All Rights Reserved. -# -# Contributor(s): -# Mark Pilgrim - port to Python -# Shy Shalom - original C code -# -# This library is free software; you can redistribute it and/or -# modify it under the terms of the GNU Lesser General Public -# License as published by the Free Software Foundation; either -# version 2.1 of the License, or (at your option) any later version. -# -# This library is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -# Lesser General Public License for more details. -# -# You should have received a copy of the GNU Lesser General Public -# License along with this library; if not, write to the Free Software -# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA -# 02110-1301 USA -######################### END LICENSE BLOCK ######################### - -_debug = 0 - -eDetecting = 0 -eFoundIt = 1 -eNotMe = 2 - -eStart = 0 -eError = 1 -eItsMe = 2 - -SHORTCUT_THRESHOLD = 0.95 diff --git a/lib/chardet/cp949prober.py b/lib/chardet/cp949prober.py index ff4272f82a0bad7585a6130f089618d09f34f02c..f1f463483b6b6a0ee9cdca807ed82e3f736e9f31 100644 --- a/lib/chardet/cp949prober.py +++ b/lib/chardet/cp949prober.py @@ -25,20 +25,21 @@ # 02110-1301 USA ######################### END LICENSE BLOCK ######################### -from .mbcharsetprober import MultiByteCharSetProber -from .codingstatemachine import CodingStateMachine from .chardistribution import EUCKRDistributionAnalysis -from .mbcssm import CP949SMModel +from .codingstatemachine import CodingStateMachine +from .mbcharsetprober import MultiByteCharSetProber +from .mbcssm import CP949_SM_MODEL class CP949Prober(MultiByteCharSetProber): def __init__(self): - MultiByteCharSetProber.__init__(self) - self._mCodingSM = CodingStateMachine(CP949SMModel) + super(CP949Prober, self).__init__() + self.coding_sm = CodingStateMachine(CP949_SM_MODEL) # NOTE: CP949 is a superset of EUC-KR, so the distribution should be # not different. - self._mDistributionAnalyzer = EUCKRDistributionAnalysis() + self.distribution_analyzer = EUCKRDistributionAnalysis() self.reset() - def get_charset_name(self): + @property + def charset_name(self): return "CP949" diff --git a/lib/chardet/enums.py b/lib/chardet/enums.py new file mode 100644 index 0000000000000000000000000000000000000000..49b3e678124154ee9aef754877546a9d18fff675 --- /dev/null +++ b/lib/chardet/enums.py @@ -0,0 +1,52 @@ +""" +All of the Enums that are used throughout the chardet package. + +:author: Dan Blanchard (dblanchard@ets.org) +""" + +try: + from enum import IntEnum +except ImportError: + from enum34 import IntEnum + + +class InputState(IntEnum): + """ + This enum represents the different states a universal detector can be in. + """ + pure_ascii = 0 + esc_ascii = 1 + high_byte = 2 + + +class LanguageFilter(IntEnum): + """ + This enum represents the different language filters we can apply to a + ``UniversalDetector``. + """ + chinese_simplified = 0x01 + chinese_traditional = 0x02 + japanese = 0x04 + korean = 0x08 + non_cjk = 0x10 + all = 0x1F + chinese = chinese_simplified | chinese_traditional + cjk = chinese | japanese | korean + + +class ProbingState(IntEnum): + """ + This enum represents the different states a prober can be in. + """ + detecting = 0 + found_it = 1 + not_me = 2 + + +class MachineState(IntEnum): + """ + This enum represents the different states a state machine can be in. + """ + start = 0 + error = 1 + its_me = 2 diff --git a/lib/chardet/escprober.py b/lib/chardet/escprober.py index 80a844ff34c3af1fb6a247a4ec4034c64d0fc33d..63111d0a4fef6b5d7f75d3a58cb67812c01c15d4 100644 --- a/lib/chardet/escprober.py +++ b/lib/chardet/escprober.py @@ -25,62 +25,71 @@ # 02110-1301 USA ######################### END LICENSE BLOCK ######################### -from . import constants -from .escsm import (HZSMModel, ISO2022CNSMModel, ISO2022JPSMModel, - ISO2022KRSMModel) from .charsetprober import CharSetProber from .codingstatemachine import CodingStateMachine from .compat import wrap_ord +from .enums import LanguageFilter, ProbingState, MachineState +from .escsm import (HZ_SM_MODEL, ISO2022CN_SM_MODEL, ISO2022JP_SM_MODEL, + ISO2022KR_SM_MODEL) class EscCharSetProber(CharSetProber): - def __init__(self): - CharSetProber.__init__(self) - self._mCodingSM = [ - CodingStateMachine(HZSMModel), - CodingStateMachine(ISO2022CNSMModel), - CodingStateMachine(ISO2022JPSMModel), - CodingStateMachine(ISO2022KRSMModel) - ] + """ + This CharSetProber uses a "code scheme" approach for detecting encodings, + whereby easily recognizable escape or shift sequences are relied on to + identify these encodings. + """ + + def __init__(self, lang_filter=None): + super(EscCharSetProber, self).__init__(lang_filter=lang_filter) + self.coding_sm = [] + if self.lang_filter & LanguageFilter.chinese_simplified: + self.coding_sm.append(CodingStateMachine(HZ_SM_MODEL)) + self.coding_sm.append(CodingStateMachine(ISO2022CN_SM_MODEL)) + if self.lang_filter & LanguageFilter.japanese: + self.coding_sm.append(CodingStateMachine(ISO2022JP_SM_MODEL)) + if self.lang_filter & LanguageFilter.korean: + self.coding_sm.append(CodingStateMachine(ISO2022KR_SM_MODEL)) + self.active_sm_count = None + self._detected_charset = None + self._state = None self.reset() def reset(self): - CharSetProber.reset(self) - for codingSM in self._mCodingSM: - if not codingSM: + super(EscCharSetProber, self).reset() + for coding_sm in self.coding_sm: + if not coding_sm: continue - codingSM.active = True - codingSM.reset() - self._mActiveSM = len(self._mCodingSM) - self._mDetectedCharset = None + coding_sm.active = True + coding_sm.reset() + self.active_sm_count = len(self.coding_sm) + self._detected_charset = None - def get_charset_name(self): - return self._mDetectedCharset + @property + def charset_name(self): + return self._detected_charset def get_confidence(self): - if self._mDetectedCharset: + if self._detected_charset: return 0.99 else: return 0.00 - def feed(self, aBuf): - for c in aBuf: - # PY3K: aBuf is a byte array, so c is an int, not a byte - for codingSM in self._mCodingSM: - if not codingSM: - continue - if not codingSM.active: + def feed(self, byte_str): + for c in byte_str: + for coding_sm in self.coding_sm: + if not coding_sm or not coding_sm.active: continue - codingState = codingSM.next_state(wrap_ord(c)) - if codingState == constants.eError: - codingSM.active = False - self._mActiveSM -= 1 - if self._mActiveSM <= 0: - self._mState = constants.eNotMe - return self.get_state() - elif codingState == constants.eItsMe: - self._mState = constants.eFoundIt - self._mDetectedCharset = codingSM.get_coding_state_machine() # nopep8 - return self.get_state() + coding_state = coding_sm.next_state(wrap_ord(c)) + if coding_state == MachineState.error: + coding_sm.active = False + self.active_sm_count -= 1 + if self.active_sm_count <= 0: + self._state = ProbingState.not_me + return self.state + elif coding_state == MachineState.its_me: + self._state = ProbingState.found_it + self._detected_charset = coding_sm.get_coding_state_machine() + return self.state - return self.get_state() + return self.state diff --git a/lib/chardet/escsm.py b/lib/chardet/escsm.py index bd302b4c61db77866944bcee33902d4b20125284..4ae9f8ae941b995c9730228fb54aa7006e4e5571 100644 --- a/lib/chardet/escsm.py +++ b/lib/chardet/escsm.py @@ -25,9 +25,9 @@ # 02110-1301 USA ######################### END LICENSE BLOCK ######################### -from .constants import eStart, eError, eItsMe +from .enums import MachineState -HZ_cls = ( +HZ_CLS = ( 1,0,0,0,0,0,0,0, # 00 - 07 0,0,0,0,0,0,0,0, # 08 - 0f 0,0,0,0,0,0,0,0, # 10 - 17 @@ -62,24 +62,24 @@ HZ_cls = ( 1,1,1,1,1,1,1,1, # f8 - ff ) -HZ_st = ( -eStart,eError, 3,eStart,eStart,eStart,eError,eError,# 00-07 -eError,eError,eError,eError,eItsMe,eItsMe,eItsMe,eItsMe,# 08-0f -eItsMe,eItsMe,eError,eError,eStart,eStart, 4,eError,# 10-17 - 5,eError, 6,eError, 5, 5, 4,eError,# 18-1f - 4,eError, 4, 4, 4,eError, 4,eError,# 20-27 - 4,eItsMe,eStart,eStart,eStart,eStart,eStart,eStart,# 28-2f +HZ_ST = ( +MachineState.start,MachineState.error, 3,MachineState.start,MachineState.start,MachineState.start,MachineState.error,MachineState.error,# 00-07 +MachineState.error,MachineState.error,MachineState.error,MachineState.error,MachineState.its_me,MachineState.its_me,MachineState.its_me,MachineState.its_me,# 08-0f +MachineState.its_me,MachineState.its_me,MachineState.error,MachineState.error,MachineState.start,MachineState.start, 4,MachineState.error,# 10-17 + 5,MachineState.error, 6,MachineState.error, 5, 5, 4,MachineState.error,# 18-1f + 4,MachineState.error, 4, 4, 4,MachineState.error, 4,MachineState.error,# 20-27 + 4,MachineState.its_me,MachineState.start,MachineState.start,MachineState.start,MachineState.start,MachineState.start,MachineState.start,# 28-2f ) -HZCharLenTable = (0, 0, 0, 0, 0, 0) +HZ_CHAR_LEN_TABLE = (0, 0, 0, 0, 0, 0) -HZSMModel = {'classTable': HZ_cls, - 'classFactor': 6, - 'stateTable': HZ_st, - 'charLenTable': HZCharLenTable, - 'name': "HZ-GB-2312"} +HZ_SM_MODEL = {'class_table': HZ_CLS, + 'class_factor': 6, + 'state_table': HZ_ST, + 'char_len_table': HZ_CHAR_LEN_TABLE, + 'name': "HZ-GB-2312"} -ISO2022CN_cls = ( +ISO2022CN_CLS = ( 2,0,0,0,0,0,0,0, # 00 - 07 0,0,0,0,0,0,0,0, # 08 - 0f 0,0,0,0,0,0,0,0, # 10 - 17 @@ -114,26 +114,26 @@ ISO2022CN_cls = ( 2,2,2,2,2,2,2,2, # f8 - ff ) -ISO2022CN_st = ( -eStart, 3,eError,eStart,eStart,eStart,eStart,eStart,# 00-07 -eStart,eError,eError,eError,eError,eError,eError,eError,# 08-0f -eError,eError,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,# 10-17 -eItsMe,eItsMe,eItsMe,eError,eError,eError, 4,eError,# 18-1f -eError,eError,eError,eItsMe,eError,eError,eError,eError,# 20-27 - 5, 6,eError,eError,eError,eError,eError,eError,# 28-2f -eError,eError,eError,eItsMe,eError,eError,eError,eError,# 30-37 -eError,eError,eError,eError,eError,eItsMe,eError,eStart,# 38-3f +ISO2022CN_ST = ( +MachineState.start, 3,MachineState.error,MachineState.start,MachineState.start,MachineState.start,MachineState.start,MachineState.start,# 00-07 +MachineState.start,MachineState.error,MachineState.error,MachineState.error,MachineState.error,MachineState.error,MachineState.error,MachineState.error,# 08-0f +MachineState.error,MachineState.error,MachineState.its_me,MachineState.its_me,MachineState.its_me,MachineState.its_me,MachineState.its_me,MachineState.its_me,# 10-17 +MachineState.its_me,MachineState.its_me,MachineState.its_me,MachineState.error,MachineState.error,MachineState.error, 4,MachineState.error,# 18-1f +MachineState.error,MachineState.error,MachineState.error,MachineState.its_me,MachineState.error,MachineState.error,MachineState.error,MachineState.error,# 20-27 + 5, 6,MachineState.error,MachineState.error,MachineState.error,MachineState.error,MachineState.error,MachineState.error,# 28-2f +MachineState.error,MachineState.error,MachineState.error,MachineState.its_me,MachineState.error,MachineState.error,MachineState.error,MachineState.error,# 30-37 +MachineState.error,MachineState.error,MachineState.error,MachineState.error,MachineState.error,MachineState.its_me,MachineState.error,MachineState.start,# 38-3f ) -ISO2022CNCharLenTable = (0, 0, 0, 0, 0, 0, 0, 0, 0) +ISO2022CN_CHAR_LEN_TABLE = (0, 0, 0, 0, 0, 0, 0, 0, 0) -ISO2022CNSMModel = {'classTable': ISO2022CN_cls, - 'classFactor': 9, - 'stateTable': ISO2022CN_st, - 'charLenTable': ISO2022CNCharLenTable, - 'name': "ISO-2022-CN"} +ISO2022CN_SM_MODEL = {'class_table': ISO2022CN_CLS, + 'class_factor': 9, + 'state_table': ISO2022CN_ST, + 'char_len_table': ISO2022CN_CHAR_LEN_TABLE, + 'name': "ISO-2022-CN"} -ISO2022JP_cls = ( +ISO2022JP_CLS = ( 2,0,0,0,0,0,0,0, # 00 - 07 0,0,0,0,0,0,2,2, # 08 - 0f 0,0,0,0,0,0,0,0, # 10 - 17 @@ -168,27 +168,27 @@ ISO2022JP_cls = ( 2,2,2,2,2,2,2,2, # f8 - ff ) -ISO2022JP_st = ( -eStart, 3,eError,eStart,eStart,eStart,eStart,eStart,# 00-07 -eStart,eStart,eError,eError,eError,eError,eError,eError,# 08-0f -eError,eError,eError,eError,eItsMe,eItsMe,eItsMe,eItsMe,# 10-17 -eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eError,eError,# 18-1f -eError, 5,eError,eError,eError, 4,eError,eError,# 20-27 -eError,eError,eError, 6,eItsMe,eError,eItsMe,eError,# 28-2f -eError,eError,eError,eError,eError,eError,eItsMe,eItsMe,# 30-37 -eError,eError,eError,eItsMe,eError,eError,eError,eError,# 38-3f -eError,eError,eError,eError,eItsMe,eError,eStart,eStart,# 40-47 +ISO2022JP_ST = ( +MachineState.start, 3,MachineState.error,MachineState.start,MachineState.start,MachineState.start,MachineState.start,MachineState.start,# 00-07 +MachineState.start,MachineState.start,MachineState.error,MachineState.error,MachineState.error,MachineState.error,MachineState.error,MachineState.error,# 08-0f +MachineState.error,MachineState.error,MachineState.error,MachineState.error,MachineState.its_me,MachineState.its_me,MachineState.its_me,MachineState.its_me,# 10-17 +MachineState.its_me,MachineState.its_me,MachineState.its_me,MachineState.its_me,MachineState.its_me,MachineState.its_me,MachineState.error,MachineState.error,# 18-1f +MachineState.error, 5,MachineState.error,MachineState.error,MachineState.error, 4,MachineState.error,MachineState.error,# 20-27 +MachineState.error,MachineState.error,MachineState.error, 6,MachineState.its_me,MachineState.error,MachineState.its_me,MachineState.error,# 28-2f +MachineState.error,MachineState.error,MachineState.error,MachineState.error,MachineState.error,MachineState.error,MachineState.its_me,MachineState.its_me,# 30-37 +MachineState.error,MachineState.error,MachineState.error,MachineState.its_me,MachineState.error,MachineState.error,MachineState.error,MachineState.error,# 38-3f +MachineState.error,MachineState.error,MachineState.error,MachineState.error,MachineState.its_me,MachineState.error,MachineState.start,MachineState.start,# 40-47 ) -ISO2022JPCharLenTable = (0, 0, 0, 0, 0, 0, 0, 0, 0, 0) +ISO2022JP_CHAR_LEN_TABLE = (0, 0, 0, 0, 0, 0, 0, 0, 0, 0) -ISO2022JPSMModel = {'classTable': ISO2022JP_cls, - 'classFactor': 10, - 'stateTable': ISO2022JP_st, - 'charLenTable': ISO2022JPCharLenTable, - 'name': "ISO-2022-JP"} +ISO2022JP_SM_MODEL = {'class_table': ISO2022JP_CLS, + 'class_factor': 10, + 'state_table': ISO2022JP_ST, + 'char_len_table': ISO2022JP_CHAR_LEN_TABLE, + 'name': "ISO-2022-JP"} -ISO2022KR_cls = ( +ISO2022KR_CLS = ( 2,0,0,0,0,0,0,0, # 00 - 07 0,0,0,0,0,0,0,0, # 08 - 0f 0,0,0,0,0,0,0,0, # 10 - 17 @@ -223,20 +223,20 @@ ISO2022KR_cls = ( 2,2,2,2,2,2,2,2, # f8 - ff ) -ISO2022KR_st = ( -eStart, 3,eError,eStart,eStart,eStart,eError,eError,# 00-07 -eError,eError,eError,eError,eItsMe,eItsMe,eItsMe,eItsMe,# 08-0f -eItsMe,eItsMe,eError,eError,eError, 4,eError,eError,# 10-17 -eError,eError,eError,eError, 5,eError,eError,eError,# 18-1f -eError,eError,eError,eItsMe,eStart,eStart,eStart,eStart,# 20-27 +ISO2022KR_ST = ( +MachineState.start, 3,MachineState.error,MachineState.start,MachineState.start,MachineState.start,MachineState.error,MachineState.error,# 00-07 +MachineState.error,MachineState.error,MachineState.error,MachineState.error,MachineState.its_me,MachineState.its_me,MachineState.its_me,MachineState.its_me,# 08-0f +MachineState.its_me,MachineState.its_me,MachineState.error,MachineState.error,MachineState.error, 4,MachineState.error,MachineState.error,# 10-17 +MachineState.error,MachineState.error,MachineState.error,MachineState.error, 5,MachineState.error,MachineState.error,MachineState.error,# 18-1f +MachineState.error,MachineState.error,MachineState.error,MachineState.its_me,MachineState.start,MachineState.start,MachineState.start,MachineState.start,# 20-27 ) -ISO2022KRCharLenTable = (0, 0, 0, 0, 0, 0) +ISO2022KR_CHAR_LEN_TABLE = (0, 0, 0, 0, 0, 0) + +ISO2022KR_SM_MODEL = {'class_table': ISO2022KR_CLS, + 'class_factor': 6, + 'state_table': ISO2022KR_ST, + 'char_len_table': ISO2022KR_CHAR_LEN_TABLE, + 'name': "ISO-2022-KR"} -ISO2022KRSMModel = {'classTable': ISO2022KR_cls, - 'classFactor': 6, - 'stateTable': ISO2022KR_st, - 'charLenTable': ISO2022KRCharLenTable, - 'name': "ISO-2022-KR"} -# flake8: noqa diff --git a/lib/chardet/eucjpprober.py b/lib/chardet/eucjpprober.py index 8e64fdcc266349de08ded1163ad366a14de92848..341d6e66284f074cfc2045a4ddc0fcac63ea005f 100644 --- a/lib/chardet/eucjpprober.py +++ b/lib/chardet/eucjpprober.py @@ -25,66 +25,64 @@ # 02110-1301 USA ######################### END LICENSE BLOCK ######################### -import sys -from . import constants +from .enums import ProbingState, MachineState from .mbcharsetprober import MultiByteCharSetProber from .codingstatemachine import CodingStateMachine from .chardistribution import EUCJPDistributionAnalysis from .jpcntx import EUCJPContextAnalysis -from .mbcssm import EUCJPSMModel +from .mbcssm import EUCJP_SM_MODEL class EUCJPProber(MultiByteCharSetProber): def __init__(self): - MultiByteCharSetProber.__init__(self) - self._mCodingSM = CodingStateMachine(EUCJPSMModel) - self._mDistributionAnalyzer = EUCJPDistributionAnalysis() - self._mContextAnalyzer = EUCJPContextAnalysis() + super(EUCJPProber, self).__init__() + self.coding_sm = CodingStateMachine(EUCJP_SM_MODEL) + self.distribution_analyzer = EUCJPDistributionAnalysis() + self.context_analyzer = EUCJPContextAnalysis() self.reset() def reset(self): - MultiByteCharSetProber.reset(self) - self._mContextAnalyzer.reset() + super(EUCJPProber, self).reset() + self.context_analyzer.reset() - def get_charset_name(self): + @property + def charset_name(self): return "EUC-JP" - def feed(self, aBuf): - aLen = len(aBuf) - for i in range(0, aLen): - # PY3K: aBuf is a byte array, so aBuf[i] is an int, not a byte - codingState = self._mCodingSM.next_state(aBuf[i]) - if codingState == constants.eError: - if constants._debug: - sys.stderr.write(self.get_charset_name() - + ' prober hit error at byte ' + str(i) - + '\n') - self._mState = constants.eNotMe + def feed(self, byte_str): + for i in range(len(byte_str)): + # PY3K: byte_str is a byte array, so byte_str[i] is an int, not a byte + coding_state = self.coding_sm.next_state(byte_str[i]) + if coding_state == MachineState.error: + self.logger.debug('%s prober hit error at byte %s', + self.charset_name, i) + self._state = ProbingState.not_me break - elif codingState == constants.eItsMe: - self._mState = constants.eFoundIt + elif coding_state == MachineState.its_me: + self._state = ProbingState.found_it break - elif codingState == constants.eStart: - charLen = self._mCodingSM.get_current_charlen() + elif coding_state == MachineState.start: + char_len = self.coding_sm.get_current_charlen() if i == 0: - self._mLastChar[1] = aBuf[0] - self._mContextAnalyzer.feed(self._mLastChar, charLen) - self._mDistributionAnalyzer.feed(self._mLastChar, charLen) + self._last_char[1] = byte_str[0] + self.context_analyzer.feed(self._last_char, char_len) + self.distribution_analyzer.feed(self._last_char, char_len) else: - self._mContextAnalyzer.feed(aBuf[i - 1:i + 1], charLen) - self._mDistributionAnalyzer.feed(aBuf[i - 1:i + 1], - charLen) + self.context_analyzer.feed(byte_str[i - 1:i + 1], + char_len) + self.distribution_analyzer.feed(byte_str[i - 1:i + 1], + char_len) - self._mLastChar[0] = aBuf[aLen - 1] + self._last_char[0] = byte_str[-1] - if self.get_state() == constants.eDetecting: - if (self._mContextAnalyzer.got_enough_data() and - (self.get_confidence() > constants.SHORTCUT_THRESHOLD)): - self._mState = constants.eFoundIt + if self.state == ProbingState.detecting: + if (self.context_analyzer.got_enough_data() and + (self.get_confidence() > self.SHORTCUT_THRESHOLD)): + self._state = ProbingState.found_it - return self.get_state() + return self.state def get_confidence(self): - contxtCf = self._mContextAnalyzer.get_confidence() - distribCf = self._mDistributionAnalyzer.get_confidence() - return max(contxtCf, distribCf) + context_conf = self.context_analyzer.get_confidence() + distrib_conf = self.distribution_analyzer.get_confidence() + return max(context_conf, distrib_conf) diff --git a/lib/chardet/euckrfreq.py b/lib/chardet/euckrfreq.py index a179e4c21c055614476dbc22160cd9394e18fe5a..b68078cb9680de4cca65b1145632a37c5e751c38 100644 --- a/lib/chardet/euckrfreq.py +++ b/lib/chardet/euckrfreq.py @@ -13,12 +13,12 @@ # modify it under the terms of the GNU Lesser General Public # License as published by the Free Software Foundation; either # version 2.1 of the License, or (at your option) any later version. -# +# # This library is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # Lesser General Public License for more details. -# +# # You should have received a copy of the GNU Lesser General Public # License along with this library; if not, write to the Free Software # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA @@ -35,15 +35,15 @@ # # Idea Distribution Ratio = 0.98653 / (1-0.98653) = 73.24 # Random Distribution Ration = 512 / (2350-512) = 0.279. -# -# Typical Distribution Ratio +# +# Typical Distribution Ratio EUCKR_TYPICAL_DISTRIBUTION_RATIO = 6.0 EUCKR_TABLE_SIZE = 2352 -# Char to FreqOrder table , -EUCKRCharToFreqOrder = ( \ +# Char to FreqOrder table , +EUCKR_CHAR_TO_FREQ_ORDER = ( 13, 130, 120,1396, 481,1719,1720, 328, 609, 212,1721, 707, 400, 299,1722, 87, 1397,1723, 104, 536,1117,1203,1724,1267, 685,1268, 508,1725,1726,1727,1728,1398, 1399,1729,1730,1731, 141, 621, 326,1057, 368,1732, 267, 488, 20,1733,1269,1734, @@ -191,406 +191,5 @@ EUCKRCharToFreqOrder = ( \ 1009,2620,2621,2622,1538, 690,1328,2623, 955,2624,1539,2625,2626, 772,2627,2628, 2629,2630,2631, 924, 648, 863, 603,2632,2633, 934,1540, 864, 865,2634, 642,1042, 670,1190,2635,2636,2637,2638, 168,2639, 652, 873, 542,1054,1541,2640,2641,2642, # 512, 256 -#Everything below is of no interest for detection purpose -2643,2644,2645,2646,2647,2648,2649,2650,2651,2652,2653,2654,2655,2656,2657,2658, -2659,2660,2661,2662,2663,2664,2665,2666,2667,2668,2669,2670,2671,2672,2673,2674, -2675,2676,2677,2678,2679,2680,2681,2682,2683,2684,2685,2686,2687,2688,2689,2690, -2691,2692,2693,2694,2695,2696,2697,2698,2699,1542, 880,2700,2701,2702,2703,2704, -2705,2706,2707,2708,2709,2710,2711,2712,2713,2714,2715,2716,2717,2718,2719,2720, -2721,2722,2723,2724,2725,1543,2726,2727,2728,2729,2730,2731,2732,1544,2733,2734, -2735,2736,2737,2738,2739,2740,2741,2742,2743,2744,2745,2746,2747,2748,2749,2750, -2751,2752,2753,2754,1545,2755,2756,2757,2758,2759,2760,2761,2762,2763,2764,2765, -2766,1546,2767,1547,2768,2769,2770,2771,2772,2773,2774,2775,2776,2777,2778,2779, -2780,2781,2782,2783,2784,2785,2786,1548,2787,2788,2789,1109,2790,2791,2792,2793, -2794,2795,2796,2797,2798,2799,2800,2801,2802,2803,2804,2805,2806,2807,2808,2809, -2810,2811,2812,1329,2813,2814,2815,2816,2817,2818,2819,2820,2821,2822,2823,2824, -2825,2826,2827,2828,2829,2830,2831,2832,2833,2834,2835,2836,2837,2838,2839,2840, -2841,2842,2843,2844,2845,2846,2847,2848,2849,2850,2851,2852,2853,2854,2855,2856, -1549,2857,2858,2859,2860,1550,2861,2862,1551,2863,2864,2865,2866,2867,2868,2869, -2870,2871,2872,2873,2874,1110,1330,2875,2876,2877,2878,2879,2880,2881,2882,2883, -2884,2885,2886,2887,2888,2889,2890,2891,2892,2893,2894,2895,2896,2897,2898,2899, -2900,2901,2902,2903,2904,2905,2906,2907,2908,2909,2910,2911,2912,2913,2914,2915, -2916,2917,2918,2919,2920,2921,2922,2923,2924,2925,2926,2927,2928,2929,2930,1331, -2931,2932,2933,2934,2935,2936,2937,2938,2939,2940,2941,2942,2943,1552,2944,2945, -2946,2947,2948,2949,2950,2951,2952,2953,2954,2955,2956,2957,2958,2959,2960,2961, -2962,2963,2964,1252,2965,2966,2967,2968,2969,2970,2971,2972,2973,2974,2975,2976, -2977,2978,2979,2980,2981,2982,2983,2984,2985,2986,2987,2988,2989,2990,2991,2992, -2993,2994,2995,2996,2997,2998,2999,3000,3001,3002,3003,3004,3005,3006,3007,3008, -3009,3010,3011,3012,1553,3013,3014,3015,3016,3017,1554,3018,1332,3019,3020,3021, -3022,3023,3024,3025,3026,3027,3028,3029,3030,3031,3032,3033,3034,3035,3036,3037, -3038,3039,3040,3041,3042,3043,3044,3045,3046,3047,3048,3049,3050,1555,3051,3052, -3053,1556,1557,3054,3055,3056,3057,3058,3059,3060,3061,3062,3063,3064,3065,3066, -3067,1558,3068,3069,3070,3071,3072,3073,3074,3075,3076,1559,3077,3078,3079,3080, -3081,3082,3083,1253,3084,3085,3086,3087,3088,3089,3090,3091,3092,3093,3094,3095, -3096,3097,3098,3099,3100,3101,3102,3103,3104,3105,3106,3107,3108,1152,3109,3110, -3111,3112,3113,1560,3114,3115,3116,3117,1111,3118,3119,3120,3121,3122,3123,3124, -3125,3126,3127,3128,3129,3130,3131,3132,3133,3134,3135,3136,3137,3138,3139,3140, -3141,3142,3143,3144,3145,3146,3147,3148,3149,3150,3151,3152,3153,3154,3155,3156, -3157,3158,3159,3160,3161,3162,3163,3164,3165,3166,3167,3168,3169,3170,3171,3172, -3173,3174,3175,3176,1333,3177,3178,3179,3180,3181,3182,3183,3184,3185,3186,3187, -3188,3189,1561,3190,3191,1334,3192,3193,3194,3195,3196,3197,3198,3199,3200,3201, -3202,3203,3204,3205,3206,3207,3208,3209,3210,3211,3212,3213,3214,3215,3216,3217, -3218,3219,3220,3221,3222,3223,3224,3225,3226,3227,3228,3229,3230,3231,3232,3233, -3234,1562,3235,3236,3237,3238,3239,3240,3241,3242,3243,3244,3245,3246,3247,3248, -3249,3250,3251,3252,3253,3254,3255,3256,3257,3258,3259,3260,3261,3262,3263,3264, -3265,3266,3267,3268,3269,3270,3271,3272,3273,3274,3275,3276,3277,1563,3278,3279, -3280,3281,3282,3283,3284,3285,3286,3287,3288,3289,3290,3291,3292,3293,3294,3295, -3296,3297,3298,3299,3300,3301,3302,3303,3304,3305,3306,3307,3308,3309,3310,3311, -3312,3313,3314,3315,3316,3317,3318,3319,3320,3321,3322,3323,3324,3325,3326,3327, -3328,3329,3330,3331,3332,3333,3334,3335,3336,3337,3338,3339,3340,3341,3342,3343, -3344,3345,3346,3347,3348,3349,3350,3351,3352,3353,3354,3355,3356,3357,3358,3359, -3360,3361,3362,3363,3364,1335,3365,3366,3367,3368,3369,3370,3371,3372,3373,3374, -3375,3376,3377,3378,3379,3380,3381,3382,3383,3384,3385,3386,3387,1336,3388,3389, -3390,3391,3392,3393,3394,3395,3396,3397,3398,3399,3400,3401,3402,3403,3404,3405, -3406,3407,3408,3409,3410,3411,3412,3413,3414,1337,3415,3416,3417,3418,3419,1338, -3420,3421,3422,1564,1565,3423,3424,3425,3426,3427,3428,3429,3430,3431,1254,3432, -3433,3434,1339,3435,3436,3437,3438,3439,1566,3440,3441,3442,3443,3444,3445,3446, -3447,3448,3449,3450,3451,3452,3453,3454,1255,3455,3456,3457,3458,3459,1567,1191, -3460,1568,1569,3461,3462,3463,1570,3464,3465,3466,3467,3468,1571,3469,3470,3471, -3472,3473,1572,3474,3475,3476,3477,3478,3479,3480,3481,3482,3483,3484,3485,3486, -1340,3487,3488,3489,3490,3491,3492,1021,3493,3494,3495,3496,3497,3498,1573,3499, -1341,3500,3501,3502,3503,3504,3505,3506,3507,3508,3509,3510,3511,1342,3512,3513, -3514,3515,3516,1574,1343,3517,3518,3519,1575,3520,1576,3521,3522,3523,3524,3525, -3526,3527,3528,3529,3530,3531,3532,3533,3534,3535,3536,3537,3538,3539,3540,3541, -3542,3543,3544,3545,3546,3547,3548,3549,3550,3551,3552,3553,3554,3555,3556,3557, -3558,3559,3560,3561,3562,3563,3564,3565,3566,3567,3568,3569,3570,3571,3572,3573, -3574,3575,3576,3577,3578,3579,3580,1577,3581,3582,1578,3583,3584,3585,3586,3587, -3588,3589,3590,3591,3592,3593,3594,3595,3596,3597,3598,3599,3600,3601,3602,3603, -3604,1579,3605,3606,3607,3608,3609,3610,3611,3612,3613,3614,3615,3616,3617,3618, -3619,3620,3621,3622,3623,3624,3625,3626,3627,3628,3629,1580,3630,3631,1581,3632, -3633,3634,3635,3636,3637,3638,3639,3640,3641,3642,3643,3644,3645,3646,3647,3648, -3649,3650,3651,3652,3653,3654,3655,3656,1582,3657,3658,3659,3660,3661,3662,3663, -3664,3665,3666,3667,3668,3669,3670,3671,3672,3673,3674,3675,3676,3677,3678,3679, -3680,3681,3682,3683,3684,3685,3686,3687,3688,3689,3690,3691,3692,3693,3694,3695, -3696,3697,3698,3699,3700,1192,3701,3702,3703,3704,1256,3705,3706,3707,3708,1583, -1257,3709,3710,3711,3712,3713,3714,3715,3716,1584,3717,3718,3719,3720,3721,3722, -3723,3724,3725,3726,3727,3728,3729,3730,3731,3732,3733,3734,3735,3736,3737,3738, -3739,3740,3741,3742,3743,3744,3745,1344,3746,3747,3748,3749,3750,3751,3752,3753, -3754,3755,3756,1585,3757,3758,3759,3760,3761,3762,3763,3764,3765,3766,1586,3767, -3768,3769,3770,3771,3772,3773,3774,3775,3776,3777,3778,1345,3779,3780,3781,3782, -3783,3784,3785,3786,3787,3788,3789,3790,3791,3792,3793,3794,3795,1346,1587,3796, -3797,1588,3798,3799,3800,3801,3802,3803,3804,3805,3806,1347,3807,3808,3809,3810, -3811,1589,3812,3813,3814,3815,3816,3817,3818,3819,3820,3821,1590,3822,3823,1591, -1348,3824,3825,3826,3827,3828,3829,3830,1592,3831,3832,1593,3833,3834,3835,3836, -3837,3838,3839,3840,3841,3842,3843,3844,1349,3845,3846,3847,3848,3849,3850,3851, -3852,3853,3854,3855,3856,3857,3858,1594,3859,3860,3861,3862,3863,3864,3865,3866, -3867,3868,3869,1595,3870,3871,3872,3873,1596,3874,3875,3876,3877,3878,3879,3880, -3881,3882,3883,3884,3885,3886,1597,3887,3888,3889,3890,3891,3892,3893,3894,3895, -1598,3896,3897,3898,1599,1600,3899,1350,3900,1351,3901,3902,1352,3903,3904,3905, -3906,3907,3908,3909,3910,3911,3912,3913,3914,3915,3916,3917,3918,3919,3920,3921, -3922,3923,3924,1258,3925,3926,3927,3928,3929,3930,3931,1193,3932,1601,3933,3934, -3935,3936,3937,3938,3939,3940,3941,3942,3943,1602,3944,3945,3946,3947,3948,1603, -3949,3950,3951,3952,3953,3954,3955,3956,3957,3958,3959,3960,3961,3962,3963,3964, -3965,1604,3966,3967,3968,3969,3970,3971,3972,3973,3974,3975,3976,3977,1353,3978, -3979,3980,3981,3982,3983,3984,3985,3986,3987,3988,3989,3990,3991,1354,3992,3993, -3994,3995,3996,3997,3998,3999,4000,4001,4002,4003,4004,4005,4006,4007,4008,4009, -4010,4011,4012,4013,4014,4015,4016,4017,4018,4019,4020,4021,4022,4023,1355,4024, -4025,4026,4027,4028,4029,4030,4031,4032,4033,4034,4035,4036,4037,4038,4039,4040, -1605,4041,4042,4043,4044,4045,4046,4047,4048,4049,4050,4051,4052,4053,4054,4055, -4056,4057,4058,4059,4060,1606,4061,4062,4063,4064,1607,4065,4066,4067,4068,4069, -4070,4071,4072,4073,4074,4075,4076,1194,4077,4078,1608,4079,4080,4081,4082,4083, -4084,4085,4086,4087,1609,4088,4089,4090,4091,4092,4093,4094,4095,4096,4097,4098, -4099,4100,4101,4102,4103,4104,4105,4106,4107,4108,1259,4109,4110,4111,4112,4113, -4114,4115,4116,4117,4118,4119,4120,4121,4122,4123,4124,1195,4125,4126,4127,1610, -4128,4129,4130,4131,4132,4133,4134,4135,4136,4137,1356,4138,4139,4140,4141,4142, -4143,4144,1611,4145,4146,4147,4148,4149,4150,4151,4152,4153,4154,4155,4156,4157, -4158,4159,4160,4161,4162,4163,4164,4165,4166,4167,4168,4169,4170,4171,4172,4173, -4174,4175,4176,4177,4178,4179,4180,4181,4182,4183,4184,4185,4186,4187,4188,4189, -4190,4191,4192,4193,4194,4195,4196,4197,4198,4199,4200,4201,4202,4203,4204,4205, -4206,4207,4208,4209,4210,4211,4212,4213,4214,4215,4216,4217,4218,4219,1612,4220, -4221,4222,4223,4224,4225,4226,4227,1357,4228,1613,4229,4230,4231,4232,4233,4234, -4235,4236,4237,4238,4239,4240,4241,4242,4243,1614,4244,4245,4246,4247,4248,4249, -4250,4251,4252,4253,4254,4255,4256,4257,4258,4259,4260,4261,4262,4263,4264,4265, -4266,4267,4268,4269,4270,1196,1358,4271,4272,4273,4274,4275,4276,4277,4278,4279, -4280,4281,4282,4283,4284,4285,4286,4287,1615,4288,4289,4290,4291,4292,4293,4294, -4295,4296,4297,4298,4299,4300,4301,4302,4303,4304,4305,4306,4307,4308,4309,4310, -4311,4312,4313,4314,4315,4316,4317,4318,4319,4320,4321,4322,4323,4324,4325,4326, -4327,4328,4329,4330,4331,4332,4333,4334,1616,4335,4336,4337,4338,4339,4340,4341, -4342,4343,4344,4345,4346,4347,4348,4349,4350,4351,4352,4353,4354,4355,4356,4357, -4358,4359,4360,1617,4361,4362,4363,4364,4365,1618,4366,4367,4368,4369,4370,4371, -4372,4373,4374,4375,4376,4377,4378,4379,4380,4381,4382,4383,4384,4385,4386,4387, -4388,4389,4390,4391,4392,4393,4394,4395,4396,4397,4398,4399,4400,4401,4402,4403, -4404,4405,4406,4407,4408,4409,4410,4411,4412,4413,4414,4415,4416,1619,4417,4418, -4419,4420,4421,4422,4423,4424,4425,1112,4426,4427,4428,4429,4430,1620,4431,4432, -4433,4434,4435,4436,4437,4438,4439,4440,4441,4442,1260,1261,4443,4444,4445,4446, -4447,4448,4449,4450,4451,4452,4453,4454,4455,1359,4456,4457,4458,4459,4460,4461, -4462,4463,4464,4465,1621,4466,4467,4468,4469,4470,4471,4472,4473,4474,4475,4476, -4477,4478,4479,4480,4481,4482,4483,4484,4485,4486,4487,4488,4489,1055,4490,4491, -4492,4493,4494,4495,4496,4497,4498,4499,4500,4501,4502,4503,4504,4505,4506,4507, -4508,4509,4510,4511,4512,4513,4514,4515,4516,4517,4518,1622,4519,4520,4521,1623, -4522,4523,4524,4525,4526,4527,4528,4529,4530,4531,4532,4533,4534,4535,1360,4536, -4537,4538,4539,4540,4541,4542,4543, 975,4544,4545,4546,4547,4548,4549,4550,4551, -4552,4553,4554,4555,4556,4557,4558,4559,4560,4561,4562,4563,4564,4565,4566,4567, -4568,4569,4570,4571,1624,4572,4573,4574,4575,4576,1625,4577,4578,4579,4580,4581, -4582,4583,4584,1626,4585,4586,4587,4588,4589,4590,4591,4592,4593,4594,4595,1627, -4596,4597,4598,4599,4600,4601,4602,4603,4604,4605,4606,4607,4608,4609,4610,4611, -4612,4613,4614,4615,1628,4616,4617,4618,4619,4620,4621,4622,4623,4624,4625,4626, -4627,4628,4629,4630,4631,4632,4633,4634,4635,4636,4637,4638,4639,4640,4641,4642, -4643,4644,4645,4646,4647,4648,4649,1361,4650,4651,4652,4653,4654,4655,4656,4657, -4658,4659,4660,4661,1362,4662,4663,4664,4665,4666,4667,4668,4669,4670,4671,4672, -4673,4674,4675,4676,4677,4678,4679,4680,4681,4682,1629,4683,4684,4685,4686,4687, -1630,4688,4689,4690,4691,1153,4692,4693,4694,1113,4695,4696,4697,4698,4699,4700, -4701,4702,4703,4704,4705,4706,4707,4708,4709,4710,4711,1197,4712,4713,4714,4715, -4716,4717,4718,4719,4720,4721,4722,4723,4724,4725,4726,4727,4728,4729,4730,4731, -4732,4733,4734,4735,1631,4736,1632,4737,4738,4739,4740,4741,4742,4743,4744,1633, -4745,4746,4747,4748,4749,1262,4750,4751,4752,4753,4754,1363,4755,4756,4757,4758, -4759,4760,4761,4762,4763,4764,4765,4766,4767,4768,1634,4769,4770,4771,4772,4773, -4774,4775,4776,4777,4778,1635,4779,4780,4781,4782,4783,4784,4785,4786,4787,4788, -4789,1636,4790,4791,4792,4793,4794,4795,4796,4797,4798,4799,4800,4801,4802,4803, -4804,4805,4806,1637,4807,4808,4809,1638,4810,4811,4812,4813,4814,4815,4816,4817, -4818,1639,4819,4820,4821,4822,4823,4824,4825,4826,4827,4828,4829,4830,4831,4832, -4833,1077,4834,4835,4836,4837,4838,4839,4840,4841,4842,4843,4844,4845,4846,4847, -4848,4849,4850,4851,4852,4853,4854,4855,4856,4857,4858,4859,4860,4861,4862,4863, -4864,4865,4866,4867,4868,4869,4870,4871,4872,4873,4874,4875,4876,4877,4878,4879, -4880,4881,4882,4883,1640,4884,4885,1641,4886,4887,4888,4889,4890,4891,4892,4893, -4894,4895,4896,4897,4898,4899,4900,4901,4902,4903,4904,4905,4906,4907,4908,4909, -4910,4911,1642,4912,4913,4914,1364,4915,4916,4917,4918,4919,4920,4921,4922,4923, -4924,4925,4926,4927,4928,4929,4930,4931,1643,4932,4933,4934,4935,4936,4937,4938, -4939,4940,4941,4942,4943,4944,4945,4946,4947,4948,4949,4950,4951,4952,4953,4954, -4955,4956,4957,4958,4959,4960,4961,4962,4963,4964,4965,4966,4967,4968,4969,4970, -4971,4972,4973,4974,4975,4976,4977,4978,4979,4980,1644,4981,4982,4983,4984,1645, -4985,4986,1646,4987,4988,4989,4990,4991,4992,4993,4994,4995,4996,4997,4998,4999, -5000,5001,5002,5003,5004,5005,1647,5006,1648,5007,5008,5009,5010,5011,5012,1078, -5013,5014,5015,5016,5017,5018,5019,5020,5021,5022,5023,5024,5025,5026,5027,5028, -1365,5029,5030,5031,5032,5033,5034,5035,5036,5037,5038,5039,1649,5040,5041,5042, -5043,5044,5045,1366,5046,5047,5048,5049,5050,5051,5052,5053,5054,5055,1650,5056, -5057,5058,5059,5060,5061,5062,5063,5064,5065,5066,5067,5068,5069,5070,5071,5072, -5073,5074,5075,5076,5077,1651,5078,5079,5080,5081,5082,5083,5084,5085,5086,5087, -5088,5089,5090,5091,5092,5093,5094,5095,5096,5097,5098,5099,5100,5101,5102,5103, -5104,5105,5106,5107,5108,5109,5110,1652,5111,5112,5113,5114,5115,5116,5117,5118, -1367,5119,5120,5121,5122,5123,5124,5125,5126,5127,5128,5129,1653,5130,5131,5132, -5133,5134,5135,5136,5137,5138,5139,5140,5141,5142,5143,5144,5145,5146,5147,5148, -5149,1368,5150,1654,5151,1369,5152,5153,5154,5155,5156,5157,5158,5159,5160,5161, -5162,5163,5164,5165,5166,5167,5168,5169,5170,5171,5172,5173,5174,5175,5176,5177, -5178,1370,5179,5180,5181,5182,5183,5184,5185,5186,5187,5188,5189,5190,5191,5192, -5193,5194,5195,5196,5197,5198,1655,5199,5200,5201,5202,1656,5203,5204,5205,5206, -1371,5207,1372,5208,5209,5210,5211,1373,5212,5213,1374,5214,5215,5216,5217,5218, -5219,5220,5221,5222,5223,5224,5225,5226,5227,5228,5229,5230,5231,5232,5233,5234, -5235,5236,5237,5238,5239,5240,5241,5242,5243,5244,5245,5246,5247,1657,5248,5249, -5250,5251,1658,1263,5252,5253,5254,5255,5256,1375,5257,5258,5259,5260,5261,5262, -5263,5264,5265,5266,5267,5268,5269,5270,5271,5272,5273,5274,5275,5276,5277,5278, -5279,5280,5281,5282,5283,1659,5284,5285,5286,5287,5288,5289,5290,5291,5292,5293, -5294,5295,5296,5297,5298,5299,5300,1660,5301,5302,5303,5304,5305,5306,5307,5308, -5309,5310,5311,5312,5313,5314,5315,5316,5317,5318,5319,5320,5321,1376,5322,5323, -5324,5325,5326,5327,5328,5329,5330,5331,5332,5333,1198,5334,5335,5336,5337,5338, -5339,5340,5341,5342,5343,1661,5344,5345,5346,5347,5348,5349,5350,5351,5352,5353, -5354,5355,5356,5357,5358,5359,5360,5361,5362,5363,5364,5365,5366,5367,5368,5369, -5370,5371,5372,5373,5374,5375,5376,5377,5378,5379,5380,5381,5382,5383,5384,5385, -5386,5387,5388,5389,5390,5391,5392,5393,5394,5395,5396,5397,5398,1264,5399,5400, -5401,5402,5403,5404,5405,5406,5407,5408,5409,5410,5411,5412,1662,5413,5414,5415, -5416,1663,5417,5418,5419,5420,5421,5422,5423,5424,5425,5426,5427,5428,5429,5430, -5431,5432,5433,5434,5435,5436,5437,5438,1664,5439,5440,5441,5442,5443,5444,5445, -5446,5447,5448,5449,5450,5451,5452,5453,5454,5455,5456,5457,5458,5459,5460,5461, -5462,5463,5464,5465,5466,5467,5468,5469,5470,5471,5472,5473,5474,5475,5476,5477, -5478,1154,5479,5480,5481,5482,5483,5484,5485,1665,5486,5487,5488,5489,5490,5491, -5492,5493,5494,5495,5496,5497,5498,5499,5500,5501,5502,5503,5504,5505,5506,5507, -5508,5509,5510,5511,5512,5513,5514,5515,5516,5517,5518,5519,5520,5521,5522,5523, -5524,5525,5526,5527,5528,5529,5530,5531,5532,5533,5534,5535,5536,5537,5538,5539, -5540,5541,5542,5543,5544,5545,5546,5547,5548,1377,5549,5550,5551,5552,5553,5554, -5555,5556,5557,5558,5559,5560,5561,5562,5563,5564,5565,5566,5567,5568,5569,5570, -1114,5571,5572,5573,5574,5575,5576,5577,5578,5579,5580,5581,5582,5583,5584,5585, -5586,5587,5588,5589,5590,5591,5592,1378,5593,5594,5595,5596,5597,5598,5599,5600, -5601,5602,5603,5604,5605,5606,5607,5608,5609,5610,5611,5612,5613,5614,1379,5615, -5616,5617,5618,5619,5620,5621,5622,5623,5624,5625,5626,5627,5628,5629,5630,5631, -5632,5633,5634,1380,5635,5636,5637,5638,5639,5640,5641,5642,5643,5644,5645,5646, -5647,5648,5649,1381,1056,5650,5651,5652,5653,5654,5655,5656,5657,5658,5659,5660, -1666,5661,5662,5663,5664,5665,5666,5667,5668,1667,5669,1668,5670,5671,5672,5673, -5674,5675,5676,5677,5678,1155,5679,5680,5681,5682,5683,5684,5685,5686,5687,5688, -5689,5690,5691,5692,5693,5694,5695,5696,5697,5698,1669,5699,5700,5701,5702,5703, -5704,5705,1670,5706,5707,5708,5709,5710,1671,5711,5712,5713,5714,1382,5715,5716, -5717,5718,5719,5720,5721,5722,5723,5724,5725,1672,5726,5727,1673,1674,5728,5729, -5730,5731,5732,5733,5734,5735,5736,1675,5737,5738,5739,5740,5741,5742,5743,5744, -1676,5745,5746,5747,5748,5749,5750,5751,1383,5752,5753,5754,5755,5756,5757,5758, -5759,5760,5761,5762,5763,5764,5765,5766,5767,5768,1677,5769,5770,5771,5772,5773, -1678,5774,5775,5776, 998,5777,5778,5779,5780,5781,5782,5783,5784,5785,1384,5786, -5787,5788,5789,5790,5791,5792,5793,5794,5795,5796,5797,5798,5799,5800,1679,5801, -5802,5803,1115,1116,5804,5805,5806,5807,5808,5809,5810,5811,5812,5813,5814,5815, -5816,5817,5818,5819,5820,5821,5822,5823,5824,5825,5826,5827,5828,5829,5830,5831, -5832,5833,5834,5835,5836,5837,5838,5839,5840,5841,5842,5843,5844,5845,5846,5847, -5848,5849,5850,5851,5852,5853,5854,5855,1680,5856,5857,5858,5859,5860,5861,5862, -5863,5864,1681,5865,5866,5867,1682,5868,5869,5870,5871,5872,5873,5874,5875,5876, -5877,5878,5879,1683,5880,1684,5881,5882,5883,5884,1685,5885,5886,5887,5888,5889, -5890,5891,5892,5893,5894,5895,5896,5897,5898,5899,5900,5901,5902,5903,5904,5905, -5906,5907,1686,5908,5909,5910,5911,5912,5913,5914,5915,5916,5917,5918,5919,5920, -5921,5922,5923,5924,5925,5926,5927,5928,5929,5930,5931,5932,5933,5934,5935,1687, -5936,5937,5938,5939,5940,5941,5942,5943,5944,5945,5946,5947,5948,5949,5950,5951, -5952,1688,1689,5953,1199,5954,5955,5956,5957,5958,5959,5960,5961,1690,5962,5963, -5964,5965,5966,5967,5968,5969,5970,5971,5972,5973,5974,5975,5976,5977,5978,5979, -5980,5981,1385,5982,1386,5983,5984,5985,5986,5987,5988,5989,5990,5991,5992,5993, -5994,5995,5996,5997,5998,5999,6000,6001,6002,6003,6004,6005,6006,6007,6008,6009, -6010,6011,6012,6013,6014,6015,6016,6017,6018,6019,6020,6021,6022,6023,6024,6025, -6026,6027,1265,6028,6029,1691,6030,6031,6032,6033,6034,6035,6036,6037,6038,6039, -6040,6041,6042,6043,6044,6045,6046,6047,6048,6049,6050,6051,6052,6053,6054,6055, -6056,6057,6058,6059,6060,6061,6062,6063,6064,6065,6066,6067,6068,6069,6070,6071, -6072,6073,6074,6075,6076,6077,6078,6079,6080,6081,6082,6083,6084,1692,6085,6086, -6087,6088,6089,6090,6091,6092,6093,6094,6095,6096,6097,6098,6099,6100,6101,6102, -6103,6104,6105,6106,6107,6108,6109,6110,6111,6112,6113,6114,6115,6116,6117,6118, -6119,6120,6121,6122,6123,6124,6125,6126,6127,6128,6129,6130,6131,1693,6132,6133, -6134,6135,6136,1694,6137,6138,6139,6140,6141,1695,6142,6143,6144,6145,6146,6147, -6148,6149,6150,6151,6152,6153,6154,6155,6156,6157,6158,6159,6160,6161,6162,6163, -6164,6165,6166,6167,6168,6169,6170,6171,6172,6173,6174,6175,6176,6177,6178,6179, -6180,6181,6182,6183,6184,6185,1696,6186,6187,6188,6189,6190,6191,6192,6193,6194, -6195,6196,6197,6198,6199,6200,6201,6202,6203,6204,6205,6206,6207,6208,6209,6210, -6211,6212,6213,6214,6215,6216,6217,6218,6219,1697,6220,6221,6222,6223,6224,6225, -6226,6227,6228,6229,6230,6231,6232,6233,6234,6235,6236,6237,6238,6239,6240,6241, -6242,6243,6244,6245,6246,6247,6248,6249,6250,6251,6252,6253,1698,6254,6255,6256, -6257,6258,6259,6260,6261,6262,6263,1200,6264,6265,6266,6267,6268,6269,6270,6271, #1024 -6272,6273,6274,6275,6276,6277,6278,6279,6280,6281,6282,6283,6284,6285,6286,6287, -6288,6289,6290,6291,6292,6293,6294,6295,6296,6297,6298,6299,6300,6301,6302,1699, -6303,6304,1700,6305,6306,6307,6308,6309,6310,6311,6312,6313,6314,6315,6316,6317, -6318,6319,6320,6321,6322,6323,6324,6325,6326,6327,6328,6329,6330,6331,6332,6333, -6334,6335,6336,6337,6338,6339,1701,6340,6341,6342,6343,6344,1387,6345,6346,6347, -6348,6349,6350,6351,6352,6353,6354,6355,6356,6357,6358,6359,6360,6361,6362,6363, -6364,6365,6366,6367,6368,6369,6370,6371,6372,6373,6374,6375,6376,6377,6378,6379, -6380,6381,6382,6383,6384,6385,6386,6387,6388,6389,6390,6391,6392,6393,6394,6395, -6396,6397,6398,6399,6400,6401,6402,6403,6404,6405,6406,6407,6408,6409,6410,6411, -6412,6413,1702,6414,6415,6416,6417,6418,6419,6420,6421,6422,1703,6423,6424,6425, -6426,6427,6428,6429,6430,6431,6432,6433,6434,6435,6436,6437,6438,1704,6439,6440, -6441,6442,6443,6444,6445,6446,6447,6448,6449,6450,6451,6452,6453,6454,6455,6456, -6457,6458,6459,6460,6461,6462,6463,6464,6465,6466,6467,6468,6469,6470,6471,6472, -6473,6474,6475,6476,6477,6478,6479,6480,6481,6482,6483,6484,6485,6486,6487,6488, -6489,6490,6491,6492,6493,6494,6495,6496,6497,6498,6499,6500,6501,6502,6503,1266, -6504,6505,6506,6507,6508,6509,6510,6511,6512,6513,6514,6515,6516,6517,6518,6519, -6520,6521,6522,6523,6524,6525,6526,6527,6528,6529,6530,6531,6532,6533,6534,6535, -6536,6537,6538,6539,6540,6541,6542,6543,6544,6545,6546,6547,6548,6549,6550,6551, -1705,1706,6552,6553,6554,6555,6556,6557,6558,6559,6560,6561,6562,6563,6564,6565, -6566,6567,6568,6569,6570,6571,6572,6573,6574,6575,6576,6577,6578,6579,6580,6581, -6582,6583,6584,6585,6586,6587,6588,6589,6590,6591,6592,6593,6594,6595,6596,6597, -6598,6599,6600,6601,6602,6603,6604,6605,6606,6607,6608,6609,6610,6611,6612,6613, -6614,6615,6616,6617,6618,6619,6620,6621,6622,6623,6624,6625,6626,6627,6628,6629, -6630,6631,6632,6633,6634,6635,6636,6637,1388,6638,6639,6640,6641,6642,6643,6644, -1707,6645,6646,6647,6648,6649,6650,6651,6652,6653,6654,6655,6656,6657,6658,6659, -6660,6661,6662,6663,1708,6664,6665,6666,6667,6668,6669,6670,6671,6672,6673,6674, -1201,6675,6676,6677,6678,6679,6680,6681,6682,6683,6684,6685,6686,6687,6688,6689, -6690,6691,6692,6693,6694,6695,6696,6697,6698,6699,6700,6701,6702,6703,6704,6705, -6706,6707,6708,6709,6710,6711,6712,6713,6714,6715,6716,6717,6718,6719,6720,6721, -6722,6723,6724,6725,1389,6726,6727,6728,6729,6730,6731,6732,6733,6734,6735,6736, -1390,1709,6737,6738,6739,6740,6741,6742,1710,6743,6744,6745,6746,1391,6747,6748, -6749,6750,6751,6752,6753,6754,6755,6756,6757,1392,6758,6759,6760,6761,6762,6763, -6764,6765,6766,6767,6768,6769,6770,6771,6772,6773,6774,6775,6776,6777,6778,6779, -6780,1202,6781,6782,6783,6784,6785,6786,6787,6788,6789,6790,6791,6792,6793,6794, -6795,6796,6797,6798,6799,6800,6801,6802,6803,6804,6805,6806,6807,6808,6809,1711, -6810,6811,6812,6813,6814,6815,6816,6817,6818,6819,6820,6821,6822,6823,6824,6825, -6826,6827,6828,6829,6830,6831,6832,6833,6834,6835,6836,1393,6837,6838,6839,6840, -6841,6842,6843,6844,6845,6846,6847,6848,6849,6850,6851,6852,6853,6854,6855,6856, -6857,6858,6859,6860,6861,6862,6863,6864,6865,6866,6867,6868,6869,6870,6871,6872, -6873,6874,6875,6876,6877,6878,6879,6880,6881,6882,6883,6884,6885,6886,6887,6888, -6889,6890,6891,6892,6893,6894,6895,6896,6897,6898,6899,6900,6901,6902,1712,6903, -6904,6905,6906,6907,6908,6909,6910,1713,6911,6912,6913,6914,6915,6916,6917,6918, -6919,6920,6921,6922,6923,6924,6925,6926,6927,6928,6929,6930,6931,6932,6933,6934, -6935,6936,6937,6938,6939,6940,6941,6942,6943,6944,6945,6946,6947,6948,6949,6950, -6951,6952,6953,6954,6955,6956,6957,6958,6959,6960,6961,6962,6963,6964,6965,6966, -6967,6968,6969,6970,6971,6972,6973,6974,1714,6975,6976,6977,6978,6979,6980,6981, -6982,6983,6984,6985,6986,6987,6988,1394,6989,6990,6991,6992,6993,6994,6995,6996, -6997,6998,6999,7000,1715,7001,7002,7003,7004,7005,7006,7007,7008,7009,7010,7011, -7012,7013,7014,7015,7016,7017,7018,7019,7020,7021,7022,7023,7024,7025,7026,7027, -7028,1716,7029,7030,7031,7032,7033,7034,7035,7036,7037,7038,7039,7040,7041,7042, -7043,7044,7045,7046,7047,7048,7049,7050,7051,7052,7053,7054,7055,7056,7057,7058, -7059,7060,7061,7062,7063,7064,7065,7066,7067,7068,7069,7070,7071,7072,7073,7074, -7075,7076,7077,7078,7079,7080,7081,7082,7083,7084,7085,7086,7087,7088,7089,7090, -7091,7092,7093,7094,7095,7096,7097,7098,7099,7100,7101,7102,7103,7104,7105,7106, -7107,7108,7109,7110,7111,7112,7113,7114,7115,7116,7117,7118,7119,7120,7121,7122, -7123,7124,7125,7126,7127,7128,7129,7130,7131,7132,7133,7134,7135,7136,7137,7138, -7139,7140,7141,7142,7143,7144,7145,7146,7147,7148,7149,7150,7151,7152,7153,7154, -7155,7156,7157,7158,7159,7160,7161,7162,7163,7164,7165,7166,7167,7168,7169,7170, -7171,7172,7173,7174,7175,7176,7177,7178,7179,7180,7181,7182,7183,7184,7185,7186, -7187,7188,7189,7190,7191,7192,7193,7194,7195,7196,7197,7198,7199,7200,7201,7202, -7203,7204,7205,7206,7207,1395,7208,7209,7210,7211,7212,7213,1717,7214,7215,7216, -7217,7218,7219,7220,7221,7222,7223,7224,7225,7226,7227,7228,7229,7230,7231,7232, -7233,7234,7235,7236,7237,7238,7239,7240,7241,7242,7243,7244,7245,7246,7247,7248, -7249,7250,7251,7252,7253,7254,7255,7256,7257,7258,7259,7260,7261,7262,7263,7264, -7265,7266,7267,7268,7269,7270,7271,7272,7273,7274,7275,7276,7277,7278,7279,7280, -7281,7282,7283,7284,7285,7286,7287,7288,7289,7290,7291,7292,7293,7294,7295,7296, -7297,7298,7299,7300,7301,7302,7303,7304,7305,7306,7307,7308,7309,7310,7311,7312, -7313,1718,7314,7315,7316,7317,7318,7319,7320,7321,7322,7323,7324,7325,7326,7327, -7328,7329,7330,7331,7332,7333,7334,7335,7336,7337,7338,7339,7340,7341,7342,7343, -7344,7345,7346,7347,7348,7349,7350,7351,7352,7353,7354,7355,7356,7357,7358,7359, -7360,7361,7362,7363,7364,7365,7366,7367,7368,7369,7370,7371,7372,7373,7374,7375, -7376,7377,7378,7379,7380,7381,7382,7383,7384,7385,7386,7387,7388,7389,7390,7391, -7392,7393,7394,7395,7396,7397,7398,7399,7400,7401,7402,7403,7404,7405,7406,7407, -7408,7409,7410,7411,7412,7413,7414,7415,7416,7417,7418,7419,7420,7421,7422,7423, -7424,7425,7426,7427,7428,7429,7430,7431,7432,7433,7434,7435,7436,7437,7438,7439, -7440,7441,7442,7443,7444,7445,7446,7447,7448,7449,7450,7451,7452,7453,7454,7455, -7456,7457,7458,7459,7460,7461,7462,7463,7464,7465,7466,7467,7468,7469,7470,7471, -7472,7473,7474,7475,7476,7477,7478,7479,7480,7481,7482,7483,7484,7485,7486,7487, -7488,7489,7490,7491,7492,7493,7494,7495,7496,7497,7498,7499,7500,7501,7502,7503, -7504,7505,7506,7507,7508,7509,7510,7511,7512,7513,7514,7515,7516,7517,7518,7519, -7520,7521,7522,7523,7524,7525,7526,7527,7528,7529,7530,7531,7532,7533,7534,7535, -7536,7537,7538,7539,7540,7541,7542,7543,7544,7545,7546,7547,7548,7549,7550,7551, -7552,7553,7554,7555,7556,7557,7558,7559,7560,7561,7562,7563,7564,7565,7566,7567, -7568,7569,7570,7571,7572,7573,7574,7575,7576,7577,7578,7579,7580,7581,7582,7583, -7584,7585,7586,7587,7588,7589,7590,7591,7592,7593,7594,7595,7596,7597,7598,7599, -7600,7601,7602,7603,7604,7605,7606,7607,7608,7609,7610,7611,7612,7613,7614,7615, -7616,7617,7618,7619,7620,7621,7622,7623,7624,7625,7626,7627,7628,7629,7630,7631, -7632,7633,7634,7635,7636,7637,7638,7639,7640,7641,7642,7643,7644,7645,7646,7647, -7648,7649,7650,7651,7652,7653,7654,7655,7656,7657,7658,7659,7660,7661,7662,7663, -7664,7665,7666,7667,7668,7669,7670,7671,7672,7673,7674,7675,7676,7677,7678,7679, -7680,7681,7682,7683,7684,7685,7686,7687,7688,7689,7690,7691,7692,7693,7694,7695, -7696,7697,7698,7699,7700,7701,7702,7703,7704,7705,7706,7707,7708,7709,7710,7711, -7712,7713,7714,7715,7716,7717,7718,7719,7720,7721,7722,7723,7724,7725,7726,7727, -7728,7729,7730,7731,7732,7733,7734,7735,7736,7737,7738,7739,7740,7741,7742,7743, -7744,7745,7746,7747,7748,7749,7750,7751,7752,7753,7754,7755,7756,7757,7758,7759, -7760,7761,7762,7763,7764,7765,7766,7767,7768,7769,7770,7771,7772,7773,7774,7775, -7776,7777,7778,7779,7780,7781,7782,7783,7784,7785,7786,7787,7788,7789,7790,7791, -7792,7793,7794,7795,7796,7797,7798,7799,7800,7801,7802,7803,7804,7805,7806,7807, -7808,7809,7810,7811,7812,7813,7814,7815,7816,7817,7818,7819,7820,7821,7822,7823, -7824,7825,7826,7827,7828,7829,7830,7831,7832,7833,7834,7835,7836,7837,7838,7839, -7840,7841,7842,7843,7844,7845,7846,7847,7848,7849,7850,7851,7852,7853,7854,7855, -7856,7857,7858,7859,7860,7861,7862,7863,7864,7865,7866,7867,7868,7869,7870,7871, -7872,7873,7874,7875,7876,7877,7878,7879,7880,7881,7882,7883,7884,7885,7886,7887, -7888,7889,7890,7891,7892,7893,7894,7895,7896,7897,7898,7899,7900,7901,7902,7903, -7904,7905,7906,7907,7908,7909,7910,7911,7912,7913,7914,7915,7916,7917,7918,7919, -7920,7921,7922,7923,7924,7925,7926,7927,7928,7929,7930,7931,7932,7933,7934,7935, -7936,7937,7938,7939,7940,7941,7942,7943,7944,7945,7946,7947,7948,7949,7950,7951, -7952,7953,7954,7955,7956,7957,7958,7959,7960,7961,7962,7963,7964,7965,7966,7967, -7968,7969,7970,7971,7972,7973,7974,7975,7976,7977,7978,7979,7980,7981,7982,7983, -7984,7985,7986,7987,7988,7989,7990,7991,7992,7993,7994,7995,7996,7997,7998,7999, -8000,8001,8002,8003,8004,8005,8006,8007,8008,8009,8010,8011,8012,8013,8014,8015, -8016,8017,8018,8019,8020,8021,8022,8023,8024,8025,8026,8027,8028,8029,8030,8031, -8032,8033,8034,8035,8036,8037,8038,8039,8040,8041,8042,8043,8044,8045,8046,8047, -8048,8049,8050,8051,8052,8053,8054,8055,8056,8057,8058,8059,8060,8061,8062,8063, -8064,8065,8066,8067,8068,8069,8070,8071,8072,8073,8074,8075,8076,8077,8078,8079, -8080,8081,8082,8083,8084,8085,8086,8087,8088,8089,8090,8091,8092,8093,8094,8095, -8096,8097,8098,8099,8100,8101,8102,8103,8104,8105,8106,8107,8108,8109,8110,8111, -8112,8113,8114,8115,8116,8117,8118,8119,8120,8121,8122,8123,8124,8125,8126,8127, -8128,8129,8130,8131,8132,8133,8134,8135,8136,8137,8138,8139,8140,8141,8142,8143, -8144,8145,8146,8147,8148,8149,8150,8151,8152,8153,8154,8155,8156,8157,8158,8159, -8160,8161,8162,8163,8164,8165,8166,8167,8168,8169,8170,8171,8172,8173,8174,8175, -8176,8177,8178,8179,8180,8181,8182,8183,8184,8185,8186,8187,8188,8189,8190,8191, -8192,8193,8194,8195,8196,8197,8198,8199,8200,8201,8202,8203,8204,8205,8206,8207, -8208,8209,8210,8211,8212,8213,8214,8215,8216,8217,8218,8219,8220,8221,8222,8223, -8224,8225,8226,8227,8228,8229,8230,8231,8232,8233,8234,8235,8236,8237,8238,8239, -8240,8241,8242,8243,8244,8245,8246,8247,8248,8249,8250,8251,8252,8253,8254,8255, -8256,8257,8258,8259,8260,8261,8262,8263,8264,8265,8266,8267,8268,8269,8270,8271, -8272,8273,8274,8275,8276,8277,8278,8279,8280,8281,8282,8283,8284,8285,8286,8287, -8288,8289,8290,8291,8292,8293,8294,8295,8296,8297,8298,8299,8300,8301,8302,8303, -8304,8305,8306,8307,8308,8309,8310,8311,8312,8313,8314,8315,8316,8317,8318,8319, -8320,8321,8322,8323,8324,8325,8326,8327,8328,8329,8330,8331,8332,8333,8334,8335, -8336,8337,8338,8339,8340,8341,8342,8343,8344,8345,8346,8347,8348,8349,8350,8351, -8352,8353,8354,8355,8356,8357,8358,8359,8360,8361,8362,8363,8364,8365,8366,8367, -8368,8369,8370,8371,8372,8373,8374,8375,8376,8377,8378,8379,8380,8381,8382,8383, -8384,8385,8386,8387,8388,8389,8390,8391,8392,8393,8394,8395,8396,8397,8398,8399, -8400,8401,8402,8403,8404,8405,8406,8407,8408,8409,8410,8411,8412,8413,8414,8415, -8416,8417,8418,8419,8420,8421,8422,8423,8424,8425,8426,8427,8428,8429,8430,8431, -8432,8433,8434,8435,8436,8437,8438,8439,8440,8441,8442,8443,8444,8445,8446,8447, -8448,8449,8450,8451,8452,8453,8454,8455,8456,8457,8458,8459,8460,8461,8462,8463, -8464,8465,8466,8467,8468,8469,8470,8471,8472,8473,8474,8475,8476,8477,8478,8479, -8480,8481,8482,8483,8484,8485,8486,8487,8488,8489,8490,8491,8492,8493,8494,8495, -8496,8497,8498,8499,8500,8501,8502,8503,8504,8505,8506,8507,8508,8509,8510,8511, -8512,8513,8514,8515,8516,8517,8518,8519,8520,8521,8522,8523,8524,8525,8526,8527, -8528,8529,8530,8531,8532,8533,8534,8535,8536,8537,8538,8539,8540,8541,8542,8543, -8544,8545,8546,8547,8548,8549,8550,8551,8552,8553,8554,8555,8556,8557,8558,8559, -8560,8561,8562,8563,8564,8565,8566,8567,8568,8569,8570,8571,8572,8573,8574,8575, -8576,8577,8578,8579,8580,8581,8582,8583,8584,8585,8586,8587,8588,8589,8590,8591, -8592,8593,8594,8595,8596,8597,8598,8599,8600,8601,8602,8603,8604,8605,8606,8607, -8608,8609,8610,8611,8612,8613,8614,8615,8616,8617,8618,8619,8620,8621,8622,8623, -8624,8625,8626,8627,8628,8629,8630,8631,8632,8633,8634,8635,8636,8637,8638,8639, -8640,8641,8642,8643,8644,8645,8646,8647,8648,8649,8650,8651,8652,8653,8654,8655, -8656,8657,8658,8659,8660,8661,8662,8663,8664,8665,8666,8667,8668,8669,8670,8671, -8672,8673,8674,8675,8676,8677,8678,8679,8680,8681,8682,8683,8684,8685,8686,8687, -8688,8689,8690,8691,8692,8693,8694,8695,8696,8697,8698,8699,8700,8701,8702,8703, -8704,8705,8706,8707,8708,8709,8710,8711,8712,8713,8714,8715,8716,8717,8718,8719, -8720,8721,8722,8723,8724,8725,8726,8727,8728,8729,8730,8731,8732,8733,8734,8735, -8736,8737,8738,8739,8740,8741) +) -# flake8: noqa diff --git a/lib/chardet/euckrprober.py b/lib/chardet/euckrprober.py index 5982a46b606746ad6a613a9e78d44f1ca3fd7069..b01959f0ddf8977f10a5141ac4aa77a71c0399d9 100644 --- a/lib/chardet/euckrprober.py +++ b/lib/chardet/euckrprober.py @@ -28,15 +28,16 @@ from .mbcharsetprober import MultiByteCharSetProber from .codingstatemachine import CodingStateMachine from .chardistribution import EUCKRDistributionAnalysis -from .mbcssm import EUCKRSMModel +from .mbcssm import EUCKR_SM_MODEL class EUCKRProber(MultiByteCharSetProber): def __init__(self): - MultiByteCharSetProber.__init__(self) - self._mCodingSM = CodingStateMachine(EUCKRSMModel) - self._mDistributionAnalyzer = EUCKRDistributionAnalysis() + super(EUCKRProber, self).__init__() + self.coding_sm = CodingStateMachine(EUCKR_SM_MODEL) + self.distribution_analyzer = EUCKRDistributionAnalysis() self.reset() - def get_charset_name(self): + @property + def charset_name(self): return "EUC-KR" diff --git a/lib/chardet/euctwfreq.py b/lib/chardet/euctwfreq.py index 576e7504dca6178504756182934f5bcad58c5cbe..57c989ebd481dec61121eb8d666de638fdbb4c23 100644 --- a/lib/chardet/euctwfreq.py +++ b/lib/chardet/euctwfreq.py @@ -46,383 +46,342 @@ EUCTW_TYPICAL_DISTRIBUTION_RATIO = 0.75 # Char to FreqOrder table , EUCTW_TABLE_SIZE = 8102 -EUCTWCharToFreqOrder = ( - 1,1800,1506, 255,1431, 198, 9, 82, 6,7310, 177, 202,3615,1256,2808, 110, # 2742 -3735, 33,3241, 261, 76, 44,2113, 16,2931,2184,1176, 659,3868, 26,3404,2643, # 2758 -1198,3869,3313,4060, 410,2211, 302, 590, 361,1963, 8, 204, 58,4296,7311,1931, # 2774 - 63,7312,7313, 317,1614, 75, 222, 159,4061,2412,1480,7314,3500,3068, 224,2809, # 2790 -3616, 3, 10,3870,1471, 29,2774,1135,2852,1939, 873, 130,3242,1123, 312,7315, # 2806 -4297,2051, 507, 252, 682,7316, 142,1914, 124, 206,2932, 34,3501,3173, 64, 604, # 2822 -7317,2494,1976,1977, 155,1990, 645, 641,1606,7318,3405, 337, 72, 406,7319, 80, # 2838 - 630, 238,3174,1509, 263, 939,1092,2644, 756,1440,1094,3406, 449, 69,2969, 591, # 2854 - 179,2095, 471, 115,2034,1843, 60, 50,2970, 134, 806,1868, 734,2035,3407, 180, # 2870 - 995,1607, 156, 537,2893, 688,7320, 319,1305, 779,2144, 514,2374, 298,4298, 359, # 2886 -2495, 90,2707,1338, 663, 11, 906,1099,2545, 20,2436, 182, 532,1716,7321, 732, # 2902 -1376,4062,1311,1420,3175, 25,2312,1056, 113, 399, 382,1949, 242,3408,2467, 529, # 2918 -3243, 475,1447,3617,7322, 117, 21, 656, 810,1297,2295,2329,3502,7323, 126,4063, # 2934 - 706, 456, 150, 613,4299, 71,1118,2036,4064, 145,3069, 85, 835, 486,2114,1246, # 2950 -1426, 428, 727,1285,1015, 800, 106, 623, 303,1281,7324,2127,2354, 347,3736, 221, # 2966 -3503,3110,7325,1955,1153,4065, 83, 296,1199,3070, 192, 624, 93,7326, 822,1897, # 2982 -2810,3111, 795,2064, 991,1554,1542,1592, 27, 43,2853, 859, 139,1456, 860,4300, # 2998 - 437, 712,3871, 164,2392,3112, 695, 211,3017,2096, 195,3872,1608,3504,3505,3618, # 3014 -3873, 234, 811,2971,2097,3874,2229,1441,3506,1615,2375, 668,2076,1638, 305, 228, # 3030 -1664,4301, 467, 415,7327, 262,2098,1593, 239, 108, 300, 200,1033, 512,1247,2077, # 3046 -7328,7329,2173,3176,3619,2673, 593, 845,1062,3244, 88,1723,2037,3875,1950, 212, # 3062 - 266, 152, 149, 468,1898,4066,4302, 77, 187,7330,3018, 37, 5,2972,7331,3876, # 3078 -7332,7333, 39,2517,4303,2894,3177,2078, 55, 148, 74,4304, 545, 483,1474,1029, # 3094 -1665, 217,1869,1531,3113,1104,2645,4067, 24, 172,3507, 900,3877,3508,3509,4305, # 3110 - 32,1408,2811,1312, 329, 487,2355,2247,2708, 784,2674, 4,3019,3314,1427,1788, # 3126 - 188, 109, 499,7334,3620,1717,1789, 888,1217,3020,4306,7335,3510,7336,3315,1520, # 3142 -3621,3878, 196,1034, 775,7337,7338, 929,1815, 249, 439, 38,7339,1063,7340, 794, # 3158 -3879,1435,2296, 46, 178,3245,2065,7341,2376,7342, 214,1709,4307, 804, 35, 707, # 3174 - 324,3622,1601,2546, 140, 459,4068,7343,7344,1365, 839, 272, 978,2257,2572,3409, # 3190 -2128,1363,3623,1423, 697, 100,3071, 48, 70,1231, 495,3114,2193,7345,1294,7346, # 3206 -2079, 462, 586,1042,3246, 853, 256, 988, 185,2377,3410,1698, 434,1084,7347,3411, # 3222 - 314,2615,2775,4308,2330,2331, 569,2280, 637,1816,2518, 757,1162,1878,1616,3412, # 3238 - 287,1577,2115, 768,4309,1671,2854,3511,2519,1321,3737, 909,2413,7348,4069, 933, # 3254 -3738,7349,2052,2356,1222,4310, 765,2414,1322, 786,4311,7350,1919,1462,1677,2895, # 3270 -1699,7351,4312,1424,2437,3115,3624,2590,3316,1774,1940,3413,3880,4070, 309,1369, # 3286 -1130,2812, 364,2230,1653,1299,3881,3512,3882,3883,2646, 525,1085,3021, 902,2000, # 3302 -1475, 964,4313, 421,1844,1415,1057,2281, 940,1364,3116, 376,4314,4315,1381, 7, # 3318 -2520, 983,2378, 336,1710,2675,1845, 321,3414, 559,1131,3022,2742,1808,1132,1313, # 3334 - 265,1481,1857,7352, 352,1203,2813,3247, 167,1089, 420,2814, 776, 792,1724,3513, # 3350 -4071,2438,3248,7353,4072,7354, 446, 229, 333,2743, 901,3739,1200,1557,4316,2647, # 3366 -1920, 395,2744,2676,3740,4073,1835, 125, 916,3178,2616,4317,7355,7356,3741,7357, # 3382 -7358,7359,4318,3117,3625,1133,2547,1757,3415,1510,2313,1409,3514,7360,2145, 438, # 3398 -2591,2896,2379,3317,1068, 958,3023, 461, 311,2855,2677,4074,1915,3179,4075,1978, # 3414 - 383, 750,2745,2617,4076, 274, 539, 385,1278,1442,7361,1154,1964, 384, 561, 210, # 3430 - 98,1295,2548,3515,7362,1711,2415,1482,3416,3884,2897,1257, 129,7363,3742, 642, # 3446 - 523,2776,2777,2648,7364, 141,2231,1333, 68, 176, 441, 876, 907,4077, 603,2592, # 3462 - 710, 171,3417, 404, 549, 18,3118,2393,1410,3626,1666,7365,3516,4319,2898,4320, # 3478 -7366,2973, 368,7367, 146, 366, 99, 871,3627,1543, 748, 807,1586,1185, 22,2258, # 3494 - 379,3743,3180,7368,3181, 505,1941,2618,1991,1382,2314,7369, 380,2357, 218, 702, # 3510 -1817,1248,3418,3024,3517,3318,3249,7370,2974,3628, 930,3250,3744,7371, 59,7372, # 3526 - 585, 601,4078, 497,3419,1112,1314,4321,1801,7373,1223,1472,2174,7374, 749,1836, # 3542 - 690,1899,3745,1772,3885,1476, 429,1043,1790,2232,2116, 917,4079, 447,1086,1629, # 3558 -7375, 556,7376,7377,2020,1654, 844,1090, 105, 550, 966,1758,2815,1008,1782, 686, # 3574 -1095,7378,2282, 793,1602,7379,3518,2593,4322,4080,2933,2297,4323,3746, 980,2496, # 3590 - 544, 353, 527,4324, 908,2678,2899,7380, 381,2619,1942,1348,7381,1341,1252, 560, # 3606 -3072,7382,3420,2856,7383,2053, 973, 886,2080, 143,4325,7384,7385, 157,3886, 496, # 3622 -4081, 57, 840, 540,2038,4326,4327,3421,2117,1445, 970,2259,1748,1965,2081,4082, # 3638 -3119,1234,1775,3251,2816,3629, 773,1206,2129,1066,2039,1326,3887,1738,1725,4083, # 3654 - 279,3120, 51,1544,2594, 423,1578,2130,2066, 173,4328,1879,7386,7387,1583, 264, # 3670 - 610,3630,4329,2439, 280, 154,7388,7389,7390,1739, 338,1282,3073, 693,2857,1411, # 3686 -1074,3747,2440,7391,4330,7392,7393,1240, 952,2394,7394,2900,1538,2679, 685,1483, # 3702 -4084,2468,1436, 953,4085,2054,4331, 671,2395, 79,4086,2441,3252, 608, 567,2680, # 3718 -3422,4087,4088,1691, 393,1261,1791,2396,7395,4332,7396,7397,7398,7399,1383,1672, # 3734 -3748,3182,1464, 522,1119, 661,1150, 216, 675,4333,3888,1432,3519, 609,4334,2681, # 3750 -2397,7400,7401,7402,4089,3025, 0,7403,2469, 315, 231,2442, 301,3319,4335,2380, # 3766 -7404, 233,4090,3631,1818,4336,4337,7405, 96,1776,1315,2082,7406, 257,7407,1809, # 3782 -3632,2709,1139,1819,4091,2021,1124,2163,2778,1777,2649,7408,3074, 363,1655,3183, # 3798 -7409,2975,7410,7411,7412,3889,1567,3890, 718, 103,3184, 849,1443, 341,3320,2934, # 3814 -1484,7413,1712, 127, 67, 339,4092,2398, 679,1412, 821,7414,7415, 834, 738, 351, # 3830 -2976,2146, 846, 235,1497,1880, 418,1992,3749,2710, 186,1100,2147,2746,3520,1545, # 3846 -1355,2935,2858,1377, 583,3891,4093,2573,2977,7416,1298,3633,1078,2549,3634,2358, # 3862 - 78,3750,3751, 267,1289,2099,2001,1594,4094, 348, 369,1274,2194,2175,1837,4338, # 3878 -1820,2817,3635,2747,2283,2002,4339,2936,2748, 144,3321, 882,4340,3892,2749,3423, # 3894 -4341,2901,7417,4095,1726, 320,7418,3893,3026, 788,2978,7419,2818,1773,1327,2859, # 3910 -3894,2819,7420,1306,4342,2003,1700,3752,3521,2359,2650, 787,2022, 506, 824,3636, # 3926 - 534, 323,4343,1044,3322,2023,1900, 946,3424,7421,1778,1500,1678,7422,1881,4344, # 3942 - 165, 243,4345,3637,2521, 123, 683,4096, 764,4346, 36,3895,1792, 589,2902, 816, # 3958 - 626,1667,3027,2233,1639,1555,1622,3753,3896,7423,3897,2860,1370,1228,1932, 891, # 3974 -2083,2903, 304,4097,7424, 292,2979,2711,3522, 691,2100,4098,1115,4347, 118, 662, # 3990 -7425, 611,1156, 854,2381,1316,2861, 2, 386, 515,2904,7426,7427,3253, 868,2234, # 4006 -1486, 855,2651, 785,2212,3028,7428,1040,3185,3523,7429,3121, 448,7430,1525,7431, # 4022 -2164,4348,7432,3754,7433,4099,2820,3524,3122, 503, 818,3898,3123,1568, 814, 676, # 4038 -1444, 306,1749,7434,3755,1416,1030, 197,1428, 805,2821,1501,4349,7435,7436,7437, # 4054 -1993,7438,4350,7439,7440,2195, 13,2779,3638,2980,3124,1229,1916,7441,3756,2131, # 4070 -7442,4100,4351,2399,3525,7443,2213,1511,1727,1120,7444,7445, 646,3757,2443, 307, # 4086 -7446,7447,1595,3186,7448,7449,7450,3639,1113,1356,3899,1465,2522,2523,7451, 519, # 4102 -7452, 128,2132, 92,2284,1979,7453,3900,1512, 342,3125,2196,7454,2780,2214,1980, # 4118 -3323,7455, 290,1656,1317, 789, 827,2360,7456,3758,4352, 562, 581,3901,7457, 401, # 4134 -4353,2248, 94,4354,1399,2781,7458,1463,2024,4355,3187,1943,7459, 828,1105,4101, # 4150 -1262,1394,7460,4102, 605,4356,7461,1783,2862,7462,2822, 819,2101, 578,2197,2937, # 4166 -7463,1502, 436,3254,4103,3255,2823,3902,2905,3425,3426,7464,2712,2315,7465,7466, # 4182 -2332,2067, 23,4357, 193, 826,3759,2102, 699,1630,4104,3075, 390,1793,1064,3526, # 4198 -7467,1579,3076,3077,1400,7468,4105,1838,1640,2863,7469,4358,4359, 137,4106, 598, # 4214 -3078,1966, 780, 104, 974,2938,7470, 278, 899, 253, 402, 572, 504, 493,1339,7471, # 4230 -3903,1275,4360,2574,2550,7472,3640,3029,3079,2249, 565,1334,2713, 863, 41,7473, # 4246 -7474,4361,7475,1657,2333, 19, 463,2750,4107, 606,7476,2981,3256,1087,2084,1323, # 4262 -2652,2982,7477,1631,1623,1750,4108,2682,7478,2864, 791,2714,2653,2334, 232,2416, # 4278 -7479,2983,1498,7480,2654,2620, 755,1366,3641,3257,3126,2025,1609, 119,1917,3427, # 4294 - 862,1026,4109,7481,3904,3760,4362,3905,4363,2260,1951,2470,7482,1125, 817,4110, # 4310 -4111,3906,1513,1766,2040,1487,4112,3030,3258,2824,3761,3127,7483,7484,1507,7485, # 4326 -2683, 733, 40,1632,1106,2865, 345,4113, 841,2524, 230,4364,2984,1846,3259,3428, # 4342 -7486,1263, 986,3429,7487, 735, 879, 254,1137, 857, 622,1300,1180,1388,1562,3907, # 4358 -3908,2939, 967,2751,2655,1349, 592,2133,1692,3324,2985,1994,4114,1679,3909,1901, # 4374 -2185,7488, 739,3642,2715,1296,1290,7489,4115,2198,2199,1921,1563,2595,2551,1870, # 4390 -2752,2986,7490, 435,7491, 343,1108, 596, 17,1751,4365,2235,3430,3643,7492,4366, # 4406 - 294,3527,2940,1693, 477, 979, 281,2041,3528, 643,2042,3644,2621,2782,2261,1031, # 4422 -2335,2134,2298,3529,4367, 367,1249,2552,7493,3530,7494,4368,1283,3325,2004, 240, # 4438 -1762,3326,4369,4370, 836,1069,3128, 474,7495,2148,2525, 268,3531,7496,3188,1521, # 4454 -1284,7497,1658,1546,4116,7498,3532,3533,7499,4117,3327,2684,1685,4118, 961,1673, # 4470 -2622, 190,2005,2200,3762,4371,4372,7500, 570,2497,3645,1490,7501,4373,2623,3260, # 4486 -1956,4374, 584,1514, 396,1045,1944,7502,4375,1967,2444,7503,7504,4376,3910, 619, # 4502 -7505,3129,3261, 215,2006,2783,2553,3189,4377,3190,4378, 763,4119,3763,4379,7506, # 4518 -7507,1957,1767,2941,3328,3646,1174, 452,1477,4380,3329,3130,7508,2825,1253,2382, # 4534 -2186,1091,2285,4120, 492,7509, 638,1169,1824,2135,1752,3911, 648, 926,1021,1324, # 4550 -4381, 520,4382, 997, 847,1007, 892,4383,3764,2262,1871,3647,7510,2400,1784,4384, # 4566 -1952,2942,3080,3191,1728,4121,2043,3648,4385,2007,1701,3131,1551, 30,2263,4122, # 4582 -7511,2026,4386,3534,7512, 501,7513,4123, 594,3431,2165,1821,3535,3432,3536,3192, # 4598 - 829,2826,4124,7514,1680,3132,1225,4125,7515,3262,4387,4126,3133,2336,7516,4388, # 4614 -4127,7517,3912,3913,7518,1847,2383,2596,3330,7519,4389, 374,3914, 652,4128,4129, # 4630 - 375,1140, 798,7520,7521,7522,2361,4390,2264, 546,1659, 138,3031,2445,4391,7523, # 4646 -2250, 612,1848, 910, 796,3765,1740,1371, 825,3766,3767,7524,2906,2554,7525, 692, # 4662 - 444,3032,2624, 801,4392,4130,7526,1491, 244,1053,3033,4131,4132, 340,7527,3915, # 4678 -1041,2987, 293,1168, 87,1357,7528,1539, 959,7529,2236, 721, 694,4133,3768, 219, # 4694 -1478, 644,1417,3331,2656,1413,1401,1335,1389,3916,7530,7531,2988,2362,3134,1825, # 4710 - 730,1515, 184,2827, 66,4393,7532,1660,2943, 246,3332, 378,1457, 226,3433, 975, # 4726 -3917,2944,1264,3537, 674, 696,7533, 163,7534,1141,2417,2166, 713,3538,3333,4394, # 4742 -3918,7535,7536,1186, 15,7537,1079,1070,7538,1522,3193,3539, 276,1050,2716, 758, # 4758 -1126, 653,2945,3263,7539,2337, 889,3540,3919,3081,2989, 903,1250,4395,3920,3434, # 4774 -3541,1342,1681,1718, 766,3264, 286, 89,2946,3649,7540,1713,7541,2597,3334,2990, # 4790 -7542,2947,2215,3194,2866,7543,4396,2498,2526, 181, 387,1075,3921, 731,2187,3335, # 4806 -7544,3265, 310, 313,3435,2299, 770,4134, 54,3034, 189,4397,3082,3769,3922,7545, # 4822 -1230,1617,1849, 355,3542,4135,4398,3336, 111,4136,3650,1350,3135,3436,3035,4137, # 4838 -2149,3266,3543,7546,2784,3923,3924,2991, 722,2008,7547,1071, 247,1207,2338,2471, # 4854 -1378,4399,2009, 864,1437,1214,4400, 373,3770,1142,2216, 667,4401, 442,2753,2555, # 4870 -3771,3925,1968,4138,3267,1839, 837, 170,1107, 934,1336,1882,7548,7549,2118,4139, # 4886 -2828, 743,1569,7550,4402,4140, 582,2384,1418,3437,7551,1802,7552, 357,1395,1729, # 4902 -3651,3268,2418,1564,2237,7553,3083,3772,1633,4403,1114,2085,4141,1532,7554, 482, # 4918 -2446,4404,7555,7556,1492, 833,1466,7557,2717,3544,1641,2829,7558,1526,1272,3652, # 4934 -4142,1686,1794, 416,2556,1902,1953,1803,7559,3773,2785,3774,1159,2316,7560,2867, # 4950 -4405,1610,1584,3036,2419,2754, 443,3269,1163,3136,7561,7562,3926,7563,4143,2499, # 4966 -3037,4406,3927,3137,2103,1647,3545,2010,1872,4144,7564,4145, 431,3438,7565, 250, # 4982 - 97, 81,4146,7566,1648,1850,1558, 160, 848,7567, 866, 740,1694,7568,2201,2830, # 4998 -3195,4147,4407,3653,1687, 950,2472, 426, 469,3196,3654,3655,3928,7569,7570,1188, # 5014 - 424,1995, 861,3546,4148,3775,2202,2685, 168,1235,3547,4149,7571,2086,1674,4408, # 5030 -3337,3270, 220,2557,1009,7572,3776, 670,2992, 332,1208, 717,7573,7574,3548,2447, # 5046 -3929,3338,7575, 513,7576,1209,2868,3339,3138,4409,1080,7577,7578,7579,7580,2527, # 5062 -3656,3549, 815,1587,3930,3931,7581,3550,3439,3777,1254,4410,1328,3038,1390,3932, # 5078 -1741,3933,3778,3934,7582, 236,3779,2448,3271,7583,7584,3657,3780,1273,3781,4411, # 5094 -7585, 308,7586,4412, 245,4413,1851,2473,1307,2575, 430, 715,2136,2449,7587, 270, # 5110 - 199,2869,3935,7588,3551,2718,1753, 761,1754, 725,1661,1840,4414,3440,3658,7589, # 5126 -7590, 587, 14,3272, 227,2598, 326, 480,2265, 943,2755,3552, 291, 650,1883,7591, # 5142 -1702,1226, 102,1547, 62,3441, 904,4415,3442,1164,4150,7592,7593,1224,1548,2756, # 5158 - 391, 498,1493,7594,1386,1419,7595,2055,1177,4416, 813, 880,1081,2363, 566,1145, # 5174 -4417,2286,1001,1035,2558,2599,2238, 394,1286,7596,7597,2068,7598, 86,1494,1730, # 5190 -3936, 491,1588, 745, 897,2948, 843,3340,3937,2757,2870,3273,1768, 998,2217,2069, # 5206 - 397,1826,1195,1969,3659,2993,3341, 284,7599,3782,2500,2137,2119,1903,7600,3938, # 5222 -2150,3939,4151,1036,3443,1904, 114,2559,4152, 209,1527,7601,7602,2949,2831,2625, # 5238 -2385,2719,3139, 812,2560,7603,3274,7604,1559, 737,1884,3660,1210, 885, 28,2686, # 5254 -3553,3783,7605,4153,1004,1779,4418,7606, 346,1981,2218,2687,4419,3784,1742, 797, # 5270 -1642,3940,1933,1072,1384,2151, 896,3941,3275,3661,3197,2871,3554,7607,2561,1958, # 5286 -4420,2450,1785,7608,7609,7610,3942,4154,1005,1308,3662,4155,2720,4421,4422,1528, # 5302 -2600, 161,1178,4156,1982, 987,4423,1101,4157, 631,3943,1157,3198,2420,1343,1241, # 5318 -1016,2239,2562, 372, 877,2339,2501,1160, 555,1934, 911,3944,7611, 466,1170, 169, # 5334 -1051,2907,2688,3663,2474,2994,1182,2011,2563,1251,2626,7612, 992,2340,3444,1540, # 5350 -2721,1201,2070,2401,1996,2475,7613,4424, 528,1922,2188,1503,1873,1570,2364,3342, # 5366 -3276,7614, 557,1073,7615,1827,3445,2087,2266,3140,3039,3084, 767,3085,2786,4425, # 5382 -1006,4158,4426,2341,1267,2176,3664,3199, 778,3945,3200,2722,1597,2657,7616,4427, # 5398 -7617,3446,7618,7619,7620,3277,2689,1433,3278, 131, 95,1504,3946, 723,4159,3141, # 5414 -1841,3555,2758,2189,3947,2027,2104,3665,7621,2995,3948,1218,7622,3343,3201,3949, # 5430 -4160,2576, 248,1634,3785, 912,7623,2832,3666,3040,3786, 654, 53,7624,2996,7625, # 5446 -1688,4428, 777,3447,1032,3950,1425,7626, 191, 820,2120,2833, 971,4429, 931,3202, # 5462 - 135, 664, 783,3787,1997, 772,2908,1935,3951,3788,4430,2909,3203, 282,2723, 640, # 5478 -1372,3448,1127, 922, 325,3344,7627,7628, 711,2044,7629,7630,3952,2219,2787,1936, # 5494 -3953,3345,2220,2251,3789,2300,7631,4431,3790,1258,3279,3954,3204,2138,2950,3955, # 5510 -3956,7632,2221, 258,3205,4432, 101,1227,7633,3280,1755,7634,1391,3281,7635,2910, # 5526 -2056, 893,7636,7637,7638,1402,4161,2342,7639,7640,3206,3556,7641,7642, 878,1325, # 5542 -1780,2788,4433, 259,1385,2577, 744,1183,2267,4434,7643,3957,2502,7644, 684,1024, # 5558 -4162,7645, 472,3557,3449,1165,3282,3958,3959, 322,2152, 881, 455,1695,1152,1340, # 5574 - 660, 554,2153,4435,1058,4436,4163, 830,1065,3346,3960,4437,1923,7646,1703,1918, # 5590 -7647, 932,2268, 122,7648,4438, 947, 677,7649,3791,2627, 297,1905,1924,2269,4439, # 5606 -2317,3283,7650,7651,4164,7652,4165, 84,4166, 112, 989,7653, 547,1059,3961, 701, # 5622 -3558,1019,7654,4167,7655,3450, 942, 639, 457,2301,2451, 993,2951, 407, 851, 494, # 5638 -4440,3347, 927,7656,1237,7657,2421,3348, 573,4168, 680, 921,2911,1279,1874, 285, # 5654 - 790,1448,1983, 719,2167,7658,7659,4441,3962,3963,1649,7660,1541, 563,7661,1077, # 5670 -7662,3349,3041,3451, 511,2997,3964,3965,3667,3966,1268,2564,3350,3207,4442,4443, # 5686 -7663, 535,1048,1276,1189,2912,2028,3142,1438,1373,2834,2952,1134,2012,7664,4169, # 5702 -1238,2578,3086,1259,7665, 700,7666,2953,3143,3668,4170,7667,4171,1146,1875,1906, # 5718 -4444,2601,3967, 781,2422, 132,1589, 203, 147, 273,2789,2402, 898,1786,2154,3968, # 5734 -3969,7668,3792,2790,7669,7670,4445,4446,7671,3208,7672,1635,3793, 965,7673,1804, # 5750 -2690,1516,3559,1121,1082,1329,3284,3970,1449,3794, 65,1128,2835,2913,2759,1590, # 5766 -3795,7674,7675, 12,2658, 45, 976,2579,3144,4447, 517,2528,1013,1037,3209,7676, # 5782 -3796,2836,7677,3797,7678,3452,7679,2602, 614,1998,2318,3798,3087,2724,2628,7680, # 5798 -2580,4172, 599,1269,7681,1810,3669,7682,2691,3088, 759,1060, 489,1805,3351,3285, # 5814 -1358,7683,7684,2386,1387,1215,2629,2252, 490,7685,7686,4173,1759,2387,2343,7687, # 5830 -4448,3799,1907,3971,2630,1806,3210,4449,3453,3286,2760,2344, 874,7688,7689,3454, # 5846 -3670,1858, 91,2914,3671,3042,3800,4450,7690,3145,3972,2659,7691,3455,1202,1403, # 5862 -3801,2954,2529,1517,2503,4451,3456,2504,7692,4452,7693,2692,1885,1495,1731,3973, # 5878 -2365,4453,7694,2029,7695,7696,3974,2693,1216, 237,2581,4174,2319,3975,3802,4454, # 5894 -4455,2694,3560,3457, 445,4456,7697,7698,7699,7700,2761, 61,3976,3672,1822,3977, # 5910 -7701, 687,2045, 935, 925, 405,2660, 703,1096,1859,2725,4457,3978,1876,1367,2695, # 5926 -3352, 918,2105,1781,2476, 334,3287,1611,1093,4458, 564,3146,3458,3673,3353, 945, # 5942 -2631,2057,4459,7702,1925, 872,4175,7703,3459,2696,3089, 349,4176,3674,3979,4460, # 5958 -3803,4177,3675,2155,3980,4461,4462,4178,4463,2403,2046, 782,3981, 400, 251,4179, # 5974 -1624,7704,7705, 277,3676, 299,1265, 476,1191,3804,2121,4180,4181,1109, 205,7706, # 5990 -2582,1000,2156,3561,1860,7707,7708,7709,4464,7710,4465,2565, 107,2477,2157,3982, # 6006 -3460,3147,7711,1533, 541,1301, 158, 753,4182,2872,3562,7712,1696, 370,1088,4183, # 6022 -4466,3563, 579, 327, 440, 162,2240, 269,1937,1374,3461, 968,3043, 56,1396,3090, # 6038 -2106,3288,3354,7713,1926,2158,4467,2998,7714,3564,7715,7716,3677,4468,2478,7717, # 6054 -2791,7718,1650,4469,7719,2603,7720,7721,3983,2661,3355,1149,3356,3984,3805,3985, # 6070 -7722,1076, 49,7723, 951,3211,3289,3290, 450,2837, 920,7724,1811,2792,2366,4184, # 6086 -1908,1138,2367,3806,3462,7725,3212,4470,1909,1147,1518,2423,4471,3807,7726,4472, # 6102 -2388,2604, 260,1795,3213,7727,7728,3808,3291, 708,7729,3565,1704,7730,3566,1351, # 6118 -1618,3357,2999,1886, 944,4185,3358,4186,3044,3359,4187,7731,3678, 422, 413,1714, # 6134 -3292, 500,2058,2345,4188,2479,7732,1344,1910, 954,7733,1668,7734,7735,3986,2404, # 6150 -4189,3567,3809,4190,7736,2302,1318,2505,3091, 133,3092,2873,4473, 629, 31,2838, # 6166 -2697,3810,4474, 850, 949,4475,3987,2955,1732,2088,4191,1496,1852,7737,3988, 620, # 6182 -3214, 981,1242,3679,3360,1619,3680,1643,3293,2139,2452,1970,1719,3463,2168,7738, # 6198 -3215,7739,7740,3361,1828,7741,1277,4476,1565,2047,7742,1636,3568,3093,7743, 869, # 6214 -2839, 655,3811,3812,3094,3989,3000,3813,1310,3569,4477,7744,7745,7746,1733, 558, # 6230 -4478,3681, 335,1549,3045,1756,4192,3682,1945,3464,1829,1291,1192, 470,2726,2107, # 6246 -2793, 913,1054,3990,7747,1027,7748,3046,3991,4479, 982,2662,3362,3148,3465,3216, # 6262 -3217,1946,2794,7749, 571,4480,7750,1830,7751,3570,2583,1523,2424,7752,2089, 984, # 6278 -4481,3683,1959,7753,3684, 852, 923,2795,3466,3685, 969,1519, 999,2048,2320,1705, # 6294 -7754,3095, 615,1662, 151, 597,3992,2405,2321,1049, 275,4482,3686,4193, 568,3687, # 6310 -3571,2480,4194,3688,7755,2425,2270, 409,3218,7756,1566,2874,3467,1002, 769,2840, # 6326 - 194,2090,3149,3689,2222,3294,4195, 628,1505,7757,7758,1763,2177,3001,3993, 521, # 6342 -1161,2584,1787,2203,2406,4483,3994,1625,4196,4197, 412, 42,3096, 464,7759,2632, # 6358 -4484,3363,1760,1571,2875,3468,2530,1219,2204,3814,2633,2140,2368,4485,4486,3295, # 6374 -1651,3364,3572,7760,7761,3573,2481,3469,7762,3690,7763,7764,2271,2091, 460,7765, # 6390 -4487,7766,3002, 962, 588,3574, 289,3219,2634,1116, 52,7767,3047,1796,7768,7769, # 6406 -7770,1467,7771,1598,1143,3691,4198,1984,1734,1067,4488,1280,3365, 465,4489,1572, # 6422 - 510,7772,1927,2241,1812,1644,3575,7773,4490,3692,7774,7775,2663,1573,1534,7776, # 6438 -7777,4199, 536,1807,1761,3470,3815,3150,2635,7778,7779,7780,4491,3471,2915,1911, # 6454 -2796,7781,3296,1122, 377,3220,7782, 360,7783,7784,4200,1529, 551,7785,2059,3693, # 6470 -1769,2426,7786,2916,4201,3297,3097,2322,2108,2030,4492,1404, 136,1468,1479, 672, # 6486 -1171,3221,2303, 271,3151,7787,2762,7788,2049, 678,2727, 865,1947,4493,7789,2013, # 6502 -3995,2956,7790,2728,2223,1397,3048,3694,4494,4495,1735,2917,3366,3576,7791,3816, # 6518 - 509,2841,2453,2876,3817,7792,7793,3152,3153,4496,4202,2531,4497,2304,1166,1010, # 6534 - 552, 681,1887,7794,7795,2957,2958,3996,1287,1596,1861,3154, 358, 453, 736, 175, # 6550 - 478,1117, 905,1167,1097,7796,1853,1530,7797,1706,7798,2178,3472,2287,3695,3473, # 6566 -3577,4203,2092,4204,7799,3367,1193,2482,4205,1458,2190,2205,1862,1888,1421,3298, # 6582 -2918,3049,2179,3474, 595,2122,7800,3997,7801,7802,4206,1707,2636, 223,3696,1359, # 6598 - 751,3098, 183,3475,7803,2797,3003, 419,2369, 633, 704,3818,2389, 241,7804,7805, # 6614 -7806, 838,3004,3697,2272,2763,2454,3819,1938,2050,3998,1309,3099,2242,1181,7807, # 6630 -1136,2206,3820,2370,1446,4207,2305,4498,7808,7809,4208,1055,2605, 484,3698,7810, # 6646 -3999, 625,4209,2273,3368,1499,4210,4000,7811,4001,4211,3222,2274,2275,3476,7812, # 6662 -7813,2764, 808,2606,3699,3369,4002,4212,3100,2532, 526,3370,3821,4213, 955,7814, # 6678 -1620,4214,2637,2427,7815,1429,3700,1669,1831, 994, 928,7816,3578,1260,7817,7818, # 6694 -7819,1948,2288, 741,2919,1626,4215,2729,2455, 867,1184, 362,3371,1392,7820,7821, # 6710 -4003,4216,1770,1736,3223,2920,4499,4500,1928,2698,1459,1158,7822,3050,3372,2877, # 6726 -1292,1929,2506,2842,3701,1985,1187,2071,2014,2607,4217,7823,2566,2507,2169,3702, # 6742 -2483,3299,7824,3703,4501,7825,7826, 666,1003,3005,1022,3579,4218,7827,4502,1813, # 6758 -2253, 574,3822,1603, 295,1535, 705,3823,4219, 283, 858, 417,7828,7829,3224,4503, # 6774 -4504,3051,1220,1889,1046,2276,2456,4004,1393,1599, 689,2567, 388,4220,7830,2484, # 6790 - 802,7831,2798,3824,2060,1405,2254,7832,4505,3825,2109,1052,1345,3225,1585,7833, # 6806 - 809,7834,7835,7836, 575,2730,3477, 956,1552,1469,1144,2323,7837,2324,1560,2457, # 6822 -3580,3226,4005, 616,2207,3155,2180,2289,7838,1832,7839,3478,4506,7840,1319,3704, # 6838 -3705,1211,3581,1023,3227,1293,2799,7841,7842,7843,3826, 607,2306,3827, 762,2878, # 6854 -1439,4221,1360,7844,1485,3052,7845,4507,1038,4222,1450,2061,2638,4223,1379,4508, # 6870 -2585,7846,7847,4224,1352,1414,2325,2921,1172,7848,7849,3828,3829,7850,1797,1451, # 6886 -7851,7852,7853,7854,2922,4006,4007,2485,2346, 411,4008,4009,3582,3300,3101,4509, # 6902 -1561,2664,1452,4010,1375,7855,7856, 47,2959, 316,7857,1406,1591,2923,3156,7858, # 6918 -1025,2141,3102,3157, 354,2731, 884,2224,4225,2407, 508,3706, 726,3583, 996,2428, # 6934 -3584, 729,7859, 392,2191,1453,4011,4510,3707,7860,7861,2458,3585,2608,1675,2800, # 6950 - 919,2347,2960,2348,1270,4511,4012, 73,7862,7863, 647,7864,3228,2843,2255,1550, # 6966 -1346,3006,7865,1332, 883,3479,7866,7867,7868,7869,3301,2765,7870,1212, 831,1347, # 6982 -4226,4512,2326,3830,1863,3053, 720,3831,4513,4514,3832,7871,4227,7872,7873,4515, # 6998 -7874,7875,1798,4516,3708,2609,4517,3586,1645,2371,7876,7877,2924, 669,2208,2665, # 7014 -2429,7878,2879,7879,7880,1028,3229,7881,4228,2408,7882,2256,1353,7883,7884,4518, # 7030 -3158, 518,7885,4013,7886,4229,1960,7887,2142,4230,7888,7889,3007,2349,2350,3833, # 7046 - 516,1833,1454,4014,2699,4231,4519,2225,2610,1971,1129,3587,7890,2766,7891,2961, # 7062 -1422, 577,1470,3008,1524,3373,7892,7893, 432,4232,3054,3480,7894,2586,1455,2508, # 7078 -2226,1972,1175,7895,1020,2732,4015,3481,4520,7896,2733,7897,1743,1361,3055,3482, # 7094 -2639,4016,4233,4521,2290, 895, 924,4234,2170, 331,2243,3056, 166,1627,3057,1098, # 7110 -7898,1232,2880,2227,3374,4522, 657, 403,1196,2372, 542,3709,3375,1600,4235,3483, # 7126 -7899,4523,2767,3230, 576, 530,1362,7900,4524,2533,2666,3710,4017,7901, 842,3834, # 7142 -7902,2801,2031,1014,4018, 213,2700,3376, 665, 621,4236,7903,3711,2925,2430,7904, # 7158 -2431,3302,3588,3377,7905,4237,2534,4238,4525,3589,1682,4239,3484,1380,7906, 724, # 7174 -2277, 600,1670,7907,1337,1233,4526,3103,2244,7908,1621,4527,7909, 651,4240,7910, # 7190 -1612,4241,2611,7911,2844,7912,2734,2307,3058,7913, 716,2459,3059, 174,1255,2701, # 7206 -4019,3590, 548,1320,1398, 728,4020,1574,7914,1890,1197,3060,4021,7915,3061,3062, # 7222 -3712,3591,3713, 747,7916, 635,4242,4528,7917,7918,7919,4243,7920,7921,4529,7922, # 7238 -3378,4530,2432, 451,7923,3714,2535,2072,4244,2735,4245,4022,7924,1764,4531,7925, # 7254 -4246, 350,7926,2278,2390,2486,7927,4247,4023,2245,1434,4024, 488,4532, 458,4248, # 7270 -4025,3715, 771,1330,2391,3835,2568,3159,2159,2409,1553,2667,3160,4249,7928,2487, # 7286 -2881,2612,1720,2702,4250,3379,4533,7929,2536,4251,7930,3231,4252,2768,7931,2015, # 7302 -2736,7932,1155,1017,3716,3836,7933,3303,2308, 201,1864,4253,1430,7934,4026,7935, # 7318 -7936,7937,7938,7939,4254,1604,7940, 414,1865, 371,2587,4534,4535,3485,2016,3104, # 7334 -4536,1708, 960,4255, 887, 389,2171,1536,1663,1721,7941,2228,4027,2351,2926,1580, # 7350 -7942,7943,7944,1744,7945,2537,4537,4538,7946,4539,7947,2073,7948,7949,3592,3380, # 7366 -2882,4256,7950,4257,2640,3381,2802, 673,2703,2460, 709,3486,4028,3593,4258,7951, # 7382 -1148, 502, 634,7952,7953,1204,4540,3594,1575,4541,2613,3717,7954,3718,3105, 948, # 7398 -3232, 121,1745,3837,1110,7955,4259,3063,2509,3009,4029,3719,1151,1771,3838,1488, # 7414 -4030,1986,7956,2433,3487,7957,7958,2093,7959,4260,3839,1213,1407,2803, 531,2737, # 7430 -2538,3233,1011,1537,7960,2769,4261,3106,1061,7961,3720,3721,1866,2883,7962,2017, # 7446 - 120,4262,4263,2062,3595,3234,2309,3840,2668,3382,1954,4542,7963,7964,3488,1047, # 7462 -2704,1266,7965,1368,4543,2845, 649,3383,3841,2539,2738,1102,2846,2669,7966,7967, # 7478 -1999,7968,1111,3596,2962,7969,2488,3842,3597,2804,1854,3384,3722,7970,7971,3385, # 7494 -2410,2884,3304,3235,3598,7972,2569,7973,3599,2805,4031,1460, 856,7974,3600,7975, # 7510 -2885,2963,7976,2886,3843,7977,4264, 632,2510, 875,3844,1697,3845,2291,7978,7979, # 7526 -4544,3010,1239, 580,4545,4265,7980, 914, 936,2074,1190,4032,1039,2123,7981,7982, # 7542 -7983,3386,1473,7984,1354,4266,3846,7985,2172,3064,4033, 915,3305,4267,4268,3306, # 7558 -1605,1834,7986,2739, 398,3601,4269,3847,4034, 328,1912,2847,4035,3848,1331,4270, # 7574 -3011, 937,4271,7987,3602,4036,4037,3387,2160,4546,3388, 524, 742, 538,3065,1012, # 7590 -7988,7989,3849,2461,7990, 658,1103, 225,3850,7991,7992,4547,7993,4548,7994,3236, # 7606 -1243,7995,4038, 963,2246,4549,7996,2705,3603,3161,7997,7998,2588,2327,7999,4550, # 7622 -8000,8001,8002,3489,3307, 957,3389,2540,2032,1930,2927,2462, 870,2018,3604,1746, # 7638 -2770,2771,2434,2463,8003,3851,8004,3723,3107,3724,3490,3390,3725,8005,1179,3066, # 7654 -8006,3162,2373,4272,3726,2541,3163,3108,2740,4039,8007,3391,1556,2542,2292, 977, # 7670 -2887,2033,4040,1205,3392,8008,1765,3393,3164,2124,1271,1689, 714,4551,3491,8009, # 7686 -2328,3852, 533,4273,3605,2181, 617,8010,2464,3308,3492,2310,8011,8012,3165,8013, # 7702 -8014,3853,1987, 618, 427,2641,3493,3394,8015,8016,1244,1690,8017,2806,4274,4552, # 7718 -8018,3494,8019,8020,2279,1576, 473,3606,4275,3395, 972,8021,3607,8022,3067,8023, # 7734 -8024,4553,4554,8025,3727,4041,4042,8026, 153,4555, 356,8027,1891,2888,4276,2143, # 7750 - 408, 803,2352,8028,3854,8029,4277,1646,2570,2511,4556,4557,3855,8030,3856,4278, # 7766 -8031,2411,3396, 752,8032,8033,1961,2964,8034, 746,3012,2465,8035,4279,3728, 698, # 7782 -4558,1892,4280,3608,2543,4559,3609,3857,8036,3166,3397,8037,1823,1302,4043,2706, # 7798 -3858,1973,4281,8038,4282,3167, 823,1303,1288,1236,2848,3495,4044,3398, 774,3859, # 7814 -8039,1581,4560,1304,2849,3860,4561,8040,2435,2161,1083,3237,4283,4045,4284, 344, # 7830 -1173, 288,2311, 454,1683,8041,8042,1461,4562,4046,2589,8043,8044,4563, 985, 894, # 7846 -8045,3399,3168,8046,1913,2928,3729,1988,8047,2110,1974,8048,4047,8049,2571,1194, # 7862 - 425,8050,4564,3169,1245,3730,4285,8051,8052,2850,8053, 636,4565,1855,3861, 760, # 7878 -1799,8054,4286,2209,1508,4566,4048,1893,1684,2293,8055,8056,8057,4287,4288,2210, # 7894 - 479,8058,8059, 832,8060,4049,2489,8061,2965,2490,3731, 990,3109, 627,1814,2642, # 7910 -4289,1582,4290,2125,2111,3496,4567,8062, 799,4291,3170,8063,4568,2112,1737,3013, # 7926 -1018, 543, 754,4292,3309,1676,4569,4570,4050,8064,1489,8065,3497,8066,2614,2889, # 7942 -4051,8067,8068,2966,8069,8070,8071,8072,3171,4571,4572,2182,1722,8073,3238,3239, # 7958 -1842,3610,1715, 481, 365,1975,1856,8074,8075,1962,2491,4573,8076,2126,3611,3240, # 7974 - 433,1894,2063,2075,8077, 602,2741,8078,8079,8080,8081,8082,3014,1628,3400,8083, # 7990 -3172,4574,4052,2890,4575,2512,8084,2544,2772,8085,8086,8087,3310,4576,2891,8088, # 8006 -4577,8089,2851,4578,4579,1221,2967,4053,2513,8090,8091,8092,1867,1989,8093,8094, # 8022 -8095,1895,8096,8097,4580,1896,4054, 318,8098,2094,4055,4293,8099,8100, 485,8101, # 8038 - 938,3862, 553,2670, 116,8102,3863,3612,8103,3498,2671,2773,3401,3311,2807,8104, # 8054 -3613,2929,4056,1747,2930,2968,8105,8106, 207,8107,8108,2672,4581,2514,8109,3015, # 8070 - 890,3614,3864,8110,1877,3732,3402,8111,2183,2353,3403,1652,8112,8113,8114, 941, # 8086 -2294, 208,3499,4057,2019, 330,4294,3865,2892,2492,3733,4295,8115,8116,8117,8118, # 8102 -#Everything below is of no interest for detection purpose -2515,1613,4582,8119,3312,3866,2516,8120,4058,8121,1637,4059,2466,4583,3867,8122, # 8118 -2493,3016,3734,8123,8124,2192,8125,8126,2162,8127,8128,8129,8130,8131,8132,8133, # 8134 -8134,8135,8136,8137,8138,8139,8140,8141,8142,8143,8144,8145,8146,8147,8148,8149, # 8150 -8150,8151,8152,8153,8154,8155,8156,8157,8158,8159,8160,8161,8162,8163,8164,8165, # 8166 -8166,8167,8168,8169,8170,8171,8172,8173,8174,8175,8176,8177,8178,8179,8180,8181, # 8182 -8182,8183,8184,8185,8186,8187,8188,8189,8190,8191,8192,8193,8194,8195,8196,8197, # 8198 -8198,8199,8200,8201,8202,8203,8204,8205,8206,8207,8208,8209,8210,8211,8212,8213, # 8214 -8214,8215,8216,8217,8218,8219,8220,8221,8222,8223,8224,8225,8226,8227,8228,8229, # 8230 -8230,8231,8232,8233,8234,8235,8236,8237,8238,8239,8240,8241,8242,8243,8244,8245, # 8246 -8246,8247,8248,8249,8250,8251,8252,8253,8254,8255,8256,8257,8258,8259,8260,8261, # 8262 -8262,8263,8264,8265,8266,8267,8268,8269,8270,8271,8272,8273,8274,8275,8276,8277, # 8278 -8278,8279,8280,8281,8282,8283,8284,8285,8286,8287,8288,8289,8290,8291,8292,8293, # 8294 -8294,8295,8296,8297,8298,8299,8300,8301,8302,8303,8304,8305,8306,8307,8308,8309, # 8310 -8310,8311,8312,8313,8314,8315,8316,8317,8318,8319,8320,8321,8322,8323,8324,8325, # 8326 -8326,8327,8328,8329,8330,8331,8332,8333,8334,8335,8336,8337,8338,8339,8340,8341, # 8342 -8342,8343,8344,8345,8346,8347,8348,8349,8350,8351,8352,8353,8354,8355,8356,8357, # 8358 -8358,8359,8360,8361,8362,8363,8364,8365,8366,8367,8368,8369,8370,8371,8372,8373, # 8374 -8374,8375,8376,8377,8378,8379,8380,8381,8382,8383,8384,8385,8386,8387,8388,8389, # 8390 -8390,8391,8392,8393,8394,8395,8396,8397,8398,8399,8400,8401,8402,8403,8404,8405, # 8406 -8406,8407,8408,8409,8410,8411,8412,8413,8414,8415,8416,8417,8418,8419,8420,8421, # 8422 -8422,8423,8424,8425,8426,8427,8428,8429,8430,8431,8432,8433,8434,8435,8436,8437, # 8438 -8438,8439,8440,8441,8442,8443,8444,8445,8446,8447,8448,8449,8450,8451,8452,8453, # 8454 -8454,8455,8456,8457,8458,8459,8460,8461,8462,8463,8464,8465,8466,8467,8468,8469, # 8470 -8470,8471,8472,8473,8474,8475,8476,8477,8478,8479,8480,8481,8482,8483,8484,8485, # 8486 -8486,8487,8488,8489,8490,8491,8492,8493,8494,8495,8496,8497,8498,8499,8500,8501, # 8502 -8502,8503,8504,8505,8506,8507,8508,8509,8510,8511,8512,8513,8514,8515,8516,8517, # 8518 -8518,8519,8520,8521,8522,8523,8524,8525,8526,8527,8528,8529,8530,8531,8532,8533, # 8534 -8534,8535,8536,8537,8538,8539,8540,8541,8542,8543,8544,8545,8546,8547,8548,8549, # 8550 -8550,8551,8552,8553,8554,8555,8556,8557,8558,8559,8560,8561,8562,8563,8564,8565, # 8566 -8566,8567,8568,8569,8570,8571,8572,8573,8574,8575,8576,8577,8578,8579,8580,8581, # 8582 -8582,8583,8584,8585,8586,8587,8588,8589,8590,8591,8592,8593,8594,8595,8596,8597, # 8598 -8598,8599,8600,8601,8602,8603,8604,8605,8606,8607,8608,8609,8610,8611,8612,8613, # 8614 -8614,8615,8616,8617,8618,8619,8620,8621,8622,8623,8624,8625,8626,8627,8628,8629, # 8630 -8630,8631,8632,8633,8634,8635,8636,8637,8638,8639,8640,8641,8642,8643,8644,8645, # 8646 -8646,8647,8648,8649,8650,8651,8652,8653,8654,8655,8656,8657,8658,8659,8660,8661, # 8662 -8662,8663,8664,8665,8666,8667,8668,8669,8670,8671,8672,8673,8674,8675,8676,8677, # 8678 -8678,8679,8680,8681,8682,8683,8684,8685,8686,8687,8688,8689,8690,8691,8692,8693, # 8694 -8694,8695,8696,8697,8698,8699,8700,8701,8702,8703,8704,8705,8706,8707,8708,8709, # 8710 -8710,8711,8712,8713,8714,8715,8716,8717,8718,8719,8720,8721,8722,8723,8724,8725, # 8726 -8726,8727,8728,8729,8730,8731,8732,8733,8734,8735,8736,8737,8738,8739,8740,8741) # 8742 +EUCTW_CHAR_TO_FREQ_ORDER = ( + 1,1800,1506, 255,1431, 198, 9, 82, 6,7310, 177, 202,3615,1256,2808, 110, # 2742 +3735, 33,3241, 261, 76, 44,2113, 16,2931,2184,1176, 659,3868, 26,3404,2643, # 2758 +1198,3869,3313,4060, 410,2211, 302, 590, 361,1963, 8, 204, 58,4296,7311,1931, # 2774 + 63,7312,7313, 317,1614, 75, 222, 159,4061,2412,1480,7314,3500,3068, 224,2809, # 2790 +3616, 3, 10,3870,1471, 29,2774,1135,2852,1939, 873, 130,3242,1123, 312,7315, # 2806 +4297,2051, 507, 252, 682,7316, 142,1914, 124, 206,2932, 34,3501,3173, 64, 604, # 2822 +7317,2494,1976,1977, 155,1990, 645, 641,1606,7318,3405, 337, 72, 406,7319, 80, # 2838 + 630, 238,3174,1509, 263, 939,1092,2644, 756,1440,1094,3406, 449, 69,2969, 591, # 2854 + 179,2095, 471, 115,2034,1843, 60, 50,2970, 134, 806,1868, 734,2035,3407, 180, # 2870 + 995,1607, 156, 537,2893, 688,7320, 319,1305, 779,2144, 514,2374, 298,4298, 359, # 2886 +2495, 90,2707,1338, 663, 11, 906,1099,2545, 20,2436, 182, 532,1716,7321, 732, # 2902 +1376,4062,1311,1420,3175, 25,2312,1056, 113, 399, 382,1949, 242,3408,2467, 529, # 2918 +3243, 475,1447,3617,7322, 117, 21, 656, 810,1297,2295,2329,3502,7323, 126,4063, # 2934 + 706, 456, 150, 613,4299, 71,1118,2036,4064, 145,3069, 85, 835, 486,2114,1246, # 2950 +1426, 428, 727,1285,1015, 800, 106, 623, 303,1281,7324,2127,2354, 347,3736, 221, # 2966 +3503,3110,7325,1955,1153,4065, 83, 296,1199,3070, 192, 624, 93,7326, 822,1897, # 2982 +2810,3111, 795,2064, 991,1554,1542,1592, 27, 43,2853, 859, 139,1456, 860,4300, # 2998 + 437, 712,3871, 164,2392,3112, 695, 211,3017,2096, 195,3872,1608,3504,3505,3618, # 3014 +3873, 234, 811,2971,2097,3874,2229,1441,3506,1615,2375, 668,2076,1638, 305, 228, # 3030 +1664,4301, 467, 415,7327, 262,2098,1593, 239, 108, 300, 200,1033, 512,1247,2077, # 3046 +7328,7329,2173,3176,3619,2673, 593, 845,1062,3244, 88,1723,2037,3875,1950, 212, # 3062 + 266, 152, 149, 468,1898,4066,4302, 77, 187,7330,3018, 37, 5,2972,7331,3876, # 3078 +7332,7333, 39,2517,4303,2894,3177,2078, 55, 148, 74,4304, 545, 483,1474,1029, # 3094 +1665, 217,1869,1531,3113,1104,2645,4067, 24, 172,3507, 900,3877,3508,3509,4305, # 3110 + 32,1408,2811,1312, 329, 487,2355,2247,2708, 784,2674, 4,3019,3314,1427,1788, # 3126 + 188, 109, 499,7334,3620,1717,1789, 888,1217,3020,4306,7335,3510,7336,3315,1520, # 3142 +3621,3878, 196,1034, 775,7337,7338, 929,1815, 249, 439, 38,7339,1063,7340, 794, # 3158 +3879,1435,2296, 46, 178,3245,2065,7341,2376,7342, 214,1709,4307, 804, 35, 707, # 3174 + 324,3622,1601,2546, 140, 459,4068,7343,7344,1365, 839, 272, 978,2257,2572,3409, # 3190 +2128,1363,3623,1423, 697, 100,3071, 48, 70,1231, 495,3114,2193,7345,1294,7346, # 3206 +2079, 462, 586,1042,3246, 853, 256, 988, 185,2377,3410,1698, 434,1084,7347,3411, # 3222 + 314,2615,2775,4308,2330,2331, 569,2280, 637,1816,2518, 757,1162,1878,1616,3412, # 3238 + 287,1577,2115, 768,4309,1671,2854,3511,2519,1321,3737, 909,2413,7348,4069, 933, # 3254 +3738,7349,2052,2356,1222,4310, 765,2414,1322, 786,4311,7350,1919,1462,1677,2895, # 3270 +1699,7351,4312,1424,2437,3115,3624,2590,3316,1774,1940,3413,3880,4070, 309,1369, # 3286 +1130,2812, 364,2230,1653,1299,3881,3512,3882,3883,2646, 525,1085,3021, 902,2000, # 3302 +1475, 964,4313, 421,1844,1415,1057,2281, 940,1364,3116, 376,4314,4315,1381, 7, # 3318 +2520, 983,2378, 336,1710,2675,1845, 321,3414, 559,1131,3022,2742,1808,1132,1313, # 3334 + 265,1481,1857,7352, 352,1203,2813,3247, 167,1089, 420,2814, 776, 792,1724,3513, # 3350 +4071,2438,3248,7353,4072,7354, 446, 229, 333,2743, 901,3739,1200,1557,4316,2647, # 3366 +1920, 395,2744,2676,3740,4073,1835, 125, 916,3178,2616,4317,7355,7356,3741,7357, # 3382 +7358,7359,4318,3117,3625,1133,2547,1757,3415,1510,2313,1409,3514,7360,2145, 438, # 3398 +2591,2896,2379,3317,1068, 958,3023, 461, 311,2855,2677,4074,1915,3179,4075,1978, # 3414 + 383, 750,2745,2617,4076, 274, 539, 385,1278,1442,7361,1154,1964, 384, 561, 210, # 3430 + 98,1295,2548,3515,7362,1711,2415,1482,3416,3884,2897,1257, 129,7363,3742, 642, # 3446 + 523,2776,2777,2648,7364, 141,2231,1333, 68, 176, 441, 876, 907,4077, 603,2592, # 3462 + 710, 171,3417, 404, 549, 18,3118,2393,1410,3626,1666,7365,3516,4319,2898,4320, # 3478 +7366,2973, 368,7367, 146, 366, 99, 871,3627,1543, 748, 807,1586,1185, 22,2258, # 3494 + 379,3743,3180,7368,3181, 505,1941,2618,1991,1382,2314,7369, 380,2357, 218, 702, # 3510 +1817,1248,3418,3024,3517,3318,3249,7370,2974,3628, 930,3250,3744,7371, 59,7372, # 3526 + 585, 601,4078, 497,3419,1112,1314,4321,1801,7373,1223,1472,2174,7374, 749,1836, # 3542 + 690,1899,3745,1772,3885,1476, 429,1043,1790,2232,2116, 917,4079, 447,1086,1629, # 3558 +7375, 556,7376,7377,2020,1654, 844,1090, 105, 550, 966,1758,2815,1008,1782, 686, # 3574 +1095,7378,2282, 793,1602,7379,3518,2593,4322,4080,2933,2297,4323,3746, 980,2496, # 3590 + 544, 353, 527,4324, 908,2678,2899,7380, 381,2619,1942,1348,7381,1341,1252, 560, # 3606 +3072,7382,3420,2856,7383,2053, 973, 886,2080, 143,4325,7384,7385, 157,3886, 496, # 3622 +4081, 57, 840, 540,2038,4326,4327,3421,2117,1445, 970,2259,1748,1965,2081,4082, # 3638 +3119,1234,1775,3251,2816,3629, 773,1206,2129,1066,2039,1326,3887,1738,1725,4083, # 3654 + 279,3120, 51,1544,2594, 423,1578,2130,2066, 173,4328,1879,7386,7387,1583, 264, # 3670 + 610,3630,4329,2439, 280, 154,7388,7389,7390,1739, 338,1282,3073, 693,2857,1411, # 3686 +1074,3747,2440,7391,4330,7392,7393,1240, 952,2394,7394,2900,1538,2679, 685,1483, # 3702 +4084,2468,1436, 953,4085,2054,4331, 671,2395, 79,4086,2441,3252, 608, 567,2680, # 3718 +3422,4087,4088,1691, 393,1261,1791,2396,7395,4332,7396,7397,7398,7399,1383,1672, # 3734 +3748,3182,1464, 522,1119, 661,1150, 216, 675,4333,3888,1432,3519, 609,4334,2681, # 3750 +2397,7400,7401,7402,4089,3025, 0,7403,2469, 315, 231,2442, 301,3319,4335,2380, # 3766 +7404, 233,4090,3631,1818,4336,4337,7405, 96,1776,1315,2082,7406, 257,7407,1809, # 3782 +3632,2709,1139,1819,4091,2021,1124,2163,2778,1777,2649,7408,3074, 363,1655,3183, # 3798 +7409,2975,7410,7411,7412,3889,1567,3890, 718, 103,3184, 849,1443, 341,3320,2934, # 3814 +1484,7413,1712, 127, 67, 339,4092,2398, 679,1412, 821,7414,7415, 834, 738, 351, # 3830 +2976,2146, 846, 235,1497,1880, 418,1992,3749,2710, 186,1100,2147,2746,3520,1545, # 3846 +1355,2935,2858,1377, 583,3891,4093,2573,2977,7416,1298,3633,1078,2549,3634,2358, # 3862 + 78,3750,3751, 267,1289,2099,2001,1594,4094, 348, 369,1274,2194,2175,1837,4338, # 3878 +1820,2817,3635,2747,2283,2002,4339,2936,2748, 144,3321, 882,4340,3892,2749,3423, # 3894 +4341,2901,7417,4095,1726, 320,7418,3893,3026, 788,2978,7419,2818,1773,1327,2859, # 3910 +3894,2819,7420,1306,4342,2003,1700,3752,3521,2359,2650, 787,2022, 506, 824,3636, # 3926 + 534, 323,4343,1044,3322,2023,1900, 946,3424,7421,1778,1500,1678,7422,1881,4344, # 3942 + 165, 243,4345,3637,2521, 123, 683,4096, 764,4346, 36,3895,1792, 589,2902, 816, # 3958 + 626,1667,3027,2233,1639,1555,1622,3753,3896,7423,3897,2860,1370,1228,1932, 891, # 3974 +2083,2903, 304,4097,7424, 292,2979,2711,3522, 691,2100,4098,1115,4347, 118, 662, # 3990 +7425, 611,1156, 854,2381,1316,2861, 2, 386, 515,2904,7426,7427,3253, 868,2234, # 4006 +1486, 855,2651, 785,2212,3028,7428,1040,3185,3523,7429,3121, 448,7430,1525,7431, # 4022 +2164,4348,7432,3754,7433,4099,2820,3524,3122, 503, 818,3898,3123,1568, 814, 676, # 4038 +1444, 306,1749,7434,3755,1416,1030, 197,1428, 805,2821,1501,4349,7435,7436,7437, # 4054 +1993,7438,4350,7439,7440,2195, 13,2779,3638,2980,3124,1229,1916,7441,3756,2131, # 4070 +7442,4100,4351,2399,3525,7443,2213,1511,1727,1120,7444,7445, 646,3757,2443, 307, # 4086 +7446,7447,1595,3186,7448,7449,7450,3639,1113,1356,3899,1465,2522,2523,7451, 519, # 4102 +7452, 128,2132, 92,2284,1979,7453,3900,1512, 342,3125,2196,7454,2780,2214,1980, # 4118 +3323,7455, 290,1656,1317, 789, 827,2360,7456,3758,4352, 562, 581,3901,7457, 401, # 4134 +4353,2248, 94,4354,1399,2781,7458,1463,2024,4355,3187,1943,7459, 828,1105,4101, # 4150 +1262,1394,7460,4102, 605,4356,7461,1783,2862,7462,2822, 819,2101, 578,2197,2937, # 4166 +7463,1502, 436,3254,4103,3255,2823,3902,2905,3425,3426,7464,2712,2315,7465,7466, # 4182 +2332,2067, 23,4357, 193, 826,3759,2102, 699,1630,4104,3075, 390,1793,1064,3526, # 4198 +7467,1579,3076,3077,1400,7468,4105,1838,1640,2863,7469,4358,4359, 137,4106, 598, # 4214 +3078,1966, 780, 104, 974,2938,7470, 278, 899, 253, 402, 572, 504, 493,1339,7471, # 4230 +3903,1275,4360,2574,2550,7472,3640,3029,3079,2249, 565,1334,2713, 863, 41,7473, # 4246 +7474,4361,7475,1657,2333, 19, 463,2750,4107, 606,7476,2981,3256,1087,2084,1323, # 4262 +2652,2982,7477,1631,1623,1750,4108,2682,7478,2864, 791,2714,2653,2334, 232,2416, # 4278 +7479,2983,1498,7480,2654,2620, 755,1366,3641,3257,3126,2025,1609, 119,1917,3427, # 4294 + 862,1026,4109,7481,3904,3760,4362,3905,4363,2260,1951,2470,7482,1125, 817,4110, # 4310 +4111,3906,1513,1766,2040,1487,4112,3030,3258,2824,3761,3127,7483,7484,1507,7485, # 4326 +2683, 733, 40,1632,1106,2865, 345,4113, 841,2524, 230,4364,2984,1846,3259,3428, # 4342 +7486,1263, 986,3429,7487, 735, 879, 254,1137, 857, 622,1300,1180,1388,1562,3907, # 4358 +3908,2939, 967,2751,2655,1349, 592,2133,1692,3324,2985,1994,4114,1679,3909,1901, # 4374 +2185,7488, 739,3642,2715,1296,1290,7489,4115,2198,2199,1921,1563,2595,2551,1870, # 4390 +2752,2986,7490, 435,7491, 343,1108, 596, 17,1751,4365,2235,3430,3643,7492,4366, # 4406 + 294,3527,2940,1693, 477, 979, 281,2041,3528, 643,2042,3644,2621,2782,2261,1031, # 4422 +2335,2134,2298,3529,4367, 367,1249,2552,7493,3530,7494,4368,1283,3325,2004, 240, # 4438 +1762,3326,4369,4370, 836,1069,3128, 474,7495,2148,2525, 268,3531,7496,3188,1521, # 4454 +1284,7497,1658,1546,4116,7498,3532,3533,7499,4117,3327,2684,1685,4118, 961,1673, # 4470 +2622, 190,2005,2200,3762,4371,4372,7500, 570,2497,3645,1490,7501,4373,2623,3260, # 4486 +1956,4374, 584,1514, 396,1045,1944,7502,4375,1967,2444,7503,7504,4376,3910, 619, # 4502 +7505,3129,3261, 215,2006,2783,2553,3189,4377,3190,4378, 763,4119,3763,4379,7506, # 4518 +7507,1957,1767,2941,3328,3646,1174, 452,1477,4380,3329,3130,7508,2825,1253,2382, # 4534 +2186,1091,2285,4120, 492,7509, 638,1169,1824,2135,1752,3911, 648, 926,1021,1324, # 4550 +4381, 520,4382, 997, 847,1007, 892,4383,3764,2262,1871,3647,7510,2400,1784,4384, # 4566 +1952,2942,3080,3191,1728,4121,2043,3648,4385,2007,1701,3131,1551, 30,2263,4122, # 4582 +7511,2026,4386,3534,7512, 501,7513,4123, 594,3431,2165,1821,3535,3432,3536,3192, # 4598 + 829,2826,4124,7514,1680,3132,1225,4125,7515,3262,4387,4126,3133,2336,7516,4388, # 4614 +4127,7517,3912,3913,7518,1847,2383,2596,3330,7519,4389, 374,3914, 652,4128,4129, # 4630 + 375,1140, 798,7520,7521,7522,2361,4390,2264, 546,1659, 138,3031,2445,4391,7523, # 4646 +2250, 612,1848, 910, 796,3765,1740,1371, 825,3766,3767,7524,2906,2554,7525, 692, # 4662 + 444,3032,2624, 801,4392,4130,7526,1491, 244,1053,3033,4131,4132, 340,7527,3915, # 4678 +1041,2987, 293,1168, 87,1357,7528,1539, 959,7529,2236, 721, 694,4133,3768, 219, # 4694 +1478, 644,1417,3331,2656,1413,1401,1335,1389,3916,7530,7531,2988,2362,3134,1825, # 4710 + 730,1515, 184,2827, 66,4393,7532,1660,2943, 246,3332, 378,1457, 226,3433, 975, # 4726 +3917,2944,1264,3537, 674, 696,7533, 163,7534,1141,2417,2166, 713,3538,3333,4394, # 4742 +3918,7535,7536,1186, 15,7537,1079,1070,7538,1522,3193,3539, 276,1050,2716, 758, # 4758 +1126, 653,2945,3263,7539,2337, 889,3540,3919,3081,2989, 903,1250,4395,3920,3434, # 4774 +3541,1342,1681,1718, 766,3264, 286, 89,2946,3649,7540,1713,7541,2597,3334,2990, # 4790 +7542,2947,2215,3194,2866,7543,4396,2498,2526, 181, 387,1075,3921, 731,2187,3335, # 4806 +7544,3265, 310, 313,3435,2299, 770,4134, 54,3034, 189,4397,3082,3769,3922,7545, # 4822 +1230,1617,1849, 355,3542,4135,4398,3336, 111,4136,3650,1350,3135,3436,3035,4137, # 4838 +2149,3266,3543,7546,2784,3923,3924,2991, 722,2008,7547,1071, 247,1207,2338,2471, # 4854 +1378,4399,2009, 864,1437,1214,4400, 373,3770,1142,2216, 667,4401, 442,2753,2555, # 4870 +3771,3925,1968,4138,3267,1839, 837, 170,1107, 934,1336,1882,7548,7549,2118,4139, # 4886 +2828, 743,1569,7550,4402,4140, 582,2384,1418,3437,7551,1802,7552, 357,1395,1729, # 4902 +3651,3268,2418,1564,2237,7553,3083,3772,1633,4403,1114,2085,4141,1532,7554, 482, # 4918 +2446,4404,7555,7556,1492, 833,1466,7557,2717,3544,1641,2829,7558,1526,1272,3652, # 4934 +4142,1686,1794, 416,2556,1902,1953,1803,7559,3773,2785,3774,1159,2316,7560,2867, # 4950 +4405,1610,1584,3036,2419,2754, 443,3269,1163,3136,7561,7562,3926,7563,4143,2499, # 4966 +3037,4406,3927,3137,2103,1647,3545,2010,1872,4144,7564,4145, 431,3438,7565, 250, # 4982 + 97, 81,4146,7566,1648,1850,1558, 160, 848,7567, 866, 740,1694,7568,2201,2830, # 4998 +3195,4147,4407,3653,1687, 950,2472, 426, 469,3196,3654,3655,3928,7569,7570,1188, # 5014 + 424,1995, 861,3546,4148,3775,2202,2685, 168,1235,3547,4149,7571,2086,1674,4408, # 5030 +3337,3270, 220,2557,1009,7572,3776, 670,2992, 332,1208, 717,7573,7574,3548,2447, # 5046 +3929,3338,7575, 513,7576,1209,2868,3339,3138,4409,1080,7577,7578,7579,7580,2527, # 5062 +3656,3549, 815,1587,3930,3931,7581,3550,3439,3777,1254,4410,1328,3038,1390,3932, # 5078 +1741,3933,3778,3934,7582, 236,3779,2448,3271,7583,7584,3657,3780,1273,3781,4411, # 5094 +7585, 308,7586,4412, 245,4413,1851,2473,1307,2575, 430, 715,2136,2449,7587, 270, # 5110 + 199,2869,3935,7588,3551,2718,1753, 761,1754, 725,1661,1840,4414,3440,3658,7589, # 5126 +7590, 587, 14,3272, 227,2598, 326, 480,2265, 943,2755,3552, 291, 650,1883,7591, # 5142 +1702,1226, 102,1547, 62,3441, 904,4415,3442,1164,4150,7592,7593,1224,1548,2756, # 5158 + 391, 498,1493,7594,1386,1419,7595,2055,1177,4416, 813, 880,1081,2363, 566,1145, # 5174 +4417,2286,1001,1035,2558,2599,2238, 394,1286,7596,7597,2068,7598, 86,1494,1730, # 5190 +3936, 491,1588, 745, 897,2948, 843,3340,3937,2757,2870,3273,1768, 998,2217,2069, # 5206 + 397,1826,1195,1969,3659,2993,3341, 284,7599,3782,2500,2137,2119,1903,7600,3938, # 5222 +2150,3939,4151,1036,3443,1904, 114,2559,4152, 209,1527,7601,7602,2949,2831,2625, # 5238 +2385,2719,3139, 812,2560,7603,3274,7604,1559, 737,1884,3660,1210, 885, 28,2686, # 5254 +3553,3783,7605,4153,1004,1779,4418,7606, 346,1981,2218,2687,4419,3784,1742, 797, # 5270 +1642,3940,1933,1072,1384,2151, 896,3941,3275,3661,3197,2871,3554,7607,2561,1958, # 5286 +4420,2450,1785,7608,7609,7610,3942,4154,1005,1308,3662,4155,2720,4421,4422,1528, # 5302 +2600, 161,1178,4156,1982, 987,4423,1101,4157, 631,3943,1157,3198,2420,1343,1241, # 5318 +1016,2239,2562, 372, 877,2339,2501,1160, 555,1934, 911,3944,7611, 466,1170, 169, # 5334 +1051,2907,2688,3663,2474,2994,1182,2011,2563,1251,2626,7612, 992,2340,3444,1540, # 5350 +2721,1201,2070,2401,1996,2475,7613,4424, 528,1922,2188,1503,1873,1570,2364,3342, # 5366 +3276,7614, 557,1073,7615,1827,3445,2087,2266,3140,3039,3084, 767,3085,2786,4425, # 5382 +1006,4158,4426,2341,1267,2176,3664,3199, 778,3945,3200,2722,1597,2657,7616,4427, # 5398 +7617,3446,7618,7619,7620,3277,2689,1433,3278, 131, 95,1504,3946, 723,4159,3141, # 5414 +1841,3555,2758,2189,3947,2027,2104,3665,7621,2995,3948,1218,7622,3343,3201,3949, # 5430 +4160,2576, 248,1634,3785, 912,7623,2832,3666,3040,3786, 654, 53,7624,2996,7625, # 5446 +1688,4428, 777,3447,1032,3950,1425,7626, 191, 820,2120,2833, 971,4429, 931,3202, # 5462 + 135, 664, 783,3787,1997, 772,2908,1935,3951,3788,4430,2909,3203, 282,2723, 640, # 5478 +1372,3448,1127, 922, 325,3344,7627,7628, 711,2044,7629,7630,3952,2219,2787,1936, # 5494 +3953,3345,2220,2251,3789,2300,7631,4431,3790,1258,3279,3954,3204,2138,2950,3955, # 5510 +3956,7632,2221, 258,3205,4432, 101,1227,7633,3280,1755,7634,1391,3281,7635,2910, # 5526 +2056, 893,7636,7637,7638,1402,4161,2342,7639,7640,3206,3556,7641,7642, 878,1325, # 5542 +1780,2788,4433, 259,1385,2577, 744,1183,2267,4434,7643,3957,2502,7644, 684,1024, # 5558 +4162,7645, 472,3557,3449,1165,3282,3958,3959, 322,2152, 881, 455,1695,1152,1340, # 5574 + 660, 554,2153,4435,1058,4436,4163, 830,1065,3346,3960,4437,1923,7646,1703,1918, # 5590 +7647, 932,2268, 122,7648,4438, 947, 677,7649,3791,2627, 297,1905,1924,2269,4439, # 5606 +2317,3283,7650,7651,4164,7652,4165, 84,4166, 112, 989,7653, 547,1059,3961, 701, # 5622 +3558,1019,7654,4167,7655,3450, 942, 639, 457,2301,2451, 993,2951, 407, 851, 494, # 5638 +4440,3347, 927,7656,1237,7657,2421,3348, 573,4168, 680, 921,2911,1279,1874, 285, # 5654 + 790,1448,1983, 719,2167,7658,7659,4441,3962,3963,1649,7660,1541, 563,7661,1077, # 5670 +7662,3349,3041,3451, 511,2997,3964,3965,3667,3966,1268,2564,3350,3207,4442,4443, # 5686 +7663, 535,1048,1276,1189,2912,2028,3142,1438,1373,2834,2952,1134,2012,7664,4169, # 5702 +1238,2578,3086,1259,7665, 700,7666,2953,3143,3668,4170,7667,4171,1146,1875,1906, # 5718 +4444,2601,3967, 781,2422, 132,1589, 203, 147, 273,2789,2402, 898,1786,2154,3968, # 5734 +3969,7668,3792,2790,7669,7670,4445,4446,7671,3208,7672,1635,3793, 965,7673,1804, # 5750 +2690,1516,3559,1121,1082,1329,3284,3970,1449,3794, 65,1128,2835,2913,2759,1590, # 5766 +3795,7674,7675, 12,2658, 45, 976,2579,3144,4447, 517,2528,1013,1037,3209,7676, # 5782 +3796,2836,7677,3797,7678,3452,7679,2602, 614,1998,2318,3798,3087,2724,2628,7680, # 5798 +2580,4172, 599,1269,7681,1810,3669,7682,2691,3088, 759,1060, 489,1805,3351,3285, # 5814 +1358,7683,7684,2386,1387,1215,2629,2252, 490,7685,7686,4173,1759,2387,2343,7687, # 5830 +4448,3799,1907,3971,2630,1806,3210,4449,3453,3286,2760,2344, 874,7688,7689,3454, # 5846 +3670,1858, 91,2914,3671,3042,3800,4450,7690,3145,3972,2659,7691,3455,1202,1403, # 5862 +3801,2954,2529,1517,2503,4451,3456,2504,7692,4452,7693,2692,1885,1495,1731,3973, # 5878 +2365,4453,7694,2029,7695,7696,3974,2693,1216, 237,2581,4174,2319,3975,3802,4454, # 5894 +4455,2694,3560,3457, 445,4456,7697,7698,7699,7700,2761, 61,3976,3672,1822,3977, # 5910 +7701, 687,2045, 935, 925, 405,2660, 703,1096,1859,2725,4457,3978,1876,1367,2695, # 5926 +3352, 918,2105,1781,2476, 334,3287,1611,1093,4458, 564,3146,3458,3673,3353, 945, # 5942 +2631,2057,4459,7702,1925, 872,4175,7703,3459,2696,3089, 349,4176,3674,3979,4460, # 5958 +3803,4177,3675,2155,3980,4461,4462,4178,4463,2403,2046, 782,3981, 400, 251,4179, # 5974 +1624,7704,7705, 277,3676, 299,1265, 476,1191,3804,2121,4180,4181,1109, 205,7706, # 5990 +2582,1000,2156,3561,1860,7707,7708,7709,4464,7710,4465,2565, 107,2477,2157,3982, # 6006 +3460,3147,7711,1533, 541,1301, 158, 753,4182,2872,3562,7712,1696, 370,1088,4183, # 6022 +4466,3563, 579, 327, 440, 162,2240, 269,1937,1374,3461, 968,3043, 56,1396,3090, # 6038 +2106,3288,3354,7713,1926,2158,4467,2998,7714,3564,7715,7716,3677,4468,2478,7717, # 6054 +2791,7718,1650,4469,7719,2603,7720,7721,3983,2661,3355,1149,3356,3984,3805,3985, # 6070 +7722,1076, 49,7723, 951,3211,3289,3290, 450,2837, 920,7724,1811,2792,2366,4184, # 6086 +1908,1138,2367,3806,3462,7725,3212,4470,1909,1147,1518,2423,4471,3807,7726,4472, # 6102 +2388,2604, 260,1795,3213,7727,7728,3808,3291, 708,7729,3565,1704,7730,3566,1351, # 6118 +1618,3357,2999,1886, 944,4185,3358,4186,3044,3359,4187,7731,3678, 422, 413,1714, # 6134 +3292, 500,2058,2345,4188,2479,7732,1344,1910, 954,7733,1668,7734,7735,3986,2404, # 6150 +4189,3567,3809,4190,7736,2302,1318,2505,3091, 133,3092,2873,4473, 629, 31,2838, # 6166 +2697,3810,4474, 850, 949,4475,3987,2955,1732,2088,4191,1496,1852,7737,3988, 620, # 6182 +3214, 981,1242,3679,3360,1619,3680,1643,3293,2139,2452,1970,1719,3463,2168,7738, # 6198 +3215,7739,7740,3361,1828,7741,1277,4476,1565,2047,7742,1636,3568,3093,7743, 869, # 6214 +2839, 655,3811,3812,3094,3989,3000,3813,1310,3569,4477,7744,7745,7746,1733, 558, # 6230 +4478,3681, 335,1549,3045,1756,4192,3682,1945,3464,1829,1291,1192, 470,2726,2107, # 6246 +2793, 913,1054,3990,7747,1027,7748,3046,3991,4479, 982,2662,3362,3148,3465,3216, # 6262 +3217,1946,2794,7749, 571,4480,7750,1830,7751,3570,2583,1523,2424,7752,2089, 984, # 6278 +4481,3683,1959,7753,3684, 852, 923,2795,3466,3685, 969,1519, 999,2048,2320,1705, # 6294 +7754,3095, 615,1662, 151, 597,3992,2405,2321,1049, 275,4482,3686,4193, 568,3687, # 6310 +3571,2480,4194,3688,7755,2425,2270, 409,3218,7756,1566,2874,3467,1002, 769,2840, # 6326 + 194,2090,3149,3689,2222,3294,4195, 628,1505,7757,7758,1763,2177,3001,3993, 521, # 6342 +1161,2584,1787,2203,2406,4483,3994,1625,4196,4197, 412, 42,3096, 464,7759,2632, # 6358 +4484,3363,1760,1571,2875,3468,2530,1219,2204,3814,2633,2140,2368,4485,4486,3295, # 6374 +1651,3364,3572,7760,7761,3573,2481,3469,7762,3690,7763,7764,2271,2091, 460,7765, # 6390 +4487,7766,3002, 962, 588,3574, 289,3219,2634,1116, 52,7767,3047,1796,7768,7769, # 6406 +7770,1467,7771,1598,1143,3691,4198,1984,1734,1067,4488,1280,3365, 465,4489,1572, # 6422 + 510,7772,1927,2241,1812,1644,3575,7773,4490,3692,7774,7775,2663,1573,1534,7776, # 6438 +7777,4199, 536,1807,1761,3470,3815,3150,2635,7778,7779,7780,4491,3471,2915,1911, # 6454 +2796,7781,3296,1122, 377,3220,7782, 360,7783,7784,4200,1529, 551,7785,2059,3693, # 6470 +1769,2426,7786,2916,4201,3297,3097,2322,2108,2030,4492,1404, 136,1468,1479, 672, # 6486 +1171,3221,2303, 271,3151,7787,2762,7788,2049, 678,2727, 865,1947,4493,7789,2013, # 6502 +3995,2956,7790,2728,2223,1397,3048,3694,4494,4495,1735,2917,3366,3576,7791,3816, # 6518 + 509,2841,2453,2876,3817,7792,7793,3152,3153,4496,4202,2531,4497,2304,1166,1010, # 6534 + 552, 681,1887,7794,7795,2957,2958,3996,1287,1596,1861,3154, 358, 453, 736, 175, # 6550 + 478,1117, 905,1167,1097,7796,1853,1530,7797,1706,7798,2178,3472,2287,3695,3473, # 6566 +3577,4203,2092,4204,7799,3367,1193,2482,4205,1458,2190,2205,1862,1888,1421,3298, # 6582 +2918,3049,2179,3474, 595,2122,7800,3997,7801,7802,4206,1707,2636, 223,3696,1359, # 6598 + 751,3098, 183,3475,7803,2797,3003, 419,2369, 633, 704,3818,2389, 241,7804,7805, # 6614 +7806, 838,3004,3697,2272,2763,2454,3819,1938,2050,3998,1309,3099,2242,1181,7807, # 6630 +1136,2206,3820,2370,1446,4207,2305,4498,7808,7809,4208,1055,2605, 484,3698,7810, # 6646 +3999, 625,4209,2273,3368,1499,4210,4000,7811,4001,4211,3222,2274,2275,3476,7812, # 6662 +7813,2764, 808,2606,3699,3369,4002,4212,3100,2532, 526,3370,3821,4213, 955,7814, # 6678 +1620,4214,2637,2427,7815,1429,3700,1669,1831, 994, 928,7816,3578,1260,7817,7818, # 6694 +7819,1948,2288, 741,2919,1626,4215,2729,2455, 867,1184, 362,3371,1392,7820,7821, # 6710 +4003,4216,1770,1736,3223,2920,4499,4500,1928,2698,1459,1158,7822,3050,3372,2877, # 6726 +1292,1929,2506,2842,3701,1985,1187,2071,2014,2607,4217,7823,2566,2507,2169,3702, # 6742 +2483,3299,7824,3703,4501,7825,7826, 666,1003,3005,1022,3579,4218,7827,4502,1813, # 6758 +2253, 574,3822,1603, 295,1535, 705,3823,4219, 283, 858, 417,7828,7829,3224,4503, # 6774 +4504,3051,1220,1889,1046,2276,2456,4004,1393,1599, 689,2567, 388,4220,7830,2484, # 6790 + 802,7831,2798,3824,2060,1405,2254,7832,4505,3825,2109,1052,1345,3225,1585,7833, # 6806 + 809,7834,7835,7836, 575,2730,3477, 956,1552,1469,1144,2323,7837,2324,1560,2457, # 6822 +3580,3226,4005, 616,2207,3155,2180,2289,7838,1832,7839,3478,4506,7840,1319,3704, # 6838 +3705,1211,3581,1023,3227,1293,2799,7841,7842,7843,3826, 607,2306,3827, 762,2878, # 6854 +1439,4221,1360,7844,1485,3052,7845,4507,1038,4222,1450,2061,2638,4223,1379,4508, # 6870 +2585,7846,7847,4224,1352,1414,2325,2921,1172,7848,7849,3828,3829,7850,1797,1451, # 6886 +7851,7852,7853,7854,2922,4006,4007,2485,2346, 411,4008,4009,3582,3300,3101,4509, # 6902 +1561,2664,1452,4010,1375,7855,7856, 47,2959, 316,7857,1406,1591,2923,3156,7858, # 6918 +1025,2141,3102,3157, 354,2731, 884,2224,4225,2407, 508,3706, 726,3583, 996,2428, # 6934 +3584, 729,7859, 392,2191,1453,4011,4510,3707,7860,7861,2458,3585,2608,1675,2800, # 6950 + 919,2347,2960,2348,1270,4511,4012, 73,7862,7863, 647,7864,3228,2843,2255,1550, # 6966 +1346,3006,7865,1332, 883,3479,7866,7867,7868,7869,3301,2765,7870,1212, 831,1347, # 6982 +4226,4512,2326,3830,1863,3053, 720,3831,4513,4514,3832,7871,4227,7872,7873,4515, # 6998 +7874,7875,1798,4516,3708,2609,4517,3586,1645,2371,7876,7877,2924, 669,2208,2665, # 7014 +2429,7878,2879,7879,7880,1028,3229,7881,4228,2408,7882,2256,1353,7883,7884,4518, # 7030 +3158, 518,7885,4013,7886,4229,1960,7887,2142,4230,7888,7889,3007,2349,2350,3833, # 7046 + 516,1833,1454,4014,2699,4231,4519,2225,2610,1971,1129,3587,7890,2766,7891,2961, # 7062 +1422, 577,1470,3008,1524,3373,7892,7893, 432,4232,3054,3480,7894,2586,1455,2508, # 7078 +2226,1972,1175,7895,1020,2732,4015,3481,4520,7896,2733,7897,1743,1361,3055,3482, # 7094 +2639,4016,4233,4521,2290, 895, 924,4234,2170, 331,2243,3056, 166,1627,3057,1098, # 7110 +7898,1232,2880,2227,3374,4522, 657, 403,1196,2372, 542,3709,3375,1600,4235,3483, # 7126 +7899,4523,2767,3230, 576, 530,1362,7900,4524,2533,2666,3710,4017,7901, 842,3834, # 7142 +7902,2801,2031,1014,4018, 213,2700,3376, 665, 621,4236,7903,3711,2925,2430,7904, # 7158 +2431,3302,3588,3377,7905,4237,2534,4238,4525,3589,1682,4239,3484,1380,7906, 724, # 7174 +2277, 600,1670,7907,1337,1233,4526,3103,2244,7908,1621,4527,7909, 651,4240,7910, # 7190 +1612,4241,2611,7911,2844,7912,2734,2307,3058,7913, 716,2459,3059, 174,1255,2701, # 7206 +4019,3590, 548,1320,1398, 728,4020,1574,7914,1890,1197,3060,4021,7915,3061,3062, # 7222 +3712,3591,3713, 747,7916, 635,4242,4528,7917,7918,7919,4243,7920,7921,4529,7922, # 7238 +3378,4530,2432, 451,7923,3714,2535,2072,4244,2735,4245,4022,7924,1764,4531,7925, # 7254 +4246, 350,7926,2278,2390,2486,7927,4247,4023,2245,1434,4024, 488,4532, 458,4248, # 7270 +4025,3715, 771,1330,2391,3835,2568,3159,2159,2409,1553,2667,3160,4249,7928,2487, # 7286 +2881,2612,1720,2702,4250,3379,4533,7929,2536,4251,7930,3231,4252,2768,7931,2015, # 7302 +2736,7932,1155,1017,3716,3836,7933,3303,2308, 201,1864,4253,1430,7934,4026,7935, # 7318 +7936,7937,7938,7939,4254,1604,7940, 414,1865, 371,2587,4534,4535,3485,2016,3104, # 7334 +4536,1708, 960,4255, 887, 389,2171,1536,1663,1721,7941,2228,4027,2351,2926,1580, # 7350 +7942,7943,7944,1744,7945,2537,4537,4538,7946,4539,7947,2073,7948,7949,3592,3380, # 7366 +2882,4256,7950,4257,2640,3381,2802, 673,2703,2460, 709,3486,4028,3593,4258,7951, # 7382 +1148, 502, 634,7952,7953,1204,4540,3594,1575,4541,2613,3717,7954,3718,3105, 948, # 7398 +3232, 121,1745,3837,1110,7955,4259,3063,2509,3009,4029,3719,1151,1771,3838,1488, # 7414 +4030,1986,7956,2433,3487,7957,7958,2093,7959,4260,3839,1213,1407,2803, 531,2737, # 7430 +2538,3233,1011,1537,7960,2769,4261,3106,1061,7961,3720,3721,1866,2883,7962,2017, # 7446 + 120,4262,4263,2062,3595,3234,2309,3840,2668,3382,1954,4542,7963,7964,3488,1047, # 7462 +2704,1266,7965,1368,4543,2845, 649,3383,3841,2539,2738,1102,2846,2669,7966,7967, # 7478 +1999,7968,1111,3596,2962,7969,2488,3842,3597,2804,1854,3384,3722,7970,7971,3385, # 7494 +2410,2884,3304,3235,3598,7972,2569,7973,3599,2805,4031,1460, 856,7974,3600,7975, # 7510 +2885,2963,7976,2886,3843,7977,4264, 632,2510, 875,3844,1697,3845,2291,7978,7979, # 7526 +4544,3010,1239, 580,4545,4265,7980, 914, 936,2074,1190,4032,1039,2123,7981,7982, # 7542 +7983,3386,1473,7984,1354,4266,3846,7985,2172,3064,4033, 915,3305,4267,4268,3306, # 7558 +1605,1834,7986,2739, 398,3601,4269,3847,4034, 328,1912,2847,4035,3848,1331,4270, # 7574 +3011, 937,4271,7987,3602,4036,4037,3387,2160,4546,3388, 524, 742, 538,3065,1012, # 7590 +7988,7989,3849,2461,7990, 658,1103, 225,3850,7991,7992,4547,7993,4548,7994,3236, # 7606 +1243,7995,4038, 963,2246,4549,7996,2705,3603,3161,7997,7998,2588,2327,7999,4550, # 7622 +8000,8001,8002,3489,3307, 957,3389,2540,2032,1930,2927,2462, 870,2018,3604,1746, # 7638 +2770,2771,2434,2463,8003,3851,8004,3723,3107,3724,3490,3390,3725,8005,1179,3066, # 7654 +8006,3162,2373,4272,3726,2541,3163,3108,2740,4039,8007,3391,1556,2542,2292, 977, # 7670 +2887,2033,4040,1205,3392,8008,1765,3393,3164,2124,1271,1689, 714,4551,3491,8009, # 7686 +2328,3852, 533,4273,3605,2181, 617,8010,2464,3308,3492,2310,8011,8012,3165,8013, # 7702 +8014,3853,1987, 618, 427,2641,3493,3394,8015,8016,1244,1690,8017,2806,4274,4552, # 7718 +8018,3494,8019,8020,2279,1576, 473,3606,4275,3395, 972,8021,3607,8022,3067,8023, # 7734 +8024,4553,4554,8025,3727,4041,4042,8026, 153,4555, 356,8027,1891,2888,4276,2143, # 7750 + 408, 803,2352,8028,3854,8029,4277,1646,2570,2511,4556,4557,3855,8030,3856,4278, # 7766 +8031,2411,3396, 752,8032,8033,1961,2964,8034, 746,3012,2465,8035,4279,3728, 698, # 7782 +4558,1892,4280,3608,2543,4559,3609,3857,8036,3166,3397,8037,1823,1302,4043,2706, # 7798 +3858,1973,4281,8038,4282,3167, 823,1303,1288,1236,2848,3495,4044,3398, 774,3859, # 7814 +8039,1581,4560,1304,2849,3860,4561,8040,2435,2161,1083,3237,4283,4045,4284, 344, # 7830 +1173, 288,2311, 454,1683,8041,8042,1461,4562,4046,2589,8043,8044,4563, 985, 894, # 7846 +8045,3399,3168,8046,1913,2928,3729,1988,8047,2110,1974,8048,4047,8049,2571,1194, # 7862 + 425,8050,4564,3169,1245,3730,4285,8051,8052,2850,8053, 636,4565,1855,3861, 760, # 7878 +1799,8054,4286,2209,1508,4566,4048,1893,1684,2293,8055,8056,8057,4287,4288,2210, # 7894 + 479,8058,8059, 832,8060,4049,2489,8061,2965,2490,3731, 990,3109, 627,1814,2642, # 7910 +4289,1582,4290,2125,2111,3496,4567,8062, 799,4291,3170,8063,4568,2112,1737,3013, # 7926 +1018, 543, 754,4292,3309,1676,4569,4570,4050,8064,1489,8065,3497,8066,2614,2889, # 7942 +4051,8067,8068,2966,8069,8070,8071,8072,3171,4571,4572,2182,1722,8073,3238,3239, # 7958 +1842,3610,1715, 481, 365,1975,1856,8074,8075,1962,2491,4573,8076,2126,3611,3240, # 7974 + 433,1894,2063,2075,8077, 602,2741,8078,8079,8080,8081,8082,3014,1628,3400,8083, # 7990 +3172,4574,4052,2890,4575,2512,8084,2544,2772,8085,8086,8087,3310,4576,2891,8088, # 8006 +4577,8089,2851,4578,4579,1221,2967,4053,2513,8090,8091,8092,1867,1989,8093,8094, # 8022 +8095,1895,8096,8097,4580,1896,4054, 318,8098,2094,4055,4293,8099,8100, 485,8101, # 8038 + 938,3862, 553,2670, 116,8102,3863,3612,8103,3498,2671,2773,3401,3311,2807,8104, # 8054 +3613,2929,4056,1747,2930,2968,8105,8106, 207,8107,8108,2672,4581,2514,8109,3015, # 8070 + 890,3614,3864,8110,1877,3732,3402,8111,2183,2353,3403,1652,8112,8113,8114, 941, # 8086 +2294, 208,3499,4057,2019, 330,4294,3865,2892,2492,3733,4295,8115,8116,8117,8118, # 8102 +) -# flake8: noqa diff --git a/lib/chardet/euctwprober.py b/lib/chardet/euctwprober.py index fe652fe37a9707a8ecbf81be8ad3f1a6308ddc98..c1bcd049b301d5aa039ce1be1f649971730e0db5 100644 --- a/lib/chardet/euctwprober.py +++ b/lib/chardet/euctwprober.py @@ -13,12 +13,12 @@ # modify it under the terms of the GNU Lesser General Public # License as published by the Free Software Foundation; either # version 2.1 of the License, or (at your option) any later version. -# +# # This library is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # Lesser General Public License for more details. -# +# # You should have received a copy of the GNU Lesser General Public # License along with this library; if not, write to the Free Software # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA @@ -28,14 +28,15 @@ from .mbcharsetprober import MultiByteCharSetProber from .codingstatemachine import CodingStateMachine from .chardistribution import EUCTWDistributionAnalysis -from .mbcssm import EUCTWSMModel +from .mbcssm import EUCTW_SM_MODEL class EUCTWProber(MultiByteCharSetProber): def __init__(self): - MultiByteCharSetProber.__init__(self) - self._mCodingSM = CodingStateMachine(EUCTWSMModel) - self._mDistributionAnalyzer = EUCTWDistributionAnalysis() + super(EUCTWProber, self).__init__() + self.coding_sm = CodingStateMachine(EUCTW_SM_MODEL) + self.distribution_analyzer = EUCTWDistributionAnalysis() self.reset() - def get_charset_name(self): + @property + def charset_name(self): return "EUC-TW" diff --git a/lib/chardet/gb2312freq.py b/lib/chardet/gb2312freq.py index 1238f510fc6afe2739f3f7a44359cfe471c154a3..697837bd9a87a77fc468fd1f10dd470d01701362 100644 --- a/lib/chardet/gb2312freq.py +++ b/lib/chardet/gb2312freq.py @@ -43,7 +43,7 @@ GB2312_TYPICAL_DISTRIBUTION_RATIO = 0.9 GB2312_TABLE_SIZE = 3760 -GB2312CharToFreqOrder = ( +GB2312_CHAR_TO_FREQ_ORDER = ( 1671, 749,1443,2364,3924,3807,2330,3921,1704,3463,2691,1511,1515, 572,3191,2205, 2361, 224,2558, 479,1711, 963,3162, 440,4060,1905,2966,2947,3580,2647,3961,3842, 2204, 869,4207, 970,2678,5626,2944,2956,1479,4048, 514,3595, 588,1346,2820,3409, @@ -278,195 +278,6 @@ GB2312CharToFreqOrder = ( 1718,1717,2655,3453,3143,4465, 161,2889,2980,2009,1421, 56,1908,1640,2387,2232, 1917,1874,2477,4921, 148, 83,3438, 592,4245,2882,1822,1055, 741, 115,1496,1624, 381,1638,4592,1020, 516,3214, 458, 947,4575,1432, 211,1514,2926,1865,2142, 189, - 852,1221,1400,1486, 882,2299,4036, 351, 28,1122, 700,6479,6480,6481,6482,6483, # last 512 -#Everything below is of no interest for detection purpose -5508,6484,3900,3414,3974,4441,4024,3537,4037,5628,5099,3633,6485,3148,6486,3636, -5509,3257,5510,5973,5445,5872,4941,4403,3174,4627,5873,6276,2286,4230,5446,5874, -5122,6102,6103,4162,5447,5123,5323,4849,6277,3980,3851,5066,4246,5774,5067,6278, -3001,2807,5695,3346,5775,5974,5158,5448,6487,5975,5976,5776,3598,6279,5696,4806, -4211,4154,6280,6488,6489,6490,6281,4212,5037,3374,4171,6491,4562,4807,4722,4827, -5977,6104,4532,4079,5159,5324,5160,4404,3858,5359,5875,3975,4288,4610,3486,4512, -5325,3893,5360,6282,6283,5560,2522,4231,5978,5186,5449,2569,3878,6284,5401,3578, -4415,6285,4656,5124,5979,2506,4247,4449,3219,3417,4334,4969,4329,6492,4576,4828, -4172,4416,4829,5402,6286,3927,3852,5361,4369,4830,4477,4867,5876,4173,6493,6105, -4657,6287,6106,5877,5450,6494,4155,4868,5451,3700,5629,4384,6288,6289,5878,3189, -4881,6107,6290,6495,4513,6496,4692,4515,4723,5100,3356,6497,6291,3810,4080,5561, -3570,4430,5980,6498,4355,5697,6499,4724,6108,6109,3764,4050,5038,5879,4093,3226, -6292,5068,5217,4693,3342,5630,3504,4831,4377,4466,4309,5698,4431,5777,6293,5778, -4272,3706,6110,5326,3752,4676,5327,4273,5403,4767,5631,6500,5699,5880,3475,5039, -6294,5562,5125,4348,4301,4482,4068,5126,4593,5700,3380,3462,5981,5563,3824,5404, -4970,5511,3825,4738,6295,6501,5452,4516,6111,5881,5564,6502,6296,5982,6503,4213, -4163,3454,6504,6112,4009,4450,6113,4658,6297,6114,3035,6505,6115,3995,4904,4739, -4563,4942,4110,5040,3661,3928,5362,3674,6506,5292,3612,4791,5565,4149,5983,5328, -5259,5021,4725,4577,4564,4517,4364,6298,5405,4578,5260,4594,4156,4157,5453,3592, -3491,6507,5127,5512,4709,4922,5984,5701,4726,4289,6508,4015,6116,5128,4628,3424, -4241,5779,6299,4905,6509,6510,5454,5702,5780,6300,4365,4923,3971,6511,5161,3270, -3158,5985,4100, 867,5129,5703,6117,5363,3695,3301,5513,4467,6118,6512,5455,4232, -4242,4629,6513,3959,4478,6514,5514,5329,5986,4850,5162,5566,3846,4694,6119,5456, -4869,5781,3779,6301,5704,5987,5515,4710,6302,5882,6120,4392,5364,5705,6515,6121, -6516,6517,3736,5988,5457,5989,4695,2457,5883,4551,5782,6303,6304,6305,5130,4971, -6122,5163,6123,4870,3263,5365,3150,4871,6518,6306,5783,5069,5706,3513,3498,4409, -5330,5632,5366,5458,5459,3991,5990,4502,3324,5991,5784,3696,4518,5633,4119,6519, -4630,5634,4417,5707,4832,5992,3418,6124,5993,5567,4768,5218,6520,4595,3458,5367, -6125,5635,6126,4202,6521,4740,4924,6307,3981,4069,4385,6308,3883,2675,4051,3834, -4302,4483,5568,5994,4972,4101,5368,6309,5164,5884,3922,6127,6522,6523,5261,5460, -5187,4164,5219,3538,5516,4111,3524,5995,6310,6311,5369,3181,3386,2484,5188,3464, -5569,3627,5708,6524,5406,5165,4677,4492,6312,4872,4851,5885,4468,5996,6313,5709, -5710,6128,2470,5886,6314,5293,4882,5785,3325,5461,5101,6129,5711,5786,6525,4906, -6526,6527,4418,5887,5712,4808,2907,3701,5713,5888,6528,3765,5636,5331,6529,6530, -3593,5889,3637,4943,3692,5714,5787,4925,6315,6130,5462,4405,6131,6132,6316,5262, -6531,6532,5715,3859,5716,5070,4696,5102,3929,5788,3987,4792,5997,6533,6534,3920, -4809,5000,5998,6535,2974,5370,6317,5189,5263,5717,3826,6536,3953,5001,4883,3190, -5463,5890,4973,5999,4741,6133,6134,3607,5570,6000,4711,3362,3630,4552,5041,6318, -6001,2950,2953,5637,4646,5371,4944,6002,2044,4120,3429,6319,6537,5103,4833,6538, -6539,4884,4647,3884,6003,6004,4758,3835,5220,5789,4565,5407,6540,6135,5294,4697, -4852,6320,6321,3206,4907,6541,6322,4945,6542,6136,6543,6323,6005,4631,3519,6544, -5891,6545,5464,3784,5221,6546,5571,4659,6547,6324,6137,5190,6548,3853,6549,4016, -4834,3954,6138,5332,3827,4017,3210,3546,4469,5408,5718,3505,4648,5790,5131,5638, -5791,5465,4727,4318,6325,6326,5792,4553,4010,4698,3439,4974,3638,4335,3085,6006, -5104,5042,5166,5892,5572,6327,4356,4519,5222,5573,5333,5793,5043,6550,5639,5071, -4503,6328,6139,6551,6140,3914,3901,5372,6007,5640,4728,4793,3976,3836,4885,6552, -4127,6553,4451,4102,5002,6554,3686,5105,6555,5191,5072,5295,4611,5794,5296,6556, -5893,5264,5894,4975,5466,5265,4699,4976,4370,4056,3492,5044,4886,6557,5795,4432, -4769,4357,5467,3940,4660,4290,6141,4484,4770,4661,3992,6329,4025,4662,5022,4632, -4835,4070,5297,4663,4596,5574,5132,5409,5895,6142,4504,5192,4664,5796,5896,3885, -5575,5797,5023,4810,5798,3732,5223,4712,5298,4084,5334,5468,6143,4052,4053,4336, -4977,4794,6558,5335,4908,5576,5224,4233,5024,4128,5469,5225,4873,6008,5045,4729, -4742,4633,3675,4597,6559,5897,5133,5577,5003,5641,5719,6330,6560,3017,2382,3854, -4406,4811,6331,4393,3964,4946,6561,2420,3722,6562,4926,4378,3247,1736,4442,6332, -5134,6333,5226,3996,2918,5470,4319,4003,4598,4743,4744,4485,3785,3902,5167,5004, -5373,4394,5898,6144,4874,1793,3997,6334,4085,4214,5106,5642,4909,5799,6009,4419, -4189,3330,5899,4165,4420,5299,5720,5227,3347,6145,4081,6335,2876,3930,6146,3293, -3786,3910,3998,5900,5300,5578,2840,6563,5901,5579,6147,3531,5374,6564,6565,5580, -4759,5375,6566,6148,3559,5643,6336,6010,5517,6337,6338,5721,5902,3873,6011,6339, -6567,5518,3868,3649,5722,6568,4771,4947,6569,6149,4812,6570,2853,5471,6340,6341, -5644,4795,6342,6012,5723,6343,5724,6013,4349,6344,3160,6150,5193,4599,4514,4493, -5168,4320,6345,4927,3666,4745,5169,5903,5005,4928,6346,5725,6014,4730,4203,5046, -4948,3395,5170,6015,4150,6016,5726,5519,6347,5047,3550,6151,6348,4197,4310,5904, -6571,5581,2965,6152,4978,3960,4291,5135,6572,5301,5727,4129,4026,5905,4853,5728, -5472,6153,6349,4533,2700,4505,5336,4678,3583,5073,2994,4486,3043,4554,5520,6350, -6017,5800,4487,6351,3931,4103,5376,6352,4011,4321,4311,4190,5136,6018,3988,3233, -4350,5906,5645,4198,6573,5107,3432,4191,3435,5582,6574,4139,5410,6353,5411,3944, -5583,5074,3198,6575,6354,4358,6576,5302,4600,5584,5194,5412,6577,6578,5585,5413, -5303,4248,5414,3879,4433,6579,4479,5025,4854,5415,6355,4760,4772,3683,2978,4700, -3797,4452,3965,3932,3721,4910,5801,6580,5195,3551,5907,3221,3471,3029,6019,3999, -5908,5909,5266,5267,3444,3023,3828,3170,4796,5646,4979,4259,6356,5647,5337,3694, -6357,5648,5338,4520,4322,5802,3031,3759,4071,6020,5586,4836,4386,5048,6581,3571, -4679,4174,4949,6154,4813,3787,3402,3822,3958,3215,3552,5268,4387,3933,4950,4359, -6021,5910,5075,3579,6358,4234,4566,5521,6359,3613,5049,6022,5911,3375,3702,3178, -4911,5339,4521,6582,6583,4395,3087,3811,5377,6023,6360,6155,4027,5171,5649,4421, -4249,2804,6584,2270,6585,4000,4235,3045,6156,5137,5729,4140,4312,3886,6361,4330, -6157,4215,6158,3500,3676,4929,4331,3713,4930,5912,4265,3776,3368,5587,4470,4855, -3038,4980,3631,6159,6160,4132,4680,6161,6362,3923,4379,5588,4255,6586,4121,6587, -6363,4649,6364,3288,4773,4774,6162,6024,6365,3543,6588,4274,3107,3737,5050,5803, -4797,4522,5589,5051,5730,3714,4887,5378,4001,4523,6163,5026,5522,4701,4175,2791, -3760,6589,5473,4224,4133,3847,4814,4815,4775,3259,5416,6590,2738,6164,6025,5304, -3733,5076,5650,4816,5590,6591,6165,6592,3934,5269,6593,3396,5340,6594,5804,3445, -3602,4042,4488,5731,5732,3525,5591,4601,5196,6166,6026,5172,3642,4612,3202,4506, -4798,6366,3818,5108,4303,5138,5139,4776,3332,4304,2915,3415,4434,5077,5109,4856, -2879,5305,4817,6595,5913,3104,3144,3903,4634,5341,3133,5110,5651,5805,6167,4057, -5592,2945,4371,5593,6596,3474,4182,6367,6597,6168,4507,4279,6598,2822,6599,4777, -4713,5594,3829,6169,3887,5417,6170,3653,5474,6368,4216,2971,5228,3790,4579,6369, -5733,6600,6601,4951,4746,4555,6602,5418,5475,6027,3400,4665,5806,6171,4799,6028, -5052,6172,3343,4800,4747,5006,6370,4556,4217,5476,4396,5229,5379,5477,3839,5914, -5652,5807,4714,3068,4635,5808,6173,5342,4192,5078,5419,5523,5734,6174,4557,6175, -4602,6371,6176,6603,5809,6372,5735,4260,3869,5111,5230,6029,5112,6177,3126,4681, -5524,5915,2706,3563,4748,3130,6178,4018,5525,6604,6605,5478,4012,4837,6606,4534, -4193,5810,4857,3615,5479,6030,4082,3697,3539,4086,5270,3662,4508,4931,5916,4912, -5811,5027,3888,6607,4397,3527,3302,3798,2775,2921,2637,3966,4122,4388,4028,4054, -1633,4858,5079,3024,5007,3982,3412,5736,6608,3426,3236,5595,3030,6179,3427,3336, -3279,3110,6373,3874,3039,5080,5917,5140,4489,3119,6374,5812,3405,4494,6031,4666, -4141,6180,4166,6032,5813,4981,6609,5081,4422,4982,4112,3915,5653,3296,3983,6375, -4266,4410,5654,6610,6181,3436,5082,6611,5380,6033,3819,5596,4535,5231,5306,5113, -6612,4952,5918,4275,3113,6613,6376,6182,6183,5814,3073,4731,4838,5008,3831,6614, -4888,3090,3848,4280,5526,5232,3014,5655,5009,5737,5420,5527,6615,5815,5343,5173, -5381,4818,6616,3151,4953,6617,5738,2796,3204,4360,2989,4281,5739,5174,5421,5197, -3132,5141,3849,5142,5528,5083,3799,3904,4839,5480,2880,4495,3448,6377,6184,5271, -5919,3771,3193,6034,6035,5920,5010,6036,5597,6037,6378,6038,3106,5422,6618,5423, -5424,4142,6619,4889,5084,4890,4313,5740,6620,3437,5175,5307,5816,4199,5198,5529, -5817,5199,5656,4913,5028,5344,3850,6185,2955,5272,5011,5818,4567,4580,5029,5921, -3616,5233,6621,6622,6186,4176,6039,6379,6380,3352,5200,5273,2908,5598,5234,3837, -5308,6623,6624,5819,4496,4323,5309,5201,6625,6626,4983,3194,3838,4167,5530,5922, -5274,6381,6382,3860,3861,5599,3333,4292,4509,6383,3553,5481,5820,5531,4778,6187, -3955,3956,4324,4389,4218,3945,4325,3397,2681,5923,4779,5085,4019,5482,4891,5382, -5383,6040,4682,3425,5275,4094,6627,5310,3015,5483,5657,4398,5924,3168,4819,6628, -5925,6629,5532,4932,4613,6041,6630,4636,6384,4780,4204,5658,4423,5821,3989,4683, -5822,6385,4954,6631,5345,6188,5425,5012,5384,3894,6386,4490,4104,6632,5741,5053, -6633,5823,5926,5659,5660,5927,6634,5235,5742,5824,4840,4933,4820,6387,4859,5928, -4955,6388,4143,3584,5825,5346,5013,6635,5661,6389,5014,5484,5743,4337,5176,5662, -6390,2836,6391,3268,6392,6636,6042,5236,6637,4158,6638,5744,5663,4471,5347,3663, -4123,5143,4293,3895,6639,6640,5311,5929,5826,3800,6189,6393,6190,5664,5348,3554, -3594,4749,4603,6641,5385,4801,6043,5827,4183,6642,5312,5426,4761,6394,5665,6191, -4715,2669,6643,6644,5533,3185,5427,5086,5930,5931,5386,6192,6044,6645,4781,4013, -5745,4282,4435,5534,4390,4267,6045,5746,4984,6046,2743,6193,3501,4087,5485,5932, -5428,4184,4095,5747,4061,5054,3058,3862,5933,5600,6646,5144,3618,6395,3131,5055, -5313,6396,4650,4956,3855,6194,3896,5202,4985,4029,4225,6195,6647,5828,5486,5829, -3589,3002,6648,6397,4782,5276,6649,6196,6650,4105,3803,4043,5237,5830,6398,4096, -3643,6399,3528,6651,4453,3315,4637,6652,3984,6197,5535,3182,3339,6653,3096,2660, -6400,6654,3449,5934,4250,4236,6047,6401,5831,6655,5487,3753,4062,5832,6198,6199, -6656,3766,6657,3403,4667,6048,6658,4338,2897,5833,3880,2797,3780,4326,6659,5748, -5015,6660,5387,4351,5601,4411,6661,3654,4424,5935,4339,4072,5277,4568,5536,6402, -6662,5238,6663,5349,5203,6200,5204,6201,5145,4536,5016,5056,4762,5834,4399,4957, -6202,6403,5666,5749,6664,4340,6665,5936,5177,5667,6666,6667,3459,4668,6404,6668, -6669,4543,6203,6670,4276,6405,4480,5537,6671,4614,5205,5668,6672,3348,2193,4763, -6406,6204,5937,5602,4177,5669,3419,6673,4020,6205,4443,4569,5388,3715,3639,6407, -6049,4058,6206,6674,5938,4544,6050,4185,4294,4841,4651,4615,5488,6207,6408,6051, -5178,3241,3509,5835,6208,4958,5836,4341,5489,5278,6209,2823,5538,5350,5206,5429, -6675,4638,4875,4073,3516,4684,4914,4860,5939,5603,5389,6052,5057,3237,5490,3791, -6676,6409,6677,4821,4915,4106,5351,5058,4243,5539,4244,5604,4842,4916,5239,3028, -3716,5837,5114,5605,5390,5940,5430,6210,4332,6678,5540,4732,3667,3840,6053,4305, -3408,5670,5541,6410,2744,5240,5750,6679,3234,5606,6680,5607,5671,3608,4283,4159, -4400,5352,4783,6681,6411,6682,4491,4802,6211,6412,5941,6413,6414,5542,5751,6683, -4669,3734,5942,6684,6415,5943,5059,3328,4670,4144,4268,6685,6686,6687,6688,4372, -3603,6689,5944,5491,4373,3440,6416,5543,4784,4822,5608,3792,4616,5838,5672,3514, -5391,6417,4892,6690,4639,6691,6054,5673,5839,6055,6692,6056,5392,6212,4038,5544, -5674,4497,6057,6693,5840,4284,5675,4021,4545,5609,6418,4454,6419,6213,4113,4472, -5314,3738,5087,5279,4074,5610,4959,4063,3179,4750,6058,6420,6214,3476,4498,4716, -5431,4960,4685,6215,5241,6694,6421,6216,6695,5841,5945,6422,3748,5946,5179,3905, -5752,5545,5947,4374,6217,4455,6423,4412,6218,4803,5353,6696,3832,5280,6219,4327, -4702,6220,6221,6059,4652,5432,6424,3749,4751,6425,5753,4986,5393,4917,5948,5030, -5754,4861,4733,6426,4703,6697,6222,4671,5949,4546,4961,5180,6223,5031,3316,5281, -6698,4862,4295,4934,5207,3644,6427,5842,5950,6428,6429,4570,5843,5282,6430,6224, -5088,3239,6060,6699,5844,5755,6061,6431,2701,5546,6432,5115,5676,4039,3993,3327, -4752,4425,5315,6433,3941,6434,5677,4617,4604,3074,4581,6225,5433,6435,6226,6062, -4823,5756,5116,6227,3717,5678,4717,5845,6436,5679,5846,6063,5847,6064,3977,3354, -6437,3863,5117,6228,5547,5394,4499,4524,6229,4605,6230,4306,4500,6700,5951,6065, -3693,5952,5089,4366,4918,6701,6231,5548,6232,6702,6438,4704,5434,6703,6704,5953, -4168,6705,5680,3420,6706,5242,4407,6066,3812,5757,5090,5954,4672,4525,3481,5681, -4618,5395,5354,5316,5955,6439,4962,6707,4526,6440,3465,4673,6067,6441,5682,6708, -5435,5492,5758,5683,4619,4571,4674,4804,4893,4686,5493,4753,6233,6068,4269,6442, -6234,5032,4705,5146,5243,5208,5848,6235,6443,4963,5033,4640,4226,6236,5849,3387, -6444,6445,4436,4437,5850,4843,5494,4785,4894,6709,4361,6710,5091,5956,3331,6237, -4987,5549,6069,6711,4342,3517,4473,5317,6070,6712,6071,4706,6446,5017,5355,6713, -6714,4988,5436,6447,4734,5759,6715,4735,4547,4456,4754,6448,5851,6449,6450,3547, -5852,5318,6451,6452,5092,4205,6716,6238,4620,4219,5611,6239,6072,4481,5760,5957, -5958,4059,6240,6453,4227,4537,6241,5761,4030,4186,5244,5209,3761,4457,4876,3337, -5495,5181,6242,5959,5319,5612,5684,5853,3493,5854,6073,4169,5613,5147,4895,6074, -5210,6717,5182,6718,3830,6243,2798,3841,6075,6244,5855,5614,3604,4606,5496,5685, -5118,5356,6719,6454,5960,5357,5961,6720,4145,3935,4621,5119,5962,4261,6721,6455, -4786,5963,4375,4582,6245,6246,6247,6076,5437,4877,5856,3376,4380,6248,4160,6722, -5148,6456,5211,6457,6723,4718,6458,6724,6249,5358,4044,3297,6459,6250,5857,5615, -5497,5245,6460,5498,6725,6251,6252,5550,3793,5499,2959,5396,6461,6462,4572,5093, -5500,5964,3806,4146,6463,4426,5762,5858,6077,6253,4755,3967,4220,5965,6254,4989, -5501,6464,4352,6726,6078,4764,2290,5246,3906,5438,5283,3767,4964,2861,5763,5094, -6255,6256,4622,5616,5859,5860,4707,6727,4285,4708,4824,5617,6257,5551,4787,5212, -4965,4935,4687,6465,6728,6466,5686,6079,3494,4413,2995,5247,5966,5618,6729,5967, -5764,5765,5687,5502,6730,6731,6080,5397,6467,4990,6258,6732,4538,5060,5619,6733, -4719,5688,5439,5018,5149,5284,5503,6734,6081,4607,6259,5120,3645,5861,4583,6260, -4584,4675,5620,4098,5440,6261,4863,2379,3306,4585,5552,5689,4586,5285,6735,4864, -6736,5286,6082,6737,4623,3010,4788,4381,4558,5621,4587,4896,3698,3161,5248,4353, -4045,6262,3754,5183,4588,6738,6263,6739,6740,5622,3936,6741,6468,6742,6264,5095, -6469,4991,5968,6743,4992,6744,6083,4897,6745,4256,5766,4307,3108,3968,4444,5287, -3889,4343,6084,4510,6085,4559,6086,4898,5969,6746,5623,5061,4919,5249,5250,5504, -5441,6265,5320,4878,3242,5862,5251,3428,6087,6747,4237,5624,5442,6266,5553,4539, -6748,2585,3533,5398,4262,6088,5150,4736,4438,6089,6267,5505,4966,6749,6268,6750, -6269,5288,5554,3650,6090,6091,4624,6092,5690,6751,5863,4270,5691,4277,5555,5864, -6752,5692,4720,4865,6470,5151,4688,4825,6753,3094,6754,6471,3235,4653,6755,5213, -5399,6756,3201,4589,5865,4967,6472,5866,6473,5019,3016,6757,5321,4756,3957,4573, -6093,4993,5767,4721,6474,6758,5625,6759,4458,6475,6270,6760,5556,4994,5214,5252, -6271,3875,5768,6094,5034,5506,4376,5769,6761,2120,6476,5253,5770,6762,5771,5970, -3990,5971,5557,5558,5772,6477,6095,2787,4641,5972,5121,6096,6097,6272,6763,3703, -5867,5507,6273,4206,6274,4789,6098,6764,3619,3646,3833,3804,2394,3788,4936,3978, -4866,4899,6099,6100,5559,6478,6765,3599,5868,6101,5869,5870,6275,6766,4527,6767) + 852,1221,1400,1486, 882,2299,4036, 351, 28,1122, 700,6479,6480,6481,6482,6483, #last 512 +) -# flake8: noqa diff --git a/lib/chardet/gb2312prober.py b/lib/chardet/gb2312prober.py index 0325a2d8614a1a3c30e3da3716b4bef8e96b3216..438ad55e047f5b15d307bd0a5b4f91e296caa782 100644 --- a/lib/chardet/gb2312prober.py +++ b/lib/chardet/gb2312prober.py @@ -13,12 +13,12 @@ # modify it under the terms of the GNU Lesser General Public # License as published by the Free Software Foundation; either # version 2.1 of the License, or (at your option) any later version. -# +# # This library is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # Lesser General Public License for more details. -# +# # You should have received a copy of the GNU Lesser General Public # License along with this library; if not, write to the Free Software # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA @@ -28,14 +28,15 @@ from .mbcharsetprober import MultiByteCharSetProber from .codingstatemachine import CodingStateMachine from .chardistribution import GB2312DistributionAnalysis -from .mbcssm import GB2312SMModel +from .mbcssm import GB2312_SM_MODEL class GB2312Prober(MultiByteCharSetProber): def __init__(self): - MultiByteCharSetProber.__init__(self) - self._mCodingSM = CodingStateMachine(GB2312SMModel) - self._mDistributionAnalyzer = GB2312DistributionAnalysis() + super(GB2312Prober, self).__init__() + self.coding_sm = CodingStateMachine(GB2312_SM_MODEL) + self.distribution_analyzer = GB2312DistributionAnalysis() self.reset() - def get_charset_name(self): + @property + def charset_name(self): return "GB2312" diff --git a/lib/chardet/hebrewprober.py b/lib/chardet/hebrewprober.py index ba225c5ef43a8c32a1b9ae1b33914dee89c536a0..90f1133ec3be97594f9fd03414e8ce08123cf8ea 100644 --- a/lib/chardet/hebrewprober.py +++ b/lib/chardet/hebrewprober.py @@ -26,7 +26,7 @@ ######################### END LICENSE BLOCK ######################### from .charsetprober import CharSetProber -from .constants import eNotMe, eDetecting +from .enums import ProbingState from .compat import wrap_ord # This prober doesn't actually recognize a language or a charset. @@ -126,56 +126,59 @@ from .compat import wrap_ord # model probers scores. The answer is returned in the form of the name of the # charset identified, either "windows-1255" or "ISO-8859-8". -# windows-1255 / ISO-8859-8 code points of interest -FINAL_KAF = 0xea -NORMAL_KAF = 0xeb -FINAL_MEM = 0xed -NORMAL_MEM = 0xee -FINAL_NUN = 0xef -NORMAL_NUN = 0xf0 -FINAL_PE = 0xf3 -NORMAL_PE = 0xf4 -FINAL_TSADI = 0xf5 -NORMAL_TSADI = 0xf6 - -# Minimum Visual vs Logical final letter score difference. -# If the difference is below this, don't rely solely on the final letter score -# distance. -MIN_FINAL_CHAR_DISTANCE = 5 +class HebrewProber(CharSetProber): + # windows-1255 / ISO-8859-8 code points of interest + FINAL_KAF = 0xea + NORMAL_KAF = 0xeb + FINAL_MEM = 0xed + NORMAL_MEM = 0xee + FINAL_NUN = 0xef + NORMAL_NUN = 0xf0 + FINAL_PE = 0xf3 + NORMAL_PE = 0xf4 + FINAL_TSADI = 0xf5 + NORMAL_TSADI = 0xf6 -# Minimum Visual vs Logical model score difference. -# If the difference is below this, don't rely at all on the model score -# distance. -MIN_MODEL_DISTANCE = 0.01 + # Minimum Visual vs Logical final letter score difference. + # If the difference is below this, don't rely solely on the final letter score + # distance. + MIN_FINAL_CHAR_DISTANCE = 5 -VISUAL_HEBREW_NAME = "ISO-8859-8" -LOGICAL_HEBREW_NAME = "windows-1255" + # Minimum Visual vs Logical model score difference. + # If the difference is below this, don't rely at all on the model score + # distance. + MIN_MODEL_DISTANCE = 0.01 + VISUAL_HEBREW_NAME = "ISO-8859-8" + LOGICAL_HEBREW_NAME = "windows-1255" -class HebrewProber(CharSetProber): def __init__(self): - CharSetProber.__init__(self) - self._mLogicalProber = None - self._mVisualProber = None + super(HebrewProber, self).__init__() + self._final_char_logical_score = None + self._final_char_visual_score = None + self._prev = None + self._before_prev = None + self._logical_prober = None + self._visual_prober = None self.reset() def reset(self): - self._mFinalCharLogicalScore = 0 - self._mFinalCharVisualScore = 0 + self._final_char_logical_score = 0 + self._final_char_visual_score = 0 # The two last characters seen in the previous buffer, # mPrev and mBeforePrev are initialized to space in order to simulate # a word delimiter at the beginning of the data - self._mPrev = ' ' - self._mBeforePrev = ' ' + self._prev = ' ' + self._before_prev = ' ' # These probers are owned by the group prober. def set_model_probers(self, logicalProber, visualProber): - self._mLogicalProber = logicalProber - self._mVisualProber = visualProber + self._logical_prober = logicalProber + self._visual_prober = visualProber def is_final(self, c): - return wrap_ord(c) in [FINAL_KAF, FINAL_MEM, FINAL_NUN, FINAL_PE, - FINAL_TSADI] + return wrap_ord(c) in [self.FINAL_KAF, self.FINAL_MEM, self.FINAL_NUN, + self.FINAL_PE, self.FINAL_TSADI] def is_non_final(self, c): # The normal Tsadi is not a good Non-Final letter due to words like @@ -188,9 +191,10 @@ class HebrewProber(CharSetProber): # for example legally end with a Non-Final Pe or Kaf. However, the # benefit of these letters as Non-Final letters outweighs the damage # since these words are quite rare. - return wrap_ord(c) in [NORMAL_KAF, NORMAL_MEM, NORMAL_NUN, NORMAL_PE] + return wrap_ord(c) in [self.NORMAL_KAF, self.NORMAL_MEM, + self.NORMAL_NUN, self.NORMAL_PE] - def feed(self, aBuf): + def feed(self, byte_str): # Final letter analysis for logical-visual decision. # Look for evidence that the received buffer is either logical Hebrew # or visual Hebrew. @@ -217,67 +221,69 @@ class HebrewProber(CharSetProber): # We automatically filter out all 7-bit characters (replace them with # spaces) so the word boundary detection works properly. [MAP] - if self.get_state() == eNotMe: + if self.state == ProbingState.not_me: # Both model probers say it's not them. No reason to continue. - return eNotMe + return ProbingState.not_me - aBuf = self.filter_high_bit_only(aBuf) + byte_str = self.filter_high_byte_only(byte_str) - for cur in aBuf: + for cur in byte_str: if cur == ' ': # We stand on a space - a word just ended - if self._mBeforePrev != ' ': - # next-to-last char was not a space so self._mPrev is not a + if self._before_prev != ' ': + # next-to-last char was not a space so self._prev is not a # 1 letter word - if self.is_final(self._mPrev): + if self.is_final(self._prev): # case (1) [-2:not space][-1:final letter][cur:space] - self._mFinalCharLogicalScore += 1 - elif self.is_non_final(self._mPrev): + self._final_char_logical_score += 1 + elif self.is_non_final(self._prev): # case (2) [-2:not space][-1:Non-Final letter][ # cur:space] - self._mFinalCharVisualScore += 1 + self._final_char_visual_score += 1 else: # Not standing on a space - if ((self._mBeforePrev == ' ') and - (self.is_final(self._mPrev)) and (cur != ' ')): + if ((self._before_prev == ' ') and + (self.is_final(self._prev)) and (cur != ' ')): # case (3) [-2:space][-1:final letter][cur:not space] - self._mFinalCharVisualScore += 1 - self._mBeforePrev = self._mPrev - self._mPrev = cur + self._final_char_visual_score += 1 + self._before_prev = self._prev + self._prev = cur # Forever detecting, till the end or until both model probers return - # eNotMe (handled above) - return eDetecting + # ProbingState.not_me (handled above) + return ProbingState.detecting - def get_charset_name(self): + @property + def charset_name(self): # Make the decision: is it Logical or Visual? # If the final letter score distance is dominant enough, rely on it. - finalsub = self._mFinalCharLogicalScore - self._mFinalCharVisualScore - if finalsub >= MIN_FINAL_CHAR_DISTANCE: - return LOGICAL_HEBREW_NAME - if finalsub <= -MIN_FINAL_CHAR_DISTANCE: - return VISUAL_HEBREW_NAME + finalsub = self._final_char_logical_score - self._final_char_visual_score + if finalsub >= self.MIN_FINAL_CHAR_DISTANCE: + return self.LOGICAL_HEBREW_NAME + if finalsub <= -self.MIN_FINAL_CHAR_DISTANCE: + return self.VISUAL_HEBREW_NAME # It's not dominant enough, try to rely on the model scores instead. - modelsub = (self._mLogicalProber.get_confidence() - - self._mVisualProber.get_confidence()) - if modelsub > MIN_MODEL_DISTANCE: - return LOGICAL_HEBREW_NAME - if modelsub < -MIN_MODEL_DISTANCE: - return VISUAL_HEBREW_NAME + modelsub = (self._logical_prober.get_confidence() + - self._visual_prober.get_confidence()) + if modelsub > self.MIN_MODEL_DISTANCE: + return self.LOGICAL_HEBREW_NAME + if modelsub < -self.MIN_MODEL_DISTANCE: + return self.VISUAL_HEBREW_NAME # Still no good, back to final letter distance, maybe it'll save the # day. if finalsub < 0.0: - return VISUAL_HEBREW_NAME + return self.VISUAL_HEBREW_NAME # (finalsub > 0 - Logical) or (don't know what to do) default to # Logical. - return LOGICAL_HEBREW_NAME + return self.LOGICAL_HEBREW_NAME - def get_state(self): + @property + def state(self): # Remain active as long as any of the model probers are active. - if (self._mLogicalProber.get_state() == eNotMe) and \ - (self._mVisualProber.get_state() == eNotMe): - return eNotMe - return eDetecting + if (self._logical_prober.state == ProbingState.not_me) and \ + (self._visual_prober.state == ProbingState.not_me): + return ProbingState.not_me + return ProbingState.detecting diff --git a/lib/chardet/jisfreq.py b/lib/chardet/jisfreq.py index 064345b0867d4d23ef17a4d7d11663dcf3cf4adf..83fc082b545106d02622de20f2083e8a7562f96c 100644 --- a/lib/chardet/jisfreq.py +++ b/lib/chardet/jisfreq.py @@ -46,7 +46,7 @@ JIS_TYPICAL_DISTRIBUTION_RATIO = 3.0 # Char to FreqOrder table , JIS_TABLE_SIZE = 4368 -JISCharToFreqOrder = ( +JIS_CHAR_TO_FREQ_ORDER = ( 40, 1, 6, 182, 152, 180, 295,2127, 285, 381,3295,4304,3068,4606,3165,3510, # 16 3511,1822,2785,4607,1193,2226,5070,4608, 171,2996,1247, 18, 179,5071, 856,1661, # 32 1262,5072, 619, 127,3431,3512,3230,1899,1700, 232, 228,1294,1298, 284, 283,2041, # 48 @@ -320,250 +320,6 @@ JISCharToFreqOrder = ( 2413,2477,1216,2725,2159, 334,3840,1328,3624,2921,1525,4132, 564,1056, 891,4363, # 4336 1444,1698,2385,2251,3729,1365,2281,2235,1717,6188, 864,3841,2515, 444, 527,2767, # 4352 2922,3625, 544, 461,6189, 566, 209,2437,3398,2098,1065,2068,3331,3626,3257,2137, # 4368 #last 512 -#Everything below is of no interest for detection purpose -2138,2122,3730,2888,1995,1820,1044,6190,6191,6192,6193,6194,6195,6196,6197,6198, # 4384 -6199,6200,6201,6202,6203,6204,6205,4670,6206,6207,6208,6209,6210,6211,6212,6213, # 4400 -6214,6215,6216,6217,6218,6219,6220,6221,6222,6223,6224,6225,6226,6227,6228,6229, # 4416 -6230,6231,6232,6233,6234,6235,6236,6237,3187,6238,6239,3969,6240,6241,6242,6243, # 4432 -6244,4671,6245,6246,4672,6247,6248,4133,6249,6250,4364,6251,2923,2556,2613,4673, # 4448 -4365,3970,6252,6253,6254,6255,4674,6256,6257,6258,2768,2353,4366,4675,4676,3188, # 4464 -4367,3463,6259,4134,4677,4678,6260,2267,6261,3842,3332,4368,3543,6262,6263,6264, # 4480 -3013,1954,1928,4135,4679,6265,6266,2478,3091,6267,4680,4369,6268,6269,1699,6270, # 4496 -3544,4136,4681,6271,4137,6272,4370,2804,6273,6274,2593,3971,3972,4682,6275,2236, # 4512 -4683,6276,6277,4684,6278,6279,4138,3973,4685,6280,6281,3258,6282,6283,6284,6285, # 4528 -3974,4686,2841,3975,6286,6287,3545,6288,6289,4139,4687,4140,6290,4141,6291,4142, # 4544 -6292,6293,3333,6294,6295,6296,4371,6297,3399,6298,6299,4372,3976,6300,6301,6302, # 4560 -4373,6303,6304,3843,3731,6305,4688,4374,6306,6307,3259,2294,6308,3732,2530,4143, # 4576 -6309,4689,6310,6311,6312,3048,6313,6314,4690,3733,2237,6315,6316,2282,3334,6317, # 4592 -6318,3844,6319,6320,4691,6321,3400,4692,6322,4693,6323,3049,6324,4375,6325,3977, # 4608 -6326,6327,6328,3546,6329,4694,3335,6330,4695,4696,6331,6332,6333,6334,4376,3978, # 4624 -6335,4697,3979,4144,6336,3980,4698,6337,6338,6339,6340,6341,4699,4700,4701,6342, # 4640 -6343,4702,6344,6345,4703,6346,6347,4704,6348,4705,4706,3135,6349,4707,6350,4708, # 4656 -6351,4377,6352,4709,3734,4145,6353,2506,4710,3189,6354,3050,4711,3981,6355,3547, # 4672 -3014,4146,4378,3735,2651,3845,3260,3136,2224,1986,6356,3401,6357,4712,2594,3627, # 4688 -3137,2573,3736,3982,4713,3628,4714,4715,2682,3629,4716,6358,3630,4379,3631,6359, # 4704 -6360,6361,3983,6362,6363,6364,6365,4147,3846,4717,6366,6367,3737,2842,6368,4718, # 4720 -2628,6369,3261,6370,2386,6371,6372,3738,3984,4719,3464,4720,3402,6373,2924,3336, # 4736 -4148,2866,6374,2805,3262,4380,2704,2069,2531,3138,2806,2984,6375,2769,6376,4721, # 4752 -4722,3403,6377,6378,3548,6379,6380,2705,3092,1979,4149,2629,3337,2889,6381,3338, # 4768 -4150,2557,3339,4381,6382,3190,3263,3739,6383,4151,4723,4152,2558,2574,3404,3191, # 4784 -6384,6385,4153,6386,4724,4382,6387,6388,4383,6389,6390,4154,6391,4725,3985,6392, # 4800 -3847,4155,6393,6394,6395,6396,6397,3465,6398,4384,6399,6400,6401,6402,6403,6404, # 4816 -4156,6405,6406,6407,6408,2123,6409,6410,2326,3192,4726,6411,6412,6413,6414,4385, # 4832 -4157,6415,6416,4158,6417,3093,3848,6418,3986,6419,6420,3849,6421,6422,6423,4159, # 4848 -6424,6425,4160,6426,3740,6427,6428,6429,6430,3987,6431,4727,6432,2238,6433,6434, # 4864 -4386,3988,6435,6436,3632,6437,6438,2843,6439,6440,6441,6442,3633,6443,2958,6444, # 4880 -6445,3466,6446,2364,4387,3850,6447,4388,2959,3340,6448,3851,6449,4728,6450,6451, # 4896 -3264,4729,6452,3193,6453,4389,4390,2706,3341,4730,6454,3139,6455,3194,6456,3051, # 4912 -2124,3852,1602,4391,4161,3853,1158,3854,4162,3989,4392,3990,4731,4732,4393,2040, # 4928 -4163,4394,3265,6457,2807,3467,3855,6458,6459,6460,3991,3468,4733,4734,6461,3140, # 4944 -2960,6462,4735,6463,6464,6465,6466,4736,4737,4738,4739,6467,6468,4164,2403,3856, # 4960 -6469,6470,2770,2844,6471,4740,6472,6473,6474,6475,6476,6477,6478,3195,6479,4741, # 4976 -4395,6480,2867,6481,4742,2808,6482,2493,4165,6483,6484,6485,6486,2295,4743,6487, # 4992 -6488,6489,3634,6490,6491,6492,6493,6494,6495,6496,2985,4744,6497,6498,4745,6499, # 5008 -6500,2925,3141,4166,6501,6502,4746,6503,6504,4747,6505,6506,6507,2890,6508,6509, # 5024 -6510,6511,6512,6513,6514,6515,6516,6517,6518,6519,3469,4167,6520,6521,6522,4748, # 5040 -4396,3741,4397,4749,4398,3342,2125,4750,6523,4751,4752,4753,3052,6524,2961,4168, # 5056 -6525,4754,6526,4755,4399,2926,4169,6527,3857,6528,4400,4170,6529,4171,6530,6531, # 5072 -2595,6532,6533,6534,6535,3635,6536,6537,6538,6539,6540,6541,6542,4756,6543,6544, # 5088 -6545,6546,6547,6548,4401,6549,6550,6551,6552,4402,3405,4757,4403,6553,6554,6555, # 5104 -4172,3742,6556,6557,6558,3992,3636,6559,6560,3053,2726,6561,3549,4173,3054,4404, # 5120 -6562,6563,3993,4405,3266,3550,2809,4406,6564,6565,6566,4758,4759,6567,3743,6568, # 5136 -4760,3744,4761,3470,6569,6570,6571,4407,6572,3745,4174,6573,4175,2810,4176,3196, # 5152 -4762,6574,4177,6575,6576,2494,2891,3551,6577,6578,3471,6579,4408,6580,3015,3197, # 5168 -6581,3343,2532,3994,3858,6582,3094,3406,4409,6583,2892,4178,4763,4410,3016,4411, # 5184 -6584,3995,3142,3017,2683,6585,4179,6586,6587,4764,4412,6588,6589,4413,6590,2986, # 5200 -6591,2962,3552,6592,2963,3472,6593,6594,4180,4765,6595,6596,2225,3267,4414,6597, # 5216 -3407,3637,4766,6598,6599,3198,6600,4415,6601,3859,3199,6602,3473,4767,2811,4416, # 5232 -1856,3268,3200,2575,3996,3997,3201,4417,6603,3095,2927,6604,3143,6605,2268,6606, # 5248 -3998,3860,3096,2771,6607,6608,3638,2495,4768,6609,3861,6610,3269,2745,4769,4181, # 5264 -3553,6611,2845,3270,6612,6613,6614,3862,6615,6616,4770,4771,6617,3474,3999,4418, # 5280 -4419,6618,3639,3344,6619,4772,4182,6620,2126,6621,6622,6623,4420,4773,6624,3018, # 5296 -6625,4774,3554,6626,4183,2025,3746,6627,4184,2707,6628,4421,4422,3097,1775,4185, # 5312 -3555,6629,6630,2868,6631,6632,4423,6633,6634,4424,2414,2533,2928,6635,4186,2387, # 5328 -6636,4775,6637,4187,6638,1891,4425,3202,3203,6639,6640,4776,6641,3345,6642,6643, # 5344 -3640,6644,3475,3346,3641,4000,6645,3144,6646,3098,2812,4188,3642,3204,6647,3863, # 5360 -3476,6648,3864,6649,4426,4001,6650,6651,6652,2576,6653,4189,4777,6654,6655,6656, # 5376 -2846,6657,3477,3205,4002,6658,4003,6659,3347,2252,6660,6661,6662,4778,6663,6664, # 5392 -6665,6666,6667,6668,6669,4779,4780,2048,6670,3478,3099,6671,3556,3747,4004,6672, # 5408 -6673,6674,3145,4005,3748,6675,6676,6677,6678,6679,3408,6680,6681,6682,6683,3206, # 5424 -3207,6684,6685,4781,4427,6686,4782,4783,4784,6687,6688,6689,4190,6690,6691,3479, # 5440 -6692,2746,6693,4428,6694,6695,6696,6697,6698,6699,4785,6700,6701,3208,2727,6702, # 5456 -3146,6703,6704,3409,2196,6705,4429,6706,6707,6708,2534,1996,6709,6710,6711,2747, # 5472 -6712,6713,6714,4786,3643,6715,4430,4431,6716,3557,6717,4432,4433,6718,6719,6720, # 5488 -6721,3749,6722,4006,4787,6723,6724,3644,4788,4434,6725,6726,4789,2772,6727,6728, # 5504 -6729,6730,6731,2708,3865,2813,4435,6732,6733,4790,4791,3480,6734,6735,6736,6737, # 5520 -4436,3348,6738,3410,4007,6739,6740,4008,6741,6742,4792,3411,4191,6743,6744,6745, # 5536 -6746,6747,3866,6748,3750,6749,6750,6751,6752,6753,6754,6755,3867,6756,4009,6757, # 5552 -4793,4794,6758,2814,2987,6759,6760,6761,4437,6762,6763,6764,6765,3645,6766,6767, # 5568 -3481,4192,6768,3751,6769,6770,2174,6771,3868,3752,6772,6773,6774,4193,4795,4438, # 5584 -3558,4796,4439,6775,4797,6776,6777,4798,6778,4799,3559,4800,6779,6780,6781,3482, # 5600 -6782,2893,6783,6784,4194,4801,4010,6785,6786,4440,6787,4011,6788,6789,6790,6791, # 5616 -6792,6793,4802,6794,6795,6796,4012,6797,6798,6799,6800,3349,4803,3483,6801,4804, # 5632 -4195,6802,4013,6803,6804,4196,6805,4014,4015,6806,2847,3271,2848,6807,3484,6808, # 5648 -6809,6810,4441,6811,4442,4197,4443,3272,4805,6812,3412,4016,1579,6813,6814,4017, # 5664 -6815,3869,6816,2964,6817,4806,6818,6819,4018,3646,6820,6821,4807,4019,4020,6822, # 5680 -6823,3560,6824,6825,4021,4444,6826,4198,6827,6828,4445,6829,6830,4199,4808,6831, # 5696 -6832,6833,3870,3019,2458,6834,3753,3413,3350,6835,4809,3871,4810,3561,4446,6836, # 5712 -6837,4447,4811,4812,6838,2459,4448,6839,4449,6840,6841,4022,3872,6842,4813,4814, # 5728 -6843,6844,4815,4200,4201,4202,6845,4023,6846,6847,4450,3562,3873,6848,6849,4816, # 5744 -4817,6850,4451,4818,2139,6851,3563,6852,6853,3351,6854,6855,3352,4024,2709,3414, # 5760 -4203,4452,6856,4204,6857,6858,3874,3875,6859,6860,4819,6861,6862,6863,6864,4453, # 5776 -3647,6865,6866,4820,6867,6868,6869,6870,4454,6871,2869,6872,6873,4821,6874,3754, # 5792 -6875,4822,4205,6876,6877,6878,3648,4206,4455,6879,4823,6880,4824,3876,6881,3055, # 5808 -4207,6882,3415,6883,6884,6885,4208,4209,6886,4210,3353,6887,3354,3564,3209,3485, # 5824 -2652,6888,2728,6889,3210,3755,6890,4025,4456,6891,4825,6892,6893,6894,6895,4211, # 5840 -6896,6897,6898,4826,6899,6900,4212,6901,4827,6902,2773,3565,6903,4828,6904,6905, # 5856 -6906,6907,3649,3650,6908,2849,3566,6909,3567,3100,6910,6911,6912,6913,6914,6915, # 5872 -4026,6916,3355,4829,3056,4457,3756,6917,3651,6918,4213,3652,2870,6919,4458,6920, # 5888 -2438,6921,6922,3757,2774,4830,6923,3356,4831,4832,6924,4833,4459,3653,2507,6925, # 5904 -4834,2535,6926,6927,3273,4027,3147,6928,3568,6929,6930,6931,4460,6932,3877,4461, # 5920 -2729,3654,6933,6934,6935,6936,2175,4835,2630,4214,4028,4462,4836,4215,6937,3148, # 5936 -4216,4463,4837,4838,4217,6938,6939,2850,4839,6940,4464,6941,6942,6943,4840,6944, # 5952 -4218,3274,4465,6945,6946,2710,6947,4841,4466,6948,6949,2894,6950,6951,4842,6952, # 5968 -4219,3057,2871,6953,6954,6955,6956,4467,6957,2711,6958,6959,6960,3275,3101,4843, # 5984 -6961,3357,3569,6962,4844,6963,6964,4468,4845,3570,6965,3102,4846,3758,6966,4847, # 6000 -3878,4848,4849,4029,6967,2929,3879,4850,4851,6968,6969,1733,6970,4220,6971,6972, # 6016 -6973,6974,6975,6976,4852,6977,6978,6979,6980,6981,6982,3759,6983,6984,6985,3486, # 6032 -3487,6986,3488,3416,6987,6988,6989,6990,6991,6992,6993,6994,6995,6996,6997,4853, # 6048 -6998,6999,4030,7000,7001,3211,7002,7003,4221,7004,7005,3571,4031,7006,3572,7007, # 6064 -2614,4854,2577,7008,7009,2965,3655,3656,4855,2775,3489,3880,4222,4856,3881,4032, # 6080 -3882,3657,2730,3490,4857,7010,3149,7011,4469,4858,2496,3491,4859,2283,7012,7013, # 6096 -7014,2365,4860,4470,7015,7016,3760,7017,7018,4223,1917,7019,7020,7021,4471,7022, # 6112 -2776,4472,7023,7024,7025,7026,4033,7027,3573,4224,4861,4034,4862,7028,7029,1929, # 6128 -3883,4035,7030,4473,3058,7031,2536,3761,3884,7032,4036,7033,2966,2895,1968,4474, # 6144 -3276,4225,3417,3492,4226,2105,7034,7035,1754,2596,3762,4227,4863,4475,3763,4864, # 6160 -3764,2615,2777,3103,3765,3658,3418,4865,2296,3766,2815,7036,7037,7038,3574,2872, # 6176 -3277,4476,7039,4037,4477,7040,7041,4038,7042,7043,7044,7045,7046,7047,2537,7048, # 6192 -7049,7050,7051,7052,7053,7054,4478,7055,7056,3767,3659,4228,3575,7057,7058,4229, # 6208 -7059,7060,7061,3660,7062,3212,7063,3885,4039,2460,7064,7065,7066,7067,7068,7069, # 6224 -7070,7071,7072,7073,7074,4866,3768,4867,7075,7076,7077,7078,4868,3358,3278,2653, # 6240 -7079,7080,4479,3886,7081,7082,4869,7083,7084,7085,7086,7087,7088,2538,7089,7090, # 6256 -7091,4040,3150,3769,4870,4041,2896,3359,4230,2930,7092,3279,7093,2967,4480,3213, # 6272 -4481,3661,7094,7095,7096,7097,7098,7099,7100,7101,7102,2461,3770,7103,7104,4231, # 6288 -3151,7105,7106,7107,4042,3662,7108,7109,4871,3663,4872,4043,3059,7110,7111,7112, # 6304 -3493,2988,7113,4873,7114,7115,7116,3771,4874,7117,7118,4232,4875,7119,3576,2336, # 6320 -4876,7120,4233,3419,4044,4877,4878,4482,4483,4879,4484,4234,7121,3772,4880,1045, # 6336 -3280,3664,4881,4882,7122,7123,7124,7125,4883,7126,2778,7127,4485,4486,7128,4884, # 6352 -3214,3887,7129,7130,3215,7131,4885,4045,7132,7133,4046,7134,7135,7136,7137,7138, # 6368 -7139,7140,7141,7142,7143,4235,7144,4886,7145,7146,7147,4887,7148,7149,7150,4487, # 6384 -4047,4488,7151,7152,4888,4048,2989,3888,7153,3665,7154,4049,7155,7156,7157,7158, # 6400 -7159,7160,2931,4889,4890,4489,7161,2631,3889,4236,2779,7162,7163,4891,7164,3060, # 6416 -7165,1672,4892,7166,4893,4237,3281,4894,7167,7168,3666,7169,3494,7170,7171,4050, # 6432 -7172,7173,3104,3360,3420,4490,4051,2684,4052,7174,4053,7175,7176,7177,2253,4054, # 6448 -7178,7179,4895,7180,3152,3890,3153,4491,3216,7181,7182,7183,2968,4238,4492,4055, # 6464 -7184,2990,7185,2479,7186,7187,4493,7188,7189,7190,7191,7192,4896,7193,4897,2969, # 6480 -4494,4898,7194,3495,7195,7196,4899,4495,7197,3105,2731,7198,4900,7199,7200,7201, # 6496 -4056,7202,3361,7203,7204,4496,4901,4902,7205,4497,7206,7207,2315,4903,7208,4904, # 6512 -7209,4905,2851,7210,7211,3577,7212,3578,4906,7213,4057,3667,4907,7214,4058,2354, # 6528 -3891,2376,3217,3773,7215,7216,7217,7218,7219,4498,7220,4908,3282,2685,7221,3496, # 6544 -4909,2632,3154,4910,7222,2337,7223,4911,7224,7225,7226,4912,4913,3283,4239,4499, # 6560 -7227,2816,7228,7229,7230,7231,7232,7233,7234,4914,4500,4501,7235,7236,7237,2686, # 6576 -7238,4915,7239,2897,4502,7240,4503,7241,2516,7242,4504,3362,3218,7243,7244,7245, # 6592 -4916,7246,7247,4505,3363,7248,7249,7250,7251,3774,4506,7252,7253,4917,7254,7255, # 6608 -3284,2991,4918,4919,3219,3892,4920,3106,3497,4921,7256,7257,7258,4922,7259,4923, # 6624 -3364,4507,4508,4059,7260,4240,3498,7261,7262,4924,7263,2992,3893,4060,3220,7264, # 6640 -7265,7266,7267,7268,7269,4509,3775,7270,2817,7271,4061,4925,4510,3776,7272,4241, # 6656 -4511,3285,7273,7274,3499,7275,7276,7277,4062,4512,4926,7278,3107,3894,7279,7280, # 6672 -4927,7281,4513,7282,7283,3668,7284,7285,4242,4514,4243,7286,2058,4515,4928,4929, # 6688 -4516,7287,3286,4244,7288,4517,7289,7290,7291,3669,7292,7293,4930,4931,4932,2355, # 6704 -4933,7294,2633,4518,7295,4245,7296,7297,4519,7298,7299,4520,4521,4934,7300,4246, # 6720 -4522,7301,7302,7303,3579,7304,4247,4935,7305,4936,7306,7307,7308,7309,3777,7310, # 6736 -4523,7311,7312,7313,4248,3580,7314,4524,3778,4249,7315,3581,7316,3287,7317,3221, # 6752 -7318,4937,7319,7320,7321,7322,7323,7324,4938,4939,7325,4525,7326,7327,7328,4063, # 6768 -7329,7330,4940,7331,7332,4941,7333,4526,7334,3500,2780,1741,4942,2026,1742,7335, # 6784 -7336,3582,4527,2388,7337,7338,7339,4528,7340,4250,4943,7341,7342,7343,4944,7344, # 6800 -7345,7346,3020,7347,4945,7348,7349,7350,7351,3895,7352,3896,4064,3897,7353,7354, # 6816 -7355,4251,7356,7357,3898,7358,3779,7359,3780,3288,7360,7361,4529,7362,4946,4530, # 6832 -2027,7363,3899,4531,4947,3222,3583,7364,4948,7365,7366,7367,7368,4949,3501,4950, # 6848 -3781,4951,4532,7369,2517,4952,4252,4953,3155,7370,4954,4955,4253,2518,4533,7371, # 6864 -7372,2712,4254,7373,7374,7375,3670,4956,3671,7376,2389,3502,4065,7377,2338,7378, # 6880 -7379,7380,7381,3061,7382,4957,7383,7384,7385,7386,4958,4534,7387,7388,2993,7389, # 6896 -3062,7390,4959,7391,7392,7393,4960,3108,4961,7394,4535,7395,4962,3421,4536,7396, # 6912 -4963,7397,4964,1857,7398,4965,7399,7400,2176,3584,4966,7401,7402,3422,4537,3900, # 6928 -3585,7403,3782,7404,2852,7405,7406,7407,4538,3783,2654,3423,4967,4539,7408,3784, # 6944 -3586,2853,4540,4541,7409,3901,7410,3902,7411,7412,3785,3109,2327,3903,7413,7414, # 6960 -2970,4066,2932,7415,7416,7417,3904,3672,3424,7418,4542,4543,4544,7419,4968,7420, # 6976 -7421,4255,7422,7423,7424,7425,7426,4067,7427,3673,3365,4545,7428,3110,2559,3674, # 6992 -7429,7430,3156,7431,7432,3503,7433,3425,4546,7434,3063,2873,7435,3223,4969,4547, # 7008 -4548,2898,4256,4068,7436,4069,3587,3786,2933,3787,4257,4970,4971,3788,7437,4972, # 7024 -3064,7438,4549,7439,7440,7441,7442,7443,4973,3905,7444,2874,7445,7446,7447,7448, # 7040 -3021,7449,4550,3906,3588,4974,7450,7451,3789,3675,7452,2578,7453,4070,7454,7455, # 7056 -7456,4258,3676,7457,4975,7458,4976,4259,3790,3504,2634,4977,3677,4551,4260,7459, # 7072 -7460,7461,7462,3907,4261,4978,7463,7464,7465,7466,4979,4980,7467,7468,2213,4262, # 7088 -7469,7470,7471,3678,4981,7472,2439,7473,4263,3224,3289,7474,3908,2415,4982,7475, # 7104 -4264,7476,4983,2655,7477,7478,2732,4552,2854,2875,7479,7480,4265,7481,4553,4984, # 7120 -7482,7483,4266,7484,3679,3366,3680,2818,2781,2782,3367,3589,4554,3065,7485,4071, # 7136 -2899,7486,7487,3157,2462,4072,4555,4073,4985,4986,3111,4267,2687,3368,4556,4074, # 7152 -3791,4268,7488,3909,2783,7489,2656,1962,3158,4557,4987,1963,3159,3160,7490,3112, # 7168 -4988,4989,3022,4990,4991,3792,2855,7491,7492,2971,4558,7493,7494,4992,7495,7496, # 7184 -7497,7498,4993,7499,3426,4559,4994,7500,3681,4560,4269,4270,3910,7501,4075,4995, # 7200 -4271,7502,7503,4076,7504,4996,7505,3225,4997,4272,4077,2819,3023,7506,7507,2733, # 7216 -4561,7508,4562,7509,3369,3793,7510,3590,2508,7511,7512,4273,3113,2994,2616,7513, # 7232 -7514,7515,7516,7517,7518,2820,3911,4078,2748,7519,7520,4563,4998,7521,7522,7523, # 7248 -7524,4999,4274,7525,4564,3682,2239,4079,4565,7526,7527,7528,7529,5000,7530,7531, # 7264 -5001,4275,3794,7532,7533,7534,3066,5002,4566,3161,7535,7536,4080,7537,3162,7538, # 7280 -7539,4567,7540,7541,7542,7543,7544,7545,5003,7546,4568,7547,7548,7549,7550,7551, # 7296 -7552,7553,7554,7555,7556,5004,7557,7558,7559,5005,7560,3795,7561,4569,7562,7563, # 7312 -7564,2821,3796,4276,4277,4081,7565,2876,7566,5006,7567,7568,2900,7569,3797,3912, # 7328 -7570,7571,7572,4278,7573,7574,7575,5007,7576,7577,5008,7578,7579,4279,2934,7580, # 7344 -7581,5009,7582,4570,7583,4280,7584,7585,7586,4571,4572,3913,7587,4573,3505,7588, # 7360 -5010,7589,7590,7591,7592,3798,4574,7593,7594,5011,7595,4281,7596,7597,7598,4282, # 7376 -5012,7599,7600,5013,3163,7601,5014,7602,3914,7603,7604,2734,4575,4576,4577,7605, # 7392 -7606,7607,7608,7609,3506,5015,4578,7610,4082,7611,2822,2901,2579,3683,3024,4579, # 7408 -3507,7612,4580,7613,3226,3799,5016,7614,7615,7616,7617,7618,7619,7620,2995,3290, # 7424 -7621,4083,7622,5017,7623,7624,7625,7626,7627,4581,3915,7628,3291,7629,5018,7630, # 7440 -7631,7632,7633,4084,7634,7635,3427,3800,7636,7637,4582,7638,5019,4583,5020,7639, # 7456 -3916,7640,3801,5021,4584,4283,7641,7642,3428,3591,2269,7643,2617,7644,4585,3592, # 7472 -7645,4586,2902,7646,7647,3227,5022,7648,4587,7649,4284,7650,7651,7652,4588,2284, # 7488 -7653,5023,7654,7655,7656,4589,5024,3802,7657,7658,5025,3508,4590,7659,7660,7661, # 7504 -1969,5026,7662,7663,3684,1821,2688,7664,2028,2509,4285,7665,2823,1841,7666,2689, # 7520 -3114,7667,3917,4085,2160,5027,5028,2972,7668,5029,7669,7670,7671,3593,4086,7672, # 7536 -4591,4087,5030,3803,7673,7674,7675,7676,7677,7678,7679,4286,2366,4592,4593,3067, # 7552 -2328,7680,7681,4594,3594,3918,2029,4287,7682,5031,3919,3370,4288,4595,2856,7683, # 7568 -3509,7684,7685,5032,5033,7686,7687,3804,2784,7688,7689,7690,7691,3371,7692,7693, # 7584 -2877,5034,7694,7695,3920,4289,4088,7696,7697,7698,5035,7699,5036,4290,5037,5038, # 7600 -5039,7700,7701,7702,5040,5041,3228,7703,1760,7704,5042,3229,4596,2106,4089,7705, # 7616 -4597,2824,5043,2107,3372,7706,4291,4090,5044,7707,4091,7708,5045,3025,3805,4598, # 7632 -4292,4293,4294,3373,7709,4599,7710,5046,7711,7712,5047,5048,3806,7713,7714,7715, # 7648 -5049,7716,7717,7718,7719,4600,5050,7720,7721,7722,5051,7723,4295,3429,7724,7725, # 7664 -7726,7727,3921,7728,3292,5052,4092,7729,7730,7731,7732,7733,7734,7735,5053,5054, # 7680 -7736,7737,7738,7739,3922,3685,7740,7741,7742,7743,2635,5055,7744,5056,4601,7745, # 7696 -7746,2560,7747,7748,7749,7750,3923,7751,7752,7753,7754,7755,4296,2903,7756,7757, # 7712 -7758,7759,7760,3924,7761,5057,4297,7762,7763,5058,4298,7764,4093,7765,7766,5059, # 7728 -3925,7767,7768,7769,7770,7771,7772,7773,7774,7775,7776,3595,7777,4299,5060,4094, # 7744 -7778,3293,5061,7779,7780,4300,7781,7782,4602,7783,3596,7784,7785,3430,2367,7786, # 7760 -3164,5062,5063,4301,7787,7788,4095,5064,5065,7789,3374,3115,7790,7791,7792,7793, # 7776 -7794,7795,7796,3597,4603,7797,7798,3686,3116,3807,5066,7799,7800,5067,7801,7802, # 7792 -4604,4302,5068,4303,4096,7803,7804,3294,7805,7806,5069,4605,2690,7807,3026,7808, # 7808 -7809,7810,7811,7812,7813,7814,7815,7816,7817,7818,7819,7820,7821,7822,7823,7824, # 7824 -7825,7826,7827,7828,7829,7830,7831,7832,7833,7834,7835,7836,7837,7838,7839,7840, # 7840 -7841,7842,7843,7844,7845,7846,7847,7848,7849,7850,7851,7852,7853,7854,7855,7856, # 7856 -7857,7858,7859,7860,7861,7862,7863,7864,7865,7866,7867,7868,7869,7870,7871,7872, # 7872 -7873,7874,7875,7876,7877,7878,7879,7880,7881,7882,7883,7884,7885,7886,7887,7888, # 7888 -7889,7890,7891,7892,7893,7894,7895,7896,7897,7898,7899,7900,7901,7902,7903,7904, # 7904 -7905,7906,7907,7908,7909,7910,7911,7912,7913,7914,7915,7916,7917,7918,7919,7920, # 7920 -7921,7922,7923,7924,3926,7925,7926,7927,7928,7929,7930,7931,7932,7933,7934,7935, # 7936 -7936,7937,7938,7939,7940,7941,7942,7943,7944,7945,7946,7947,7948,7949,7950,7951, # 7952 -7952,7953,7954,7955,7956,7957,7958,7959,7960,7961,7962,7963,7964,7965,7966,7967, # 7968 -7968,7969,7970,7971,7972,7973,7974,7975,7976,7977,7978,7979,7980,7981,7982,7983, # 7984 -7984,7985,7986,7987,7988,7989,7990,7991,7992,7993,7994,7995,7996,7997,7998,7999, # 8000 -8000,8001,8002,8003,8004,8005,8006,8007,8008,8009,8010,8011,8012,8013,8014,8015, # 8016 -8016,8017,8018,8019,8020,8021,8022,8023,8024,8025,8026,8027,8028,8029,8030,8031, # 8032 -8032,8033,8034,8035,8036,8037,8038,8039,8040,8041,8042,8043,8044,8045,8046,8047, # 8048 -8048,8049,8050,8051,8052,8053,8054,8055,8056,8057,8058,8059,8060,8061,8062,8063, # 8064 -8064,8065,8066,8067,8068,8069,8070,8071,8072,8073,8074,8075,8076,8077,8078,8079, # 8080 -8080,8081,8082,8083,8084,8085,8086,8087,8088,8089,8090,8091,8092,8093,8094,8095, # 8096 -8096,8097,8098,8099,8100,8101,8102,8103,8104,8105,8106,8107,8108,8109,8110,8111, # 8112 -8112,8113,8114,8115,8116,8117,8118,8119,8120,8121,8122,8123,8124,8125,8126,8127, # 8128 -8128,8129,8130,8131,8132,8133,8134,8135,8136,8137,8138,8139,8140,8141,8142,8143, # 8144 -8144,8145,8146,8147,8148,8149,8150,8151,8152,8153,8154,8155,8156,8157,8158,8159, # 8160 -8160,8161,8162,8163,8164,8165,8166,8167,8168,8169,8170,8171,8172,8173,8174,8175, # 8176 -8176,8177,8178,8179,8180,8181,8182,8183,8184,8185,8186,8187,8188,8189,8190,8191, # 8192 -8192,8193,8194,8195,8196,8197,8198,8199,8200,8201,8202,8203,8204,8205,8206,8207, # 8208 -8208,8209,8210,8211,8212,8213,8214,8215,8216,8217,8218,8219,8220,8221,8222,8223, # 8224 -8224,8225,8226,8227,8228,8229,8230,8231,8232,8233,8234,8235,8236,8237,8238,8239, # 8240 -8240,8241,8242,8243,8244,8245,8246,8247,8248,8249,8250,8251,8252,8253,8254,8255, # 8256 -8256,8257,8258,8259,8260,8261,8262,8263,8264,8265,8266,8267,8268,8269,8270,8271) # 8272 +) + -# flake8: noqa diff --git a/lib/chardet/jpcntx.py b/lib/chardet/jpcntx.py index f7f69ba4cdaba120b5d8c6932e6f7eb4c60a0913..4c180def74be638a2b86c5a7c903989bec7ea719 100644 --- a/lib/chardet/jpcntx.py +++ b/lib/chardet/jpcntx.py @@ -27,12 +27,6 @@ from .compat import wrap_ord -NUM_OF_CATEGORY = 6 -DONT_KNOW = -1 -ENOUGH_REL_THRESHOLD = 100 -MAX_REL_THRESHOLD = 1000 -MINIMUM_DATA_THRESHOLD = 4 - # This is hiragana 2-char sequence table, the number in each cell represents its frequency category jp2CharContext = ( (0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1), @@ -120,24 +114,35 @@ jp2CharContext = ( (0,4,0,3,0,3,0,3,0,3,5,5,3,3,3,3,4,3,4,3,3,3,4,4,4,3,3,3,3,4,3,5,3,3,1,3,2,4,5,5,5,5,4,3,4,5,5,3,2,2,3,3,3,3,2,3,3,1,2,3,2,4,3,3,3,4,0,4,0,2,0,4,3,2,2,1,2,0,3,0,0,4,1), ) -class JapaneseContextAnalysis: +class JapaneseContextAnalysis(object): + NUM_OF_CATEGORY = 6 + DONT_KNOW = -1 + ENOUGH_REL_THRESHOLD = 100 + MAX_REL_THRESHOLD = 1000 + MINIMUM_DATA_THRESHOLD = 4 + def __init__(self): + self._total_rel = None + self._rel_sample = None + self._need_to_skip_char_num = None + self._last_char_order = None + self._done = None self.reset() def reset(self): - self._mTotalRel = 0 # total sequence received + self._total_rel = 0 # total sequence received # category counters, each interger counts sequence in its category - self._mRelSample = [0] * NUM_OF_CATEGORY + self._rel_sample = [0] * self.NUM_OF_CATEGORY # if last byte in current buffer is not the last byte of a character, # we need to know how many bytes to skip in next buffer - self._mNeedToSkipCharNum = 0 - self._mLastCharOrder = -1 # The order of previous char + self._need_to_skip_char_num = 0 + self._last_char_order = -1 # The order of previous char # If this flag is set to True, detection is done and conclusion has # been made - self._mDone = False + self._done = False - def feed(self, aBuf, aLen): - if self._mDone: + def feed(self, byte_str, num_bytes): + if self._done: return # The buffer we got is byte oriented, and a character may span in more than one @@ -147,73 +152,83 @@ class JapaneseContextAnalysis: # well and analyse the character once it is complete, but since a # character will not make much difference, by simply skipping # this character will simply our logic and improve performance. - i = self._mNeedToSkipCharNum - while i < aLen: - order, charLen = self.get_order(aBuf[i:i + 2]) - i += charLen - if i > aLen: - self._mNeedToSkipCharNum = i - aLen - self._mLastCharOrder = -1 + i = self._need_to_skip_char_num + while i < num_bytes: + order, char_len = self.get_order(byte_str[i:i + 2]) + i += char_len + if i > num_bytes: + self._need_to_skip_char_num = i - num_bytes + self._last_char_order = -1 else: - if (order != -1) and (self._mLastCharOrder != -1): - self._mTotalRel += 1 - if self._mTotalRel > MAX_REL_THRESHOLD: - self._mDone = True + if (order != -1) and (self._last_char_order != -1): + self._total_rel += 1 + if self._total_rel > self.MAX_REL_THRESHOLD: + self._done = True break - self._mRelSample[jp2CharContext[self._mLastCharOrder][order]] += 1 - self._mLastCharOrder = order + self._rel_sample[jp2CharContext[self._last_char_order][order]] += 1 + self._last_char_order = order def got_enough_data(self): - return self._mTotalRel > ENOUGH_REL_THRESHOLD + return self._total_rel > self.ENOUGH_REL_THRESHOLD def get_confidence(self): # This is just one way to calculate confidence. It works well for me. - if self._mTotalRel > MINIMUM_DATA_THRESHOLD: - return (self._mTotalRel - self._mRelSample[0]) / self._mTotalRel + if self._total_rel > self.MINIMUM_DATA_THRESHOLD: + return (self._total_rel - self._rel_sample[0]) / self._total_rel else: - return DONT_KNOW + return self.DONT_KNOW - def get_order(self, aBuf): + def get_order(self, byte_str): return -1, 1 class SJISContextAnalysis(JapaneseContextAnalysis): - def get_order(self, aBuf): - if not aBuf: + def __init__(self): + super(SJISContextAnalysis, self).__init__() + self._charset_name = "SHIFT_JIS" + + @property + def charset_name(self): + return self._charset_name + + def get_order(self, byte_str): + if not byte_str: return -1, 1 # find out current char's byte length - first_char = wrap_ord(aBuf[0]) - if ((0x81 <= first_char <= 0x9F) or (0xE0 <= first_char <= 0xFC)): - charLen = 2 + first_char = wrap_ord(byte_str[0]) + if (0x81 <= first_char <= 0x9F) or (0xE0 <= first_char <= 0xFC): + char_len = 2 + if (first_char == 0x87) or (0xFA <= first_char <= 0xFC): + self._charset_name = "CP932" else: - charLen = 1 + char_len = 1 # return its order if it is hiragana - if len(aBuf) > 1: - second_char = wrap_ord(aBuf[1]) + if len(byte_str) > 1: + second_char = wrap_ord(byte_str[1]) if (first_char == 202) and (0x9F <= second_char <= 0xF1): - return second_char - 0x9F, charLen + return second_char - 0x9F, char_len - return -1, charLen + return -1, char_len class EUCJPContextAnalysis(JapaneseContextAnalysis): - def get_order(self, aBuf): - if not aBuf: + def get_order(self, byte_str): + if not byte_str: return -1, 1 # find out current char's byte length - first_char = wrap_ord(aBuf[0]) + first_char = wrap_ord(byte_str[0]) if (first_char == 0x8E) or (0xA1 <= first_char <= 0xFE): - charLen = 2 + char_len = 2 elif first_char == 0x8F: - charLen = 3 + char_len = 3 else: - charLen = 1 + char_len = 1 # return its order if it is hiragana - if len(aBuf) > 1: - second_char = wrap_ord(aBuf[1]) + if len(byte_str) > 1: + second_char = wrap_ord(byte_str[1]) if (first_char == 0xA4) and (0xA1 <= second_char <= 0xF3): - return second_char - 0xA1, charLen + return second_char - 0xA1, char_len + + return -1, char_len - return -1, charLen -# flake8: noqa diff --git a/lib/chardet/langbulgarianmodel.py b/lib/chardet/langbulgarianmodel.py index e5788fc64a66e53a3dfe55b2a075d760e6904780..98b6d1aa0378cebd3f12e8f35308d15fa21f0f50 100644 --- a/lib/chardet/langbulgarianmodel.py +++ b/lib/chardet/langbulgarianmodel.py @@ -210,20 +210,20 @@ BulgarianLangModel = ( ) Latin5BulgarianModel = { - 'charToOrderMap': Latin5_BulgarianCharToOrderMap, - 'precedenceMatrix': BulgarianLangModel, - 'mTypicalPositiveRatio': 0.969392, - 'keepEnglishLetter': False, - 'charsetName': "ISO-8859-5" + 'char_to_order_map': Latin5_BulgarianCharToOrderMap, + 'precedence_matrix': BulgarianLangModel, + 'typical_positive_ratio': 0.969392, + 'keep_english_letter': False, + 'charset_name': "ISO-8859-5" } Win1251BulgarianModel = { - 'charToOrderMap': win1251BulgarianCharToOrderMap, - 'precedenceMatrix': BulgarianLangModel, - 'mTypicalPositiveRatio': 0.969392, - 'keepEnglishLetter': False, - 'charsetName': "windows-1251" + 'char_to_order_map': win1251BulgarianCharToOrderMap, + 'precedence_matrix': BulgarianLangModel, + 'typical_positive_ratio': 0.969392, + 'keep_english_letter': False, + 'charset_name': "windows-1251" } -# flake8: noqa + diff --git a/lib/chardet/langcyrillicmodel.py b/lib/chardet/langcyrillicmodel.py index a86f54bd5461039b1d9ecb24248f8b2c0fcfe5a1..ebc3dbea88f9bc609734997436bef6af430566e0 100644 --- a/lib/chardet/langcyrillicmodel.py +++ b/lib/chardet/langcyrillicmodel.py @@ -27,7 +27,7 @@ # KOI8-R language model # Character Mapping Table: -KOI8R_CharToOrderMap = ( +KOI8R_char_to_order_map = ( 255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10 253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20 @@ -46,7 +46,7 @@ KOI8R_CharToOrderMap = ( 35, 43, 45, 32, 40, 52, 56, 33, 61, 62, 51, 57, 47, 63, 50, 70, # f0 ) -win1251_CharToOrderMap = ( +win1251_char_to_order_map = ( 255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10 253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20 @@ -65,7 +65,7 @@ win1251_CharToOrderMap = ( 9, 7, 6, 14, 39, 26, 28, 22, 25, 29, 54, 18, 17, 30, 27, 16, ) -latin5_CharToOrderMap = ( +latin5_char_to_order_map = ( 255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10 253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20 @@ -84,7 +84,7 @@ latin5_CharToOrderMap = ( 239, 68,240,241,242,243,244,245,246,247,248,249,250,251,252,255, ) -macCyrillic_CharToOrderMap = ( +macCyrillic_char_to_order_map = ( 255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10 253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20 @@ -103,7 +103,7 @@ macCyrillic_CharToOrderMap = ( 9, 7, 6, 14, 39, 26, 28, 22, 25, 29, 54, 18, 17, 30, 27,255, ) -IBM855_CharToOrderMap = ( +IBM855_char_to_order_map = ( 255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10 253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20 @@ -122,7 +122,7 @@ IBM855_CharToOrderMap = ( 250, 18, 62, 20, 51, 25, 57, 30, 47, 29, 63, 22, 50,251,252,255, ) -IBM866_CharToOrderMap = ( +IBM866_char_to_order_map = ( 255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10 253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20 @@ -279,51 +279,51 @@ RussianLangModel = ( ) Koi8rModel = { - 'charToOrderMap': KOI8R_CharToOrderMap, - 'precedenceMatrix': RussianLangModel, - 'mTypicalPositiveRatio': 0.976601, - 'keepEnglishLetter': False, - 'charsetName': "KOI8-R" + 'char_to_order_map': KOI8R_char_to_order_map, + 'precedence_matrix': RussianLangModel, + 'typical_positive_ratio': 0.976601, + 'keep_english_letter': False, + 'charset_name': "KOI8-R" } Win1251CyrillicModel = { - 'charToOrderMap': win1251_CharToOrderMap, - 'precedenceMatrix': RussianLangModel, - 'mTypicalPositiveRatio': 0.976601, - 'keepEnglishLetter': False, - 'charsetName': "windows-1251" + 'char_to_order_map': win1251_char_to_order_map, + 'precedence_matrix': RussianLangModel, + 'typical_positive_ratio': 0.976601, + 'keep_english_letter': False, + 'charset_name': "windows-1251" } Latin5CyrillicModel = { - 'charToOrderMap': latin5_CharToOrderMap, - 'precedenceMatrix': RussianLangModel, - 'mTypicalPositiveRatio': 0.976601, - 'keepEnglishLetter': False, - 'charsetName': "ISO-8859-5" + 'char_to_order_map': latin5_char_to_order_map, + 'precedence_matrix': RussianLangModel, + 'typical_positive_ratio': 0.976601, + 'keep_english_letter': False, + 'charset_name': "ISO-8859-5" } MacCyrillicModel = { - 'charToOrderMap': macCyrillic_CharToOrderMap, - 'precedenceMatrix': RussianLangModel, - 'mTypicalPositiveRatio': 0.976601, - 'keepEnglishLetter': False, - 'charsetName': "MacCyrillic" -}; + 'char_to_order_map': macCyrillic_char_to_order_map, + 'precedence_matrix': RussianLangModel, + 'typical_positive_ratio': 0.976601, + 'keep_english_letter': False, + 'charset_name': "MacCyrillic" +} Ibm866Model = { - 'charToOrderMap': IBM866_CharToOrderMap, - 'precedenceMatrix': RussianLangModel, - 'mTypicalPositiveRatio': 0.976601, - 'keepEnglishLetter': False, - 'charsetName': "IBM866" + 'char_to_order_map': IBM866_char_to_order_map, + 'precedence_matrix': RussianLangModel, + 'typical_positive_ratio': 0.976601, + 'keep_english_letter': False, + 'charset_name': "IBM866" } Ibm855Model = { - 'charToOrderMap': IBM855_CharToOrderMap, - 'precedenceMatrix': RussianLangModel, - 'mTypicalPositiveRatio': 0.976601, - 'keepEnglishLetter': False, - 'charsetName': "IBM855" + 'char_to_order_map': IBM855_char_to_order_map, + 'precedence_matrix': RussianLangModel, + 'typical_positive_ratio': 0.976601, + 'keep_english_letter': False, + 'charset_name': "IBM855" } -# flake8: noqa + diff --git a/lib/chardet/langgreekmodel.py b/lib/chardet/langgreekmodel.py index ddb5837655d9c3adb3113cccf57e6c700b4c7a92..8de8e03fb93a9f428caf2e48b76ee8997eb53d74 100644 --- a/lib/chardet/langgreekmodel.py +++ b/lib/chardet/langgreekmodel.py @@ -31,7 +31,7 @@ # 252: 0 - 9 # Character Mapping Table: -Latin7_CharToOrderMap = ( +Latin7_char_to_order_map = ( 255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10 253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20 @@ -50,7 +50,7 @@ Latin7_CharToOrderMap = ( 9, 8, 14, 7, 2, 12, 28, 23, 42, 24, 64, 75, 19, 26, 27,253, # f0 ) -win1253_CharToOrderMap = ( +win1253_char_to_order_map = ( 255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10 253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20 @@ -207,19 +207,19 @@ GreekLangModel = ( ) Latin7GreekModel = { - 'charToOrderMap': Latin7_CharToOrderMap, - 'precedenceMatrix': GreekLangModel, - 'mTypicalPositiveRatio': 0.982851, - 'keepEnglishLetter': False, - 'charsetName': "ISO-8859-7" + 'char_to_order_map': Latin7_char_to_order_map, + 'precedence_matrix': GreekLangModel, + 'typical_positive_ratio': 0.982851, + 'keep_english_letter': False, + 'charset_name': "ISO-8859-7" } Win1253GreekModel = { - 'charToOrderMap': win1253_CharToOrderMap, - 'precedenceMatrix': GreekLangModel, - 'mTypicalPositiveRatio': 0.982851, - 'keepEnglishLetter': False, - 'charsetName': "windows-1253" + 'char_to_order_map': win1253_char_to_order_map, + 'precedence_matrix': GreekLangModel, + 'typical_positive_ratio': 0.982851, + 'keep_english_letter': False, + 'charset_name': "windows-1253" } -# flake8: noqa + diff --git a/lib/chardet/langhebrewmodel.py b/lib/chardet/langhebrewmodel.py index 75f2bc7fe73f13242d5c351e00b6a15959afda5a..6a895b69f350b6d5192211f204c357ddfdf902e3 100644 --- a/lib/chardet/langhebrewmodel.py +++ b/lib/chardet/langhebrewmodel.py @@ -34,7 +34,7 @@ # Windows-1255 language model # Character Mapping Table: -win1255_CharToOrderMap = ( +WIN1255_CHAR_TO_ORDER_MAP = ( 255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10 253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20 @@ -59,7 +59,7 @@ win1255_CharToOrderMap = ( # first 1024 sequences: 1.5981% # rest sequences: 0.087% # negative sequences: 0.0015% -HebrewLangModel = ( +HEBREW_LANG_MODEL = ( 0,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,2,3,2,1,2,0,1,0,0, 3,0,3,1,0,0,1,3,2,0,1,1,2,0,2,2,2,1,1,1,1,2,1,1,1,2,0,0,2,2,0,1, 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,2,2, @@ -191,11 +191,11 @@ HebrewLangModel = ( ) Win1255HebrewModel = { - 'charToOrderMap': win1255_CharToOrderMap, - 'precedenceMatrix': HebrewLangModel, - 'mTypicalPositiveRatio': 0.984004, - 'keepEnglishLetter': False, - 'charsetName': "windows-1255" + 'char_to_order_map': WIN1255_CHAR_TO_ORDER_MAP, + 'precedence_matrix': HEBREW_LANG_MODEL, + 'typical_positive_ratio': 0.984004, + 'keep_english_letter': False, + 'charset_name': "windows-1255" } -# flake8: noqa + diff --git a/lib/chardet/langhungarianmodel.py b/lib/chardet/langhungarianmodel.py index 49d2f0fe75169c5a73ae42b7a1d63114e647c639..5297844e45ddfa8c8d2809329ff32d6bfda5867e 100644 --- a/lib/chardet/langhungarianmodel.py +++ b/lib/chardet/langhungarianmodel.py @@ -207,19 +207,19 @@ HungarianLangModel = ( ) Latin2HungarianModel = { - 'charToOrderMap': Latin2_HungarianCharToOrderMap, - 'precedenceMatrix': HungarianLangModel, - 'mTypicalPositiveRatio': 0.947368, - 'keepEnglishLetter': True, - 'charsetName': "ISO-8859-2" + 'char_to_order_map': Latin2_HungarianCharToOrderMap, + 'precedence_matrix': HungarianLangModel, + 'typical_positive_ratio': 0.947368, + 'keep_english_letter': True, + 'charset_name': "ISO-8859-2" } Win1250HungarianModel = { - 'charToOrderMap': win1250HungarianCharToOrderMap, - 'precedenceMatrix': HungarianLangModel, - 'mTypicalPositiveRatio': 0.947368, - 'keepEnglishLetter': True, - 'charsetName': "windows-1250" + 'char_to_order_map': win1250HungarianCharToOrderMap, + 'precedence_matrix': HungarianLangModel, + 'typical_positive_ratio': 0.947368, + 'keep_english_letter': True, + 'charset_name': "windows-1250" } -# flake8: noqa + diff --git a/lib/chardet/langthaimodel.py b/lib/chardet/langthaimodel.py index 0508b1b1abc3f727fe54fdc6653e5310e0105e71..3758bc1653476ac57576c101e0d19f16d62bdc43 100644 --- a/lib/chardet/langthaimodel.py +++ b/lib/chardet/langthaimodel.py @@ -190,11 +190,11 @@ ThaiLangModel = ( ) TIS620ThaiModel = { - 'charToOrderMap': TIS620CharToOrderMap, - 'precedenceMatrix': ThaiLangModel, - 'mTypicalPositiveRatio': 0.926386, - 'keepEnglishLetter': False, - 'charsetName': "TIS-620" + 'char_to_order_map': TIS620CharToOrderMap, + 'precedence_matrix': ThaiLangModel, + 'typical_positive_ratio': 0.926386, + 'keep_english_letter': False, + 'charset_name': "TIS-620" } -# flake8: noqa + diff --git a/lib/chardet/langturkishmodel.py b/lib/chardet/langturkishmodel.py new file mode 100644 index 0000000000000000000000000000000000000000..c4e8099dd51e981580232e003ecef62cbc4510a2 --- /dev/null +++ b/lib/chardet/langturkishmodel.py @@ -0,0 +1,192 @@ +# -*- coding: utf-8 -*- +######################## BEGIN LICENSE BLOCK ######################## +# The Original Code is Mozilla Communicator client code. +# +# The Initial Developer of the Original Code is +# Netscape Communications Corporation. +# Portions created by the Initial Developer are Copyright (C) 1998 +# the Initial Developer. All Rights Reserved. +# +# Contributor(s): +# Mark Pilgrim - port to Python +# Özgür Baskın - Turkish Language Model +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA +# 02110-1301 USA +######################### END LICENSE BLOCK ######################### + +# 255: Control characters that usually does not exist in any text +# 254: Carriage/Return +# 253: symbol (punctuation) that does not belong to word +# 252: 0 - 9 + +# Character Mapping Table: +Latin5_TurkishCharToOrderMap = ( +255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, +255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, +255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, +255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, +255, 23, 37, 47, 39, 29, 52, 36, 45, 53, 60, 16, 49, 20, 46, 42, + 48, 69, 44, 35, 31, 51, 38, 62, 65, 43, 56,255,255,255,255,255, +255, 1, 21, 28, 12, 2, 18, 27, 25, 3, 24, 10, 5, 13, 4, 15, + 26, 64, 7, 8, 9, 14, 32, 57, 58, 11, 22,255,255,255,255,255, +180,179,178,177,176,175,174,173,172,171,170,169,168,167,166,165, +164,163,162,161,160,159,101,158,157,156,155,154,153,152,151,106, +150,149,148,147,146,145,144,100,143,142,141,140,139,138,137,136, + 94, 80, 93,135,105,134,133, 63,132,131,130,129,128,127,126,125, +124,104, 73, 99, 79, 85,123, 54,122, 98, 92,121,120, 91,103,119, + 68,118,117, 97,116,115, 50, 90,114,113,112,111, 55, 41, 40, 86, + 89, 70, 59, 78, 71, 82, 88, 33, 77, 66, 84, 83,110, 75, 61, 96, + 30, 67,109, 74, 87,102, 34, 95, 81,108, 76, 72, 17, 6, 19,107, +) + +TurkishLangModel = ( +3,2,3,3,3,1,3,3,3,3,3,3,3,3,2,1,1,3,3,1,3,3,0,3,3,3,3,3,0,3,1,3, +3,2,1,0,0,1,1,0,0,0,1,0,0,1,1,1,1,0,0,0,0,0,0,0,2,2,0,0,1,0,0,1, +3,2,2,3,3,0,3,3,3,3,3,3,3,2,3,1,0,3,3,1,3,3,0,3,3,3,3,3,0,3,0,3, +3,1,1,0,1,0,1,0,0,0,0,0,0,1,1,1,1,0,0,0,0,0,0,0,2,2,0,0,0,1,0,1, +3,3,2,3,3,0,3,3,3,3,3,3,3,2,3,1,1,3,3,0,3,3,1,2,3,3,3,3,0,3,0,3, +3,1,1,0,0,0,1,0,0,0,0,1,1,0,1,2,1,0,0,0,1,0,0,0,0,2,0,0,0,0,0,1, +3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,1,3,3,2,0,3,2,1,2,2,1,3,3,0,0,0,2, +2,2,0,1,0,0,1,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,1,0,1,0,0,1, +3,3,3,2,3,3,1,2,3,3,3,3,3,3,3,1,3,2,1,0,3,2,0,1,2,3,3,2,1,0,0,2, +2,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,2,0,2,0,0,0, +1,0,1,3,3,1,3,3,3,3,3,3,3,1,2,0,0,2,3,0,2,3,0,0,2,2,2,3,0,3,0,1, +2,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3,3,3,0,3,2,0,2,3,2,3,3,1,0,0,2, +3,2,0,0,1,0,0,0,0,0,0,2,0,0,1,0,0,0,0,0,0,0,0,0,1,1,1,0,2,0,0,1, +3,3,3,2,3,3,2,3,3,3,3,2,3,3,3,0,3,3,0,0,2,1,0,0,2,3,2,2,0,0,0,2, +2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,1,0,1,0,2,0,0,1, +3,3,3,2,3,3,3,3,3,3,3,2,3,3,3,0,3,2,0,1,3,2,1,1,3,2,3,2,1,0,0,2, +2,2,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0, +3,3,3,2,3,3,3,3,3,3,3,2,3,3,3,0,3,2,2,0,2,3,0,0,2,2,2,2,0,0,0,2, +3,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,2,0,1,0,0,0, +3,3,3,3,3,3,3,2,2,2,2,3,2,3,3,0,3,3,1,1,2,2,0,0,2,2,3,2,0,0,1,3, +0,3,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,1, +3,3,3,2,3,3,3,2,1,2,2,3,2,3,3,0,3,2,0,0,1,1,0,1,1,2,1,2,0,0,0,1, +0,3,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,1,0,0,0, +3,3,3,2,3,3,2,3,2,2,2,3,3,3,3,1,3,1,1,0,3,2,1,1,3,3,2,3,1,0,0,1, +1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,2,0,0,1, +3,2,2,3,3,0,3,3,3,3,3,3,3,2,2,1,0,3,3,1,3,3,0,1,3,3,2,3,0,3,0,3, +2,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0, +2,2,2,3,3,0,3,3,3,3,3,3,3,3,3,0,0,3,2,0,3,3,0,3,2,3,3,3,0,3,1,3, +2,0,0,0,0,0,0,0,0,0,0,1,0,1,2,0,1,0,0,0,0,0,0,0,2,2,0,0,1,0,0,1, +3,3,3,1,2,3,3,1,0,0,1,0,0,3,3,2,3,0,0,2,0,0,2,0,2,0,0,0,2,0,2,0, +0,3,1,0,1,0,0,0,2,2,1,0,1,1,2,1,2,2,2,0,2,1,1,0,0,0,2,0,0,0,0,0, +1,2,1,3,3,0,3,3,3,3,3,2,3,0,0,0,0,2,3,0,2,3,1,0,2,3,1,3,0,3,0,2, +3,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,3,3,1,3,3,2,2,3,2,2,0,1,2,3,0,1,2,1,0,1,0,0,0,1,0,2,2,0,0,0,1, +1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,1,0,0,1,0,0,0, +3,3,3,1,3,3,1,1,3,3,1,1,3,3,1,0,2,1,2,0,2,1,0,0,1,1,2,1,0,0,0,2, +2,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,3,3,1,0,2,1,3,0,0,2,0,0,3,3,0,3,0,0,1,0,1,2,0,0,1,1,2,2,0,1,0, +0,1,2,1,1,0,1,0,1,1,1,1,1,0,1,1,1,2,2,1,2,0,1,0,0,0,0,0,0,1,0,0, +3,3,3,2,3,2,3,3,0,2,2,2,3,3,3,0,3,0,0,0,2,2,0,1,2,1,1,1,0,0,0,1, +0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0, +3,3,3,3,3,3,2,1,2,2,3,3,3,3,2,0,2,0,0,0,2,2,0,0,2,1,3,3,0,0,1,1, +1,1,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0, +1,1,2,3,3,0,3,3,3,3,3,3,2,2,0,2,0,2,3,2,3,2,2,2,2,2,2,2,1,3,2,3, +2,0,2,1,2,2,2,2,1,1,2,2,1,2,2,1,2,0,0,2,1,1,0,2,1,0,0,1,0,0,0,1, +2,3,3,1,1,1,0,1,1,1,2,3,2,1,1,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0, +0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,3,3,2,2,2,3,2,3,2,2,1,3,3,3,0,2,1,2,0,2,1,0,0,1,1,1,1,1,0,0,1, +2,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,2,0,1,0,0,0, +3,3,3,2,3,3,3,3,3,2,3,1,2,3,3,1,2,0,0,0,0,0,0,0,3,2,1,1,0,0,0,0, +2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0, +3,3,3,2,2,3,3,2,1,1,1,1,1,3,3,0,3,1,0,0,1,1,0,0,3,1,2,1,0,0,0,0, +0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0, +3,3,3,2,2,3,2,2,2,3,2,1,1,3,3,0,3,0,0,0,0,1,0,0,3,1,1,2,0,0,0,1, +1,0,0,1,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1, +1,1,1,3,3,0,3,3,3,3,3,2,2,2,1,2,0,2,1,2,2,1,1,0,1,2,2,2,2,2,2,2, +0,0,2,1,2,1,2,1,0,1,1,3,1,2,1,1,2,0,0,2,0,1,0,1,0,1,0,0,0,1,0,1, +3,3,3,1,3,3,3,0,1,1,0,2,2,3,1,0,3,0,0,0,1,0,0,0,1,0,0,1,0,1,0,0, +1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,3,2,0,0,2,2,1,0,0,1,0,0,3,3,1,3,0,0,1,1,0,2,0,3,0,0,0,2,0,1,1, +0,1,2,0,1,2,2,0,2,2,2,2,1,0,2,1,1,0,2,0,2,1,2,0,0,0,0,0,0,0,0,0, +3,3,3,1,3,2,3,2,0,2,2,2,1,3,2,0,2,1,2,0,1,2,0,0,1,0,2,2,0,0,0,2, +1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,1,0,0,0, +3,3,3,0,3,3,1,1,2,3,1,0,3,2,3,0,3,0,0,0,1,0,0,0,1,0,1,0,0,0,0,0, +1,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,3,3,0,3,3,2,3,3,2,2,0,0,0,0,1,2,0,1,3,0,0,0,3,1,1,0,3,0,2, +2,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,3,3,1,2,2,1,0,3,1,1,1,1,3,3,2,3,0,0,1,0,1,2,0,2,2,0,2,2,0,2,1, +0,2,2,1,1,1,1,0,2,1,1,0,1,1,1,1,2,1,2,1,2,0,1,0,1,0,0,0,0,0,0,0, +3,3,3,0,1,1,3,0,0,1,1,0,0,2,2,0,3,0,0,1,1,0,1,0,0,0,0,0,2,0,0,0, +0,3,1,0,1,0,1,0,2,0,0,1,0,1,0,1,1,1,2,1,1,0,2,0,0,0,0,0,0,0,0,0, +3,3,3,0,2,0,2,0,1,1,1,0,0,3,3,0,2,0,0,1,0,0,2,1,1,0,1,0,1,0,1,0, +0,2,0,1,2,0,2,0,2,1,1,0,1,0,2,1,1,0,2,1,1,0,1,0,0,0,1,1,0,0,0,0, +3,2,3,0,1,0,0,0,0,0,0,0,0,1,2,0,1,0,0,1,0,0,1,0,0,0,0,0,2,0,0,0, +0,0,1,1,0,0,1,0,1,0,0,1,0,0,0,2,1,0,1,0,2,0,0,0,0,0,0,0,0,0,0,0, +3,3,3,0,0,2,3,0,0,1,0,1,0,2,3,2,3,0,0,1,3,0,2,1,0,0,0,0,2,0,1,0, +0,2,1,0,0,1,1,0,2,1,0,0,1,0,0,1,1,0,1,1,2,0,1,0,0,0,0,1,0,0,0,0, +3,2,2,0,0,1,1,0,0,0,0,0,0,3,1,1,1,0,0,0,0,0,1,0,0,0,0,0,2,0,1,0, +0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0, +0,0,0,3,3,0,2,3,2,2,1,2,2,1,1,2,0,1,3,2,2,2,0,0,2,2,0,0,0,1,2,1, +3,0,2,1,1,0,1,1,1,0,1,2,2,2,1,1,2,0,0,0,0,1,0,1,1,0,0,0,0,0,0,0, +0,1,1,2,3,0,3,3,3,2,2,2,2,1,0,1,0,1,0,1,2,2,0,0,2,2,1,3,1,1,2,1, +0,0,1,1,2,0,1,1,0,0,1,2,0,2,1,1,2,0,0,1,0,0,0,1,0,1,0,1,0,0,0,0, +3,3,2,0,0,3,1,0,0,0,0,0,0,3,2,1,2,0,0,1,0,0,2,0,0,0,0,0,2,0,1,0, +0,2,1,1,0,0,1,0,1,2,0,0,1,1,0,0,2,1,1,1,1,0,2,0,0,0,0,0,0,0,0,0, +3,3,2,0,0,1,0,0,0,0,1,0,0,3,3,2,2,0,0,1,0,0,2,0,1,0,0,0,2,0,1,0, +0,0,1,1,0,0,2,0,2,1,0,0,1,1,2,1,2,0,2,1,2,1,1,1,0,0,1,1,0,0,0,0, +3,3,2,0,0,2,2,0,0,0,1,1,0,2,2,1,3,1,0,1,0,1,2,0,0,0,0,0,1,0,1,0, +0,1,1,0,0,0,0,0,1,0,0,1,0,0,0,1,1,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0, +3,3,3,2,0,0,0,1,0,0,1,0,0,2,3,1,2,0,0,1,0,0,2,0,0,0,1,0,2,0,2,0, +0,1,1,2,2,1,2,0,2,1,1,0,0,1,1,0,1,1,1,1,2,1,1,0,0,0,0,0,0,0,0,0, +3,3,3,0,2,1,2,1,0,0,1,1,0,3,3,1,2,0,0,1,0,0,2,0,2,0,1,1,2,0,0,0, +0,0,1,1,1,1,2,0,1,1,0,1,1,1,1,0,0,0,1,1,1,0,1,0,0,0,1,0,0,0,0,0, +3,3,3,0,2,2,3,2,0,0,1,0,0,2,3,1,0,0,0,0,0,0,2,0,2,0,0,0,2,0,0,0, +0,1,1,0,0,0,1,0,0,1,0,1,1,0,1,0,1,1,1,0,1,0,0,0,0,0,0,0,0,0,0,0, +3,2,3,0,0,0,0,0,0,0,1,0,0,2,2,2,2,0,0,1,0,0,2,0,0,0,0,0,2,0,1,0, +0,0,2,1,1,0,1,0,2,1,1,0,0,1,1,2,1,0,2,0,2,0,1,0,0,0,2,0,0,0,0,0, +0,0,0,2,2,0,2,1,1,1,1,2,2,0,0,1,0,1,0,0,1,3,0,0,0,0,1,0,0,2,1,0, +0,0,1,0,1,0,0,0,0,0,2,1,0,1,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0, +2,0,0,2,3,0,2,3,1,2,2,0,2,0,0,2,0,2,1,1,1,2,1,0,0,1,2,1,1,2,1,0, +1,0,2,0,1,0,1,1,0,0,2,2,1,2,1,1,2,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0, +3,3,3,0,2,1,2,0,0,0,1,0,0,3,2,0,1,0,0,1,0,0,2,0,0,0,1,2,1,0,1,0, +0,0,0,0,1,0,1,0,0,1,0,0,0,0,1,0,1,0,1,1,1,0,1,0,0,0,0,0,0,0,0,0, +0,0,0,2,2,0,2,2,1,1,0,1,1,1,1,1,0,0,1,2,1,1,1,0,1,0,0,0,1,1,1,1, +0,0,2,1,0,1,1,1,0,1,1,2,1,2,1,1,2,0,1,1,2,1,0,2,0,0,0,0,0,0,0,0, +3,2,2,0,0,2,0,0,0,0,0,0,0,2,2,0,2,0,0,1,0,0,2,0,0,0,0,0,2,0,0,0, +0,2,1,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0, +0,0,0,3,2,0,2,2,0,1,1,0,1,0,0,1,0,0,0,1,0,1,0,0,0,0,0,1,0,0,0,0, +2,0,1,0,1,0,1,1,0,0,1,2,0,1,0,1,1,0,0,1,0,1,0,2,0,0,0,0,0,0,0,0, +2,2,2,0,1,1,0,0,0,1,0,0,0,1,2,0,1,0,0,1,0,0,1,0,0,0,0,1,2,0,1,0, +0,0,1,0,0,0,1,0,0,1,0,0,0,0,0,0,1,0,1,0,2,0,0,0,0,0,0,0,0,0,0,0, +2,2,2,2,1,0,1,1,1,0,0,0,0,1,2,0,0,1,0,0,0,1,0,0,1,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0, +1,1,2,0,1,0,0,0,1,0,1,0,0,0,1,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,2,0,0,0,0,0,1, +0,0,1,2,2,0,2,1,2,1,1,2,2,0,0,0,0,1,0,0,1,1,0,0,2,0,0,0,0,1,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0, +2,2,2,0,0,0,1,0,0,0,0,0,0,2,2,1,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0, +0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,1,1,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +2,2,2,0,1,0,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,1,0,0,0,0,0,0,0,0,0,0,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +) + +Latin5TurkishModel = { + 'char_to_order_map': Latin5_TurkishCharToOrderMap, + 'precedence_matrix': TurkishLangModel, + 'typical_positive_ratio': 0.970290, + 'keep_english_letter': True, + 'charset_name': "ISO-8859-9" +} diff --git a/lib/chardet/latin1prober.py b/lib/chardet/latin1prober.py index ad695f57a728421cd920cc3772fb31141d1e0b29..b04c23c2ab5abbedc0d546fac6591ec1a4ea043b 100644 --- a/lib/chardet/latin1prober.py +++ b/lib/chardet/latin1prober.py @@ -27,8 +27,8 @@ ######################### END LICENSE BLOCK ######################### from .charsetprober import CharSetProber -from .constants import eNotMe from .compat import wrap_ord +from .enums import ProbingState FREQ_CAT_NUM = 4 @@ -82,7 +82,7 @@ Latin1_CharToClass = ( # 2 : normal # 3 : very likely Latin1ClassModel = ( - # UDF OTH ASC ASS ACV ACO ASV ASO +# UDF OTH ASC ASS ACV ACO ASV ASO 0, 0, 0, 0, 0, 0, 0, 0, # UDF 0, 3, 3, 3, 3, 3, 3, 3, # OTH 0, 3, 3, 3, 3, 3, 3, 3, # ASC @@ -96,44 +96,47 @@ Latin1ClassModel = ( class Latin1Prober(CharSetProber): def __init__(self): - CharSetProber.__init__(self) + super(Latin1Prober, self).__init__() + self._last_char_class = None + self._freq_counter = None self.reset() def reset(self): - self._mLastCharClass = OTH - self._mFreqCounter = [0] * FREQ_CAT_NUM + self._last_char_class = OTH + self._freq_counter = [0] * FREQ_CAT_NUM CharSetProber.reset(self) - def get_charset_name(self): + @property + def charset_name(self): return "windows-1252" - def feed(self, aBuf): - aBuf = self.filter_with_english_letters(aBuf) - for c in aBuf: - charClass = Latin1_CharToClass[wrap_ord(c)] - freq = Latin1ClassModel[(self._mLastCharClass * CLASS_NUM) - + charClass] + def feed(self, byte_str): + byte_str = self.filter_with_english_letters(byte_str) + for c in byte_str: + char_class = Latin1_CharToClass[wrap_ord(c)] + freq = Latin1ClassModel[(self._last_char_class * CLASS_NUM) + + char_class] if freq == 0: - self._mState = eNotMe + self._state = ProbingState.not_me break - self._mFreqCounter[freq] += 1 - self._mLastCharClass = charClass + self._freq_counter[freq] += 1 + self._last_char_class = char_class - return self.get_state() + return self.state def get_confidence(self): - if self.get_state() == eNotMe: + if self.state == ProbingState.not_me: return 0.01 - total = sum(self._mFreqCounter) + total = sum(self._freq_counter) if total < 0.01: confidence = 0.0 else: - confidence = ((self._mFreqCounter[3] / total) - - (self._mFreqCounter[1] * 20.0 / total)) + confidence = ((self._freq_counter[3] - self._freq_counter[1] * 20.0) + / total) if confidence < 0.0: confidence = 0.0 # lower the confidence of latin1 so that other more accurate # detector can take priority. - confidence = confidence * 0.5 + confidence = confidence * 0.73 return confidence diff --git a/lib/chardet/mbcharsetprober.py b/lib/chardet/mbcharsetprober.py index bb42f2fb5e8189d442f3db4d4461ea6b230824b3..1f66e61cef5a2e778da8b8a1f180861db024daa1 100644 --- a/lib/chardet/mbcharsetprober.py +++ b/lib/chardet/mbcharsetprober.py @@ -27,60 +27,61 @@ # 02110-1301 USA ######################### END LICENSE BLOCK ######################### -import sys -from . import constants from .charsetprober import CharSetProber +from .enums import ProbingState, MachineState class MultiByteCharSetProber(CharSetProber): - def __init__(self): - CharSetProber.__init__(self) - self._mDistributionAnalyzer = None - self._mCodingSM = None - self._mLastChar = [0, 0] + """ + MultiByteCharSetProber + """ + + def __init__(self, lang_filter=None): + super(MultiByteCharSetProber, self).__init__(lang_filter=lang_filter) + self.distribution_analyzer = None + self.coding_sm = None + self._last_char = [0, 0] def reset(self): - CharSetProber.reset(self) - if self._mCodingSM: - self._mCodingSM.reset() - if self._mDistributionAnalyzer: - self._mDistributionAnalyzer.reset() - self._mLastChar = [0, 0] + super(MultiByteCharSetProber, self).reset() + if self.coding_sm: + self.coding_sm.reset() + if self.distribution_analyzer: + self.distribution_analyzer.reset() + self._last_char = [0, 0] - def get_charset_name(self): + @property + def charset_name(self): pass - def feed(self, aBuf): - aLen = len(aBuf) - for i in range(0, aLen): - codingState = self._mCodingSM.next_state(aBuf[i]) - if codingState == constants.eError: - if constants._debug: - sys.stderr.write(self.get_charset_name() - + ' prober hit error at byte ' + str(i) - + '\n') - self._mState = constants.eNotMe + def feed(self, byte_str): + for i in range(len(byte_str)): + coding_state = self.coding_sm.next_state(byte_str[i]) + if coding_state == MachineState.error: + self.logger.debug('%s prober hit error at byte %s', + self.charset_name, i) + self._state = ProbingState.not_me break - elif codingState == constants.eItsMe: - self._mState = constants.eFoundIt + elif coding_state == MachineState.its_me: + self._state = ProbingState.found_it break - elif codingState == constants.eStart: - charLen = self._mCodingSM.get_current_charlen() + elif coding_state == MachineState.start: + char_len = self.coding_sm.get_current_charlen() if i == 0: - self._mLastChar[1] = aBuf[0] - self._mDistributionAnalyzer.feed(self._mLastChar, charLen) + self._last_char[1] = byte_str[0] + self.distribution_analyzer.feed(self._last_char, char_len) else: - self._mDistributionAnalyzer.feed(aBuf[i - 1:i + 1], - charLen) + self.distribution_analyzer.feed(byte_str[i - 1:i + 1], + char_len) - self._mLastChar[0] = aBuf[aLen - 1] + self._last_char[0] = byte_str[-1] - if self.get_state() == constants.eDetecting: - if (self._mDistributionAnalyzer.got_enough_data() and - (self.get_confidence() > constants.SHORTCUT_THRESHOLD)): - self._mState = constants.eFoundIt + if self.state == ProbingState.detecting: + if (self.distribution_analyzer.got_enough_data() and + (self.get_confidence() > self.SHORTCUT_THRESHOLD)): + self._state = ProbingState.found_it - return self.get_state() + return self.state def get_confidence(self): - return self._mDistributionAnalyzer.get_confidence() + return self.distribution_analyzer.get_confidence() diff --git a/lib/chardet/mbcsgroupprober.py b/lib/chardet/mbcsgroupprober.py index 03c9dcf3eb8337dd69e68574e58c33012ebb2408..530abe75e0c00cbfcb2a310d872866f320977d0a 100644 --- a/lib/chardet/mbcsgroupprober.py +++ b/lib/chardet/mbcsgroupprober.py @@ -39,9 +39,9 @@ from .euctwprober import EUCTWProber class MBCSGroupProber(CharSetGroupProber): - def __init__(self): - CharSetGroupProber.__init__(self) - self._mProbers = [ + def __init__(self, lang_filter=None): + super(MBCSGroupProber, self).__init__(lang_filter=lang_filter) + self.probers = [ UTF8Prober(), SJISProber(), EUCJPProber(), diff --git a/lib/chardet/mbcssm.py b/lib/chardet/mbcssm.py index 3f93cfb045c01e072de4c448d2b5bb2e25438b9b..9a01af768864523ed209a1fd271f3d3e99929df3 100644 --- a/lib/chardet/mbcssm.py +++ b/lib/chardet/mbcssm.py @@ -25,11 +25,11 @@ # 02110-1301 USA ######################### END LICENSE BLOCK ######################### -from .constants import eStart, eError, eItsMe +from .enums import MachineState # BIG5 -BIG5_cls = ( +BIG5_CLS = ( 1,1,1,1,1,1,1,1, # 00 - 07 #allow 0x00 as legal value 1,1,1,1,1,1,0,0, # 08 - 0f 1,1,1,1,1,1,1,1, # 10 - 17 @@ -64,23 +64,23 @@ BIG5_cls = ( 3,3,3,3,3,3,3,0 # f8 - ff ) -BIG5_st = ( - eError,eStart,eStart, 3,eError,eError,eError,eError,#00-07 - eError,eError,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eError,#08-0f - eError,eStart,eStart,eStart,eStart,eStart,eStart,eStart#10-17 +BIG5_ST = ( + MachineState.error,MachineState.start,MachineState.start, 3,MachineState.error,MachineState.error,MachineState.error,MachineState.error,#00-07 + MachineState.error,MachineState.error,MachineState.its_me,MachineState.its_me,MachineState.its_me,MachineState.its_me,MachineState.its_me,MachineState.error,#08-0f + MachineState.error,MachineState.start,MachineState.start,MachineState.start,MachineState.start,MachineState.start,MachineState.start,MachineState.start#10-17 ) -Big5CharLenTable = (0, 1, 1, 2, 0) +BIG5_CHAR_LEN_TABLE = (0, 1, 1, 2, 0) -Big5SMModel = {'classTable': BIG5_cls, - 'classFactor': 5, - 'stateTable': BIG5_st, - 'charLenTable': Big5CharLenTable, - 'name': 'Big5'} +BIG5_SM_MODEL = {'class_table': BIG5_CLS, + 'class_factor': 5, + 'state_table': BIG5_ST, + 'char_len_table': BIG5_CHAR_LEN_TABLE, + 'name': 'Big5'} # CP949 -CP949_cls = ( +CP949_CLS = ( 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,0,0, # 00 - 0f 1,1,1,1,1,1,1,1, 1,1,1,0,1,1,1,1, # 10 - 1f 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, # 20 - 2f @@ -99,28 +99,28 @@ CP949_cls = ( 2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,0, # f0 - ff ) -CP949_st = ( +CP949_ST = ( #cls= 0 1 2 3 4 5 6 7 8 9 # previous state = - eError,eStart, 3,eError,eStart,eStart, 4, 5,eError, 6, # eStart - eError,eError,eError,eError,eError,eError,eError,eError,eError,eError, # eError - eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe, # eItsMe - eError,eError,eStart,eStart,eError,eError,eError,eStart,eStart,eStart, # 3 - eError,eError,eStart,eStart,eStart,eStart,eStart,eStart,eStart,eStart, # 4 - eError,eStart,eStart,eStart,eStart,eStart,eStart,eStart,eStart,eStart, # 5 - eError,eStart,eStart,eStart,eStart,eError,eError,eStart,eStart,eStart, # 6 + MachineState.error,MachineState.start, 3,MachineState.error,MachineState.start,MachineState.start, 4, 5,MachineState.error, 6, # MachineState.start + MachineState.error,MachineState.error,MachineState.error,MachineState.error,MachineState.error,MachineState.error,MachineState.error,MachineState.error,MachineState.error,MachineState.error, # MachineState.error + MachineState.its_me,MachineState.its_me,MachineState.its_me,MachineState.its_me,MachineState.its_me,MachineState.its_me,MachineState.its_me,MachineState.its_me,MachineState.its_me,MachineState.its_me, # MachineState.its_me + MachineState.error,MachineState.error,MachineState.start,MachineState.start,MachineState.error,MachineState.error,MachineState.error,MachineState.start,MachineState.start,MachineState.start, # 3 + MachineState.error,MachineState.error,MachineState.start,MachineState.start,MachineState.start,MachineState.start,MachineState.start,MachineState.start,MachineState.start,MachineState.start, # 4 + MachineState.error,MachineState.start,MachineState.start,MachineState.start,MachineState.start,MachineState.start,MachineState.start,MachineState.start,MachineState.start,MachineState.start, # 5 + MachineState.error,MachineState.start,MachineState.start,MachineState.start,MachineState.start,MachineState.error,MachineState.error,MachineState.start,MachineState.start,MachineState.start, # 6 ) -CP949CharLenTable = (0, 1, 2, 0, 1, 1, 2, 2, 0, 2) +CP949_CHAR_LEN_TABLE = (0, 1, 2, 0, 1, 1, 2, 2, 0, 2) -CP949SMModel = {'classTable': CP949_cls, - 'classFactor': 10, - 'stateTable': CP949_st, - 'charLenTable': CP949CharLenTable, - 'name': 'CP949'} +CP949_SM_MODEL = {'class_table': CP949_CLS, + 'class_factor': 10, + 'state_table': CP949_ST, + 'char_len_table': CP949_CHAR_LEN_TABLE, + 'name': 'CP949'} # EUC-JP -EUCJP_cls = ( +EUCJP_CLS = ( 4,4,4,4,4,4,4,4, # 00 - 07 4,4,4,4,4,4,5,5, # 08 - 0f 4,4,4,4,4,4,4,4, # 10 - 17 @@ -155,25 +155,25 @@ EUCJP_cls = ( 0,0,0,0,0,0,0,5 # f8 - ff ) -EUCJP_st = ( - 3, 4, 3, 5,eStart,eError,eError,eError,#00-07 - eError,eError,eError,eError,eItsMe,eItsMe,eItsMe,eItsMe,#08-0f - eItsMe,eItsMe,eStart,eError,eStart,eError,eError,eError,#10-17 - eError,eError,eStart,eError,eError,eError, 3,eError,#18-1f - 3,eError,eError,eError,eStart,eStart,eStart,eStart#20-27 +EUCJP_ST = ( + 3, 4, 3, 5,MachineState.start,MachineState.error,MachineState.error,MachineState.error,#00-07 + MachineState.error,MachineState.error,MachineState.error,MachineState.error,MachineState.its_me,MachineState.its_me,MachineState.its_me,MachineState.its_me,#08-0f + MachineState.its_me,MachineState.its_me,MachineState.start,MachineState.error,MachineState.start,MachineState.error,MachineState.error,MachineState.error,#10-17 + MachineState.error,MachineState.error,MachineState.start,MachineState.error,MachineState.error,MachineState.error, 3,MachineState.error,#18-1f + 3,MachineState.error,MachineState.error,MachineState.error,MachineState.start,MachineState.start,MachineState.start,MachineState.start#20-27 ) -EUCJPCharLenTable = (2, 2, 2, 3, 1, 0) +EUCJP_CHAR_LEN_TABLE = (2, 2, 2, 3, 1, 0) -EUCJPSMModel = {'classTable': EUCJP_cls, - 'classFactor': 6, - 'stateTable': EUCJP_st, - 'charLenTable': EUCJPCharLenTable, - 'name': 'EUC-JP'} +EUCJP_SM_MODEL = {'class_table': EUCJP_CLS, + 'class_factor': 6, + 'state_table': EUCJP_ST, + 'char_len_table': EUCJP_CHAR_LEN_TABLE, + 'name': 'EUC-JP'} # EUC-KR -EUCKR_cls = ( +EUCKR_CLS = ( 1,1,1,1,1,1,1,1, # 00 - 07 1,1,1,1,1,1,0,0, # 08 - 0f 1,1,1,1,1,1,1,1, # 10 - 17 @@ -208,22 +208,22 @@ EUCKR_cls = ( 2,2,2,2,2,2,2,0 # f8 - ff ) -EUCKR_st = ( - eError,eStart, 3,eError,eError,eError,eError,eError,#00-07 - eItsMe,eItsMe,eItsMe,eItsMe,eError,eError,eStart,eStart #08-0f +EUCKR_ST = ( + MachineState.error,MachineState.start, 3,MachineState.error,MachineState.error,MachineState.error,MachineState.error,MachineState.error,#00-07 + MachineState.its_me,MachineState.its_me,MachineState.its_me,MachineState.its_me,MachineState.error,MachineState.error,MachineState.start,MachineState.start #08-0f ) -EUCKRCharLenTable = (0, 1, 2, 0) +EUCKR_CHAR_LEN_TABLE = (0, 1, 2, 0) -EUCKRSMModel = {'classTable': EUCKR_cls, - 'classFactor': 4, - 'stateTable': EUCKR_st, - 'charLenTable': EUCKRCharLenTable, +EUCKR_SM_MODEL = {'class_table': EUCKR_CLS, + 'class_factor': 4, + 'state_table': EUCKR_ST, + 'char_len_table': EUCKR_CHAR_LEN_TABLE, 'name': 'EUC-KR'} # EUC-TW -EUCTW_cls = ( +EUCTW_CLS = ( 2,2,2,2,2,2,2,2, # 00 - 07 2,2,2,2,2,2,0,0, # 08 - 0f 2,2,2,2,2,2,2,2, # 10 - 17 @@ -258,26 +258,26 @@ EUCTW_cls = ( 3,3,3,3,3,3,3,0 # f8 - ff ) -EUCTW_st = ( - eError,eError,eStart, 3, 3, 3, 4,eError,#00-07 - eError,eError,eError,eError,eError,eError,eItsMe,eItsMe,#08-0f - eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eError,eStart,eError,#10-17 - eStart,eStart,eStart,eError,eError,eError,eError,eError,#18-1f - 5,eError,eError,eError,eStart,eError,eStart,eStart,#20-27 - eStart,eError,eStart,eStart,eStart,eStart,eStart,eStart #28-2f +EUCTW_ST = ( + MachineState.error,MachineState.error,MachineState.start, 3, 3, 3, 4,MachineState.error,#00-07 + MachineState.error,MachineState.error,MachineState.error,MachineState.error,MachineState.error,MachineState.error,MachineState.its_me,MachineState.its_me,#08-0f + MachineState.its_me,MachineState.its_me,MachineState.its_me,MachineState.its_me,MachineState.its_me,MachineState.error,MachineState.start,MachineState.error,#10-17 + MachineState.start,MachineState.start,MachineState.start,MachineState.error,MachineState.error,MachineState.error,MachineState.error,MachineState.error,#18-1f + 5,MachineState.error,MachineState.error,MachineState.error,MachineState.start,MachineState.error,MachineState.start,MachineState.start,#20-27 + MachineState.start,MachineState.error,MachineState.start,MachineState.start,MachineState.start,MachineState.start,MachineState.start,MachineState.start #28-2f ) -EUCTWCharLenTable = (0, 0, 1, 2, 2, 2, 3) +EUCTW_CHAR_LEN_TABLE = (0, 0, 1, 2, 2, 2, 3) -EUCTWSMModel = {'classTable': EUCTW_cls, - 'classFactor': 7, - 'stateTable': EUCTW_st, - 'charLenTable': EUCTWCharLenTable, +EUCTW_SM_MODEL = {'class_table': EUCTW_CLS, + 'class_factor': 7, + 'state_table': EUCTW_ST, + 'char_len_table': EUCTW_CHAR_LEN_TABLE, 'name': 'x-euc-tw'} # GB2312 -GB2312_cls = ( +GB2312_CLS = ( 1,1,1,1,1,1,1,1, # 00 - 07 1,1,1,1,1,1,0,0, # 08 - 0f 1,1,1,1,1,1,1,1, # 10 - 17 @@ -312,13 +312,13 @@ GB2312_cls = ( 6,6,6,6,6,6,6,0 # f8 - ff ) -GB2312_st = ( - eError,eStart,eStart,eStart,eStart,eStart, 3,eError,#00-07 - eError,eError,eError,eError,eError,eError,eItsMe,eItsMe,#08-0f - eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eError,eError,eStart,#10-17 - 4,eError,eStart,eStart,eError,eError,eError,eError,#18-1f - eError,eError, 5,eError,eError,eError,eItsMe,eError,#20-27 - eError,eError,eStart,eStart,eStart,eStart,eStart,eStart #28-2f +GB2312_ST = ( + MachineState.error,MachineState.start,MachineState.start,MachineState.start,MachineState.start,MachineState.start, 3,MachineState.error,#00-07 + MachineState.error,MachineState.error,MachineState.error,MachineState.error,MachineState.error,MachineState.error,MachineState.its_me,MachineState.its_me,#08-0f + MachineState.its_me,MachineState.its_me,MachineState.its_me,MachineState.its_me,MachineState.its_me,MachineState.error,MachineState.error,MachineState.start,#10-17 + 4,MachineState.error,MachineState.start,MachineState.start,MachineState.error,MachineState.error,MachineState.error,MachineState.error,#18-1f + MachineState.error,MachineState.error, 5,MachineState.error,MachineState.error,MachineState.error,MachineState.its_me,MachineState.error,#20-27 + MachineState.error,MachineState.error,MachineState.start,MachineState.start,MachineState.start,MachineState.start,MachineState.start,MachineState.start #28-2f ) # To be accurate, the length of class 6 can be either 2 or 4. @@ -326,17 +326,17 @@ GB2312_st = ( # it is used for frequency analysis only, and we are validing # each code range there as well. So it is safe to set it to be # 2 here. -GB2312CharLenTable = (0, 1, 1, 1, 1, 1, 2) +GB2312_CHAR_LEN_TABLE = (0, 1, 1, 1, 1, 1, 2) -GB2312SMModel = {'classTable': GB2312_cls, - 'classFactor': 7, - 'stateTable': GB2312_st, - 'charLenTable': GB2312CharLenTable, - 'name': 'GB2312'} +GB2312_SM_MODEL = {'class_table': GB2312_CLS, + 'class_factor': 7, + 'state_table': GB2312_ST, + 'char_len_table': GB2312_CHAR_LEN_TABLE, + 'name': 'GB2312'} # Shift_JIS -SJIS_cls = ( +SJIS_CLS = ( 1,1,1,1,1,1,1,1, # 00 - 07 1,1,1,1,1,1,0,0, # 08 - 0f 1,1,1,1,1,1,1,1, # 10 - 17 @@ -353,7 +353,7 @@ SJIS_cls = ( 2,2,2,2,2,2,2,2, # 68 - 6f 2,2,2,2,2,2,2,2, # 70 - 77 2,2,2,2,2,2,2,1, # 78 - 7f - 3,3,3,3,3,3,3,3, # 80 - 87 + 3,3,3,3,3,2,2,3, # 80 - 87 3,3,3,3,3,3,3,3, # 88 - 8f 3,3,3,3,3,3,3,3, # 90 - 97 3,3,3,3,3,3,3,3, # 98 - 9f @@ -369,28 +369,27 @@ SJIS_cls = ( 2,2,2,2,2,2,2,2, # d8 - df 3,3,3,3,3,3,3,3, # e0 - e7 3,3,3,3,3,4,4,4, # e8 - ef - 4,4,4,4,4,4,4,4, # f0 - f7 - 4,4,4,4,4,0,0,0 # f8 - ff -) + 3,3,3,3,3,3,3,3, # f0 - f7 + 3,3,3,3,3,0,0,0) # f8 - ff -SJIS_st = ( - eError,eStart,eStart, 3,eError,eError,eError,eError,#00-07 - eError,eError,eError,eError,eItsMe,eItsMe,eItsMe,eItsMe,#08-0f - eItsMe,eItsMe,eError,eError,eStart,eStart,eStart,eStart #10-17 +SJIS_ST = ( + MachineState.error,MachineState.start,MachineState.start, 3,MachineState.error,MachineState.error,MachineState.error,MachineState.error,#00-07 + MachineState.error,MachineState.error,MachineState.error,MachineState.error,MachineState.its_me,MachineState.its_me,MachineState.its_me,MachineState.its_me,#08-0f + MachineState.its_me,MachineState.its_me,MachineState.error,MachineState.error,MachineState.start,MachineState.start,MachineState.start,MachineState.start #10-17 ) -SJISCharLenTable = (0, 1, 1, 2, 0, 0) +SJIS_CHAR_LEN_TABLE = (0, 1, 1, 2, 0, 0) -SJISSMModel = {'classTable': SJIS_cls, - 'classFactor': 6, - 'stateTable': SJIS_st, - 'charLenTable': SJISCharLenTable, +SJIS_SM_MODEL = {'class_table': SJIS_CLS, + 'class_factor': 6, + 'state_table': SJIS_ST, + 'char_len_table': SJIS_CHAR_LEN_TABLE, 'name': 'Shift_JIS'} # UCS2-BE -UCS2BE_cls = ( +UCS2BE_CLS = ( 0,0,0,0,0,0,0,0, # 00 - 07 0,0,1,0,0,2,0,0, # 08 - 0f 0,0,0,0,0,0,0,0, # 10 - 17 @@ -425,27 +424,27 @@ UCS2BE_cls = ( 0,0,0,0,0,0,4,5 # f8 - ff ) -UCS2BE_st = ( - 5, 7, 7,eError, 4, 3,eError,eError,#00-07 - eError,eError,eError,eError,eItsMe,eItsMe,eItsMe,eItsMe,#08-0f - eItsMe,eItsMe, 6, 6, 6, 6,eError,eError,#10-17 - 6, 6, 6, 6, 6,eItsMe, 6, 6,#18-1f - 6, 6, 6, 6, 5, 7, 7,eError,#20-27 - 5, 8, 6, 6,eError, 6, 6, 6,#28-2f - 6, 6, 6, 6,eError,eError,eStart,eStart #30-37 +UCS2BE_ST = ( + 5, 7, 7,MachineState.error, 4, 3,MachineState.error,MachineState.error,#00-07 + MachineState.error,MachineState.error,MachineState.error,MachineState.error,MachineState.its_me,MachineState.its_me,MachineState.its_me,MachineState.its_me,#08-0f + MachineState.its_me,MachineState.its_me, 6, 6, 6, 6,MachineState.error,MachineState.error,#10-17 + 6, 6, 6, 6, 6,MachineState.its_me, 6, 6,#18-1f + 6, 6, 6, 6, 5, 7, 7,MachineState.error,#20-27 + 5, 8, 6, 6,MachineState.error, 6, 6, 6,#28-2f + 6, 6, 6, 6,MachineState.error,MachineState.error,MachineState.start,MachineState.start #30-37 ) -UCS2BECharLenTable = (2, 2, 2, 0, 2, 2) +UCS2BE_CHAR_LEN_TABLE = (2, 2, 2, 0, 2, 2) -UCS2BESMModel = {'classTable': UCS2BE_cls, - 'classFactor': 6, - 'stateTable': UCS2BE_st, - 'charLenTable': UCS2BECharLenTable, - 'name': 'UTF-16BE'} +UCS2BE_SM_MODEL = {'class_table': UCS2BE_CLS, + 'class_factor': 6, + 'state_table': UCS2BE_ST, + 'char_len_table': UCS2BE_CHAR_LEN_TABLE, + 'name': 'UTF-16BE'} # UCS2-LE -UCS2LE_cls = ( +UCS2LE_CLS = ( 0,0,0,0,0,0,0,0, # 00 - 07 0,0,1,0,0,2,0,0, # 08 - 0f 0,0,0,0,0,0,0,0, # 10 - 17 @@ -480,27 +479,27 @@ UCS2LE_cls = ( 0,0,0,0,0,0,4,5 # f8 - ff ) -UCS2LE_st = ( - 6, 6, 7, 6, 4, 3,eError,eError,#00-07 - eError,eError,eError,eError,eItsMe,eItsMe,eItsMe,eItsMe,#08-0f - eItsMe,eItsMe, 5, 5, 5,eError,eItsMe,eError,#10-17 - 5, 5, 5,eError, 5,eError, 6, 6,#18-1f - 7, 6, 8, 8, 5, 5, 5,eError,#20-27 - 5, 5, 5,eError,eError,eError, 5, 5,#28-2f - 5, 5, 5,eError, 5,eError,eStart,eStart #30-37 +UCS2LE_ST = ( + 6, 6, 7, 6, 4, 3,MachineState.error,MachineState.error,#00-07 + MachineState.error,MachineState.error,MachineState.error,MachineState.error,MachineState.its_me,MachineState.its_me,MachineState.its_me,MachineState.its_me,#08-0f + MachineState.its_me,MachineState.its_me, 5, 5, 5,MachineState.error,MachineState.its_me,MachineState.error,#10-17 + 5, 5, 5,MachineState.error, 5,MachineState.error, 6, 6,#18-1f + 7, 6, 8, 8, 5, 5, 5,MachineState.error,#20-27 + 5, 5, 5,MachineState.error,MachineState.error,MachineState.error, 5, 5,#28-2f + 5, 5, 5,MachineState.error, 5,MachineState.error,MachineState.start,MachineState.start #30-37 ) -UCS2LECharLenTable = (2, 2, 2, 2, 2, 2) +UCS2LE_CHAR_LEN_TABLE = (2, 2, 2, 2, 2, 2) -UCS2LESMModel = {'classTable': UCS2LE_cls, - 'classFactor': 6, - 'stateTable': UCS2LE_st, - 'charLenTable': UCS2LECharLenTable, +UCS2LE_SM_MODEL = {'class_table': UCS2LE_CLS, + 'class_factor': 6, + 'state_table': UCS2LE_ST, + 'char_len_table': UCS2LE_CHAR_LEN_TABLE, 'name': 'UTF-16LE'} # UTF-8 -UTF8_cls = ( +UTF8_CLS = ( 1,1,1,1,1,1,1,1, # 00 - 07 #allow 0x00 as a legal value 1,1,1,1,1,1,0,0, # 08 - 0f 1,1,1,1,1,1,1,1, # 10 - 17 @@ -535,41 +534,39 @@ UTF8_cls = ( 12,13,13,13,14,15,0,0 # f8 - ff ) -UTF8_st = ( - eError,eStart,eError,eError,eError,eError, 12, 10,#00-07 +UTF8_ST = ( + MachineState.error,MachineState.start,MachineState.error,MachineState.error,MachineState.error,MachineState.error, 12, 10,#00-07 9, 11, 8, 7, 6, 5, 4, 3,#08-0f - eError,eError,eError,eError,eError,eError,eError,eError,#10-17 - eError,eError,eError,eError,eError,eError,eError,eError,#18-1f - eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,#20-27 - eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,#28-2f - eError,eError, 5, 5, 5, 5,eError,eError,#30-37 - eError,eError,eError,eError,eError,eError,eError,eError,#38-3f - eError,eError,eError, 5, 5, 5,eError,eError,#40-47 - eError,eError,eError,eError,eError,eError,eError,eError,#48-4f - eError,eError, 7, 7, 7, 7,eError,eError,#50-57 - eError,eError,eError,eError,eError,eError,eError,eError,#58-5f - eError,eError,eError,eError, 7, 7,eError,eError,#60-67 - eError,eError,eError,eError,eError,eError,eError,eError,#68-6f - eError,eError, 9, 9, 9, 9,eError,eError,#70-77 - eError,eError,eError,eError,eError,eError,eError,eError,#78-7f - eError,eError,eError,eError,eError, 9,eError,eError,#80-87 - eError,eError,eError,eError,eError,eError,eError,eError,#88-8f - eError,eError, 12, 12, 12, 12,eError,eError,#90-97 - eError,eError,eError,eError,eError,eError,eError,eError,#98-9f - eError,eError,eError,eError,eError, 12,eError,eError,#a0-a7 - eError,eError,eError,eError,eError,eError,eError,eError,#a8-af - eError,eError, 12, 12, 12,eError,eError,eError,#b0-b7 - eError,eError,eError,eError,eError,eError,eError,eError,#b8-bf - eError,eError,eStart,eStart,eStart,eStart,eError,eError,#c0-c7 - eError,eError,eError,eError,eError,eError,eError,eError #c8-cf + MachineState.error,MachineState.error,MachineState.error,MachineState.error,MachineState.error,MachineState.error,MachineState.error,MachineState.error,#10-17 + MachineState.error,MachineState.error,MachineState.error,MachineState.error,MachineState.error,MachineState.error,MachineState.error,MachineState.error,#18-1f + MachineState.its_me,MachineState.its_me,MachineState.its_me,MachineState.its_me,MachineState.its_me,MachineState.its_me,MachineState.its_me,MachineState.its_me,#20-27 + MachineState.its_me,MachineState.its_me,MachineState.its_me,MachineState.its_me,MachineState.its_me,MachineState.its_me,MachineState.its_me,MachineState.its_me,#28-2f + MachineState.error,MachineState.error, 5, 5, 5, 5,MachineState.error,MachineState.error,#30-37 + MachineState.error,MachineState.error,MachineState.error,MachineState.error,MachineState.error,MachineState.error,MachineState.error,MachineState.error,#38-3f + MachineState.error,MachineState.error,MachineState.error, 5, 5, 5,MachineState.error,MachineState.error,#40-47 + MachineState.error,MachineState.error,MachineState.error,MachineState.error,MachineState.error,MachineState.error,MachineState.error,MachineState.error,#48-4f + MachineState.error,MachineState.error, 7, 7, 7, 7,MachineState.error,MachineState.error,#50-57 + MachineState.error,MachineState.error,MachineState.error,MachineState.error,MachineState.error,MachineState.error,MachineState.error,MachineState.error,#58-5f + MachineState.error,MachineState.error,MachineState.error,MachineState.error, 7, 7,MachineState.error,MachineState.error,#60-67 + MachineState.error,MachineState.error,MachineState.error,MachineState.error,MachineState.error,MachineState.error,MachineState.error,MachineState.error,#68-6f + MachineState.error,MachineState.error, 9, 9, 9, 9,MachineState.error,MachineState.error,#70-77 + MachineState.error,MachineState.error,MachineState.error,MachineState.error,MachineState.error,MachineState.error,MachineState.error,MachineState.error,#78-7f + MachineState.error,MachineState.error,MachineState.error,MachineState.error,MachineState.error, 9,MachineState.error,MachineState.error,#80-87 + MachineState.error,MachineState.error,MachineState.error,MachineState.error,MachineState.error,MachineState.error,MachineState.error,MachineState.error,#88-8f + MachineState.error,MachineState.error, 12, 12, 12, 12,MachineState.error,MachineState.error,#90-97 + MachineState.error,MachineState.error,MachineState.error,MachineState.error,MachineState.error,MachineState.error,MachineState.error,MachineState.error,#98-9f + MachineState.error,MachineState.error,MachineState.error,MachineState.error,MachineState.error, 12,MachineState.error,MachineState.error,#a0-a7 + MachineState.error,MachineState.error,MachineState.error,MachineState.error,MachineState.error,MachineState.error,MachineState.error,MachineState.error,#a8-af + MachineState.error,MachineState.error, 12, 12, 12,MachineState.error,MachineState.error,MachineState.error,#b0-b7 + MachineState.error,MachineState.error,MachineState.error,MachineState.error,MachineState.error,MachineState.error,MachineState.error,MachineState.error,#b8-bf + MachineState.error,MachineState.error,MachineState.start,MachineState.start,MachineState.start,MachineState.start,MachineState.error,MachineState.error,#c0-c7 + MachineState.error,MachineState.error,MachineState.error,MachineState.error,MachineState.error,MachineState.error,MachineState.error,MachineState.error #c8-cf ) -UTF8CharLenTable = (0, 1, 0, 0, 0, 0, 2, 3, 3, 3, 4, 4, 5, 5, 6, 6) - -UTF8SMModel = {'classTable': UTF8_cls, - 'classFactor': 16, - 'stateTable': UTF8_st, - 'charLenTable': UTF8CharLenTable, - 'name': 'UTF-8'} +UTF8_CHAR_LEN_TABLE = (0, 1, 0, 0, 0, 0, 2, 3, 3, 3, 4, 4, 5, 5, 6, 6) -# flake8: noqa +UTF8_SM_MODEL = {'class_table': UTF8_CLS, + 'class_factor': 16, + 'state_table': UTF8_ST, + 'char_len_table': UTF8_CHAR_LEN_TABLE, + 'name': 'UTF-8'} diff --git a/lib/chardet/sbcharsetprober.py b/lib/chardet/sbcharsetprober.py index 37291bd27a9d88f1746b21c10d6d8ef055f74110..03553732ec9138b001ea4f40735a67a960639812 100644 --- a/lib/chardet/sbcharsetprober.py +++ b/lib/chardet/sbcharsetprober.py @@ -26,95 +26,96 @@ # 02110-1301 USA ######################### END LICENSE BLOCK ######################### -import sys -from . import constants from .charsetprober import CharSetProber from .compat import wrap_ord - -SAMPLE_SIZE = 64 -SB_ENOUGH_REL_THRESHOLD = 1024 -POSITIVE_SHORTCUT_THRESHOLD = 0.95 -NEGATIVE_SHORTCUT_THRESHOLD = 0.05 -SYMBOL_CAT_ORDER = 250 -NUMBER_OF_SEQ_CAT = 4 -POSITIVE_CAT = NUMBER_OF_SEQ_CAT - 1 -#NEGATIVE_CAT = 0 +from .enums import ProbingState class SingleByteCharSetProber(CharSetProber): - def __init__(self, model, reversed=False, nameProber=None): - CharSetProber.__init__(self) - self._mModel = model + SAMPLE_SIZE = 64 + SB_ENOUGH_REL_THRESHOLD = 1024 + POSITIVE_SHORTCUT_THRESHOLD = 0.95 + NEGATIVE_SHORTCUT_THRESHOLD = 0.05 + SYMBOL_CAT_ORDER = 250 + NUMBER_OF_SEQ_CAT = 4 + POSITIVE_CAT = NUMBER_OF_SEQ_CAT - 1 + + def __init__(self, model, reversed=False, name_prober=None): + super(SingleByteCharSetProber, self).__init__() + self._model = model # TRUE if we need to reverse every pair in the model lookup - self._mReversed = reversed + self._reversed = reversed # Optional auxiliary prober for name decision - self._mNameProber = nameProber + self._name_prober = name_prober + self._last_order = None + self._seq_counters = None + self._total_seqs = None + self._total_char = None + self._freq_char = None self.reset() def reset(self): - CharSetProber.reset(self) + super(SingleByteCharSetProber, self).reset() # char order of last character - self._mLastOrder = 255 - self._mSeqCounters = [0] * NUMBER_OF_SEQ_CAT - self._mTotalSeqs = 0 - self._mTotalChar = 0 + self._last_order = 255 + self._seq_counters = [0] * self.NUMBER_OF_SEQ_CAT + self._total_seqs = 0 + self._total_char = 0 # characters that fall in our sampling range - self._mFreqChar = 0 + self._freq_char = 0 - def get_charset_name(self): - if self._mNameProber: - return self._mNameProber.get_charset_name() + @property + def charset_name(self): + if self._name_prober: + return self._name_prober.charset_name else: - return self._mModel['charsetName'] + return self._model['charset_name'] - def feed(self, aBuf): - if not self._mModel['keepEnglishLetter']: - aBuf = self.filter_without_english_letters(aBuf) - aLen = len(aBuf) - if not aLen: - return self.get_state() - for c in aBuf: - order = self._mModel['charToOrderMap'][wrap_ord(c)] - if order < SYMBOL_CAT_ORDER: - self._mTotalChar += 1 - if order < SAMPLE_SIZE: - self._mFreqChar += 1 - if self._mLastOrder < SAMPLE_SIZE: - self._mTotalSeqs += 1 - if not self._mReversed: - i = (self._mLastOrder * SAMPLE_SIZE) + order - model = self._mModel['precedenceMatrix'][i] + def feed(self, byte_str): + if not self._model['keep_english_letter']: + byte_str = self.filter_international_words(byte_str) + num_bytes = len(byte_str) + if not num_bytes: + return self.state + for c in byte_str: + order = self._model['char_to_order_map'][wrap_ord(c)] + if order < self.SYMBOL_CAT_ORDER: + self._total_char += 1 + if order < self.SAMPLE_SIZE: + self._freq_char += 1 + if self._last_order < self.SAMPLE_SIZE: + self._total_seqs += 1 + if not self._reversed: + i = (self._last_order * self.SAMPLE_SIZE) + order + model = self._model['precedence_matrix'][i] else: # reverse the order of the letters in the lookup - i = (order * SAMPLE_SIZE) + self._mLastOrder - model = self._mModel['precedenceMatrix'][i] - self._mSeqCounters[model] += 1 - self._mLastOrder = order + i = (order * self.SAMPLE_SIZE) + self._last_order + model = self._model['precedence_matrix'][i] + self._seq_counters[model] += 1 + self._last_order = order - if self.get_state() == constants.eDetecting: - if self._mTotalSeqs > SB_ENOUGH_REL_THRESHOLD: + if self.state == ProbingState.detecting: + if self._total_seqs > self.SB_ENOUGH_REL_THRESHOLD: cf = self.get_confidence() - if cf > POSITIVE_SHORTCUT_THRESHOLD: - if constants._debug: - sys.stderr.write('%s confidence = %s, we have a' - 'winner\n' % - (self._mModel['charsetName'], cf)) - self._mState = constants.eFoundIt - elif cf < NEGATIVE_SHORTCUT_THRESHOLD: - if constants._debug: - sys.stderr.write('%s confidence = %s, below negative' - 'shortcut threshhold %s\n' % - (self._mModel['charsetName'], cf, - NEGATIVE_SHORTCUT_THRESHOLD)) - self._mState = constants.eNotMe + if cf > self.POSITIVE_SHORTCUT_THRESHOLD: + self.logger.debug('%s confidence = %s, we have a winner', + self._model['charset_name'], cf) + self._state = ProbingState.found_it + elif cf < self.NEGATIVE_SHORTCUT_THRESHOLD: + self.logger.debug('%s confidence = %s, below negative ' + 'shortcut threshhold %s', + self._model['charset_name'], cf, + self.NEGATIVE_SHORTCUT_THRESHOLD) + self._state = ProbingState.not_me - return self.get_state() + return self.state def get_confidence(self): r = 0.01 - if self._mTotalSeqs > 0: - r = ((1.0 * self._mSeqCounters[POSITIVE_CAT]) / self._mTotalSeqs - / self._mModel['mTypicalPositiveRatio']) - r = r * self._mFreqChar / self._mTotalChar + if self._total_seqs > 0: + r = ((1.0 * self._seq_counters[self.POSITIVE_CAT]) / self._total_seqs + / self._model['typical_positive_ratio']) + r = r * self._freq_char / self._total_char if r >= 1.0: r = 0.99 return r diff --git a/lib/chardet/sbcsgroupprober.py b/lib/chardet/sbcsgroupprober.py index 1b6196cd16cf6a6d6daf42c48d2ca0327a69b91f..5c2bc53e0c216e8d2cd49fd5e678f7719378ae66 100644 --- a/lib/chardet/sbcsgroupprober.py +++ b/lib/chardet/sbcsgroupprober.py @@ -37,12 +37,13 @@ from .langhungarianmodel import Latin2HungarianModel, Win1250HungarianModel from .langthaimodel import TIS620ThaiModel from .langhebrewmodel import Win1255HebrewModel from .hebrewprober import HebrewProber +from .langturkishmodel import Latin5TurkishModel class SBCSGroupProber(CharSetGroupProber): def __init__(self): - CharSetGroupProber.__init__(self) - self._mProbers = [ + super(SBCSGroupProber, self).__init__() + self.probers = [ SingleByteCharSetProber(Win1251CyrillicModel), SingleByteCharSetProber(Koi8rModel), SingleByteCharSetProber(Latin5CyrillicModel), @@ -53,17 +54,20 @@ class SBCSGroupProber(CharSetGroupProber): SingleByteCharSetProber(Win1253GreekModel), SingleByteCharSetProber(Latin5BulgarianModel), SingleByteCharSetProber(Win1251BulgarianModel), - SingleByteCharSetProber(Latin2HungarianModel), - SingleByteCharSetProber(Win1250HungarianModel), + # TODO: Restore Hungarian encodings (iso-8859-2 and windows-1250) + # after we retrain model. + # SingleByteCharSetProber(Latin2HungarianModel), + # SingleByteCharSetProber(Win1250HungarianModel), SingleByteCharSetProber(TIS620ThaiModel), + SingleByteCharSetProber(Latin5TurkishModel), ] - hebrewProber = HebrewProber() - logicalHebrewProber = SingleByteCharSetProber(Win1255HebrewModel, - False, hebrewProber) - visualHebrewProber = SingleByteCharSetProber(Win1255HebrewModel, True, - hebrewProber) - hebrewProber.set_model_probers(logicalHebrewProber, visualHebrewProber) - self._mProbers.extend([hebrewProber, logicalHebrewProber, - visualHebrewProber]) + hebrew_prober = HebrewProber() + logical_hebrew_prober = SingleByteCharSetProber(Win1255HebrewModel, + False, hebrew_prober) + visual_hebrew_prober = SingleByteCharSetProber(Win1255HebrewModel, True, + hebrew_prober) + hebrew_prober.set_model_probers(logical_hebrew_prober, visual_hebrew_prober) + self.probers.extend([hebrew_prober, logical_hebrew_prober, + visual_hebrew_prober]) self.reset() diff --git a/lib/chardet/sjisprober.py b/lib/chardet/sjisprober.py index b173614e6827b6f73eda8774a9e87f64e76a952d..33c5d83d742bdcacdc6c7fbabb4b6a5a1c30c299 100644 --- a/lib/chardet/sjisprober.py +++ b/lib/chardet/sjisprober.py @@ -25,67 +25,64 @@ # 02110-1301 USA ######################### END LICENSE BLOCK ######################### -import sys from .mbcharsetprober import MultiByteCharSetProber from .codingstatemachine import CodingStateMachine from .chardistribution import SJISDistributionAnalysis from .jpcntx import SJISContextAnalysis -from .mbcssm import SJISSMModel -from . import constants +from .mbcssm import SJIS_SM_MODEL +from .enums import ProbingState, MachineState class SJISProber(MultiByteCharSetProber): def __init__(self): - MultiByteCharSetProber.__init__(self) - self._mCodingSM = CodingStateMachine(SJISSMModel) - self._mDistributionAnalyzer = SJISDistributionAnalysis() - self._mContextAnalyzer = SJISContextAnalysis() + super(SJISProber, self).__init__() + self.coding_sm = CodingStateMachine(SJIS_SM_MODEL) + self.distribution_analyzer = SJISDistributionAnalysis() + self.context_analyzer = SJISContextAnalysis() self.reset() def reset(self): - MultiByteCharSetProber.reset(self) - self._mContextAnalyzer.reset() + super(SJISProber, self).reset() + self.context_analyzer.reset() - def get_charset_name(self): - return "SHIFT_JIS" + @property + def charset_name(self): + return self.context_analyzer.charset_name - def feed(self, aBuf): - aLen = len(aBuf) - for i in range(0, aLen): - codingState = self._mCodingSM.next_state(aBuf[i]) - if codingState == constants.eError: - if constants._debug: - sys.stderr.write(self.get_charset_name() - + ' prober hit error at byte ' + str(i) - + '\n') - self._mState = constants.eNotMe + def feed(self, byte_str): + for i in range(len(byte_str)): + coding_state = self.coding_sm.next_state(byte_str[i]) + if coding_state == MachineState.error: + self.logger.debug('%s prober hit error at byte %s', + self.charset_name, i) + self._state = ProbingState.not_me break - elif codingState == constants.eItsMe: - self._mState = constants.eFoundIt + elif coding_state == MachineState.its_me: + self._state = ProbingState.found_it break - elif codingState == constants.eStart: - charLen = self._mCodingSM.get_current_charlen() + elif coding_state == MachineState.start: + char_len = self.coding_sm.get_current_charlen() if i == 0: - self._mLastChar[1] = aBuf[0] - self._mContextAnalyzer.feed(self._mLastChar[2 - charLen:], - charLen) - self._mDistributionAnalyzer.feed(self._mLastChar, charLen) + self._last_char[1] = byte_str[0] + self.context_analyzer.feed(self._last_char[2 - char_len:], + char_len) + self.distribution_analyzer.feed(self._last_char, char_len) else: - self._mContextAnalyzer.feed(aBuf[i + 1 - charLen:i + 3 - - charLen], charLen) - self._mDistributionAnalyzer.feed(aBuf[i - 1:i + 1], - charLen) + self.context_analyzer.feed(byte_str[i + 1 - char_len:i + 3 + - char_len], char_len) + self.distribution_analyzer.feed(byte_str[i - 1:i + 1], + char_len) - self._mLastChar[0] = aBuf[aLen - 1] + self._last_char[0] = byte_str[-1] - if self.get_state() == constants.eDetecting: - if (self._mContextAnalyzer.got_enough_data() and - (self.get_confidence() > constants.SHORTCUT_THRESHOLD)): - self._mState = constants.eFoundIt + if self.state == ProbingState.detecting: + if (self.context_analyzer.got_enough_data() and + (self.get_confidence() > self.SHORTCUT_THRESHOLD)): + self._state = ProbingState.found_it - return self.get_state() + return self.state def get_confidence(self): - contxtCf = self._mContextAnalyzer.get_confidence() - distribCf = self._mDistributionAnalyzer.get_confidence() - return max(contxtCf, distribCf) + context_conf = self.context_analyzer.get_confidence() + distrib_conf = self.distribution_analyzer.get_confidence() + return max(context_conf, distrib_conf) diff --git a/lib/chardet/universaldetector.py b/lib/chardet/universaldetector.py index 9a03ad3d89ac2be24738a70bcf2798608edd56ab..33712a73bec46ef00d38c6b1159406050972a59d 100644 --- a/lib/chardet/universaldetector.py +++ b/lib/chardet/universaldetector.py @@ -25,146 +25,212 @@ # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA # 02110-1301 USA ######################### END LICENSE BLOCK ######################### +""" +Module containing the UniversalDetector detector class, which is the primary +class a user of ``chardet`` should use. + +:author: Mark Pilgrim (intial port to Python) +:author: Shy Shalom (original C code) +:author: Dan Blanchard (major refactoring for 3.0) +:author: Ian Cordasco +""" + -from . import constants -import sys import codecs -from .latin1prober import Latin1Prober # windows-1252 -from .mbcsgroupprober import MBCSGroupProber # multi-byte character sets -from .sbcsgroupprober import SBCSGroupProber # single-byte character sets -from .escprober import EscCharSetProber # ISO-2122, etc. +import logging import re -MINIMUM_THRESHOLD = 0.20 -ePureAscii = 0 -eEscAscii = 1 -eHighbyte = 2 +from .enums import InputState, LanguageFilter, ProbingState +from .escprober import EscCharSetProber +from .latin1prober import Latin1Prober +from .mbcsgroupprober import MBCSGroupProber +from .sbcsgroupprober import SBCSGroupProber + + +class UniversalDetector(object): + """ + The ``UniversalDetector`` class underlies the ``chardet.detect`` function + and coordinates all of the different charset probers. + + To get a ``dict`` containing an encoding and its confidence, you can simply + run: + + .. code:: + + u = UniversalDetector() + u.feed(some_bytes) + u.close() + detected = u.result + + """ + MINIMUM_THRESHOLD = 0.20 + HIGH_BYTE_DETECTOR = re.compile(b'[\x80-\xFF]') + ESC_DETECTOR = re.compile(b'(\033|~{)') -class UniversalDetector: - def __init__(self): - self._highBitDetector = re.compile(b'[\x80-\xFF]') - self._escDetector = re.compile(b'(\033|~{)') - self._mEscCharSetProber = None - self._mCharSetProbers = [] + def __init__(self, lang_filter=LanguageFilter.all): + self._esc_charset_prober = None + self._charset_probers = [] + self.result = None + self.done = None + self._got_data = None + self._input_state = None + self._last_char = None + self.lang_filter = lang_filter + self.logger = logging.getLogger(__name__) self.reset() def reset(self): + """ + Reset the UniversalDetector and all of its probers back to their + initial states. This is called by ``__init__``, so you only need to + call this directly in between analyses of different documents. + """ self.result = {'encoding': None, 'confidence': 0.0} self.done = False - self._mStart = True - self._mGotData = False - self._mInputState = ePureAscii - self._mLastChar = b'' - if self._mEscCharSetProber: - self._mEscCharSetProber.reset() - for prober in self._mCharSetProbers: + self._got_data = False + self._input_state = InputState.pure_ascii + self._last_char = b'' + if self._esc_charset_prober: + self._esc_charset_prober.reset() + for prober in self._charset_probers: prober.reset() - def feed(self, aBuf): + def feed(self, byte_str): + """ + Takes a chunk of a document and feeds it through all of the relevant + charset probers. + + After calling ``feed``, you can check the value of the ``done`` + attribute to see if you need to continue feeding the + ``UniversalDetector`` more data, or if it has made a prediction + (in the ``result`` attribute). + + .. note:: + You should always call ``close`` when you're done feeding in your + document if ``done`` is not already ``True``. + """ if self.done: return - aLen = len(aBuf) - if not aLen: + if not len(byte_str): return - if not self._mGotData: + # First check for known BOMs, since these are guaranteed to be correct + if not self._got_data: # If the data starts with BOM, we know it is UTF - if aBuf[:3] == codecs.BOM: + if byte_str.startswith(codecs.BOM_UTF8): # EF BB BF UTF-8 with BOM - self.result = {'encoding': "UTF-8", 'confidence': 1.0} - elif aBuf[:4] == codecs.BOM_UTF32_LE: + self.result = {'encoding': "UTF-8-SIG", 'confidence': 1.0} + elif byte_str.startswith(codecs.BOM_UTF32_LE): # FF FE 00 00 UTF-32, little-endian BOM self.result = {'encoding': "UTF-32LE", 'confidence': 1.0} - elif aBuf[:4] == codecs.BOM_UTF32_BE: + elif byte_str.startswith(codecs.BOM_UTF32_BE): # 00 00 FE FF UTF-32, big-endian BOM self.result = {'encoding': "UTF-32BE", 'confidence': 1.0} - elif aBuf[:4] == b'\xFE\xFF\x00\x00': + elif byte_str.startswith(b'\xFE\xFF\x00\x00'): # FE FF 00 00 UCS-4, unusual octet order BOM (3412) - self.result = { - 'encoding': "X-ISO-10646-UCS-4-3412", - 'confidence': 1.0 - } - elif aBuf[:4] == b'\x00\x00\xFF\xFE': + self.result = {'encoding': "X-ISO-10646-UCS-4-3412", + 'confidence': 1.0} + elif byte_str.startswith(b'\x00\x00\xFF\xFE'): # 00 00 FF FE UCS-4, unusual octet order BOM (2143) - self.result = { - 'encoding': "X-ISO-10646-UCS-4-2143", - 'confidence': 1.0 - } - elif aBuf[:2] == codecs.BOM_LE: + self.result = {'encoding': "X-ISO-10646-UCS-4-2143", + 'confidence': 1.0} + elif byte_str.startswith(codecs.BOM_LE): # FF FE UTF-16, little endian BOM self.result = {'encoding': "UTF-16LE", 'confidence': 1.0} - elif aBuf[:2] == codecs.BOM_BE: + elif byte_str.startswith(codecs.BOM_BE): # FE FF UTF-16, big endian BOM self.result = {'encoding': "UTF-16BE", 'confidence': 1.0} - self._mGotData = True - if self.result['encoding'] and (self.result['confidence'] > 0.0): - self.done = True - return - - if self._mInputState == ePureAscii: - if self._highBitDetector.search(aBuf): - self._mInputState = eHighbyte - elif ((self._mInputState == ePureAscii) and - self._escDetector.search(self._mLastChar + aBuf)): - self._mInputState = eEscAscii - - self._mLastChar = aBuf[-1:] - - if self._mInputState == eEscAscii: - if not self._mEscCharSetProber: - self._mEscCharSetProber = EscCharSetProber() - if self._mEscCharSetProber.feed(aBuf) == constants.eFoundIt: - self.result = {'encoding': self._mEscCharSetProber.get_charset_name(), - 'confidence': self._mEscCharSetProber.get_confidence()} + self._got_data = True + if self.result['encoding'] is not None: self.done = True - elif self._mInputState == eHighbyte: - if not self._mCharSetProbers: - self._mCharSetProbers = [MBCSGroupProber(), SBCSGroupProber(), - Latin1Prober()] - for prober in self._mCharSetProbers: - if prober.feed(aBuf) == constants.eFoundIt: - self.result = {'encoding': prober.get_charset_name(), + return + + # If none of those matched and we've only see ASCII so far, check + # for high bytes and escape sequences + if self._input_state == InputState.pure_ascii: + if self.HIGH_BYTE_DETECTOR.search(byte_str): + self._input_state = InputState.high_byte + elif self._input_state == InputState.pure_ascii and \ + self.ESC_DETECTOR.search(self._last_char + byte_str): + self._input_state = InputState.esc_ascii + + self._last_char = byte_str[-1:] + + # If we've seen escape sequences, use the EscCharSetProber, which + # uses a simple state machine to check for known escape sequences in + # HZ and ISO-2022 encodings, since those are the only encodings that + # use such sequences. + if self._input_state == InputState.esc_ascii: + if not self._esc_charset_prober: + self._esc_charset_prober = EscCharSetProber(self.lang_filter) + if self._esc_charset_prober.feed(byte_str) == ProbingState.found_it: + self.result = {'encoding': + self._esc_charset_prober.charset_name, + 'confidence': + self._esc_charset_prober.get_confidence()} + self.done = True + # If we've seen high bytes (i.e., those with values greater than 127), + # we need to do more complicated checks using all our multi-byte and + # single-byte probers that are left. The single-byte probers + # use character bigram distributions to determine the encoding, whereas + # the multi-byte probers use a combination of character unigram and + # bigram distributions. + elif self._input_state == InputState.high_byte: + if not self._charset_probers: + self._charset_probers = [MBCSGroupProber(self.lang_filter)] + # If we're checking non-CJK encodings, use single-byte prober + if self.lang_filter & LanguageFilter.non_cjk: + self._charset_probers.append(SBCSGroupProber()) + self._charset_probers.append(Latin1Prober()) + for prober in self._charset_probers: + if prober.feed(byte_str) == ProbingState.found_it: + self.result = {'encoding': prober.charset_name, 'confidence': prober.get_confidence()} self.done = True break def close(self): + """ + Stop analyzing the current document and come up with a final + prediction. + + :returns: The ``result`` attribute if a prediction was made, otherwise + ``None``. + """ if self.done: - return - if not self._mGotData: - if constants._debug: - sys.stderr.write('no data received!\n') + return self.result + if not self._got_data: + self.logger.debug('no data received!') return self.done = True - if self._mInputState == ePureAscii: + if self._input_state == InputState.pure_ascii: self.result = {'encoding': 'ascii', 'confidence': 1.0} return self.result - if self._mInputState == eHighbyte: + if self._input_state == InputState.high_byte: proberConfidence = None - maxProberConfidence = 0.0 - maxProber = None - for prober in self._mCharSetProbers: + max_prober_confidence = 0.0 + max_prober = None + for prober in self._charset_probers: if not prober: continue proberConfidence = prober.get_confidence() - if proberConfidence > maxProberConfidence: - maxProberConfidence = proberConfidence - maxProber = prober - if maxProber and (maxProberConfidence > MINIMUM_THRESHOLD): - self.result = {'encoding': maxProber.get_charset_name(), - 'confidence': maxProber.get_confidence()} + if proberConfidence > max_prober_confidence: + max_prober_confidence = proberConfidence + max_prober = prober + if max_prober and (max_prober_confidence > self.MINIMUM_THRESHOLD): + self.result = {'encoding': max_prober.charset_name, + 'confidence': max_prober.get_confidence()} return self.result - if constants._debug: - sys.stderr.write('no probers hit minimum threshhold\n') - for prober in self._mCharSetProbers[0].mProbers: + if self.logger.getEffectiveLevel() == logging.DEBUG: + self.logger.debug('no probers hit minimum threshhold') + for prober in self._charset_probers[0].mProbers: if not prober: continue - sys.stderr.write('%s confidence = %s\n' % - (prober.get_charset_name(), - prober.get_confidence())) + self.logger.debug('%s confidence = %s', prober.charset_name, + prober.get_confidence()) diff --git a/lib/chardet/utf8prober.py b/lib/chardet/utf8prober.py index 1c0bb5d8fdafd17f5c4b9c65b8a28fb5cfac544a..6f31f3cd375170e31fe0f545c09d5f78c2ee01da 100644 --- a/lib/chardet/utf8prober.py +++ b/lib/chardet/utf8prober.py @@ -25,52 +25,54 @@ # 02110-1301 USA ######################### END LICENSE BLOCK ######################### -from . import constants from .charsetprober import CharSetProber +from .enums import ProbingState, MachineState from .codingstatemachine import CodingStateMachine -from .mbcssm import UTF8SMModel +from .mbcssm import UTF8_SM_MODEL -ONE_CHAR_PROB = 0.5 class UTF8Prober(CharSetProber): + ONE_CHAR_PROB = 0.5 + def __init__(self): - CharSetProber.__init__(self) - self._mCodingSM = CodingStateMachine(UTF8SMModel) + super(UTF8Prober, self).__init__() + self.coding_sm = CodingStateMachine(UTF8_SM_MODEL) + self._num_mb_chars = None self.reset() def reset(self): - CharSetProber.reset(self) - self._mCodingSM.reset() - self._mNumOfMBChar = 0 + super(UTF8Prober, self).reset() + self.coding_sm.reset() + self._num_mb_chars = 0 - def get_charset_name(self): + @property + def charset_name(self): return "utf-8" - def feed(self, aBuf): - for c in aBuf: - codingState = self._mCodingSM.next_state(c) - if codingState == constants.eError: - self._mState = constants.eNotMe + def feed(self, byte_str): + for c in byte_str: + coding_state = self.coding_sm.next_state(c) + if coding_state == MachineState.error: + self._state = ProbingState.not_me break - elif codingState == constants.eItsMe: - self._mState = constants.eFoundIt + elif coding_state == MachineState.its_me: + self._state = ProbingState.found_it break - elif codingState == constants.eStart: - if self._mCodingSM.get_current_charlen() >= 2: - self._mNumOfMBChar += 1 + elif coding_state == MachineState.start: + if self.coding_sm.get_current_charlen() >= 2: + self._num_mb_chars += 1 - if self.get_state() == constants.eDetecting: - if self.get_confidence() > constants.SHORTCUT_THRESHOLD: - self._mState = constants.eFoundIt + if self.state == ProbingState.detecting: + if self.get_confidence() > self.SHORTCUT_THRESHOLD: + self._state = ProbingState.found_it - return self.get_state() + return self.state def get_confidence(self): unlike = 0.99 - if self._mNumOfMBChar < 6: - for i in range(0, self._mNumOfMBChar): - unlike = unlike * ONE_CHAR_PROB + if self._num_mb_chars < 6: + unlike *= self.ONE_CHAR_PROB ** self._num_mb_chars return 1.0 - unlike else: return unlike diff --git a/lib/chardet/version.py b/lib/chardet/version.py new file mode 100644 index 0000000000000000000000000000000000000000..8ed8c57894ac15f10f6fe043c79186b22d30a5ae --- /dev/null +++ b/lib/chardet/version.py @@ -0,0 +1,9 @@ +""" +This module exists only to simplify retrieving the version number of chardet +from within setup.py and from chardet subpackages. + +:author: Dan Blanchard (dblanchard@ets.org) +""" + +__version__ = "2.3.0" +VERSION = __version__.split('.') diff --git a/lib/enum34/LICENSE b/lib/enum34/LICENSE new file mode 100644 index 0000000000000000000000000000000000000000..9003b8850e7ef11e2e1dda0364370b82f4c769d6 --- /dev/null +++ b/lib/enum34/LICENSE @@ -0,0 +1,32 @@ +Copyright (c) 2013, Ethan Furman. +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + + Redistributions of source code must retain the above + copyright notice, this list of conditions and the + following disclaimer. + + Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following + disclaimer in the documentation and/or other materials + provided with the distribution. + + Neither the name Ethan Furman nor the names of any + contributors may be used to endorse or promote products + derived from this software without specific prior written + permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. diff --git a/lib/enum34/README b/lib/enum34/README new file mode 100644 index 0000000000000000000000000000000000000000..511af98413d117239d16b496460647f7ba42c629 --- /dev/null +++ b/lib/enum34/README @@ -0,0 +1,2 @@ +enum34 is the new Python stdlib enum module available in Python 3.4 +backported for previous versions of Python from 2.4 to 3.3. diff --git a/lib/enum34/__init__.py b/lib/enum34/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..6a327a8a8f9b20c6062d65e5008457a007baff13 --- /dev/null +++ b/lib/enum34/__init__.py @@ -0,0 +1,790 @@ +"""Python Enumerations""" + +import sys as _sys + +__all__ = ['Enum', 'IntEnum', 'unique'] + +version = 1, 0, 4 + +pyver = float('%s.%s' % _sys.version_info[:2]) + +try: + any +except NameError: + def any(iterable): + for element in iterable: + if element: + return True + return False + +try: + from collections import OrderedDict +except ImportError: + OrderedDict = None + +try: + basestring +except NameError: + # In Python 2 basestring is the ancestor of both str and unicode + # in Python 3 it's just str, but was missing in 3.1 + basestring = str + +try: + unicode +except NameError: + # In Python 3 unicode no longer exists (it's just str) + unicode = str + +class _RouteClassAttributeToGetattr(object): + """Route attribute access on a class to __getattr__. + + This is a descriptor, used to define attributes that act differently when + accessed through an instance and through a class. Instance access remains + normal, but access to an attribute through a class will be routed to the + class's __getattr__ method; this is done by raising AttributeError. + + """ + def __init__(self, fget=None): + self.fget = fget + + def __get__(self, instance, ownerclass=None): + if instance is None: + raise AttributeError() + return self.fget(instance) + + def __set__(self, instance, value): + raise AttributeError("can't set attribute") + + def __delete__(self, instance): + raise AttributeError("can't delete attribute") + + +def _is_descriptor(obj): + """Returns True if obj is a descriptor, False otherwise.""" + return ( + hasattr(obj, '__get__') or + hasattr(obj, '__set__') or + hasattr(obj, '__delete__')) + + +def _is_dunder(name): + """Returns True if a __dunder__ name, False otherwise.""" + return (name[:2] == name[-2:] == '__' and + name[2:3] != '_' and + name[-3:-2] != '_' and + len(name) > 4) + + +def _is_sunder(name): + """Returns True if a _sunder_ name, False otherwise.""" + return (name[0] == name[-1] == '_' and + name[1:2] != '_' and + name[-2:-1] != '_' and + len(name) > 2) + + +def _make_class_unpicklable(cls): + """Make the given class un-picklable.""" + def _break_on_call_reduce(self, protocol=None): + raise TypeError('%r cannot be pickled' % self) + cls.__reduce_ex__ = _break_on_call_reduce + cls.__module__ = '<unknown>' + + +class _EnumDict(dict): + """Track enum member order and ensure member names are not reused. + + EnumMeta will use the names found in self._member_names as the + enumeration member names. + + """ + def __init__(self): + super(_EnumDict, self).__init__() + self._member_names = [] + + def __setitem__(self, key, value): + """Changes anything not dundered or not a descriptor. + + If a descriptor is added with the same name as an enum member, the name + is removed from _member_names (this may leave a hole in the numerical + sequence of values). + + If an enum member name is used twice, an error is raised; duplicate + values are not checked for. + + Single underscore (sunder) names are reserved. + + Note: in 3.x __order__ is simply discarded as a not necessary piece + leftover from 2.x + + """ + if pyver >= 3.0 and key == '__order__': + return + if _is_sunder(key): + raise ValueError('_names_ are reserved for future Enum use') + elif _is_dunder(key): + pass + elif key in self._member_names: + # descriptor overwriting an enum? + raise TypeError('Attempted to reuse key: %r' % key) + elif not _is_descriptor(value): + if key in self: + # enum overwriting a descriptor? + raise TypeError('Key already defined as: %r' % self[key]) + self._member_names.append(key) + super(_EnumDict, self).__setitem__(key, value) + + +# Dummy value for Enum as EnumMeta explicity checks for it, but of course until +# EnumMeta finishes running the first time the Enum class doesn't exist. This +# is also why there are checks in EnumMeta like `if Enum is not None` +Enum = None + + +class EnumMeta(type): + """Metaclass for Enum""" + @classmethod + def __prepare__(metacls, cls, bases): + return _EnumDict() + + def __new__(metacls, cls, bases, classdict): + # an Enum class is final once enumeration items have been defined; it + # cannot be mixed with other types (int, float, etc.) if it has an + # inherited __new__ unless a new __new__ is defined (or the resulting + # class will fail). + if type(classdict) is dict: + original_dict = classdict + classdict = _EnumDict() + for k, v in original_dict.items(): + classdict[k] = v + + member_type, first_enum = metacls._get_mixins_(bases) + __new__, save_new, use_args = metacls._find_new_(classdict, member_type, + first_enum) + # save enum items into separate mapping so they don't get baked into + # the new class + members = dict((k, classdict[k]) for k in classdict._member_names) + for name in classdict._member_names: + del classdict[name] + + # py2 support for definition order + __order__ = classdict.get('__order__') + if __order__ is None: + if pyver < 3.0: + try: + __order__ = [name for (name, value) in sorted(members.items(), key=lambda item: item[1])] + except TypeError: + __order__ = [name for name in sorted(members.keys())] + else: + __order__ = classdict._member_names + else: + del classdict['__order__'] + if pyver < 3.0: + __order__ = __order__.replace(',', ' ').split() + aliases = [name for name in members if name not in __order__] + __order__ += aliases + + # check for illegal enum names (any others?) + invalid_names = set(members) & set(['mro']) + if invalid_names: + raise ValueError('Invalid enum member name(s): %s' % ( + ', '.join(invalid_names), )) + + # create our new Enum type + enum_class = super(EnumMeta, metacls).__new__(metacls, cls, bases, classdict) + enum_class._member_names_ = [] # names in random order + if OrderedDict is not None: + enum_class._member_map_ = OrderedDict() + else: + enum_class._member_map_ = {} # name->value map + enum_class._member_type_ = member_type + + # Reverse value->name map for hashable values. + enum_class._value2member_map_ = {} + + # instantiate them, checking for duplicates as we go + # we instantiate first instead of checking for duplicates first in case + # a custom __new__ is doing something funky with the values -- such as + # auto-numbering ;) + if __new__ is None: + __new__ = enum_class.__new__ + for member_name in __order__: + value = members[member_name] + if not isinstance(value, tuple): + args = (value, ) + else: + args = value + if member_type is tuple: # special case for tuple enums + args = (args, ) # wrap it one more time + if not use_args or not args: + enum_member = __new__(enum_class) + if not hasattr(enum_member, '_value_'): + enum_member._value_ = value + else: + enum_member = __new__(enum_class, *args) + if not hasattr(enum_member, '_value_'): + enum_member._value_ = member_type(*args) + value = enum_member._value_ + enum_member._name_ = member_name + enum_member.__objclass__ = enum_class + enum_member.__init__(*args) + # If another member with the same value was already defined, the + # new member becomes an alias to the existing one. + for name, canonical_member in enum_class._member_map_.items(): + if canonical_member.value == enum_member._value_: + enum_member = canonical_member + break + else: + # Aliases don't appear in member names (only in __members__). + enum_class._member_names_.append(member_name) + enum_class._member_map_[member_name] = enum_member + try: + # This may fail if value is not hashable. We can't add the value + # to the map, and by-value lookups for this value will be + # linear. + enum_class._value2member_map_[value] = enum_member + except TypeError: + pass + + + # If a custom type is mixed into the Enum, and it does not know how + # to pickle itself, pickle.dumps will succeed but pickle.loads will + # fail. Rather than have the error show up later and possibly far + # from the source, sabotage the pickle protocol for this class so + # that pickle.dumps also fails. + # + # However, if the new class implements its own __reduce_ex__, do not + # sabotage -- it's on them to make sure it works correctly. We use + # __reduce_ex__ instead of any of the others as it is preferred by + # pickle over __reduce__, and it handles all pickle protocols. + unpicklable = False + if '__reduce_ex__' not in classdict: + if member_type is not object: + methods = ('__getnewargs_ex__', '__getnewargs__', + '__reduce_ex__', '__reduce__') + if not any(m in member_type.__dict__ for m in methods): + _make_class_unpicklable(enum_class) + unpicklable = True + + + # double check that repr and friends are not the mixin's or various + # things break (such as pickle) + for name in ('__repr__', '__str__', '__format__', '__reduce_ex__'): + class_method = getattr(enum_class, name) + obj_method = getattr(member_type, name, None) + enum_method = getattr(first_enum, name, None) + if name not in classdict and class_method is not enum_method: + if name == '__reduce_ex__' and unpicklable: + continue + setattr(enum_class, name, enum_method) + + # method resolution and int's are not playing nice + # Python's less than 2.6 use __cmp__ + + if pyver < 2.6: + + if issubclass(enum_class, int): + setattr(enum_class, '__cmp__', getattr(int, '__cmp__')) + + elif pyver < 3.0: + + if issubclass(enum_class, int): + for method in ( + '__le__', + '__lt__', + '__gt__', + '__ge__', + '__eq__', + '__ne__', + '__hash__', + ): + setattr(enum_class, method, getattr(int, method)) + + # replace any other __new__ with our own (as long as Enum is not None, + # anyway) -- again, this is to support pickle + if Enum is not None: + # if the user defined their own __new__, save it before it gets + # clobbered in case they subclass later + if save_new: + setattr(enum_class, '__member_new__', enum_class.__dict__['__new__']) + setattr(enum_class, '__new__', Enum.__dict__['__new__']) + return enum_class + + def __call__(cls, value, names=None, module=None, type=None): + """Either returns an existing member, or creates a new enum class. + + This method is used both when an enum class is given a value to match + to an enumeration member (i.e. Color(3)) and for the functional API + (i.e. Color = Enum('Color', names='red green blue')). + + When used for the functional API: `module`, if set, will be stored in + the new class' __module__ attribute; `type`, if set, will be mixed in + as the first base class. + + Note: if `module` is not set this routine will attempt to discover the + calling module by walking the frame stack; if this is unsuccessful + the resulting class will not be pickleable. + + """ + if names is None: # simple value lookup + return cls.__new__(cls, value) + # otherwise, functional API: we're creating a new Enum type + return cls._create_(value, names, module=module, type=type) + + def __contains__(cls, member): + return isinstance(member, cls) and member.name in cls._member_map_ + + def __delattr__(cls, attr): + # nicer error message when someone tries to delete an attribute + # (see issue19025). + if attr in cls._member_map_: + raise AttributeError( + "%s: cannot delete Enum member." % cls.__name__) + super(EnumMeta, cls).__delattr__(attr) + + def __dir__(self): + return (['__class__', '__doc__', '__members__', '__module__'] + + self._member_names_) + + @property + def __members__(cls): + """Returns a mapping of member name->value. + + This mapping lists all enum members, including aliases. Note that this + is a copy of the internal mapping. + + """ + return cls._member_map_.copy() + + def __getattr__(cls, name): + """Return the enum member matching `name` + + We use __getattr__ instead of descriptors or inserting into the enum + class' __dict__ in order to support `name` and `value` being both + properties for enum members (which live in the class' __dict__) and + enum members themselves. + + """ + if _is_dunder(name): + raise AttributeError(name) + try: + return cls._member_map_[name] + except KeyError: + raise AttributeError(name) + + def __getitem__(cls, name): + return cls._member_map_[name] + + def __iter__(cls): + return (cls._member_map_[name] for name in cls._member_names_) + + def __reversed__(cls): + return (cls._member_map_[name] for name in reversed(cls._member_names_)) + + def __len__(cls): + return len(cls._member_names_) + + def __repr__(cls): + return "<enum %r>" % cls.__name__ + + def __setattr__(cls, name, value): + """Block attempts to reassign Enum members. + + A simple assignment to the class namespace only changes one of the + several possible ways to get an Enum member from the Enum class, + resulting in an inconsistent Enumeration. + + """ + member_map = cls.__dict__.get('_member_map_', {}) + if name in member_map: + raise AttributeError('Cannot reassign members.') + super(EnumMeta, cls).__setattr__(name, value) + + def _create_(cls, class_name, names=None, module=None, type=None): + """Convenience method to create a new Enum class. + + `names` can be: + + * A string containing member names, separated either with spaces or + commas. Values are auto-numbered from 1. + * An iterable of member names. Values are auto-numbered from 1. + * An iterable of (member name, value) pairs. + * A mapping of member name -> value. + + """ + if pyver < 3.0: + # if class_name is unicode, attempt a conversion to ASCII + if isinstance(class_name, unicode): + try: + class_name = class_name.encode('ascii') + except UnicodeEncodeError: + raise TypeError('%r is not representable in ASCII' % class_name) + metacls = cls.__class__ + if type is None: + bases = (cls, ) + else: + bases = (type, cls) + classdict = metacls.__prepare__(class_name, bases) + __order__ = [] + + # special processing needed for names? + if isinstance(names, basestring): + names = names.replace(',', ' ').split() + if isinstance(names, (tuple, list)) and isinstance(names[0], basestring): + names = [(e, i+1) for (i, e) in enumerate(names)] + + # Here, names is either an iterable of (name, value) or a mapping. + for item in names: + if isinstance(item, basestring): + member_name, member_value = item, names[item] + else: + member_name, member_value = item + classdict[member_name] = member_value + __order__.append(member_name) + # only set __order__ in classdict if name/value was not from a mapping + if not isinstance(item, basestring): + classdict['__order__'] = ' '.join(__order__) + enum_class = metacls.__new__(metacls, class_name, bases, classdict) + + # TODO: replace the frame hack if a blessed way to know the calling + # module is ever developed + if module is None: + try: + module = _sys._getframe(2).f_globals['__name__'] + except (AttributeError, ValueError): + pass + if module is None: + _make_class_unpicklable(enum_class) + else: + enum_class.__module__ = module + + return enum_class + + @staticmethod + def _get_mixins_(bases): + """Returns the type for creating enum members, and the first inherited + enum class. + + bases: the tuple of bases that was given to __new__ + + """ + if not bases or Enum is None: + return object, Enum + + + # double check that we are not subclassing a class with existing + # enumeration members; while we're at it, see if any other data + # type has been mixed in so we can use the correct __new__ + member_type = first_enum = None + for base in bases: + if (base is not Enum and + issubclass(base, Enum) and + base._member_names_): + raise TypeError("Cannot extend enumerations") + # base is now the last base in bases + if not issubclass(base, Enum): + raise TypeError("new enumerations must be created as " + "`ClassName([mixin_type,] enum_type)`") + + # get correct mix-in type (either mix-in type of Enum subclass, or + # first base if last base is Enum) + if not issubclass(bases[0], Enum): + member_type = bases[0] # first data type + first_enum = bases[-1] # enum type + else: + for base in bases[0].__mro__: + # most common: (IntEnum, int, Enum, object) + # possible: (<Enum 'AutoIntEnum'>, <Enum 'IntEnum'>, + # <class 'int'>, <Enum 'Enum'>, + # <class 'object'>) + if issubclass(base, Enum): + if first_enum is None: + first_enum = base + else: + if member_type is None: + member_type = base + + return member_type, first_enum + + if pyver < 3.0: + @staticmethod + def _find_new_(classdict, member_type, first_enum): + """Returns the __new__ to be used for creating the enum members. + + classdict: the class dictionary given to __new__ + member_type: the data type whose __new__ will be used by default + first_enum: enumeration to check for an overriding __new__ + + """ + # now find the correct __new__, checking to see of one was defined + # by the user; also check earlier enum classes in case a __new__ was + # saved as __member_new__ + __new__ = classdict.get('__new__', None) + if __new__: + return None, True, True # __new__, save_new, use_args + + N__new__ = getattr(None, '__new__') + O__new__ = getattr(object, '__new__') + if Enum is None: + E__new__ = N__new__ + else: + E__new__ = Enum.__dict__['__new__'] + # check all possibles for __member_new__ before falling back to + # __new__ + for method in ('__member_new__', '__new__'): + for possible in (member_type, first_enum): + try: + target = possible.__dict__[method] + except (AttributeError, KeyError): + target = getattr(possible, method, None) + if target not in [ + None, + N__new__, + O__new__, + E__new__, + ]: + if method == '__member_new__': + classdict['__new__'] = target + return None, False, True + if isinstance(target, staticmethod): + target = target.__get__(member_type) + __new__ = target + break + if __new__ is not None: + break + else: + __new__ = object.__new__ + + # if a non-object.__new__ is used then whatever value/tuple was + # assigned to the enum member name will be passed to __new__ and to the + # new enum member's __init__ + if __new__ is object.__new__: + use_args = False + else: + use_args = True + + return __new__, False, use_args + else: + @staticmethod + def _find_new_(classdict, member_type, first_enum): + """Returns the __new__ to be used for creating the enum members. + + classdict: the class dictionary given to __new__ + member_type: the data type whose __new__ will be used by default + first_enum: enumeration to check for an overriding __new__ + + """ + # now find the correct __new__, checking to see of one was defined + # by the user; also check earlier enum classes in case a __new__ was + # saved as __member_new__ + __new__ = classdict.get('__new__', None) + + # should __new__ be saved as __member_new__ later? + save_new = __new__ is not None + + if __new__ is None: + # check all possibles for __member_new__ before falling back to + # __new__ + for method in ('__member_new__', '__new__'): + for possible in (member_type, first_enum): + target = getattr(possible, method, None) + if target not in ( + None, + None.__new__, + object.__new__, + Enum.__new__, + ): + __new__ = target + break + if __new__ is not None: + break + else: + __new__ = object.__new__ + + # if a non-object.__new__ is used then whatever value/tuple was + # assigned to the enum member name will be passed to __new__ and to the + # new enum member's __init__ + if __new__ is object.__new__: + use_args = False + else: + use_args = True + + return __new__, save_new, use_args + + +######################################################## +# In order to support Python 2 and 3 with a single +# codebase we have to create the Enum methods separately +# and then use the `type(name, bases, dict)` method to +# create the class. +######################################################## +temp_enum_dict = {} +temp_enum_dict['__doc__'] = "Generic enumeration.\n\n Derive from this class to define new enumerations.\n\n" + +def __new__(cls, value): + # all enum instances are actually created during class construction + # without calling this method; this method is called by the metaclass' + # __call__ (i.e. Color(3) ), and by pickle + if type(value) is cls: + # For lookups like Color(Color.red) + value = value.value + #return value + # by-value search for a matching enum member + # see if it's in the reverse mapping (for hashable values) + try: + if value in cls._value2member_map_: + return cls._value2member_map_[value] + except TypeError: + # not there, now do long search -- O(n) behavior + for member in cls._member_map_.values(): + if member.value == value: + return member + raise ValueError("%s is not a valid %s" % (value, cls.__name__)) +temp_enum_dict['__new__'] = __new__ +del __new__ + +def __repr__(self): + return "<%s.%s: %r>" % ( + self.__class__.__name__, self._name_, self._value_) +temp_enum_dict['__repr__'] = __repr__ +del __repr__ + +def __str__(self): + return "%s.%s" % (self.__class__.__name__, self._name_) +temp_enum_dict['__str__'] = __str__ +del __str__ + +def __dir__(self): + added_behavior = [ + m + for cls in self.__class__.mro() + for m in cls.__dict__ + if m[0] != '_' + ] + return (['__class__', '__doc__', '__module__', ] + added_behavior) +temp_enum_dict['__dir__'] = __dir__ +del __dir__ + +def __format__(self, format_spec): + # mixed-in Enums should use the mixed-in type's __format__, otherwise + # we can get strange results with the Enum name showing up instead of + # the value + + # pure Enum branch + if self._member_type_ is object: + cls = str + val = str(self) + # mix-in branch + else: + cls = self._member_type_ + val = self.value + return cls.__format__(val, format_spec) +temp_enum_dict['__format__'] = __format__ +del __format__ + + +#################################### +# Python's less than 2.6 use __cmp__ + +if pyver < 2.6: + + def __cmp__(self, other): + if type(other) is self.__class__: + if self is other: + return 0 + return -1 + return NotImplemented + raise TypeError("unorderable types: %s() and %s()" % (self.__class__.__name__, other.__class__.__name__)) + temp_enum_dict['__cmp__'] = __cmp__ + del __cmp__ + +else: + + def __le__(self, other): + raise TypeError("unorderable types: %s() <= %s()" % (self.__class__.__name__, other.__class__.__name__)) + temp_enum_dict['__le__'] = __le__ + del __le__ + + def __lt__(self, other): + raise TypeError("unorderable types: %s() < %s()" % (self.__class__.__name__, other.__class__.__name__)) + temp_enum_dict['__lt__'] = __lt__ + del __lt__ + + def __ge__(self, other): + raise TypeError("unorderable types: %s() >= %s()" % (self.__class__.__name__, other.__class__.__name__)) + temp_enum_dict['__ge__'] = __ge__ + del __ge__ + + def __gt__(self, other): + raise TypeError("unorderable types: %s() > %s()" % (self.__class__.__name__, other.__class__.__name__)) + temp_enum_dict['__gt__'] = __gt__ + del __gt__ + + +def __eq__(self, other): + if type(other) is self.__class__: + return self is other + return NotImplemented +temp_enum_dict['__eq__'] = __eq__ +del __eq__ + +def __ne__(self, other): + if type(other) is self.__class__: + return self is not other + return NotImplemented +temp_enum_dict['__ne__'] = __ne__ +del __ne__ + +def __hash__(self): + return hash(self._name_) +temp_enum_dict['__hash__'] = __hash__ +del __hash__ + +def __reduce_ex__(self, proto): + return self.__class__, (self._value_, ) +temp_enum_dict['__reduce_ex__'] = __reduce_ex__ +del __reduce_ex__ + +# _RouteClassAttributeToGetattr is used to provide access to the `name` +# and `value` properties of enum members while keeping some measure of +# protection from modification, while still allowing for an enumeration +# to have members named `name` and `value`. This works because enumeration +# members are not set directly on the enum class -- __getattr__ is +# used to look them up. + +@_RouteClassAttributeToGetattr +def name(self): + return self._name_ +temp_enum_dict['name'] = name +del name + +@_RouteClassAttributeToGetattr +def value(self): + return self._value_ +temp_enum_dict['value'] = value +del value + +Enum = EnumMeta('Enum', (object, ), temp_enum_dict) +del temp_enum_dict + +# Enum has now been created +########################### + +class IntEnum(int, Enum): + """Enum where members are also (and must be) ints""" + + +def unique(enumeration): + """Class decorator that ensures only unique members exist in an enumeration.""" + duplicates = [] + for name, member in enumeration.__members__.items(): + if name != member.name: + duplicates.append((name, member.name)) + if duplicates: + duplicate_names = ', '.join( + ["%s -> %s" % (alias, name) for (alias, name) in duplicates] + ) + raise ValueError('duplicate names found in %r: %s' % + (enumeration, duplicate_names) + ) + return enumeration diff --git a/lib/enum34/doc/enum.rst b/lib/enum34/doc/enum.rst new file mode 100644 index 0000000000000000000000000000000000000000..0d429bfc4c7e1579a321d74da7be3777dcb8bf88 --- /dev/null +++ b/lib/enum34/doc/enum.rst @@ -0,0 +1,725 @@ +``enum`` --- support for enumerations +======================================== + +.. :synopsis: enumerations are sets of symbolic names bound to unique, constant + values. +.. :moduleauthor:: Ethan Furman <ethan@stoneleaf.us> +.. :sectionauthor:: Barry Warsaw <barry@python.org>, +.. :sectionauthor:: Eli Bendersky <eliben@gmail.com>, +.. :sectionauthor:: Ethan Furman <ethan@stoneleaf.us> + +---------------- + +An enumeration is a set of symbolic names (members) bound to unique, constant +values. Within an enumeration, the members can be compared by identity, and +the enumeration itself can be iterated over. + + +Module Contents +--------------- + +This module defines two enumeration classes that can be used to define unique +sets of names and values: ``Enum`` and ``IntEnum``. It also defines +one decorator, ``unique``. + +``Enum`` + +Base class for creating enumerated constants. See section `Functional API`_ +for an alternate construction syntax. + +``IntEnum`` + +Base class for creating enumerated constants that are also subclasses of ``int``. + +``unique`` + +Enum class decorator that ensures only one name is bound to any one value. + + +Creating an Enum +---------------- + +Enumerations are created using the ``class`` syntax, which makes them +easy to read and write. An alternative creation method is described in +`Functional API`_. To define an enumeration, subclass ``Enum`` as +follows:: + + >>> from enum import Enum + >>> class Color(Enum): + ... red = 1 + ... green = 2 + ... blue = 3 + +Note: Nomenclature + + - The class ``Color`` is an *enumeration* (or *enum*) + - The attributes ``Color.red``, ``Color.green``, etc., are + *enumeration members* (or *enum members*). + - The enum members have *names* and *values* (the name of + ``Color.red`` is ``red``, the value of ``Color.blue`` is + ``3``, etc.) + +Note: + + Even though we use the ``class`` syntax to create Enums, Enums + are not normal Python classes. See `How are Enums different?`_ for + more details. + +Enumeration members have human readable string representations:: + + >>> print(Color.red) + Color.red + +...while their ``repr`` has more information:: + + >>> print(repr(Color.red)) + <Color.red: 1> + +The *type* of an enumeration member is the enumeration it belongs to:: + + >>> type(Color.red) + <enum 'Color'> + >>> isinstance(Color.green, Color) + True + >>> + +Enum members also have a property that contains just their item name:: + + >>> print(Color.red.name) + red + +Enumerations support iteration. In Python 3.x definition order is used; in +Python 2.x the definition order is not available, but class attribute +``__order__`` is supported; otherwise, value order is used:: + + >>> class Shake(Enum): + ... __order__ = 'vanilla chocolate cookies mint' # only needed in 2.x + ... vanilla = 7 + ... chocolate = 4 + ... cookies = 9 + ... mint = 3 + ... + >>> for shake in Shake: + ... print(shake) + ... + Shake.vanilla + Shake.chocolate + Shake.cookies + Shake.mint + +The ``__order__`` attribute is always removed, and in 3.x it is also ignored +(order is definition order); however, in the stdlib version it will be ignored +but not removed. + +Enumeration members are hashable, so they can be used in dictionaries and sets:: + + >>> apples = {} + >>> apples[Color.red] = 'red delicious' + >>> apples[Color.green] = 'granny smith' + >>> apples == {Color.red: 'red delicious', Color.green: 'granny smith'} + True + + +Programmatic access to enumeration members and their attributes +--------------------------------------------------------------- + +Sometimes it's useful to access members in enumerations programmatically (i.e. +situations where ``Color.red`` won't do because the exact color is not known +at program-writing time). ``Enum`` allows such access:: + + >>> Color(1) + <Color.red: 1> + >>> Color(3) + <Color.blue: 3> + +If you want to access enum members by *name*, use item access:: + + >>> Color['red'] + <Color.red: 1> + >>> Color['green'] + <Color.green: 2> + +If have an enum member and need its ``name`` or ``value``:: + + >>> member = Color.red + >>> member.name + 'red' + >>> member.value + 1 + + +Duplicating enum members and values +----------------------------------- + +Having two enum members (or any other attribute) with the same name is invalid; +in Python 3.x this would raise an error, but in Python 2.x the second member +simply overwrites the first:: + + >>> # python 2.x + >>> class Shape(Enum): + ... square = 2 + ... square = 3 + ... + >>> Shape.square + <Shape.square: 3> + + >>> # python 3.x + >>> class Shape(Enum): + ... square = 2 + ... square = 3 + Traceback (most recent call last): + ... + TypeError: Attempted to reuse key: 'square' + +However, two enum members are allowed to have the same value. Given two members +A and B with the same value (and A defined first), B is an alias to A. By-value +lookup of the value of A and B will return A. By-name lookup of B will also +return A:: + + >>> class Shape(Enum): + ... __order__ = 'square diamond circle alias_for_square' # only needed in 2.x + ... square = 2 + ... diamond = 1 + ... circle = 3 + ... alias_for_square = 2 + ... + >>> Shape.square + <Shape.square: 2> + >>> Shape.alias_for_square + <Shape.square: 2> + >>> Shape(2) + <Shape.square: 2> + + +Allowing aliases is not always desirable. ``unique`` can be used to ensure +that none exist in a particular enumeration:: + + >>> from enum import unique + >>> @unique + ... class Mistake(Enum): + ... __order__ = 'one two three four' # only needed in 2.x + ... one = 1 + ... two = 2 + ... three = 3 + ... four = 3 + Traceback (most recent call last): + ... + ValueError: duplicate names found in <enum 'Mistake'>: four -> three + +Iterating over the members of an enum does not provide the aliases:: + + >>> list(Shape) + [<Shape.square: 2>, <Shape.diamond: 1>, <Shape.circle: 3>] + +The special attribute ``__members__`` is a dictionary mapping names to members. +It includes all names defined in the enumeration, including the aliases:: + + >>> for name, member in sorted(Shape.__members__.items()): + ... name, member + ... + ('alias_for_square', <Shape.square: 2>) + ('circle', <Shape.circle: 3>) + ('diamond', <Shape.diamond: 1>) + ('square', <Shape.square: 2>) + +The ``__members__`` attribute can be used for detailed programmatic access to +the enumeration members. For example, finding all the aliases:: + + >>> [name for name, member in Shape.__members__.items() if member.name != name] + ['alias_for_square'] + +Comparisons +----------- + +Enumeration members are compared by identity:: + + >>> Color.red is Color.red + True + >>> Color.red is Color.blue + False + >>> Color.red is not Color.blue + True + +Ordered comparisons between enumeration values are *not* supported. Enum +members are not integers (but see `IntEnum`_ below):: + + >>> Color.red < Color.blue + Traceback (most recent call last): + File "<stdin>", line 1, in <module> + TypeError: unorderable types: Color() < Color() + +.. warning:: + + In Python 2 *everything* is ordered, even though the ordering may not + make sense. If you want your enumerations to have a sensible ordering + check out the `OrderedEnum`_ recipe below. + + +Equality comparisons are defined though:: + + >>> Color.blue == Color.red + False + >>> Color.blue != Color.red + True + >>> Color.blue == Color.blue + True + +Comparisons against non-enumeration values will always compare not equal +(again, ``IntEnum`` was explicitly designed to behave differently, see +below):: + + >>> Color.blue == 2 + False + + +Allowed members and attributes of enumerations +---------------------------------------------- + +The examples above use integers for enumeration values. Using integers is +short and handy (and provided by default by the `Functional API`_), but not +strictly enforced. In the vast majority of use-cases, one doesn't care what +the actual value of an enumeration is. But if the value *is* important, +enumerations can have arbitrary values. + +Enumerations are Python classes, and can have methods and special methods as +usual. If we have this enumeration:: + + >>> class Mood(Enum): + ... funky = 1 + ... happy = 3 + ... + ... def describe(self): + ... # self is the member here + ... return self.name, self.value + ... + ... def __str__(self): + ... return 'my custom str! {0}'.format(self.value) + ... + ... @classmethod + ... def favorite_mood(cls): + ... # cls here is the enumeration + ... return cls.happy + +Then:: + + >>> Mood.favorite_mood() + <Mood.happy: 3> + >>> Mood.happy.describe() + ('happy', 3) + >>> str(Mood.funky) + 'my custom str! 1' + +The rules for what is allowed are as follows: _sunder_ names (starting and +ending with a single underscore) are reserved by enum and cannot be used; +all other attributes defined within an enumeration will become members of this +enumeration, with the exception of *__dunder__* names and descriptors (methods +are also descriptors). + +Note: + + If your enumeration defines ``__new__`` and/or ``__init__`` then + whatever value(s) were given to the enum member will be passed into + those methods. See `Planet`_ for an example. + + +Restricted subclassing of enumerations +-------------------------------------- + +Subclassing an enumeration is allowed only if the enumeration does not define +any members. So this is forbidden:: + + >>> class MoreColor(Color): + ... pink = 17 + Traceback (most recent call last): + ... + TypeError: Cannot extend enumerations + +But this is allowed:: + + >>> class Foo(Enum): + ... def some_behavior(self): + ... pass + ... + >>> class Bar(Foo): + ... happy = 1 + ... sad = 2 + ... + +Allowing subclassing of enums that define members would lead to a violation of +some important invariants of types and instances. On the other hand, it makes +sense to allow sharing some common behavior between a group of enumerations. +(See `OrderedEnum`_ for an example.) + + +Pickling +-------- + +Enumerations can be pickled and unpickled:: + + >>> from enum.test_enum import Fruit + >>> from pickle import dumps, loads + >>> Fruit.tomato is loads(dumps(Fruit.tomato, 2)) + True + +The usual restrictions for pickling apply: picklable enums must be defined in +the top level of a module, since unpickling requires them to be importable +from that module. + +Note: + + With pickle protocol version 4 (introduced in Python 3.4) it is possible + to easily pickle enums nested in other classes. + + + +Functional API +-------------- + +The ``Enum`` class is callable, providing the following functional API:: + + >>> Animal = Enum('Animal', 'ant bee cat dog') + >>> Animal + <enum 'Animal'> + >>> Animal.ant + <Animal.ant: 1> + >>> Animal.ant.value + 1 + >>> list(Animal) + [<Animal.ant: 1>, <Animal.bee: 2>, <Animal.cat: 3>, <Animal.dog: 4>] + +The semantics of this API resemble ``namedtuple``. The first argument +of the call to ``Enum`` is the name of the enumeration. + +The second argument is the *source* of enumeration member names. It can be a +whitespace-separated string of names, a sequence of names, a sequence of +2-tuples with key/value pairs, or a mapping (e.g. dictionary) of names to +values. The last two options enable assigning arbitrary values to +enumerations; the others auto-assign increasing integers starting with 1. A +new class derived from ``Enum`` is returned. In other words, the above +assignment to ``Animal`` is equivalent to:: + + >>> class Animals(Enum): + ... ant = 1 + ... bee = 2 + ... cat = 3 + ... dog = 4 + +Pickling enums created with the functional API can be tricky as frame stack +implementation details are used to try and figure out which module the +enumeration is being created in (e.g. it will fail if you use a utility +function in separate module, and also may not work on IronPython or Jython). +The solution is to specify the module name explicitly as follows:: + + >>> Animals = Enum('Animals', 'ant bee cat dog', module=__name__) + +Derived Enumerations +-------------------- + +IntEnum +^^^^^^^ + +A variation of ``Enum`` is provided which is also a subclass of +``int``. Members of an ``IntEnum`` can be compared to integers; +by extension, integer enumerations of different types can also be compared +to each other:: + + >>> from enum import IntEnum + >>> class Shape(IntEnum): + ... circle = 1 + ... square = 2 + ... + >>> class Request(IntEnum): + ... post = 1 + ... get = 2 + ... + >>> Shape == 1 + False + >>> Shape.circle == 1 + True + >>> Shape.circle == Request.post + True + +However, they still can't be compared to standard ``Enum`` enumerations:: + + >>> class Shape(IntEnum): + ... circle = 1 + ... square = 2 + ... + >>> class Color(Enum): + ... red = 1 + ... green = 2 + ... + >>> Shape.circle == Color.red + False + +``IntEnum`` values behave like integers in other ways you'd expect:: + + >>> int(Shape.circle) + 1 + >>> ['a', 'b', 'c'][Shape.circle] + 'b' + >>> [i for i in range(Shape.square)] + [0, 1] + +For the vast majority of code, ``Enum`` is strongly recommended, +since ``IntEnum`` breaks some semantic promises of an enumeration (by +being comparable to integers, and thus by transitivity to other +unrelated enumerations). It should be used only in special cases where +there's no other choice; for example, when integer constants are +replaced with enumerations and backwards compatibility is required with code +that still expects integers. + + +Others +^^^^^^ + +While ``IntEnum`` is part of the ``enum`` module, it would be very +simple to implement independently:: + + class IntEnum(int, Enum): + pass + +This demonstrates how similar derived enumerations can be defined; for example +a ``StrEnum`` that mixes in ``str`` instead of ``int``. + +Some rules: + +1. When subclassing ``Enum``, mix-in types must appear before + ``Enum`` itself in the sequence of bases, as in the ``IntEnum`` + example above. +2. While ``Enum`` can have members of any type, once you mix in an + additional type, all the members must have values of that type, e.g. + ``int`` above. This restriction does not apply to mix-ins which only + add methods and don't specify another data type such as ``int`` or + ``str``. +3. When another data type is mixed in, the ``value`` attribute is *not the + same* as the enum member itself, although it is equivalant and will compare + equal. +4. %-style formatting: ``%s`` and ``%r`` call ``Enum``'s ``__str__`` and + ``__repr__`` respectively; other codes (such as ``%i`` or ``%h`` for + IntEnum) treat the enum member as its mixed-in type. + + Note: Prior to Python 3.4 there is a bug in ``str``'s %-formatting: ``int`` + subclasses are printed as strings and not numbers when the ``%d``, ``%i``, + or ``%u`` codes are used. +5. ``str.__format__`` (or ``format``) will use the mixed-in + type's ``__format__``. If the ``Enum``'s ``str`` or + ``repr`` is desired use the ``!s`` or ``!r`` ``str`` format codes. + + +Decorators +---------- + +unique +^^^^^^ + +A ``class`` decorator specifically for enumerations. It searches an +enumeration's ``__members__`` gathering any aliases it finds; if any are +found ``ValueError`` is raised with the details:: + + >>> @unique + ... class NoDupes(Enum): + ... first = 'one' + ... second = 'two' + ... third = 'two' + Traceback (most recent call last): + ... + ValueError: duplicate names found in <enum 'NoDupes'>: third -> second + + +Interesting examples +-------------------- + +While ``Enum`` and ``IntEnum`` are expected to cover the majority of +use-cases, they cannot cover them all. Here are recipes for some different +types of enumerations that can be used directly, or as examples for creating +one's own. + + +AutoNumber +^^^^^^^^^^ + +Avoids having to specify the value for each enumeration member:: + + >>> class AutoNumber(Enum): + ... def __new__(cls): + ... value = len(cls.__members__) + 1 + ... obj = object.__new__(cls) + ... obj._value_ = value + ... return obj + ... + >>> class Color(AutoNumber): + ... __order__ = "red green blue" # only needed in 2.x + ... red = () + ... green = () + ... blue = () + ... + >>> Color.green.value == 2 + True + +Note: + + The `__new__` method, if defined, is used during creation of the Enum + members; it is then replaced by Enum's `__new__` which is used after + class creation for lookup of existing members. Due to the way Enums are + supposed to behave, there is no way to customize Enum's `__new__`. + + +UniqueEnum +^^^^^^^^^^ + +Raises an error if a duplicate member name is found instead of creating an +alias:: + + >>> class UniqueEnum(Enum): + ... def __init__(self, *args): + ... cls = self.__class__ + ... if any(self.value == e.value for e in cls): + ... a = self.name + ... e = cls(self.value).name + ... raise ValueError( + ... "aliases not allowed in UniqueEnum: %r --> %r" + ... % (a, e)) + ... + >>> class Color(UniqueEnum): + ... red = 1 + ... green = 2 + ... blue = 3 + ... grene = 2 + Traceback (most recent call last): + ... + ValueError: aliases not allowed in UniqueEnum: 'grene' --> 'green' + + +OrderedEnum +^^^^^^^^^^^ + +An ordered enumeration that is not based on ``IntEnum`` and so maintains +the normal ``Enum`` invariants (such as not being comparable to other +enumerations):: + + >>> class OrderedEnum(Enum): + ... def __ge__(self, other): + ... if self.__class__ is other.__class__: + ... return self._value_ >= other._value_ + ... return NotImplemented + ... def __gt__(self, other): + ... if self.__class__ is other.__class__: + ... return self._value_ > other._value_ + ... return NotImplemented + ... def __le__(self, other): + ... if self.__class__ is other.__class__: + ... return self._value_ <= other._value_ + ... return NotImplemented + ... def __lt__(self, other): + ... if self.__class__ is other.__class__: + ... return self._value_ < other._value_ + ... return NotImplemented + ... + >>> class Grade(OrderedEnum): + ... __ordered__ = 'A B C D F' + ... A = 5 + ... B = 4 + ... C = 3 + ... D = 2 + ... F = 1 + ... + >>> Grade.C < Grade.A + True + + +Planet +^^^^^^ + +If ``__new__`` or ``__init__`` is defined the value of the enum member +will be passed to those methods:: + + >>> class Planet(Enum): + ... MERCURY = (3.303e+23, 2.4397e6) + ... VENUS = (4.869e+24, 6.0518e6) + ... EARTH = (5.976e+24, 6.37814e6) + ... MARS = (6.421e+23, 3.3972e6) + ... JUPITER = (1.9e+27, 7.1492e7) + ... SATURN = (5.688e+26, 6.0268e7) + ... URANUS = (8.686e+25, 2.5559e7) + ... NEPTUNE = (1.024e+26, 2.4746e7) + ... def __init__(self, mass, radius): + ... self.mass = mass # in kilograms + ... self.radius = radius # in meters + ... @property + ... def surface_gravity(self): + ... # universal gravitational constant (m3 kg-1 s-2) + ... G = 6.67300E-11 + ... return G * self.mass / (self.radius * self.radius) + ... + >>> Planet.EARTH.value + (5.976e+24, 6378140.0) + >>> Planet.EARTH.surface_gravity + 9.802652743337129 + + +How are Enums different? +------------------------ + +Enums have a custom metaclass that affects many aspects of both derived Enum +classes and their instances (members). + + +Enum Classes +^^^^^^^^^^^^ + +The ``EnumMeta`` metaclass is responsible for providing the +``__contains__``, ``__dir__``, ``__iter__`` and other methods that +allow one to do things with an ``Enum`` class that fail on a typical +class, such as ``list(Color)`` or ``some_var in Color``. ``EnumMeta`` is +responsible for ensuring that various other methods on the final ``Enum`` +class are correct (such as ``__new__``, ``__getnewargs__``, +``__str__`` and ``__repr__``) + + +Enum Members (aka instances) +^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The most interesting thing about Enum members is that they are singletons. +``EnumMeta`` creates them all while it is creating the ``Enum`` +class itself, and then puts a custom ``__new__`` in place to ensure +that no new ones are ever instantiated by returning only the existing +member instances. + + +Finer Points +^^^^^^^^^^^^ + +Enum members are instances of an Enum class, and even though they are +accessible as ``EnumClass.member``, they are not accessible directly from +the member:: + + >>> Color.red + <Color.red: 1> + >>> Color.red.blue + Traceback (most recent call last): + ... + AttributeError: 'Color' object has no attribute 'blue' + +Likewise, ``__members__`` is only available on the class. + +In Python 3.x ``__members__`` is always an ``OrderedDict``, with the order being +the definition order. In Python 2.7 ``__members__`` is an ``OrderedDict`` if +``__order__`` was specified, and a plain ``dict`` otherwise. In all other Python +2.x versions ``__members__`` is a plain ``dict`` even if ``__order__`` was specified +as the ``OrderedDict`` type didn't exist yet. + +If you give your ``Enum`` subclass extra methods, like the `Planet`_ +class above, those methods will show up in a `dir` of the member, +but not of the class:: + + >>> dir(Planet) + ['EARTH', 'JUPITER', 'MARS', 'MERCURY', 'NEPTUNE', 'SATURN', 'URANUS', + 'VENUS', '__class__', '__doc__', '__members__', '__module__'] + >>> dir(Planet.EARTH) + ['__class__', '__doc__', '__module__', 'name', 'surface_gravity', 'value'] + +A ``__new__`` method will only be used for the creation of the +``Enum`` members -- after that it is replaced. This means if you wish to +change how ``Enum`` members are looked up you either have to write a +helper function or a ``classmethod``. diff --git a/lib/enum34/enum.py b/lib/enum34/enum.py new file mode 100644 index 0000000000000000000000000000000000000000..6a327a8a8f9b20c6062d65e5008457a007baff13 --- /dev/null +++ b/lib/enum34/enum.py @@ -0,0 +1,790 @@ +"""Python Enumerations""" + +import sys as _sys + +__all__ = ['Enum', 'IntEnum', 'unique'] + +version = 1, 0, 4 + +pyver = float('%s.%s' % _sys.version_info[:2]) + +try: + any +except NameError: + def any(iterable): + for element in iterable: + if element: + return True + return False + +try: + from collections import OrderedDict +except ImportError: + OrderedDict = None + +try: + basestring +except NameError: + # In Python 2 basestring is the ancestor of both str and unicode + # in Python 3 it's just str, but was missing in 3.1 + basestring = str + +try: + unicode +except NameError: + # In Python 3 unicode no longer exists (it's just str) + unicode = str + +class _RouteClassAttributeToGetattr(object): + """Route attribute access on a class to __getattr__. + + This is a descriptor, used to define attributes that act differently when + accessed through an instance and through a class. Instance access remains + normal, but access to an attribute through a class will be routed to the + class's __getattr__ method; this is done by raising AttributeError. + + """ + def __init__(self, fget=None): + self.fget = fget + + def __get__(self, instance, ownerclass=None): + if instance is None: + raise AttributeError() + return self.fget(instance) + + def __set__(self, instance, value): + raise AttributeError("can't set attribute") + + def __delete__(self, instance): + raise AttributeError("can't delete attribute") + + +def _is_descriptor(obj): + """Returns True if obj is a descriptor, False otherwise.""" + return ( + hasattr(obj, '__get__') or + hasattr(obj, '__set__') or + hasattr(obj, '__delete__')) + + +def _is_dunder(name): + """Returns True if a __dunder__ name, False otherwise.""" + return (name[:2] == name[-2:] == '__' and + name[2:3] != '_' and + name[-3:-2] != '_' and + len(name) > 4) + + +def _is_sunder(name): + """Returns True if a _sunder_ name, False otherwise.""" + return (name[0] == name[-1] == '_' and + name[1:2] != '_' and + name[-2:-1] != '_' and + len(name) > 2) + + +def _make_class_unpicklable(cls): + """Make the given class un-picklable.""" + def _break_on_call_reduce(self, protocol=None): + raise TypeError('%r cannot be pickled' % self) + cls.__reduce_ex__ = _break_on_call_reduce + cls.__module__ = '<unknown>' + + +class _EnumDict(dict): + """Track enum member order and ensure member names are not reused. + + EnumMeta will use the names found in self._member_names as the + enumeration member names. + + """ + def __init__(self): + super(_EnumDict, self).__init__() + self._member_names = [] + + def __setitem__(self, key, value): + """Changes anything not dundered or not a descriptor. + + If a descriptor is added with the same name as an enum member, the name + is removed from _member_names (this may leave a hole in the numerical + sequence of values). + + If an enum member name is used twice, an error is raised; duplicate + values are not checked for. + + Single underscore (sunder) names are reserved. + + Note: in 3.x __order__ is simply discarded as a not necessary piece + leftover from 2.x + + """ + if pyver >= 3.0 and key == '__order__': + return + if _is_sunder(key): + raise ValueError('_names_ are reserved for future Enum use') + elif _is_dunder(key): + pass + elif key in self._member_names: + # descriptor overwriting an enum? + raise TypeError('Attempted to reuse key: %r' % key) + elif not _is_descriptor(value): + if key in self: + # enum overwriting a descriptor? + raise TypeError('Key already defined as: %r' % self[key]) + self._member_names.append(key) + super(_EnumDict, self).__setitem__(key, value) + + +# Dummy value for Enum as EnumMeta explicity checks for it, but of course until +# EnumMeta finishes running the first time the Enum class doesn't exist. This +# is also why there are checks in EnumMeta like `if Enum is not None` +Enum = None + + +class EnumMeta(type): + """Metaclass for Enum""" + @classmethod + def __prepare__(metacls, cls, bases): + return _EnumDict() + + def __new__(metacls, cls, bases, classdict): + # an Enum class is final once enumeration items have been defined; it + # cannot be mixed with other types (int, float, etc.) if it has an + # inherited __new__ unless a new __new__ is defined (or the resulting + # class will fail). + if type(classdict) is dict: + original_dict = classdict + classdict = _EnumDict() + for k, v in original_dict.items(): + classdict[k] = v + + member_type, first_enum = metacls._get_mixins_(bases) + __new__, save_new, use_args = metacls._find_new_(classdict, member_type, + first_enum) + # save enum items into separate mapping so they don't get baked into + # the new class + members = dict((k, classdict[k]) for k in classdict._member_names) + for name in classdict._member_names: + del classdict[name] + + # py2 support for definition order + __order__ = classdict.get('__order__') + if __order__ is None: + if pyver < 3.0: + try: + __order__ = [name for (name, value) in sorted(members.items(), key=lambda item: item[1])] + except TypeError: + __order__ = [name for name in sorted(members.keys())] + else: + __order__ = classdict._member_names + else: + del classdict['__order__'] + if pyver < 3.0: + __order__ = __order__.replace(',', ' ').split() + aliases = [name for name in members if name not in __order__] + __order__ += aliases + + # check for illegal enum names (any others?) + invalid_names = set(members) & set(['mro']) + if invalid_names: + raise ValueError('Invalid enum member name(s): %s' % ( + ', '.join(invalid_names), )) + + # create our new Enum type + enum_class = super(EnumMeta, metacls).__new__(metacls, cls, bases, classdict) + enum_class._member_names_ = [] # names in random order + if OrderedDict is not None: + enum_class._member_map_ = OrderedDict() + else: + enum_class._member_map_ = {} # name->value map + enum_class._member_type_ = member_type + + # Reverse value->name map for hashable values. + enum_class._value2member_map_ = {} + + # instantiate them, checking for duplicates as we go + # we instantiate first instead of checking for duplicates first in case + # a custom __new__ is doing something funky with the values -- such as + # auto-numbering ;) + if __new__ is None: + __new__ = enum_class.__new__ + for member_name in __order__: + value = members[member_name] + if not isinstance(value, tuple): + args = (value, ) + else: + args = value + if member_type is tuple: # special case for tuple enums + args = (args, ) # wrap it one more time + if not use_args or not args: + enum_member = __new__(enum_class) + if not hasattr(enum_member, '_value_'): + enum_member._value_ = value + else: + enum_member = __new__(enum_class, *args) + if not hasattr(enum_member, '_value_'): + enum_member._value_ = member_type(*args) + value = enum_member._value_ + enum_member._name_ = member_name + enum_member.__objclass__ = enum_class + enum_member.__init__(*args) + # If another member with the same value was already defined, the + # new member becomes an alias to the existing one. + for name, canonical_member in enum_class._member_map_.items(): + if canonical_member.value == enum_member._value_: + enum_member = canonical_member + break + else: + # Aliases don't appear in member names (only in __members__). + enum_class._member_names_.append(member_name) + enum_class._member_map_[member_name] = enum_member + try: + # This may fail if value is not hashable. We can't add the value + # to the map, and by-value lookups for this value will be + # linear. + enum_class._value2member_map_[value] = enum_member + except TypeError: + pass + + + # If a custom type is mixed into the Enum, and it does not know how + # to pickle itself, pickle.dumps will succeed but pickle.loads will + # fail. Rather than have the error show up later and possibly far + # from the source, sabotage the pickle protocol for this class so + # that pickle.dumps also fails. + # + # However, if the new class implements its own __reduce_ex__, do not + # sabotage -- it's on them to make sure it works correctly. We use + # __reduce_ex__ instead of any of the others as it is preferred by + # pickle over __reduce__, and it handles all pickle protocols. + unpicklable = False + if '__reduce_ex__' not in classdict: + if member_type is not object: + methods = ('__getnewargs_ex__', '__getnewargs__', + '__reduce_ex__', '__reduce__') + if not any(m in member_type.__dict__ for m in methods): + _make_class_unpicklable(enum_class) + unpicklable = True + + + # double check that repr and friends are not the mixin's or various + # things break (such as pickle) + for name in ('__repr__', '__str__', '__format__', '__reduce_ex__'): + class_method = getattr(enum_class, name) + obj_method = getattr(member_type, name, None) + enum_method = getattr(first_enum, name, None) + if name not in classdict and class_method is not enum_method: + if name == '__reduce_ex__' and unpicklable: + continue + setattr(enum_class, name, enum_method) + + # method resolution and int's are not playing nice + # Python's less than 2.6 use __cmp__ + + if pyver < 2.6: + + if issubclass(enum_class, int): + setattr(enum_class, '__cmp__', getattr(int, '__cmp__')) + + elif pyver < 3.0: + + if issubclass(enum_class, int): + for method in ( + '__le__', + '__lt__', + '__gt__', + '__ge__', + '__eq__', + '__ne__', + '__hash__', + ): + setattr(enum_class, method, getattr(int, method)) + + # replace any other __new__ with our own (as long as Enum is not None, + # anyway) -- again, this is to support pickle + if Enum is not None: + # if the user defined their own __new__, save it before it gets + # clobbered in case they subclass later + if save_new: + setattr(enum_class, '__member_new__', enum_class.__dict__['__new__']) + setattr(enum_class, '__new__', Enum.__dict__['__new__']) + return enum_class + + def __call__(cls, value, names=None, module=None, type=None): + """Either returns an existing member, or creates a new enum class. + + This method is used both when an enum class is given a value to match + to an enumeration member (i.e. Color(3)) and for the functional API + (i.e. Color = Enum('Color', names='red green blue')). + + When used for the functional API: `module`, if set, will be stored in + the new class' __module__ attribute; `type`, if set, will be mixed in + as the first base class. + + Note: if `module` is not set this routine will attempt to discover the + calling module by walking the frame stack; if this is unsuccessful + the resulting class will not be pickleable. + + """ + if names is None: # simple value lookup + return cls.__new__(cls, value) + # otherwise, functional API: we're creating a new Enum type + return cls._create_(value, names, module=module, type=type) + + def __contains__(cls, member): + return isinstance(member, cls) and member.name in cls._member_map_ + + def __delattr__(cls, attr): + # nicer error message when someone tries to delete an attribute + # (see issue19025). + if attr in cls._member_map_: + raise AttributeError( + "%s: cannot delete Enum member." % cls.__name__) + super(EnumMeta, cls).__delattr__(attr) + + def __dir__(self): + return (['__class__', '__doc__', '__members__', '__module__'] + + self._member_names_) + + @property + def __members__(cls): + """Returns a mapping of member name->value. + + This mapping lists all enum members, including aliases. Note that this + is a copy of the internal mapping. + + """ + return cls._member_map_.copy() + + def __getattr__(cls, name): + """Return the enum member matching `name` + + We use __getattr__ instead of descriptors or inserting into the enum + class' __dict__ in order to support `name` and `value` being both + properties for enum members (which live in the class' __dict__) and + enum members themselves. + + """ + if _is_dunder(name): + raise AttributeError(name) + try: + return cls._member_map_[name] + except KeyError: + raise AttributeError(name) + + def __getitem__(cls, name): + return cls._member_map_[name] + + def __iter__(cls): + return (cls._member_map_[name] for name in cls._member_names_) + + def __reversed__(cls): + return (cls._member_map_[name] for name in reversed(cls._member_names_)) + + def __len__(cls): + return len(cls._member_names_) + + def __repr__(cls): + return "<enum %r>" % cls.__name__ + + def __setattr__(cls, name, value): + """Block attempts to reassign Enum members. + + A simple assignment to the class namespace only changes one of the + several possible ways to get an Enum member from the Enum class, + resulting in an inconsistent Enumeration. + + """ + member_map = cls.__dict__.get('_member_map_', {}) + if name in member_map: + raise AttributeError('Cannot reassign members.') + super(EnumMeta, cls).__setattr__(name, value) + + def _create_(cls, class_name, names=None, module=None, type=None): + """Convenience method to create a new Enum class. + + `names` can be: + + * A string containing member names, separated either with spaces or + commas. Values are auto-numbered from 1. + * An iterable of member names. Values are auto-numbered from 1. + * An iterable of (member name, value) pairs. + * A mapping of member name -> value. + + """ + if pyver < 3.0: + # if class_name is unicode, attempt a conversion to ASCII + if isinstance(class_name, unicode): + try: + class_name = class_name.encode('ascii') + except UnicodeEncodeError: + raise TypeError('%r is not representable in ASCII' % class_name) + metacls = cls.__class__ + if type is None: + bases = (cls, ) + else: + bases = (type, cls) + classdict = metacls.__prepare__(class_name, bases) + __order__ = [] + + # special processing needed for names? + if isinstance(names, basestring): + names = names.replace(',', ' ').split() + if isinstance(names, (tuple, list)) and isinstance(names[0], basestring): + names = [(e, i+1) for (i, e) in enumerate(names)] + + # Here, names is either an iterable of (name, value) or a mapping. + for item in names: + if isinstance(item, basestring): + member_name, member_value = item, names[item] + else: + member_name, member_value = item + classdict[member_name] = member_value + __order__.append(member_name) + # only set __order__ in classdict if name/value was not from a mapping + if not isinstance(item, basestring): + classdict['__order__'] = ' '.join(__order__) + enum_class = metacls.__new__(metacls, class_name, bases, classdict) + + # TODO: replace the frame hack if a blessed way to know the calling + # module is ever developed + if module is None: + try: + module = _sys._getframe(2).f_globals['__name__'] + except (AttributeError, ValueError): + pass + if module is None: + _make_class_unpicklable(enum_class) + else: + enum_class.__module__ = module + + return enum_class + + @staticmethod + def _get_mixins_(bases): + """Returns the type for creating enum members, and the first inherited + enum class. + + bases: the tuple of bases that was given to __new__ + + """ + if not bases or Enum is None: + return object, Enum + + + # double check that we are not subclassing a class with existing + # enumeration members; while we're at it, see if any other data + # type has been mixed in so we can use the correct __new__ + member_type = first_enum = None + for base in bases: + if (base is not Enum and + issubclass(base, Enum) and + base._member_names_): + raise TypeError("Cannot extend enumerations") + # base is now the last base in bases + if not issubclass(base, Enum): + raise TypeError("new enumerations must be created as " + "`ClassName([mixin_type,] enum_type)`") + + # get correct mix-in type (either mix-in type of Enum subclass, or + # first base if last base is Enum) + if not issubclass(bases[0], Enum): + member_type = bases[0] # first data type + first_enum = bases[-1] # enum type + else: + for base in bases[0].__mro__: + # most common: (IntEnum, int, Enum, object) + # possible: (<Enum 'AutoIntEnum'>, <Enum 'IntEnum'>, + # <class 'int'>, <Enum 'Enum'>, + # <class 'object'>) + if issubclass(base, Enum): + if first_enum is None: + first_enum = base + else: + if member_type is None: + member_type = base + + return member_type, first_enum + + if pyver < 3.0: + @staticmethod + def _find_new_(classdict, member_type, first_enum): + """Returns the __new__ to be used for creating the enum members. + + classdict: the class dictionary given to __new__ + member_type: the data type whose __new__ will be used by default + first_enum: enumeration to check for an overriding __new__ + + """ + # now find the correct __new__, checking to see of one was defined + # by the user; also check earlier enum classes in case a __new__ was + # saved as __member_new__ + __new__ = classdict.get('__new__', None) + if __new__: + return None, True, True # __new__, save_new, use_args + + N__new__ = getattr(None, '__new__') + O__new__ = getattr(object, '__new__') + if Enum is None: + E__new__ = N__new__ + else: + E__new__ = Enum.__dict__['__new__'] + # check all possibles for __member_new__ before falling back to + # __new__ + for method in ('__member_new__', '__new__'): + for possible in (member_type, first_enum): + try: + target = possible.__dict__[method] + except (AttributeError, KeyError): + target = getattr(possible, method, None) + if target not in [ + None, + N__new__, + O__new__, + E__new__, + ]: + if method == '__member_new__': + classdict['__new__'] = target + return None, False, True + if isinstance(target, staticmethod): + target = target.__get__(member_type) + __new__ = target + break + if __new__ is not None: + break + else: + __new__ = object.__new__ + + # if a non-object.__new__ is used then whatever value/tuple was + # assigned to the enum member name will be passed to __new__ and to the + # new enum member's __init__ + if __new__ is object.__new__: + use_args = False + else: + use_args = True + + return __new__, False, use_args + else: + @staticmethod + def _find_new_(classdict, member_type, first_enum): + """Returns the __new__ to be used for creating the enum members. + + classdict: the class dictionary given to __new__ + member_type: the data type whose __new__ will be used by default + first_enum: enumeration to check for an overriding __new__ + + """ + # now find the correct __new__, checking to see of one was defined + # by the user; also check earlier enum classes in case a __new__ was + # saved as __member_new__ + __new__ = classdict.get('__new__', None) + + # should __new__ be saved as __member_new__ later? + save_new = __new__ is not None + + if __new__ is None: + # check all possibles for __member_new__ before falling back to + # __new__ + for method in ('__member_new__', '__new__'): + for possible in (member_type, first_enum): + target = getattr(possible, method, None) + if target not in ( + None, + None.__new__, + object.__new__, + Enum.__new__, + ): + __new__ = target + break + if __new__ is not None: + break + else: + __new__ = object.__new__ + + # if a non-object.__new__ is used then whatever value/tuple was + # assigned to the enum member name will be passed to __new__ and to the + # new enum member's __init__ + if __new__ is object.__new__: + use_args = False + else: + use_args = True + + return __new__, save_new, use_args + + +######################################################## +# In order to support Python 2 and 3 with a single +# codebase we have to create the Enum methods separately +# and then use the `type(name, bases, dict)` method to +# create the class. +######################################################## +temp_enum_dict = {} +temp_enum_dict['__doc__'] = "Generic enumeration.\n\n Derive from this class to define new enumerations.\n\n" + +def __new__(cls, value): + # all enum instances are actually created during class construction + # without calling this method; this method is called by the metaclass' + # __call__ (i.e. Color(3) ), and by pickle + if type(value) is cls: + # For lookups like Color(Color.red) + value = value.value + #return value + # by-value search for a matching enum member + # see if it's in the reverse mapping (for hashable values) + try: + if value in cls._value2member_map_: + return cls._value2member_map_[value] + except TypeError: + # not there, now do long search -- O(n) behavior + for member in cls._member_map_.values(): + if member.value == value: + return member + raise ValueError("%s is not a valid %s" % (value, cls.__name__)) +temp_enum_dict['__new__'] = __new__ +del __new__ + +def __repr__(self): + return "<%s.%s: %r>" % ( + self.__class__.__name__, self._name_, self._value_) +temp_enum_dict['__repr__'] = __repr__ +del __repr__ + +def __str__(self): + return "%s.%s" % (self.__class__.__name__, self._name_) +temp_enum_dict['__str__'] = __str__ +del __str__ + +def __dir__(self): + added_behavior = [ + m + for cls in self.__class__.mro() + for m in cls.__dict__ + if m[0] != '_' + ] + return (['__class__', '__doc__', '__module__', ] + added_behavior) +temp_enum_dict['__dir__'] = __dir__ +del __dir__ + +def __format__(self, format_spec): + # mixed-in Enums should use the mixed-in type's __format__, otherwise + # we can get strange results with the Enum name showing up instead of + # the value + + # pure Enum branch + if self._member_type_ is object: + cls = str + val = str(self) + # mix-in branch + else: + cls = self._member_type_ + val = self.value + return cls.__format__(val, format_spec) +temp_enum_dict['__format__'] = __format__ +del __format__ + + +#################################### +# Python's less than 2.6 use __cmp__ + +if pyver < 2.6: + + def __cmp__(self, other): + if type(other) is self.__class__: + if self is other: + return 0 + return -1 + return NotImplemented + raise TypeError("unorderable types: %s() and %s()" % (self.__class__.__name__, other.__class__.__name__)) + temp_enum_dict['__cmp__'] = __cmp__ + del __cmp__ + +else: + + def __le__(self, other): + raise TypeError("unorderable types: %s() <= %s()" % (self.__class__.__name__, other.__class__.__name__)) + temp_enum_dict['__le__'] = __le__ + del __le__ + + def __lt__(self, other): + raise TypeError("unorderable types: %s() < %s()" % (self.__class__.__name__, other.__class__.__name__)) + temp_enum_dict['__lt__'] = __lt__ + del __lt__ + + def __ge__(self, other): + raise TypeError("unorderable types: %s() >= %s()" % (self.__class__.__name__, other.__class__.__name__)) + temp_enum_dict['__ge__'] = __ge__ + del __ge__ + + def __gt__(self, other): + raise TypeError("unorderable types: %s() > %s()" % (self.__class__.__name__, other.__class__.__name__)) + temp_enum_dict['__gt__'] = __gt__ + del __gt__ + + +def __eq__(self, other): + if type(other) is self.__class__: + return self is other + return NotImplemented +temp_enum_dict['__eq__'] = __eq__ +del __eq__ + +def __ne__(self, other): + if type(other) is self.__class__: + return self is not other + return NotImplemented +temp_enum_dict['__ne__'] = __ne__ +del __ne__ + +def __hash__(self): + return hash(self._name_) +temp_enum_dict['__hash__'] = __hash__ +del __hash__ + +def __reduce_ex__(self, proto): + return self.__class__, (self._value_, ) +temp_enum_dict['__reduce_ex__'] = __reduce_ex__ +del __reduce_ex__ + +# _RouteClassAttributeToGetattr is used to provide access to the `name` +# and `value` properties of enum members while keeping some measure of +# protection from modification, while still allowing for an enumeration +# to have members named `name` and `value`. This works because enumeration +# members are not set directly on the enum class -- __getattr__ is +# used to look them up. + +@_RouteClassAttributeToGetattr +def name(self): + return self._name_ +temp_enum_dict['name'] = name +del name + +@_RouteClassAttributeToGetattr +def value(self): + return self._value_ +temp_enum_dict['value'] = value +del value + +Enum = EnumMeta('Enum', (object, ), temp_enum_dict) +del temp_enum_dict + +# Enum has now been created +########################### + +class IntEnum(int, Enum): + """Enum where members are also (and must be) ints""" + + +def unique(enumeration): + """Class decorator that ensures only unique members exist in an enumeration.""" + duplicates = [] + for name, member in enumeration.__members__.items(): + if name != member.name: + duplicates.append((name, member.name)) + if duplicates: + duplicate_names = ', '.join( + ["%s -> %s" % (alias, name) for (alias, name) in duplicates] + ) + raise ValueError('duplicate names found in %r: %s' % + (enumeration, duplicate_names) + ) + return enumeration diff --git a/lib/enum34/test_enum.py b/lib/enum34/test_enum.py new file mode 100644 index 0000000000000000000000000000000000000000..d7a97942151a7895278b526f3d1195a23c26ed9c --- /dev/null +++ b/lib/enum34/test_enum.py @@ -0,0 +1,1690 @@ +import enum +import sys +import unittest +from enum import Enum, IntEnum, unique, EnumMeta +from pickle import dumps, loads, PicklingError, HIGHEST_PROTOCOL + +pyver = float('%s.%s' % sys.version_info[:2]) + +try: + any +except NameError: + def any(iterable): + for element in iterable: + if element: + return True + return False + +try: + unicode +except NameError: + unicode = str + +try: + from collections import OrderedDict +except ImportError: + OrderedDict = None + +# for pickle tests +try: + class Stooges(Enum): + LARRY = 1 + CURLY = 2 + MOE = 3 +except Exception: + Stooges = sys.exc_info()[1] + +try: + class IntStooges(int, Enum): + LARRY = 1 + CURLY = 2 + MOE = 3 +except Exception: + IntStooges = sys.exc_info()[1] + +try: + class FloatStooges(float, Enum): + LARRY = 1.39 + CURLY = 2.72 + MOE = 3.142596 +except Exception: + FloatStooges = sys.exc_info()[1] + +# for pickle test and subclass tests +try: + class StrEnum(str, Enum): + 'accepts only string values' + class Name(StrEnum): + BDFL = 'Guido van Rossum' + FLUFL = 'Barry Warsaw' +except Exception: + Name = sys.exc_info()[1] + +try: + Question = Enum('Question', 'who what when where why', module=__name__) +except Exception: + Question = sys.exc_info()[1] + +try: + Answer = Enum('Answer', 'him this then there because') +except Exception: + Answer = sys.exc_info()[1] + +try: + Theory = Enum('Theory', 'rule law supposition', qualname='spanish_inquisition') +except Exception: + Theory = sys.exc_info()[1] + +# for doctests +try: + class Fruit(Enum): + tomato = 1 + banana = 2 + cherry = 3 +except Exception: + pass + +def test_pickle_dump_load(assertion, source, target=None, + protocol=(0, HIGHEST_PROTOCOL)): + start, stop = protocol + failures = [] + for protocol in range(start, stop+1): + try: + if target is None: + assertion(loads(dumps(source, protocol=protocol)) is source) + else: + assertion(loads(dumps(source, protocol=protocol)), target) + except Exception: + exc, tb = sys.exc_info()[1:] + failures.append('%2d: %s' %(protocol, exc)) + if failures: + raise ValueError('Failed with protocols: %s' % ', '.join(failures)) + +def test_pickle_exception(assertion, exception, obj, + protocol=(0, HIGHEST_PROTOCOL)): + start, stop = protocol + failures = [] + for protocol in range(start, stop+1): + try: + assertion(exception, dumps, obj, protocol=protocol) + except Exception: + exc = sys.exc_info()[1] + failures.append('%d: %s %s' % (protocol, exc.__class__.__name__, exc)) + if failures: + raise ValueError('Failed with protocols: %s' % ', '.join(failures)) + + +class TestHelpers(unittest.TestCase): + # _is_descriptor, _is_sunder, _is_dunder + + def test_is_descriptor(self): + class foo: + pass + for attr in ('__get__','__set__','__delete__'): + obj = foo() + self.assertFalse(enum._is_descriptor(obj)) + setattr(obj, attr, 1) + self.assertTrue(enum._is_descriptor(obj)) + + def test_is_sunder(self): + for s in ('_a_', '_aa_'): + self.assertTrue(enum._is_sunder(s)) + + for s in ('a', 'a_', '_a', '__a', 'a__', '__a__', '_a__', '__a_', '_', + '__', '___', '____', '_____',): + self.assertFalse(enum._is_sunder(s)) + + def test_is_dunder(self): + for s in ('__a__', '__aa__'): + self.assertTrue(enum._is_dunder(s)) + for s in ('a', 'a_', '_a', '__a', 'a__', '_a_', '_a__', '__a_', '_', + '__', '___', '____', '_____',): + self.assertFalse(enum._is_dunder(s)) + + +class TestEnum(unittest.TestCase): + def setUp(self): + class Season(Enum): + SPRING = 1 + SUMMER = 2 + AUTUMN = 3 + WINTER = 4 + self.Season = Season + + class Konstants(float, Enum): + E = 2.7182818 + PI = 3.1415926 + TAU = 2 * PI + self.Konstants = Konstants + + class Grades(IntEnum): + A = 5 + B = 4 + C = 3 + D = 2 + F = 0 + self.Grades = Grades + + class Directional(str, Enum): + EAST = 'east' + WEST = 'west' + NORTH = 'north' + SOUTH = 'south' + self.Directional = Directional + + from datetime import date + class Holiday(date, Enum): + NEW_YEAR = 2013, 1, 1 + IDES_OF_MARCH = 2013, 3, 15 + self.Holiday = Holiday + + if pyver >= 2.6: # cannot specify custom `dir` on previous versions + def test_dir_on_class(self): + Season = self.Season + self.assertEqual( + set(dir(Season)), + set(['__class__', '__doc__', '__members__', '__module__', + 'SPRING', 'SUMMER', 'AUTUMN', 'WINTER']), + ) + + def test_dir_on_item(self): + Season = self.Season + self.assertEqual( + set(dir(Season.WINTER)), + set(['__class__', '__doc__', '__module__', 'name', 'value']), + ) + + def test_dir_on_sub_with_behavior_on_super(self): + # see issue22506 + class SuperEnum(Enum): + def invisible(self): + return "did you see me?" + class SubEnum(SuperEnum): + sample = 5 + self.assertEqual( + set(dir(SubEnum.sample)), + set(['__class__', '__doc__', '__module__', 'name', 'value', 'invisible']), + ) + + if pyver >= 2.7: # OrderedDict first available here + def test_members_is_ordereddict_if_ordered(self): + class Ordered(Enum): + __order__ = 'first second third' + first = 'bippity' + second = 'boppity' + third = 'boo' + self.assertTrue(type(Ordered.__members__) is OrderedDict) + + def test_members_is_ordereddict_if_not_ordered(self): + class Unordered(Enum): + this = 'that' + these = 'those' + self.assertTrue(type(Unordered.__members__) is OrderedDict) + + if pyver >= 3.0: # all objects are ordered in Python 2.x + def test_members_is_always_ordered(self): + class AlwaysOrdered(Enum): + first = 1 + second = 2 + third = 3 + self.assertTrue(type(AlwaysOrdered.__members__) is OrderedDict) + + def test_comparisons(self): + def bad_compare(): + Season.SPRING > 4 + Season = self.Season + self.assertNotEqual(Season.SPRING, 1) + self.assertRaises(TypeError, bad_compare) + + class Part(Enum): + SPRING = 1 + CLIP = 2 + BARREL = 3 + + self.assertNotEqual(Season.SPRING, Part.SPRING) + def bad_compare(): + Season.SPRING < Part.CLIP + self.assertRaises(TypeError, bad_compare) + + def test_enum_in_enum_out(self): + Season = self.Season + self.assertTrue(Season(Season.WINTER) is Season.WINTER) + + def test_enum_value(self): + Season = self.Season + self.assertEqual(Season.SPRING.value, 1) + + def test_intenum_value(self): + self.assertEqual(IntStooges.CURLY.value, 2) + + def test_enum(self): + Season = self.Season + lst = list(Season) + self.assertEqual(len(lst), len(Season)) + self.assertEqual(len(Season), 4, Season) + self.assertEqual( + [Season.SPRING, Season.SUMMER, Season.AUTUMN, Season.WINTER], lst) + + for i, season in enumerate('SPRING SUMMER AUTUMN WINTER'.split()): + i += 1 + e = Season(i) + self.assertEqual(e, getattr(Season, season)) + self.assertEqual(e.value, i) + self.assertNotEqual(e, i) + self.assertEqual(e.name, season) + self.assertTrue(e in Season) + self.assertTrue(type(e) is Season) + self.assertTrue(isinstance(e, Season)) + self.assertEqual(str(e), 'Season.' + season) + self.assertEqual( + repr(e), + '<Season.%s: %s>' % (season, i), + ) + + def test_value_name(self): + Season = self.Season + self.assertEqual(Season.SPRING.name, 'SPRING') + self.assertEqual(Season.SPRING.value, 1) + def set_name(obj, new_value): + obj.name = new_value + def set_value(obj, new_value): + obj.value = new_value + self.assertRaises(AttributeError, set_name, Season.SPRING, 'invierno', ) + self.assertRaises(AttributeError, set_value, Season.SPRING, 2) + + def test_attribute_deletion(self): + class Season(Enum): + SPRING = 1 + SUMMER = 2 + AUTUMN = 3 + WINTER = 4 + + def spam(cls): + pass + + self.assertTrue(hasattr(Season, 'spam')) + del Season.spam + self.assertFalse(hasattr(Season, 'spam')) + + self.assertRaises(AttributeError, delattr, Season, 'SPRING') + self.assertRaises(AttributeError, delattr, Season, 'DRY') + self.assertRaises(AttributeError, delattr, Season.SPRING, 'name') + + def test_invalid_names(self): + def create_bad_class_1(): + class Wrong(Enum): + mro = 9 + def create_bad_class_2(): + class Wrong(Enum): + _reserved_ = 3 + self.assertRaises(ValueError, create_bad_class_1) + self.assertRaises(ValueError, create_bad_class_2) + + def test_contains(self): + Season = self.Season + self.assertTrue(Season.AUTUMN in Season) + self.assertTrue(3 not in Season) + + val = Season(3) + self.assertTrue(val in Season) + + class OtherEnum(Enum): + one = 1; two = 2 + self.assertTrue(OtherEnum.two not in Season) + + if pyver >= 2.6: # when `format` came into being + + def test_format_enum(self): + Season = self.Season + self.assertEqual('{0}'.format(Season.SPRING), + '{0}'.format(str(Season.SPRING))) + self.assertEqual( '{0:}'.format(Season.SPRING), + '{0:}'.format(str(Season.SPRING))) + self.assertEqual('{0:20}'.format(Season.SPRING), + '{0:20}'.format(str(Season.SPRING))) + self.assertEqual('{0:^20}'.format(Season.SPRING), + '{0:^20}'.format(str(Season.SPRING))) + self.assertEqual('{0:>20}'.format(Season.SPRING), + '{0:>20}'.format(str(Season.SPRING))) + self.assertEqual('{0:<20}'.format(Season.SPRING), + '{0:<20}'.format(str(Season.SPRING))) + + def test_format_enum_custom(self): + class TestFloat(float, Enum): + one = 1.0 + two = 2.0 + def __format__(self, spec): + return 'TestFloat success!' + self.assertEqual('{0}'.format(TestFloat.one), 'TestFloat success!') + + def assertFormatIsValue(self, spec, member): + self.assertEqual(spec.format(member), spec.format(member.value)) + + def test_format_enum_date(self): + Holiday = self.Holiday + self.assertFormatIsValue('{0}', Holiday.IDES_OF_MARCH) + self.assertFormatIsValue('{0:}', Holiday.IDES_OF_MARCH) + self.assertFormatIsValue('{0:20}', Holiday.IDES_OF_MARCH) + self.assertFormatIsValue('{0:^20}', Holiday.IDES_OF_MARCH) + self.assertFormatIsValue('{0:>20}', Holiday.IDES_OF_MARCH) + self.assertFormatIsValue('{0:<20}', Holiday.IDES_OF_MARCH) + self.assertFormatIsValue('{0:%Y %m}', Holiday.IDES_OF_MARCH) + self.assertFormatIsValue('{0:%Y %m %M:00}', Holiday.IDES_OF_MARCH) + + def test_format_enum_float(self): + Konstants = self.Konstants + self.assertFormatIsValue('{0}', Konstants.TAU) + self.assertFormatIsValue('{0:}', Konstants.TAU) + self.assertFormatIsValue('{0:20}', Konstants.TAU) + self.assertFormatIsValue('{0:^20}', Konstants.TAU) + self.assertFormatIsValue('{0:>20}', Konstants.TAU) + self.assertFormatIsValue('{0:<20}', Konstants.TAU) + self.assertFormatIsValue('{0:n}', Konstants.TAU) + self.assertFormatIsValue('{0:5.2}', Konstants.TAU) + self.assertFormatIsValue('{0:f}', Konstants.TAU) + + def test_format_enum_int(self): + Grades = self.Grades + self.assertFormatIsValue('{0}', Grades.C) + self.assertFormatIsValue('{0:}', Grades.C) + self.assertFormatIsValue('{0:20}', Grades.C) + self.assertFormatIsValue('{0:^20}', Grades.C) + self.assertFormatIsValue('{0:>20}', Grades.C) + self.assertFormatIsValue('{0:<20}', Grades.C) + self.assertFormatIsValue('{0:+}', Grades.C) + self.assertFormatIsValue('{0:08X}', Grades.C) + self.assertFormatIsValue('{0:b}', Grades.C) + + def test_format_enum_str(self): + Directional = self.Directional + self.assertFormatIsValue('{0}', Directional.WEST) + self.assertFormatIsValue('{0:}', Directional.WEST) + self.assertFormatIsValue('{0:20}', Directional.WEST) + self.assertFormatIsValue('{0:^20}', Directional.WEST) + self.assertFormatIsValue('{0:>20}', Directional.WEST) + self.assertFormatIsValue('{0:<20}', Directional.WEST) + + def test_hash(self): + Season = self.Season + dates = {} + dates[Season.WINTER] = '1225' + dates[Season.SPRING] = '0315' + dates[Season.SUMMER] = '0704' + dates[Season.AUTUMN] = '1031' + self.assertEqual(dates[Season.AUTUMN], '1031') + + def test_enum_duplicates(self): + __order__ = "SPRING SUMMER AUTUMN WINTER" + class Season(Enum): + SPRING = 1 + SUMMER = 2 + AUTUMN = FALL = 3 + WINTER = 4 + ANOTHER_SPRING = 1 + lst = list(Season) + self.assertEqual( + lst, + [Season.SPRING, Season.SUMMER, + Season.AUTUMN, Season.WINTER, + ]) + self.assertTrue(Season.FALL is Season.AUTUMN) + self.assertEqual(Season.FALL.value, 3) + self.assertEqual(Season.AUTUMN.value, 3) + self.assertTrue(Season(3) is Season.AUTUMN) + self.assertTrue(Season(1) is Season.SPRING) + self.assertEqual(Season.FALL.name, 'AUTUMN') + self.assertEqual( + set([k for k,v in Season.__members__.items() if v.name != k]), + set(['FALL', 'ANOTHER_SPRING']), + ) + + if pyver >= 3.0: + cls = vars() + result = {'Enum':Enum} + exec("""def test_duplicate_name(self): + with self.assertRaises(TypeError): + class Color(Enum): + red = 1 + green = 2 + blue = 3 + red = 4 + + with self.assertRaises(TypeError): + class Color(Enum): + red = 1 + green = 2 + blue = 3 + def red(self): + return 'red' + + with self.assertRaises(TypeError): + class Color(Enum): + @property + + def red(self): + return 'redder' + red = 1 + green = 2 + blue = 3""", + result) + cls['test_duplicate_name'] = result['test_duplicate_name'] + + def test_enum_with_value_name(self): + class Huh(Enum): + name = 1 + value = 2 + self.assertEqual( + list(Huh), + [Huh.name, Huh.value], + ) + self.assertTrue(type(Huh.name) is Huh) + self.assertEqual(Huh.name.name, 'name') + self.assertEqual(Huh.name.value, 1) + + def test_intenum_from_scratch(self): + class phy(int, Enum): + pi = 3 + tau = 2 * pi + self.assertTrue(phy.pi < phy.tau) + + def test_intenum_inherited(self): + class IntEnum(int, Enum): + pass + class phy(IntEnum): + pi = 3 + tau = 2 * pi + self.assertTrue(phy.pi < phy.tau) + + def test_floatenum_from_scratch(self): + class phy(float, Enum): + pi = 3.1415926 + tau = 2 * pi + self.assertTrue(phy.pi < phy.tau) + + def test_floatenum_inherited(self): + class FloatEnum(float, Enum): + pass + class phy(FloatEnum): + pi = 3.1415926 + tau = 2 * pi + self.assertTrue(phy.pi < phy.tau) + + def test_strenum_from_scratch(self): + class phy(str, Enum): + pi = 'Pi' + tau = 'Tau' + self.assertTrue(phy.pi < phy.tau) + + def test_strenum_inherited(self): + class StrEnum(str, Enum): + pass + class phy(StrEnum): + pi = 'Pi' + tau = 'Tau' + self.assertTrue(phy.pi < phy.tau) + + def test_intenum(self): + class WeekDay(IntEnum): + SUNDAY = 1 + MONDAY = 2 + TUESDAY = 3 + WEDNESDAY = 4 + THURSDAY = 5 + FRIDAY = 6 + SATURDAY = 7 + + self.assertEqual(['a', 'b', 'c'][WeekDay.MONDAY], 'c') + self.assertEqual([i for i in range(WeekDay.TUESDAY)], [0, 1, 2]) + + lst = list(WeekDay) + self.assertEqual(len(lst), len(WeekDay)) + self.assertEqual(len(WeekDay), 7) + target = 'SUNDAY MONDAY TUESDAY WEDNESDAY THURSDAY FRIDAY SATURDAY' + target = target.split() + for i, weekday in enumerate(target): + i += 1 + e = WeekDay(i) + self.assertEqual(e, i) + self.assertEqual(int(e), i) + self.assertEqual(e.name, weekday) + self.assertTrue(e in WeekDay) + self.assertEqual(lst.index(e)+1, i) + self.assertTrue(0 < e < 8) + self.assertTrue(type(e) is WeekDay) + self.assertTrue(isinstance(e, int)) + self.assertTrue(isinstance(e, Enum)) + + def test_intenum_duplicates(self): + class WeekDay(IntEnum): + __order__ = 'SUNDAY MONDAY TUESDAY WEDNESDAY THURSDAY FRIDAY SATURDAY' + SUNDAY = 1 + MONDAY = 2 + TUESDAY = TEUSDAY = 3 + WEDNESDAY = 4 + THURSDAY = 5 + FRIDAY = 6 + SATURDAY = 7 + self.assertTrue(WeekDay.TEUSDAY is WeekDay.TUESDAY) + self.assertEqual(WeekDay(3).name, 'TUESDAY') + self.assertEqual([k for k,v in WeekDay.__members__.items() + if v.name != k], ['TEUSDAY', ]) + + def test_pickle_enum(self): + if isinstance(Stooges, Exception): + raise Stooges + test_pickle_dump_load(self.assertTrue, Stooges.CURLY) + test_pickle_dump_load(self.assertTrue, Stooges) + + def test_pickle_int(self): + if isinstance(IntStooges, Exception): + raise IntStooges + test_pickle_dump_load(self.assertTrue, IntStooges.CURLY) + test_pickle_dump_load(self.assertTrue, IntStooges) + + def test_pickle_float(self): + if isinstance(FloatStooges, Exception): + raise FloatStooges + test_pickle_dump_load(self.assertTrue, FloatStooges.CURLY) + test_pickle_dump_load(self.assertTrue, FloatStooges) + + def test_pickle_enum_function(self): + if isinstance(Answer, Exception): + raise Answer + test_pickle_dump_load(self.assertTrue, Answer.him) + test_pickle_dump_load(self.assertTrue, Answer) + + def test_pickle_enum_function_with_module(self): + if isinstance(Question, Exception): + raise Question + test_pickle_dump_load(self.assertTrue, Question.who) + test_pickle_dump_load(self.assertTrue, Question) + + if pyver >= 3.4: + def test_class_nested_enum_and_pickle_protocol_four(self): + # would normally just have this directly in the class namespace + class NestedEnum(Enum): + twigs = 'common' + shiny = 'rare' + + self.__class__.NestedEnum = NestedEnum + self.NestedEnum.__qualname__ = '%s.NestedEnum' % self.__class__.__name__ + test_pickle_exception( + self.assertRaises, PicklingError, self.NestedEnum.twigs, + protocol=(0, 3)) + test_pickle_dump_load(self.assertTrue, self.NestedEnum.twigs, + protocol=(4, HIGHEST_PROTOCOL)) + + def test_exploding_pickle(self): + BadPickle = Enum('BadPickle', 'dill sweet bread-n-butter') + enum._make_class_unpicklable(BadPickle) + globals()['BadPickle'] = BadPickle + test_pickle_exception(self.assertRaises, TypeError, BadPickle.dill) + test_pickle_exception(self.assertRaises, PicklingError, BadPickle) + + def test_string_enum(self): + class SkillLevel(str, Enum): + master = 'what is the sound of one hand clapping?' + journeyman = 'why did the chicken cross the road?' + apprentice = 'knock, knock!' + self.assertEqual(SkillLevel.apprentice, 'knock, knock!') + + def test_getattr_getitem(self): + class Period(Enum): + morning = 1 + noon = 2 + evening = 3 + night = 4 + self.assertTrue(Period(2) is Period.noon) + self.assertTrue(getattr(Period, 'night') is Period.night) + self.assertTrue(Period['morning'] is Period.morning) + + def test_getattr_dunder(self): + Season = self.Season + self.assertTrue(getattr(Season, '__hash__')) + + def test_iteration_order(self): + class Season(Enum): + __order__ = 'SUMMER WINTER AUTUMN SPRING' + SUMMER = 2 + WINTER = 4 + AUTUMN = 3 + SPRING = 1 + self.assertEqual( + list(Season), + [Season.SUMMER, Season.WINTER, Season.AUTUMN, Season.SPRING], + ) + + def test_iteration_order_with_unorderable_values(self): + class Complex(Enum): + a = complex(7, 9) + b = complex(3.14, 2) + c = complex(1, -1) + d = complex(-77, 32) + self.assertEqual( + list(Complex), + [Complex.a, Complex.b, Complex.c, Complex.d], + ) + + def test_programatic_function_string(self): + SummerMonth = Enum('SummerMonth', 'june july august') + lst = list(SummerMonth) + self.assertEqual(len(lst), len(SummerMonth)) + self.assertEqual(len(SummerMonth), 3, SummerMonth) + self.assertEqual( + [SummerMonth.june, SummerMonth.july, SummerMonth.august], + lst, + ) + for i, month in enumerate('june july august'.split()): + i += 1 + e = SummerMonth(i) + self.assertEqual(int(e.value), i) + self.assertNotEqual(e, i) + self.assertEqual(e.name, month) + self.assertTrue(e in SummerMonth) + self.assertTrue(type(e) is SummerMonth) + + def test_programatic_function_string_list(self): + SummerMonth = Enum('SummerMonth', ['june', 'july', 'august']) + lst = list(SummerMonth) + self.assertEqual(len(lst), len(SummerMonth)) + self.assertEqual(len(SummerMonth), 3, SummerMonth) + self.assertEqual( + [SummerMonth.june, SummerMonth.july, SummerMonth.august], + lst, + ) + for i, month in enumerate('june july august'.split()): + i += 1 + e = SummerMonth(i) + self.assertEqual(int(e.value), i) + self.assertNotEqual(e, i) + self.assertEqual(e.name, month) + self.assertTrue(e in SummerMonth) + self.assertTrue(type(e) is SummerMonth) + + def test_programatic_function_iterable(self): + SummerMonth = Enum( + 'SummerMonth', + (('june', 1), ('july', 2), ('august', 3)) + ) + lst = list(SummerMonth) + self.assertEqual(len(lst), len(SummerMonth)) + self.assertEqual(len(SummerMonth), 3, SummerMonth) + self.assertEqual( + [SummerMonth.june, SummerMonth.july, SummerMonth.august], + lst, + ) + for i, month in enumerate('june july august'.split()): + i += 1 + e = SummerMonth(i) + self.assertEqual(int(e.value), i) + self.assertNotEqual(e, i) + self.assertEqual(e.name, month) + self.assertTrue(e in SummerMonth) + self.assertTrue(type(e) is SummerMonth) + + def test_programatic_function_from_dict(self): + SummerMonth = Enum( + 'SummerMonth', + dict((('june', 1), ('july', 2), ('august', 3))) + ) + lst = list(SummerMonth) + self.assertEqual(len(lst), len(SummerMonth)) + self.assertEqual(len(SummerMonth), 3, SummerMonth) + if pyver < 3.0: + self.assertEqual( + [SummerMonth.june, SummerMonth.july, SummerMonth.august], + lst, + ) + for i, month in enumerate('june july august'.split()): + i += 1 + e = SummerMonth(i) + self.assertEqual(int(e.value), i) + self.assertNotEqual(e, i) + self.assertEqual(e.name, month) + self.assertTrue(e in SummerMonth) + self.assertTrue(type(e) is SummerMonth) + + def test_programatic_function_type(self): + SummerMonth = Enum('SummerMonth', 'june july august', type=int) + lst = list(SummerMonth) + self.assertEqual(len(lst), len(SummerMonth)) + self.assertEqual(len(SummerMonth), 3, SummerMonth) + self.assertEqual( + [SummerMonth.june, SummerMonth.july, SummerMonth.august], + lst, + ) + for i, month in enumerate('june july august'.split()): + i += 1 + e = SummerMonth(i) + self.assertEqual(e, i) + self.assertEqual(e.name, month) + self.assertTrue(e in SummerMonth) + self.assertTrue(type(e) is SummerMonth) + + def test_programatic_function_type_from_subclass(self): + SummerMonth = IntEnum('SummerMonth', 'june july august') + lst = list(SummerMonth) + self.assertEqual(len(lst), len(SummerMonth)) + self.assertEqual(len(SummerMonth), 3, SummerMonth) + self.assertEqual( + [SummerMonth.june, SummerMonth.july, SummerMonth.august], + lst, + ) + for i, month in enumerate('june july august'.split()): + i += 1 + e = SummerMonth(i) + self.assertEqual(e, i) + self.assertEqual(e.name, month) + self.assertTrue(e in SummerMonth) + self.assertTrue(type(e) is SummerMonth) + + def test_programatic_function_unicode(self): + SummerMonth = Enum('SummerMonth', unicode('june july august')) + lst = list(SummerMonth) + self.assertEqual(len(lst), len(SummerMonth)) + self.assertEqual(len(SummerMonth), 3, SummerMonth) + self.assertEqual( + [SummerMonth.june, SummerMonth.july, SummerMonth.august], + lst, + ) + for i, month in enumerate(unicode('june july august').split()): + i += 1 + e = SummerMonth(i) + self.assertEqual(int(e.value), i) + self.assertNotEqual(e, i) + self.assertEqual(e.name, month) + self.assertTrue(e in SummerMonth) + self.assertTrue(type(e) is SummerMonth) + + def test_programatic_function_unicode_list(self): + SummerMonth = Enum('SummerMonth', [unicode('june'), unicode('july'), unicode('august')]) + lst = list(SummerMonth) + self.assertEqual(len(lst), len(SummerMonth)) + self.assertEqual(len(SummerMonth), 3, SummerMonth) + self.assertEqual( + [SummerMonth.june, SummerMonth.july, SummerMonth.august], + lst, + ) + for i, month in enumerate(unicode('june july august').split()): + i += 1 + e = SummerMonth(i) + self.assertEqual(int(e.value), i) + self.assertNotEqual(e, i) + self.assertEqual(e.name, month) + self.assertTrue(e in SummerMonth) + self.assertTrue(type(e) is SummerMonth) + + def test_programatic_function_unicode_iterable(self): + SummerMonth = Enum( + 'SummerMonth', + ((unicode('june'), 1), (unicode('july'), 2), (unicode('august'), 3)) + ) + lst = list(SummerMonth) + self.assertEqual(len(lst), len(SummerMonth)) + self.assertEqual(len(SummerMonth), 3, SummerMonth) + self.assertEqual( + [SummerMonth.june, SummerMonth.july, SummerMonth.august], + lst, + ) + for i, month in enumerate(unicode('june july august').split()): + i += 1 + e = SummerMonth(i) + self.assertEqual(int(e.value), i) + self.assertNotEqual(e, i) + self.assertEqual(e.name, month) + self.assertTrue(e in SummerMonth) + self.assertTrue(type(e) is SummerMonth) + + def test_programatic_function_from_unicode_dict(self): + SummerMonth = Enum( + 'SummerMonth', + dict(((unicode('june'), 1), (unicode('july'), 2), (unicode('august'), 3))) + ) + lst = list(SummerMonth) + self.assertEqual(len(lst), len(SummerMonth)) + self.assertEqual(len(SummerMonth), 3, SummerMonth) + if pyver < 3.0: + self.assertEqual( + [SummerMonth.june, SummerMonth.july, SummerMonth.august], + lst, + ) + for i, month in enumerate(unicode('june july august').split()): + i += 1 + e = SummerMonth(i) + self.assertEqual(int(e.value), i) + self.assertNotEqual(e, i) + self.assertEqual(e.name, month) + self.assertTrue(e in SummerMonth) + self.assertTrue(type(e) is SummerMonth) + + def test_programatic_function_unicode_type(self): + SummerMonth = Enum('SummerMonth', unicode('june july august'), type=int) + lst = list(SummerMonth) + self.assertEqual(len(lst), len(SummerMonth)) + self.assertEqual(len(SummerMonth), 3, SummerMonth) + self.assertEqual( + [SummerMonth.june, SummerMonth.july, SummerMonth.august], + lst, + ) + for i, month in enumerate(unicode('june july august').split()): + i += 1 + e = SummerMonth(i) + self.assertEqual(e, i) + self.assertEqual(e.name, month) + self.assertTrue(e in SummerMonth) + self.assertTrue(type(e) is SummerMonth) + + def test_programatic_function_unicode_type_from_subclass(self): + SummerMonth = IntEnum('SummerMonth', unicode('june july august')) + lst = list(SummerMonth) + self.assertEqual(len(lst), len(SummerMonth)) + self.assertEqual(len(SummerMonth), 3, SummerMonth) + self.assertEqual( + [SummerMonth.june, SummerMonth.july, SummerMonth.august], + lst, + ) + for i, month in enumerate(unicode('june july august').split()): + i += 1 + e = SummerMonth(i) + self.assertEqual(e, i) + self.assertEqual(e.name, month) + self.assertTrue(e in SummerMonth) + self.assertTrue(type(e) is SummerMonth) + + def test_programmatic_function_unicode_class(self): + if pyver < 3.0: + class_names = unicode('SummerMonth'), 'S\xfcmm\xe9rM\xf6nth'.decode('latin1') + else: + class_names = 'SummerMonth', 'S\xfcmm\xe9rM\xf6nth' + for i, class_name in enumerate(class_names): + if pyver < 3.0 and i == 1: + self.assertRaises(TypeError, Enum, class_name, unicode('june july august')) + else: + SummerMonth = Enum(class_name, unicode('june july august')) + lst = list(SummerMonth) + self.assertEqual(len(lst), len(SummerMonth)) + self.assertEqual(len(SummerMonth), 3, SummerMonth) + self.assertEqual( + [SummerMonth.june, SummerMonth.july, SummerMonth.august], + lst, + ) + for i, month in enumerate(unicode('june july august').split()): + i += 1 + e = SummerMonth(i) + self.assertEqual(e.value, i) + self.assertEqual(e.name, month) + self.assertTrue(e in SummerMonth) + self.assertTrue(type(e) is SummerMonth) + + def test_subclassing(self): + if isinstance(Name, Exception): + raise Name + self.assertEqual(Name.BDFL, 'Guido van Rossum') + self.assertTrue(Name.BDFL, Name('Guido van Rossum')) + self.assertTrue(Name.BDFL is getattr(Name, 'BDFL')) + test_pickle_dump_load(self.assertTrue, Name.BDFL) + + def test_extending(self): + def bad_extension(): + class Color(Enum): + red = 1 + green = 2 + blue = 3 + class MoreColor(Color): + cyan = 4 + magenta = 5 + yellow = 6 + self.assertRaises(TypeError, bad_extension) + + def test_exclude_methods(self): + class whatever(Enum): + this = 'that' + these = 'those' + def really(self): + return 'no, not %s' % self.value + self.assertFalse(type(whatever.really) is whatever) + self.assertEqual(whatever.this.really(), 'no, not that') + + def test_wrong_inheritance_order(self): + def wrong_inherit(): + class Wrong(Enum, str): + NotHere = 'error before this point' + self.assertRaises(TypeError, wrong_inherit) + + def test_intenum_transitivity(self): + class number(IntEnum): + one = 1 + two = 2 + three = 3 + class numero(IntEnum): + uno = 1 + dos = 2 + tres = 3 + self.assertEqual(number.one, numero.uno) + self.assertEqual(number.two, numero.dos) + self.assertEqual(number.three, numero.tres) + + def test_introspection(self): + class Number(IntEnum): + one = 100 + two = 200 + self.assertTrue(Number.one._member_type_ is int) + self.assertTrue(Number._member_type_ is int) + class String(str, Enum): + yarn = 'soft' + rope = 'rough' + wire = 'hard' + self.assertTrue(String.yarn._member_type_ is str) + self.assertTrue(String._member_type_ is str) + class Plain(Enum): + vanilla = 'white' + one = 1 + self.assertTrue(Plain.vanilla._member_type_ is object) + self.assertTrue(Plain._member_type_ is object) + + def test_wrong_enum_in_call(self): + class Monochrome(Enum): + black = 0 + white = 1 + class Gender(Enum): + male = 0 + female = 1 + self.assertRaises(ValueError, Monochrome, Gender.male) + + def test_wrong_enum_in_mixed_call(self): + class Monochrome(IntEnum): + black = 0 + white = 1 + class Gender(Enum): + male = 0 + female = 1 + self.assertRaises(ValueError, Monochrome, Gender.male) + + def test_mixed_enum_in_call_1(self): + class Monochrome(IntEnum): + black = 0 + white = 1 + class Gender(IntEnum): + male = 0 + female = 1 + self.assertTrue(Monochrome(Gender.female) is Monochrome.white) + + def test_mixed_enum_in_call_2(self): + class Monochrome(Enum): + black = 0 + white = 1 + class Gender(IntEnum): + male = 0 + female = 1 + self.assertTrue(Monochrome(Gender.male) is Monochrome.black) + + def test_flufl_enum(self): + class Fluflnum(Enum): + def __int__(self): + return int(self.value) + class MailManOptions(Fluflnum): + option1 = 1 + option2 = 2 + option3 = 3 + self.assertEqual(int(MailManOptions.option1), 1) + + def test_no_such_enum_member(self): + class Color(Enum): + red = 1 + green = 2 + blue = 3 + self.assertRaises(ValueError, Color, 4) + self.assertRaises(KeyError, Color.__getitem__, 'chartreuse') + + def test_new_repr(self): + class Color(Enum): + red = 1 + green = 2 + blue = 3 + def __repr__(self): + return "don't you just love shades of %s?" % self.name + self.assertEqual( + repr(Color.blue), + "don't you just love shades of blue?", + ) + + def test_inherited_repr(self): + class MyEnum(Enum): + def __repr__(self): + return "My name is %s." % self.name + class MyIntEnum(int, MyEnum): + this = 1 + that = 2 + theother = 3 + self.assertEqual(repr(MyIntEnum.that), "My name is that.") + + def test_multiple_mixin_mro(self): + class auto_enum(EnumMeta): + def __new__(metacls, cls, bases, classdict): + original_dict = classdict + classdict = enum._EnumDict() + for k, v in original_dict.items(): + classdict[k] = v + temp = type(classdict)() + names = set(classdict._member_names) + i = 0 + for k in classdict._member_names: + v = classdict[k] + if v == (): + v = i + else: + i = v + i += 1 + temp[k] = v + for k, v in classdict.items(): + if k not in names: + temp[k] = v + return super(auto_enum, metacls).__new__( + metacls, cls, bases, temp) + + AutoNumberedEnum = auto_enum('AutoNumberedEnum', (Enum,), {}) + + AutoIntEnum = auto_enum('AutoIntEnum', (IntEnum,), {}) + + class TestAutoNumber(AutoNumberedEnum): + a = () + b = 3 + c = () + + class TestAutoInt(AutoIntEnum): + a = () + b = 3 + c = () + + def test_subclasses_with_getnewargs(self): + class NamedInt(int): + __qualname__ = 'NamedInt' # needed for pickle protocol 4 + def __new__(cls, *args): + _args = args + if len(args) < 1: + raise TypeError("name and value must be specified") + name, args = args[0], args[1:] + self = int.__new__(cls, *args) + self._intname = name + self._args = _args + return self + def __getnewargs__(self): + return self._args + @property + def __name__(self): + return self._intname + def __repr__(self): + # repr() is updated to include the name and type info + return "%s(%r, %s)" % (type(self).__name__, + self.__name__, + int.__repr__(self)) + def __str__(self): + # str() is unchanged, even if it relies on the repr() fallback + base = int + base_str = base.__str__ + if base_str.__objclass__ is object: + return base.__repr__(self) + return base_str(self) + # for simplicity, we only define one operator that + # propagates expressions + def __add__(self, other): + temp = int(self) + int( other) + if isinstance(self, NamedInt) and isinstance(other, NamedInt): + return NamedInt( + '(%s + %s)' % (self.__name__, other.__name__), + temp ) + else: + return temp + + class NEI(NamedInt, Enum): + __qualname__ = 'NEI' # needed for pickle protocol 4 + x = ('the-x', 1) + y = ('the-y', 2) + + self.assertTrue(NEI.__new__ is Enum.__new__) + self.assertEqual(repr(NEI.x + NEI.y), "NamedInt('(the-x + the-y)', 3)") + globals()['NamedInt'] = NamedInt + globals()['NEI'] = NEI + NI5 = NamedInt('test', 5) + self.assertEqual(NI5, 5) + test_pickle_dump_load(self.assertTrue, NI5, 5) + self.assertEqual(NEI.y.value, 2) + test_pickle_dump_load(self.assertTrue, NEI.y) + + if pyver >= 3.4: + def test_subclasses_with_getnewargs_ex(self): + class NamedInt(int): + __qualname__ = 'NamedInt' # needed for pickle protocol 4 + def __new__(cls, *args): + _args = args + if len(args) < 2: + raise TypeError("name and value must be specified") + name, args = args[0], args[1:] + self = int.__new__(cls, *args) + self._intname = name + self._args = _args + return self + def __getnewargs_ex__(self): + return self._args, {} + @property + def __name__(self): + return self._intname + def __repr__(self): + # repr() is updated to include the name and type info + return "{}({!r}, {})".format(type(self).__name__, + self.__name__, + int.__repr__(self)) + def __str__(self): + # str() is unchanged, even if it relies on the repr() fallback + base = int + base_str = base.__str__ + if base_str.__objclass__ is object: + return base.__repr__(self) + return base_str(self) + # for simplicity, we only define one operator that + # propagates expressions + def __add__(self, other): + temp = int(self) + int( other) + if isinstance(self, NamedInt) and isinstance(other, NamedInt): + return NamedInt( + '({0} + {1})'.format(self.__name__, other.__name__), + temp ) + else: + return temp + + class NEI(NamedInt, Enum): + __qualname__ = 'NEI' # needed for pickle protocol 4 + x = ('the-x', 1) + y = ('the-y', 2) + + + self.assertIs(NEI.__new__, Enum.__new__) + self.assertEqual(repr(NEI.x + NEI.y), "NamedInt('(the-x + the-y)', 3)") + globals()['NamedInt'] = NamedInt + globals()['NEI'] = NEI + NI5 = NamedInt('test', 5) + self.assertEqual(NI5, 5) + test_pickle_dump_load(self.assertEqual, NI5, 5, protocol=(4, HIGHEST_PROTOCOL)) + self.assertEqual(NEI.y.value, 2) + test_pickle_dump_load(self.assertTrue, NEI.y, protocol=(4, HIGHEST_PROTOCOL)) + + def test_subclasses_with_reduce(self): + class NamedInt(int): + __qualname__ = 'NamedInt' # needed for pickle protocol 4 + def __new__(cls, *args): + _args = args + if len(args) < 1: + raise TypeError("name and value must be specified") + name, args = args[0], args[1:] + self = int.__new__(cls, *args) + self._intname = name + self._args = _args + return self + def __reduce__(self): + return self.__class__, self._args + @property + def __name__(self): + return self._intname + def __repr__(self): + # repr() is updated to include the name and type info + return "%s(%r, %s)" % (type(self).__name__, + self.__name__, + int.__repr__(self)) + def __str__(self): + # str() is unchanged, even if it relies on the repr() fallback + base = int + base_str = base.__str__ + if base_str.__objclass__ is object: + return base.__repr__(self) + return base_str(self) + # for simplicity, we only define one operator that + # propagates expressions + def __add__(self, other): + temp = int(self) + int( other) + if isinstance(self, NamedInt) and isinstance(other, NamedInt): + return NamedInt( + '(%s + %s)' % (self.__name__, other.__name__), + temp ) + else: + return temp + + class NEI(NamedInt, Enum): + __qualname__ = 'NEI' # needed for pickle protocol 4 + x = ('the-x', 1) + y = ('the-y', 2) + + + self.assertTrue(NEI.__new__ is Enum.__new__) + self.assertEqual(repr(NEI.x + NEI.y), "NamedInt('(the-x + the-y)', 3)") + globals()['NamedInt'] = NamedInt + globals()['NEI'] = NEI + NI5 = NamedInt('test', 5) + self.assertEqual(NI5, 5) + test_pickle_dump_load(self.assertEqual, NI5, 5) + self.assertEqual(NEI.y.value, 2) + test_pickle_dump_load(self.assertTrue, NEI.y) + + def test_subclasses_with_reduce_ex(self): + class NamedInt(int): + __qualname__ = 'NamedInt' # needed for pickle protocol 4 + def __new__(cls, *args): + _args = args + if len(args) < 1: + raise TypeError("name and value must be specified") + name, args = args[0], args[1:] + self = int.__new__(cls, *args) + self._intname = name + self._args = _args + return self + def __reduce_ex__(self, proto): + return self.__class__, self._args + @property + def __name__(self): + return self._intname + def __repr__(self): + # repr() is updated to include the name and type info + return "%s(%r, %s)" % (type(self).__name__, + self.__name__, + int.__repr__(self)) + def __str__(self): + # str() is unchanged, even if it relies on the repr() fallback + base = int + base_str = base.__str__ + if base_str.__objclass__ is object: + return base.__repr__(self) + return base_str(self) + # for simplicity, we only define one operator that + # propagates expressions + def __add__(self, other): + temp = int(self) + int( other) + if isinstance(self, NamedInt) and isinstance(other, NamedInt): + return NamedInt( + '(%s + %s)' % (self.__name__, other.__name__), + temp ) + else: + return temp + + class NEI(NamedInt, Enum): + __qualname__ = 'NEI' # needed for pickle protocol 4 + x = ('the-x', 1) + y = ('the-y', 2) + + + self.assertTrue(NEI.__new__ is Enum.__new__) + self.assertEqual(repr(NEI.x + NEI.y), "NamedInt('(the-x + the-y)', 3)") + globals()['NamedInt'] = NamedInt + globals()['NEI'] = NEI + NI5 = NamedInt('test', 5) + self.assertEqual(NI5, 5) + test_pickle_dump_load(self.assertEqual, NI5, 5) + self.assertEqual(NEI.y.value, 2) + test_pickle_dump_load(self.assertTrue, NEI.y) + + def test_subclasses_without_direct_pickle_support(self): + class NamedInt(int): + __qualname__ = 'NamedInt' + def __new__(cls, *args): + _args = args + name, args = args[0], args[1:] + if len(args) == 0: + raise TypeError("name and value must be specified") + self = int.__new__(cls, *args) + self._intname = name + self._args = _args + return self + @property + def __name__(self): + return self._intname + def __repr__(self): + # repr() is updated to include the name and type info + return "%s(%r, %s)" % (type(self).__name__, + self.__name__, + int.__repr__(self)) + def __str__(self): + # str() is unchanged, even if it relies on the repr() fallback + base = int + base_str = base.__str__ + if base_str.__objclass__ is object: + return base.__repr__(self) + return base_str(self) + # for simplicity, we only define one operator that + # propagates expressions + def __add__(self, other): + temp = int(self) + int( other) + if isinstance(self, NamedInt) and isinstance(other, NamedInt): + return NamedInt( + '(%s + %s)' % (self.__name__, other.__name__), + temp ) + else: + return temp + + class NEI(NamedInt, Enum): + __qualname__ = 'NEI' + x = ('the-x', 1) + y = ('the-y', 2) + + self.assertTrue(NEI.__new__ is Enum.__new__) + self.assertEqual(repr(NEI.x + NEI.y), "NamedInt('(the-x + the-y)', 3)") + globals()['NamedInt'] = NamedInt + globals()['NEI'] = NEI + NI5 = NamedInt('test', 5) + self.assertEqual(NI5, 5) + self.assertEqual(NEI.y.value, 2) + test_pickle_exception(self.assertRaises, TypeError, NEI.x) + test_pickle_exception(self.assertRaises, PicklingError, NEI) + + def test_subclasses_without_direct_pickle_support_using_name(self): + class NamedInt(int): + __qualname__ = 'NamedInt' + def __new__(cls, *args): + _args = args + name, args = args[0], args[1:] + if len(args) == 0: + raise TypeError("name and value must be specified") + self = int.__new__(cls, *args) + self._intname = name + self._args = _args + return self + @property + def __name__(self): + return self._intname + def __repr__(self): + # repr() is updated to include the name and type info + return "%s(%r, %s)" % (type(self).__name__, + self.__name__, + int.__repr__(self)) + def __str__(self): + # str() is unchanged, even if it relies on the repr() fallback + base = int + base_str = base.__str__ + if base_str.__objclass__ is object: + return base.__repr__(self) + return base_str(self) + # for simplicity, we only define one operator that + # propagates expressions + def __add__(self, other): + temp = int(self) + int( other) + if isinstance(self, NamedInt) and isinstance(other, NamedInt): + return NamedInt( + '(%s + %s)' % (self.__name__, other.__name__), + temp ) + else: + return temp + + class NEI(NamedInt, Enum): + __qualname__ = 'NEI' + x = ('the-x', 1) + y = ('the-y', 2) + def __reduce_ex__(self, proto): + return getattr, (self.__class__, self._name_) + + self.assertTrue(NEI.__new__ is Enum.__new__) + self.assertEqual(repr(NEI.x + NEI.y), "NamedInt('(the-x + the-y)', 3)") + globals()['NamedInt'] = NamedInt + globals()['NEI'] = NEI + NI5 = NamedInt('test', 5) + self.assertEqual(NI5, 5) + self.assertEqual(NEI.y.value, 2) + test_pickle_dump_load(self.assertTrue, NEI.y) + test_pickle_dump_load(self.assertTrue, NEI) + + def test_tuple_subclass(self): + class SomeTuple(tuple, Enum): + __qualname__ = 'SomeTuple' + first = (1, 'for the money') + second = (2, 'for the show') + third = (3, 'for the music') + self.assertTrue(type(SomeTuple.first) is SomeTuple) + self.assertTrue(isinstance(SomeTuple.second, tuple)) + self.assertEqual(SomeTuple.third, (3, 'for the music')) + globals()['SomeTuple'] = SomeTuple + test_pickle_dump_load(self.assertTrue, SomeTuple.first) + + def test_duplicate_values_give_unique_enum_items(self): + class AutoNumber(Enum): + __order__ = 'enum_m enum_d enum_y' + enum_m = () + enum_d = () + enum_y = () + def __new__(cls): + value = len(cls.__members__) + 1 + obj = object.__new__(cls) + obj._value_ = value + return obj + def __int__(self): + return int(self._value_) + self.assertEqual(int(AutoNumber.enum_d), 2) + self.assertEqual(AutoNumber.enum_y.value, 3) + self.assertTrue(AutoNumber(1) is AutoNumber.enum_m) + self.assertEqual( + list(AutoNumber), + [AutoNumber.enum_m, AutoNumber.enum_d, AutoNumber.enum_y], + ) + + def test_inherited_new_from_enhanced_enum(self): + class AutoNumber2(Enum): + def __new__(cls): + value = len(cls.__members__) + 1 + obj = object.__new__(cls) + obj._value_ = value + return obj + def __int__(self): + return int(self._value_) + class Color(AutoNumber2): + __order__ = 'red green blue' + red = () + green = () + blue = () + self.assertEqual(len(Color), 3, "wrong number of elements: %d (should be %d)" % (len(Color), 3)) + self.assertEqual(list(Color), [Color.red, Color.green, Color.blue]) + if pyver >= 3.0: + self.assertEqual(list(map(int, Color)), [1, 2, 3]) + + def test_inherited_new_from_mixed_enum(self): + class AutoNumber3(IntEnum): + def __new__(cls): + value = len(cls.__members__) + 1 + obj = int.__new__(cls, value) + obj._value_ = value + return obj + class Color(AutoNumber3): + red = () + green = () + blue = () + self.assertEqual(len(Color), 3, "wrong number of elements: %d (should be %d)" % (len(Color), 3)) + Color.red + Color.green + Color.blue + + def test_ordered_mixin(self): + class OrderedEnum(Enum): + def __ge__(self, other): + if self.__class__ is other.__class__: + return self._value_ >= other._value_ + return NotImplemented + def __gt__(self, other): + if self.__class__ is other.__class__: + return self._value_ > other._value_ + return NotImplemented + def __le__(self, other): + if self.__class__ is other.__class__: + return self._value_ <= other._value_ + return NotImplemented + def __lt__(self, other): + if self.__class__ is other.__class__: + return self._value_ < other._value_ + return NotImplemented + class Grade(OrderedEnum): + __order__ = 'A B C D F' + A = 5 + B = 4 + C = 3 + D = 2 + F = 1 + self.assertEqual(list(Grade), [Grade.A, Grade.B, Grade.C, Grade.D, Grade.F]) + self.assertTrue(Grade.A > Grade.B) + self.assertTrue(Grade.F <= Grade.C) + self.assertTrue(Grade.D < Grade.A) + self.assertTrue(Grade.B >= Grade.B) + + def test_extending2(self): + def bad_extension(): + class Shade(Enum): + def shade(self): + print(self.name) + class Color(Shade): + red = 1 + green = 2 + blue = 3 + class MoreColor(Color): + cyan = 4 + magenta = 5 + yellow = 6 + self.assertRaises(TypeError, bad_extension) + + def test_extending3(self): + class Shade(Enum): + def shade(self): + return self.name + class Color(Shade): + def hex(self): + return '%s hexlified!' % self.value + class MoreColor(Color): + cyan = 4 + magenta = 5 + yellow = 6 + self.assertEqual(MoreColor.magenta.hex(), '5 hexlified!') + + def test_no_duplicates(self): + def bad_duplicates(): + class UniqueEnum(Enum): + def __init__(self, *args): + cls = self.__class__ + if any(self.value == e.value for e in cls): + a = self.name + e = cls(self.value).name + raise ValueError( + "aliases not allowed in UniqueEnum: %r --> %r" + % (a, e) + ) + class Color(UniqueEnum): + red = 1 + green = 2 + blue = 3 + class Color(UniqueEnum): + red = 1 + green = 2 + blue = 3 + grene = 2 + self.assertRaises(ValueError, bad_duplicates) + + def test_reversed(self): + self.assertEqual( + list(reversed(self.Season)), + [self.Season.WINTER, self.Season.AUTUMN, self.Season.SUMMER, + self.Season.SPRING] + ) + + def test_init(self): + class Planet(Enum): + MERCURY = (3.303e+23, 2.4397e6) + VENUS = (4.869e+24, 6.0518e6) + EARTH = (5.976e+24, 6.37814e6) + MARS = (6.421e+23, 3.3972e6) + JUPITER = (1.9e+27, 7.1492e7) + SATURN = (5.688e+26, 6.0268e7) + URANUS = (8.686e+25, 2.5559e7) + NEPTUNE = (1.024e+26, 2.4746e7) + def __init__(self, mass, radius): + self.mass = mass # in kilograms + self.radius = radius # in meters + @property + def surface_gravity(self): + # universal gravitational constant (m3 kg-1 s-2) + G = 6.67300E-11 + return G * self.mass / (self.radius * self.radius) + self.assertEqual(round(Planet.EARTH.surface_gravity, 2), 9.80) + self.assertEqual(Planet.EARTH.value, (5.976e+24, 6.37814e6)) + + def test_nonhash_value(self): + class AutoNumberInAList(Enum): + def __new__(cls): + value = [len(cls.__members__) + 1] + obj = object.__new__(cls) + obj._value_ = value + return obj + class ColorInAList(AutoNumberInAList): + __order__ = 'red green blue' + red = () + green = () + blue = () + self.assertEqual(list(ColorInAList), [ColorInAList.red, ColorInAList.green, ColorInAList.blue]) + self.assertEqual(ColorInAList.red.value, [1]) + self.assertEqual(ColorInAList([1]), ColorInAList.red) + + def test_conflicting_types_resolved_in_new(self): + class LabelledIntEnum(int, Enum): + def __new__(cls, *args): + value, label = args + obj = int.__new__(cls, value) + obj.label = label + obj._value_ = value + return obj + + class LabelledList(LabelledIntEnum): + unprocessed = (1, "Unprocessed") + payment_complete = (2, "Payment Complete") + + self.assertEqual(list(LabelledList), [LabelledList.unprocessed, LabelledList.payment_complete]) + self.assertEqual(LabelledList.unprocessed, 1) + self.assertEqual(LabelledList(1), LabelledList.unprocessed) + +class TestUnique(unittest.TestCase): + """2.4 doesn't allow class decorators, use function syntax.""" + + def test_unique_clean(self): + class Clean(Enum): + one = 1 + two = 'dos' + tres = 4.0 + unique(Clean) + class Cleaner(IntEnum): + single = 1 + double = 2 + triple = 3 + unique(Cleaner) + + def test_unique_dirty(self): + try: + class Dirty(Enum): + __order__ = 'one two tres' + one = 1 + two = 'dos' + tres = 1 + unique(Dirty) + except ValueError: + exc = sys.exc_info()[1] + message = exc.args[0] + self.assertTrue('tres -> one' in message) + + try: + class Dirtier(IntEnum): + __order__ = 'single double triple turkey' + single = 1 + double = 1 + triple = 3 + turkey = 3 + unique(Dirtier) + except ValueError: + exc = sys.exc_info()[1] + message = exc.args[0] + self.assertTrue('double -> single' in message) + self.assertTrue('turkey -> triple' in message) + + +class TestMe(unittest.TestCase): + + pass + +if __name__ == '__main__': + unittest.main() diff --git a/lib/guessit/test/__main__.py b/lib/guessit/test/__main__.py new file mode 100644 index 0000000000000000000000000000000000000000..32b8dd1001bbbf692d94434545b1fe6825e4a549 --- /dev/null +++ b/lib/guessit/test/__main__.py @@ -0,0 +1,40 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# +# GuessIt - A library for guessing information from filenames +# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com> +# +# GuessIt is free software; you can redistribute it and/or modify it under +# the terms of the Lesser GNU General Public License as published by +# the Free Software Foundation; either version 3 of the License, or +# (at your option) any later version. +# +# GuessIt is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# Lesser GNU General Public License for more details. +# +# You should have received a copy of the Lesser GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. +# + +from __future__ import absolute_import, division, print_function, unicode_literals +from guessit.test import (test_api, test_autodetect, test_autodetect_all, test_doctests, + test_episode, test_hashes, test_language, test_main, + test_matchtree, test_movie, test_quality, test_utils) +from unittest import TextTestRunner + + +import logging + +def main(): + for suite in [test_api.suite, test_autodetect.suite, + test_autodetect_all.suite, test_doctests.suite, + test_episode.suite, test_hashes.suite, test_language.suite, + test_main.suite, test_matchtree.suite, test_movie.suite, + test_quality.suite, test_utils.suite]: + TextTestRunner(verbosity=2).run(suite) + + +if __name__ == '__main__': + main() diff --git a/lib/guessit/test/test_doctests.py b/lib/guessit/test/test_doctests.py new file mode 100644 index 0000000000000000000000000000000000000000..9fedeb0f3f9809cfbfe4eb2eb610c11639e87c3d --- /dev/null +++ b/lib/guessit/test/test_doctests.py @@ -0,0 +1,45 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# +# GuessIt - A library for guessing information from filenames +# Copyright (c) 2014 Nicolas Wack <wackou@gmail.com> +# +# GuessIt is free software; you can redistribute it and/or modify it under +# the terms of the Lesser GNU General Public License as published by +# the Free Software Foundation; either version 3 of the License, or +# (at your option) any later version. +# +# GuessIt is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# Lesser GNU General Public License for more details. +# +# You should have received a copy of the Lesser GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. +# + +from __future__ import absolute_import, division, print_function, unicode_literals + +from guessit.test.guessittest import * +import guessit +import guessit.hash_ed2k +import unittest +import doctest + + +def load_tests(loader, tests, ignore): + tests.addTests(doctest.DocTestSuite(guessit)) + tests.addTests(doctest.DocTestSuite(guessit.date)) + tests.addTests(doctest.DocTestSuite(guessit.fileutils)) + tests.addTests(doctest.DocTestSuite(guessit.guess)) + tests.addTests(doctest.DocTestSuite(guessit.hash_ed2k)) + tests.addTests(doctest.DocTestSuite(guessit.language)) + tests.addTests(doctest.DocTestSuite(guessit.matchtree)) + tests.addTests(doctest.DocTestSuite(guessit.textutils)) + return tests + +suite = unittest.TestSuite() +load_tests(None, suite, None) + +if __name__ == '__main__': + TextTestRunner(verbosity=2).run(suite) diff --git a/lib/trakt/trakt.py b/lib/trakt/trakt.py index 08808bf2a786cb5009b7f9eadec1838e6241544e..f8f048026395647835460ded7781b05217c30383 100644 --- a/lib/trakt/trakt.py +++ b/lib/trakt/trakt.py @@ -8,9 +8,9 @@ from sickbeard import logger from exceptions import traktException, traktAuthException, traktServerBusy class TraktAPI(): - def __init__(self, disable_ssl_verify=False, timeout=30): + def __init__(self, ssl_verify=True, timeout=30): self.session = requests.Session() - self.verify = certifi.where() if not disable_ssl_verify else False + self.verify = certifi.where() if ssl_verify else None self.timeout = timeout if timeout else None self.auth_url = sickbeard.TRAKT_OAUTH_URL self.api_url = sickbeard.TRAKT_API_URL diff --git a/sickbeard/__init__.py b/sickbeard/__init__.py index fd5d46f1964e3d057bc6fcedf47386ba3799088f..5a05113cd313dd7a17221dee7ba00c51cb92cdd6 100644 --- a/sickbeard/__init__.py +++ b/sickbeard/__init__.py @@ -156,6 +156,7 @@ DOWNLOAD_URL = None HANDLE_REVERSE_PROXY = False PROXY_SETTING = None PROXY_INDEXERS = True +SSL_VERIFY = True LOCALHOST_IP = None @@ -433,7 +434,6 @@ TRAKT_USE_RECOMMENDED = False TRAKT_SYNC = False TRAKT_SYNC_REMOVE = False TRAKT_DEFAULT_INDEXER = None -TRAKT_DISABLE_SSL_VERIFY = False TRAKT_TIMEOUT = None TRAKT_BLACKLIST_NAME = None TRAKT_USE_ROLLING_DOWNLOAD = None @@ -552,7 +552,7 @@ def initialize(consoleLogging=True): TORRENT_USERNAME, TORRENT_PASSWORD, TORRENT_HOST, TORRENT_PATH, TORRENT_SEED_TIME, TORRENT_PAUSED, TORRENT_HIGH_BANDWIDTH, TORRENT_LABEL, TORRENT_LABEL_ANIME, TORRENT_VERIFY_CERT, TORRENT_RPCURL, TORRENT_AUTH_TYPE, \ USE_KODI, KODI_ALWAYS_ON, KODI_NOTIFY_ONSNATCH, KODI_NOTIFY_ONDOWNLOAD, KODI_NOTIFY_ONSUBTITLEDOWNLOAD, KODI_UPDATE_FULL, KODI_UPDATE_ONLYFIRST, \ KODI_UPDATE_LIBRARY, KODI_HOST, KODI_USERNAME, KODI_PASSWORD, BACKLOG_FREQUENCY, \ - USE_TRAKT, TRAKT_USERNAME, TRAKT_ACCESS_TOKEN, TRAKT_REFRESH_TOKEN, TRAKT_REMOVE_WATCHLIST, TRAKT_SYNC_WATCHLIST, TRAKT_REMOVE_SHOW_FROM_SICKRAGE, TRAKT_METHOD_ADD, TRAKT_START_PAUSED, traktCheckerScheduler, traktRollingScheduler, TRAKT_USE_RECOMMENDED, TRAKT_SYNC, TRAKT_SYNC_REMOVE, TRAKT_DEFAULT_INDEXER, TRAKT_REMOVE_SERIESLIST, TRAKT_DISABLE_SSL_VERIFY, TRAKT_TIMEOUT, TRAKT_BLACKLIST_NAME, TRAKT_USE_ROLLING_DOWNLOAD, TRAKT_ROLLING_NUM_EP, TRAKT_ROLLING_ADD_PAUSED, TRAKT_ROLLING_FREQUENCY, \ + USE_TRAKT, TRAKT_USERNAME, TRAKT_ACCESS_TOKEN, TRAKT_REFRESH_TOKEN, TRAKT_REMOVE_WATCHLIST, TRAKT_SYNC_WATCHLIST, TRAKT_REMOVE_SHOW_FROM_SICKRAGE, TRAKT_METHOD_ADD, TRAKT_START_PAUSED, traktCheckerScheduler, traktRollingScheduler, TRAKT_USE_RECOMMENDED, TRAKT_SYNC, TRAKT_SYNC_REMOVE, TRAKT_DEFAULT_INDEXER, TRAKT_REMOVE_SERIESLIST, TRAKT_TIMEOUT, TRAKT_BLACKLIST_NAME, TRAKT_USE_ROLLING_DOWNLOAD, TRAKT_ROLLING_NUM_EP, TRAKT_ROLLING_ADD_PAUSED, TRAKT_ROLLING_FREQUENCY, \ USE_PLEX, PLEX_NOTIFY_ONSNATCH, PLEX_NOTIFY_ONDOWNLOAD, PLEX_NOTIFY_ONSUBTITLEDOWNLOAD, PLEX_UPDATE_LIBRARY, USE_PLEX_CLIENT, PLEX_CLIENT_USERNAME, PLEX_CLIENT_PASSWORD, \ PLEX_SERVER_HOST, PLEX_SERVER_TOKEN, PLEX_HOST, PLEX_USERNAME, PLEX_PASSWORD, DEFAULT_BACKLOG_FREQUENCY, MIN_BACKLOG_FREQUENCY, BACKLOG_STARTUP, SKIP_REMOVED_FILES, \ showUpdateScheduler, __INITIALIZED__, INDEXER_DEFAULT_LANGUAGE, EP_DEFAULT_DELETED_STATUS, LAUNCH_BROWSER, UPDATE_SHOWS_ON_START, UPDATE_SHOWS_ON_SNATCH, TRASH_REMOVE_SHOW, TRASH_ROTATE_LOGS, SORT_ARTICLE, showList, loadingShowList, \ @@ -587,7 +587,7 @@ def initialize(consoleLogging=True): AUTOPOSTPROCESSER_FREQUENCY, SHOWUPDATE_HOUR, DEFAULT_AUTOPOSTPROCESSER_FREQUENCY, MIN_AUTOPOSTPROCESSER_FREQUENCY, \ ANIME_DEFAULT, NAMING_ANIME, ANIMESUPPORT, USE_ANIDB, ANIDB_USERNAME, ANIDB_PASSWORD, ANIDB_USE_MYLIST, \ ANIME_SPLIT_HOME, SCENE_DEFAULT, DOWNLOAD_URL, BACKLOG_DAYS, GIT_ORG, GIT_REPO, GIT_USERNAME, GIT_PASSWORD, \ - GIT_AUTOISSUES, DEVELOPER, gh, DISPLAY_ALL_SEASONS + GIT_AUTOISSUES, DEVELOPER, gh, DISPLAY_ALL_SEASONS, SSL_VERIFY if __INITIALIZED__: return False @@ -613,6 +613,7 @@ def initialize(consoleLogging=True): CheckSection(CFG, 'Pushalot') CheckSection(CFG, 'Pushbullet') CheckSection(CFG, 'Subtitles') + CheckSection(CFG, 'pyTivo') # Need to be before any passwords ENCRYPTION_VERSION = check_setting_int(CFG, 'General', 'encryption_version', 0) @@ -739,6 +740,8 @@ def initialize(consoleLogging=True): WEB_USE_GZIP = bool(check_setting_int(CFG, 'General', 'web_use_gzip', 1)) + SSL_VERIFY = bool(check_setting_int(CFG, 'General', 'ssl_verify', 1)) + INDEXER_DEFAULT_LANGUAGE = check_setting_str(CFG, 'General', 'indexerDefaultLang', 'en') EP_DEFAULT_DELETED_STATUS = check_setting_int(CFG, 'General', 'ep_default_deleted_status', 6) @@ -1029,7 +1032,6 @@ def initialize(consoleLogging=True): TRAKT_SYNC = bool(check_setting_int(CFG, 'Trakt', 'trakt_sync', 0)) TRAKT_SYNC_REMOVE = bool(check_setting_int(CFG, 'Trakt', 'trakt_sync_remove', 0)) TRAKT_DEFAULT_INDEXER = check_setting_int(CFG, 'Trakt', 'trakt_default_indexer', 1) - TRAKT_DISABLE_SSL_VERIFY = bool(check_setting_int(CFG, 'Trakt', 'trakt_disable_ssl_verify', 0)) TRAKT_TIMEOUT = check_setting_int(CFG, 'Trakt', 'trakt_timeout', 30) TRAKT_BLACKLIST_NAME = check_setting_str(CFG, 'Trakt', 'trakt_blacklist_name', '') TRAKT_USE_ROLLING_DOWNLOAD = bool(check_setting_int(CFG, 'Trakt', 'trakt_use_rolling_download', 0)) @@ -1039,7 +1041,6 @@ def initialize(consoleLogging=True): if TRAKT_ROLLING_FREQUENCY < 4: TRAKT_ROLLING_FREQUENCY = 4 - CheckSection(CFG, 'pyTivo') USE_PYTIVO = bool(check_setting_int(CFG, 'pyTivo', 'use_pytivo', 0)) PYTIVO_NOTIFY_ONSNATCH = bool(check_setting_int(CFG, 'pyTivo', 'pytivo_notify_onsnatch', 0)) PYTIVO_NOTIFY_ONDOWNLOAD = bool(check_setting_int(CFG, 'pyTivo', 'pytivo_notify_ondownload', 0)) @@ -1636,6 +1637,7 @@ def save_config(): new_config['General']['web_password'] = helpers.encrypt(WEB_PASSWORD, ENCRYPTION_VERSION) new_config['General']['web_cookie_secret'] = WEB_COOKIE_SECRET new_config['General']['web_use_gzip'] = int(WEB_USE_GZIP) + new_config['General']['ssl_verify'] = int(SSL_VERIFY) new_config['General']['download_url'] = DOWNLOAD_URL new_config['General']['localhost_ip'] = LOCALHOST_IP new_config['General']['cpu_preset'] = CPU_PRESET @@ -2000,7 +2002,6 @@ def save_config(): new_config['Trakt']['trakt_sync'] = int(TRAKT_SYNC) new_config['Trakt']['trakt_sync_remove'] = int(TRAKT_SYNC_REMOVE) new_config['Trakt']['trakt_default_indexer'] = int(TRAKT_DEFAULT_INDEXER) - new_config['Trakt']['trakt_disable_ssl_verify'] = int(TRAKT_DISABLE_SSL_VERIFY) new_config['Trakt']['trakt_timeout'] = int(TRAKT_TIMEOUT) new_config['Trakt']['trakt_blacklist_name'] = TRAKT_BLACKLIST_NAME new_config['Trakt']['trakt_use_rolling_download'] = int(TRAKT_USE_ROLLING_DOWNLOAD) diff --git a/sickbeard/dailysearcher.py b/sickbeard/dailysearcher.py index bd72ab250f5fc0360b3137154496ce01bc5f68fc..feedc5115df1d271a917d900b8bd2a6feae3303d 100644 --- a/sickbeard/dailysearcher.py +++ b/sickbeard/dailysearcher.py @@ -92,7 +92,7 @@ class DailySearcher(): ep = show.getEpisode(int(sqlEp["season"]), int(sqlEp["episode"])) with ep.lock: if ep.show.paused: - ep.status = common.SKIPPED + ep.status = ep.show.default_ep_status elif ep.season == 0: logger.log(u"New episode " + ep.prettyName() + " airs today, setting status to SKIPPED because is a special season") ep.status = common.SKIPPED @@ -104,12 +104,12 @@ class DailySearcher(): ep.status = ep.show.default_ep_status sql_l.append(ep.get_sql()) - else: - logger.log(u"No new released episodes found ...") if len(sql_l) > 0: myDB = db.DBConnection() myDB.mass_action(sql_l) + else: + logger.log(u"No new released episodes found ...") sickbeard.traktRollingScheduler.action.updateWantedList() diff --git a/sickbeard/db.py b/sickbeard/db.py index 346b3725953292f48946ef76ef904e5d2f74e2fd..e1045711e7f09f2380978610b9f325579df46675 100644 --- a/sickbeard/db.py +++ b/sickbeard/db.py @@ -230,19 +230,20 @@ class DBConnection(object): def _unicode_text_factory(self, x): try: x = unicode(x) - except UnicodeDecodeError: + except Exception: try: - x = unicode(x, chardet.detect(x).get('encoding')) - except UnicodeDecodeError: + x = unicode(x, sickbeard.SYS_ENCODING) + except Exception: try: - x = unicode(x, sickbeard.SYS_ENCODING) - except UnicodeDecodeError: + x = unicode(x, 'utf-8') + except Exception: try: - x = unicode(x, 'utf-8') - except UnicodeDecodeError: + x = unicode(x, 'latin-1') + except Exception: try: - x = unicode(x, 'latin-1') - except UnicodeDecodeError: + # Chardet can be wrong, so try it before ignoring + x = unicode(x, chardet.detect(x).get('encoding')) + except Exception: x = unicode(x, sickbeard.SYS_ENCODING, errors="ignore") return x diff --git a/sickbeard/encodingKludge.py b/sickbeard/encodingKludge.py index 4cb8ac9832d4fe88518b13f178e381b7cfeade7f..04a2ec27d5dc990febda5d48730784d2600ef4bc 100644 --- a/sickbeard/encodingKludge.py +++ b/sickbeard/encodingKludge.py @@ -24,14 +24,18 @@ def _toUnicode(x): if isinstance(x, str): try: x = unicode(x) - except UnicodeDecodeError: + except Exception: try: - x = unicode(x, chardet.detect(x).get('encoding')) - except UnicodeDecodeError: + x = unicode(x, sickbeard.SYS_ENCODING) + except Exception: try: - x = unicode(x, sickbeard.SYS_ENCODING) - except UnicodeDecodeError: - pass + x = unicode(x, 'utf-8') + except Exception: + try: + x = unicode(x, 'latin-1') + except Exception: + # Chardet can be wrong, so try it last + x = unicode(x, chardet.detect(x).get('encoding')) return x def ss(x): @@ -39,16 +43,15 @@ def ss(x): try: x = x.encode(sickbeard.SYS_ENCODING) - except UnicodeDecodeError, UnicodeEncodeError: + except Exception: try: x = x.encode('utf-8') - except UnicodeDecodeError, UnicodeEncodeError: + except Exception: try: x = x.encode(sickbeard.SYS_ENCODING, 'replace') - except UnicodeDecodeError, UnicodeEncodeError: + except Exception: x = x.encode('utf-8', 'ignore') - finally: - return x + return x def fixListEncodings(x): if not isinstance(x, (list, tuple)): diff --git a/sickbeard/helpers.py b/sickbeard/helpers.py index e8fde7c714da8a639911be11c20f2f11416dc0af..8cf78bcfd942475dc2af74e97874f873e6df8f49 100644 --- a/sickbeard/helpers.py +++ b/sickbeard/helpers.py @@ -58,7 +58,7 @@ from sickbeard import encodingKludge as ek from sickbeard import notifiers from sickbeard import clients from sickbeard.subtitles import isValidLanguage -from lib.cachecontrol import CacheControl, caches +from cachecontrol import CacheControl, caches from itertools import izip, cycle @@ -134,7 +134,11 @@ def remove_non_release_groups(name): '-NZBGEEK$': 'searchre', '-RP$': 'searchre', '-20-40$': 'searchre', + '\.\[www\.usabit\.com\]$': 'searchre', '\[NO-RAR\] - \[ www\.torrentday\.com \]$': 'searchre', + '- \[ www\.torrentday\.com \]$': 'searchre', + '- \{ www\.SceneTime\.com \}$': 'searchre', + '^\{ www\.SceneTime\.com \} - ': 'searchre', '^\[ www\.TorrentDay\.com \] - ': 'searchre', '^\[ www\.Cpasbien\.pw \] ': 'searchre', } @@ -150,7 +154,7 @@ def remove_non_release_groups(name): def replaceExtension(filename, newExt): - ''' + """ >>> replaceExtension('foo.avi', 'mkv') 'foo.mkv' >>> replaceExtension('.vimrc', 'arglebargle') @@ -161,7 +165,7 @@ def replaceExtension(filename, newExt): '' >>> replaceExtension('foo.bar', '') 'foo.' - ''' + """ sepFile = filename.rpartition(".") if sepFile[0] == "": return filename @@ -221,7 +225,7 @@ def isBeingWritten(filepath): def sanitizeFileName(name): - ''' + """ >>> sanitizeFileName('a/b/c') 'a-b-c' >>> sanitizeFileName('abc') @@ -230,7 +234,7 @@ def sanitizeFileName(name): 'ab' >>> sanitizeFileName('.a.b..') 'a.b' - ''' + """ # remove bad chars from the filename name = re.sub(r'[\\/\*]', '-', name) @@ -356,7 +360,7 @@ def searchIndexerForShowID(regShowName, indexer=None, indexer_id=None, ui=None): def sizeof_fmt(num): - ''' + """ >>> sizeof_fmt(2) '2.0 bytes' >>> sizeof_fmt(1024) @@ -367,7 +371,7 @@ def sizeof_fmt(num): '1.0 MB' >>> sizeof_fmt(1234567) '1.2 MB' - ''' + """ for x in ['bytes', 'KB', 'MB', 'GB', 'TB']: if num < 1024.0: return "%3.1f %s" % (num, x) @@ -618,7 +622,8 @@ def chmodAsParent(childPath): logger.log(u"Setting permissions for %s to %o as parent directory has %o" % (childPath, childMode, parentMode), logger.DEBUG) except OSError: - logger.log(u"Failed to set permission for %s to %o" % (childPath, childMode), logger.ERROR) + logger.log(u"Failed to set permission for %s to %o" % (childPath, childMode), logger.DEBUG) + pass def fixSetGroupID(childPath): @@ -1306,7 +1311,7 @@ def _setUpSession(session, headers): session.headers.update(headers) # request session ssl verify - session.verify = certifi.where() + session.verify = certifi.where() if sickbeard.SSL_VERIFY else None # request session proxies if not 'Referer' in session.headers and sickbeard.PROXY_SETTING: @@ -1589,15 +1594,19 @@ def pretty_time_delta(seconds): days, seconds = divmod(seconds, 86400) hours, seconds = divmod(seconds, 3600) minutes, seconds = divmod(seconds, 60) + time_delta = sign_string + if days > 0: - return '%s%dd%02dh%02dm%02ds' % (sign_string, days, hours, minutes, seconds) - elif hours > 0: - return '%s%02dh%02dm%02ds' % (sign_string, hours, minutes, seconds) - elif minutes > 0: - return '%s%02dm%02ds' % (sign_string, minutes, seconds) - else: - return '%s%02ds' % (sign_string, seconds) - + time_delta += ' %dd' % days + if hours > 0: + time_delta += ' %dh' % hours + if minutes > 0: + time_delta += ' %dm' % minutes + if seconds > 0: + time_delta += ' %ds' % seconds + + return time_delta + def isFileLocked(file, writeLockCheck=False): ''' Checks to see if a file is locked. Performs three checks diff --git a/sickbeard/history.py b/sickbeard/history.py index 3f4f042cbbdbb88dd477ac57eefeec10444eb47d..1889af2fdcbf16e2d242440a986ecd6af7c03d07 100644 --- a/sickbeard/history.py +++ b/sickbeard/history.py @@ -77,7 +77,7 @@ def logDownload(episode, filename, new_ep_quality, release_group=None, version=- def logSubtitle(showid, season, episode, status, subtitleResult): - resource = subtitleResult.language.alpha3 + resource = subtitleResult.language.opensubtitles provider = subtitleResult.provider_name status, quality = Quality.splitCompositeStatus(status) diff --git a/sickbeard/name_parser/parser.py b/sickbeard/name_parser/parser.py index e0761992e1fab0a8ea3457b6b2031847b47cb87d..301bf5ebee53b44c9b6e4627f09ff26634f45e78 100644 --- a/sickbeard/name_parser/parser.py +++ b/sickbeard/name_parser/parser.py @@ -549,16 +549,16 @@ class ParseResult(object): else: to_return = u'' if self.season_number != None: - to_return += 'S' + str(self.season_number) + to_return += 'S' + str(self.season_number).zfill(2) if self.episode_numbers and len(self.episode_numbers): for e in self.episode_numbers: - to_return += 'E' + str(e) + to_return += 'E' + str(e).zfill(2) if self.is_air_by_date: to_return += str(self.air_date) if self.ab_episode_numbers: to_return += ' [ABS: ' + str(self.ab_episode_numbers) + ']' - if self.version: + if self.version and self.is_anime is True: to_return += ' [ANIME VER: ' + str(self.version) + ']' if self.release_group: diff --git a/sickbeard/notifiers/freemobile.py b/sickbeard/notifiers/freemobile.py index bcf10afd4d90929db3b19947c52bb6879f3ceae6..102da870f534ed1795fcde8db690f89923fa32e9 100644 --- a/sickbeard/notifiers/freemobile.py +++ b/sickbeard/notifiers/freemobile.py @@ -50,7 +50,7 @@ class FreeMobileNotifier: # build up the URL and parameters msg = msg.strip() - msg_quoted = urllib2.quote(title + ": " + msg) + msg_quoted = urllib2.quote(title.encode('utf-8') + ": " + msg.encode('utf-8')) URL = "https://smsapi.free-mobile.fr/sendmsg?user=" + id + "&pass=" + apiKey + "&msg=" + msg_quoted req = urllib2.Request(URL) @@ -75,7 +75,11 @@ class FreeMobileNotifier: message = "Server error. Please retry in few moment." logger.log(message, logger.ERROR) return False, message - + except Exception, e: + message = u"Error while sending SMS: {0}".format(e) + logger.log(message, logger.ERROR) + return False, message + message = "Free Mobile SMS successful." logger.log(message, logger.INFO) return True, message diff --git a/sickbeard/notifiers/plex.py b/sickbeard/notifiers/plex.py index de496dfef0211a8b4e526c6f2f835b8903f663e1..fccaa1db4f25482878cd0bea095531271f858304 100644 --- a/sickbeard/notifiers/plex.py +++ b/sickbeard/notifiers/plex.py @@ -151,7 +151,8 @@ class PLEXNotifier: if sickbeard.USE_PLEX: update_text = common.notifyStrings[common.NOTIFY_GIT_UPDATE_TEXT] title = common.notifyStrings[common.NOTIFY_GIT_UPDATE] - self._notify_pmc(update_text + new_version, title) + if update_text and title and new_version: + self._notify_pmc(update_text + new_version, title) def test_notify_pmc(self, host, username, password): return self._notify_pmc('This is a test notification from SickRage', 'Test Notification', host, username, password, force=True) diff --git a/sickbeard/notifiers/trakt.py b/sickbeard/notifiers/trakt.py index 5bf08bc4697068179ffcc070701d29db125ea10e..c276b657d97726be7943eba573d212e68260cacc 100644 --- a/sickbeard/notifiers/trakt.py +++ b/sickbeard/notifiers/trakt.py @@ -47,7 +47,7 @@ class TraktNotifier: """ trakt_id = sickbeard.indexerApi(ep_obj.show.indexer).config['trakt_id'] - trakt_api = TraktAPI(sickbeard.TRAKT_DISABLE_SSL_VERIFY, sickbeard.TRAKT_TIMEOUT) + trakt_api = TraktAPI(sickbeard.SSL_VERIFY, sickbeard.TRAKT_TIMEOUT) if sickbeard.USE_TRAKT: try: @@ -100,7 +100,7 @@ class TraktNotifier: update: type o action add or remove """ - trakt_api = TraktAPI(sickbeard.TRAKT_DISABLE_SSL_VERIFY, sickbeard.TRAKT_TIMEOUT) + trakt_api = TraktAPI(sickbeard.SSL_VERIFY, sickbeard.TRAKT_TIMEOUT) if sickbeard.USE_TRAKT: diff --git a/sickbeard/providers/generic.py b/sickbeard/providers/generic.py index a4e702533e8f62d74ec449edaf736c75accdd00d..b97a86dd936eb6022a0b138acaee8d5636e9cdc0 100644 --- a/sickbeard/providers/generic.py +++ b/sickbeard/providers/generic.py @@ -156,7 +156,11 @@ class GenericProvider: if result.url.startswith('magnet'): try: torrent_hash = re.findall('urn:btih:([\w]{32,40})', result.url)[0].upper() - torrent_name = re.findall('dn=([^&]+)', result.url)[0] + + try: + torrent_name = re.findall('dn=([^&]+)', result.url)[0] + except: + torrent_name = 'NO_DOWNLOAD_NAME' if len(torrent_hash) == 32: torrent_hash = b16encode(b32decode(torrent_hash)).upper() @@ -203,6 +207,8 @@ class GenericProvider: self.proxyGlypeProxySSLwarning = None for url in urls: + if 'NO_DOWNLOAD_NAME' in url: + continue if helpers.headURL(self.proxy._buildURL(url), session=self.session, headers=self.headers, proxyGlypeProxySSLwarning=self.proxyGlypeProxySSLwarning): return url @@ -226,6 +232,9 @@ class GenericProvider: self.headers.pop('Referer') for url in urls: + if 'NO_DOWNLOAD_NAME' in url: + continue + logger.log(u"Downloading a result from " + self.name + " at " + url) if helpers.download_file(self.proxy._buildURL(url), filename, session=self.session, headers=self.headers): if self._verify_download(filename): diff --git a/sickbeard/providers/tntvillage.py b/sickbeard/providers/tntvillage.py index 84d07f15510bc8df422173d8473946f07293456f..d29fbfbae950079d56367ba24f9bfc3203129900 100644 --- a/sickbeard/providers/tntvillage.py +++ b/sickbeard/providers/tntvillage.py @@ -116,10 +116,7 @@ class TNTVillageProvider(generic.TorrentProvider): 'download' : 'http://forum.tntvillage.scambioetico.org/index.php?act=Attach&type=post&id=%s', } - self.sub_string = [ - 'sub', - 'softsub', - ] + self.sub_string = ['sub', 'softsub'] self.url = self.urls['base_url'] @@ -294,39 +291,29 @@ class TNTVillageProvider(generic.TorrentProvider): else: return Quality.UNKNOWN - def _is_italian(self,torrent_rows): + def _is_italian(self, torrent_rows): - is_italian = 0 - - span_tag = (torrent_rows.find_all('td'))[1].find('b').find('span') - - name = str(span_tag) - - if not name: - return 0 - - subFound=0 + name = str(torrent_rows.find_all('td')[1].find('b').find('span')) + if not name or name is 'None': + return False + subFound = italian = False for sub in self.sub_string: - - if not re.search(sub, name, re.I): - continue + if re.search(sub, name, re.I): + subFound = True else: - subFound = 1 - - name = name.split(sub) + continue - if re.search("ita", name[0], re.I): + if re.search("ita", name.split(sub)[0], re.I): logger.log(u"Found Italian release", logger.DEBUG) - is_italian=1 + italian = True break - if not subFound: - if re.search("ita", name, re.I): - logger.log(u"Found Italian release", logger.DEBUG) - is_italian=1 + if not subFound and re.search("ita", name, re.I): + logger.log(u"Found Italian release", logger.DEBUG) + italian = True - return is_italian + return italian def _is_season_pack(self, name): diff --git a/sickbeard/subtitles.py b/sickbeard/subtitles.py index 183af22ca50cb6082ea60514a4b295f8762d50f5..8532b57b967adae3c7dbdeb7ad34806f17d5cc5c 100644 --- a/sickbeard/subtitles.py +++ b/sickbeard/subtitles.py @@ -67,7 +67,7 @@ def getEnabledServiceList(): #Hack around this for now. def fromietf(language): - return babelfish.Language.fromietf(language if language not in 'pb' else 'pt-BR') + return babelfish.Language.fromopensubtitles(language) def isValidLanguage(language): try: @@ -81,7 +81,7 @@ def getLanguageName(language): # TODO: Filter here for non-languages in sickbeard.SUBTITLES_LANGUAGES def wantedLanguages(sqlLike = False): - wantedLanguages = sorted(sickbeard.SUBTITLES_LANGUAGES) + wantedLanguages = [x for x in sorted(sickbeard.SUBTITLES_LANGUAGES) if x in babelfish.language_converters['opensubtitles'].codes] if sqlLike: return '%' + ','.join(wantedLanguages) + '%' return wantedLanguages @@ -92,8 +92,10 @@ def subtitlesLanguages(video_path): languages = subliminal.video.scan_subtitle_languages(video_path) for language in languages: - if hasattr(language, 'alpha3') and language.alpha3: - resultList.append(language.alpha3) + if hasattr(language, 'opensubtitles') and language.opensubtitles: + resultList.append(language.opensubtitles) + elif hasattr(language, 'alpha3') and language.alpha3: + resultList.append(language.alpha3) elif hasattr(language, 'alpha2') and language.alpha2: resultList.append(language.alpha2) @@ -101,11 +103,14 @@ def subtitlesLanguages(video_path): if len(resultList) is 1 and len(defaultLang) is 1: return defaultLang + if ('pob' in defaultLang or 'pb' in defaultLang) and ('pt' not in defaultLang and 'por' not in defaultLang): + resultList = [x if not x in ['por', 'pt'] else u'pob' for x in resultList] + return sorted(resultList) # TODO: Return only languages our providers allow def subtitleLanguageFilter(): - return [language for language in babelfish.LANGUAGE_MATRIX if hasattr(language, 'alpha2') and language.alpha2] + return [babelfish.Language.fromopensubtitles(language) for language in babelfish.language_converters['opensubtitles'].codes if len(language) == 3] class SubtitlesFinder(): """ diff --git a/sickbeard/traktChecker.py b/sickbeard/traktChecker.py index 80d2fff963ff427c61e267aab310169d7cb3593a..eb9ca3b576735565b4715dfbd65487b386f9feda 100644 --- a/sickbeard/traktChecker.py +++ b/sickbeard/traktChecker.py @@ -62,7 +62,7 @@ def setEpisodeToWanted(show, s, e): class TraktChecker(): def __init__(self): - self.trakt_api = TraktAPI(sickbeard.TRAKT_DISABLE_SSL_VERIFY, sickbeard.TRAKT_TIMEOUT) + self.trakt_api = TraktAPI(sickbeard.SSL_VERIFY, sickbeard.TRAKT_TIMEOUT) self.todoBacklog = [] self.todoWanted = [] self.ShowWatchlist = {} @@ -644,7 +644,7 @@ class TraktChecker(): class TraktRolling(): def __init__(self): - self.trakt_api = TraktAPI(sickbeard.TRAKT_DISABLE_SSL_VERIFY, sickbeard.TRAKT_TIMEOUT) + self.trakt_api = TraktAPI(sickbeard.SSL_VERIFY, sickbeard.TRAKT_TIMEOUT) self.EpisodeWatched = [] def run(self, force=False): diff --git a/sickbeard/tv.py b/sickbeard/tv.py index 7e1b1c103ed1a4d0d81497d460cc98e281cc8c32..58ea13f7714933647daaf5d7f4183e8ba88c0df3 100644 --- a/sickbeard/tv.py +++ b/sickbeard/tv.py @@ -174,6 +174,10 @@ class TVShow(object): else: return False + @property + def network_logo_name(self): + return self.network.replace(u'\u00C9', 'e').replace(u'\u00E9', 'e').lower() + def _getLocation(self): # no dir check needed if missing show dirs are created during post-processing if sickbeard.CREATE_MISSING_SHOW_DIRS: @@ -1228,7 +1232,7 @@ class TVShow(object): def wantEpisode(self, season, episode, quality, manualSearch=False, downCurQuality=False): - logger.log(u"Checking if found episode " + str(season) + "x" + str(episode) + " is wanted at quality " + + logger.log(u"Checking if found episode " + "S" + str(season).zfill(2) + "E" + str(episode).zfill(2) + " is wanted at quality " + Quality.qualityStrings[quality], logger.DEBUG) # if the quality isn't one we want under any circumstances then just say no @@ -1471,21 +1475,7 @@ class TVEpisode(object): logger.log(u'%s: No subtitles found for S%02dE%02d on any provider' % (self.show.indexerid, self.season, self.episode), logger.DEBUG) return - if len(sickbeard.SUBTITLES_DIR): - # absolute path (GUI 'Browse' button, or typed absolute path) - sillyness? - if ek.ek(os.path.exists, sickbeard.SUBTITLES_DIR): - subs_new_path = ek.ek(os.path.join, sickbeard.SUBTITLES_DIR, self.show.name) - else: - # relative to the folder the episode is in - sillyness? - subs_new_path = ek.ek(os.path.join, ek.ek(os.path.dirname, self.location), sickbeard.SUBTITLES_DIR) - dir_exists = helpers.makeDir(subs_new_path) - - if not dir_exists: - logger.log(u'Unable to create subtitles folder ' + subs_new_path, logger.ERROR) - else: - helpers.chmodAsParent(subs_new_path) - else: - subs_new_path = None + subs_new_path = sickbeard.SUBTITLES_DIR if sickbeard.SUBTITLES_DIR and ek.ek(os.path.exists, sickbeard.SUBTITLES_DIR) else None subliminal.save_subtitles(foundSubs, directory=subs_new_path, single=not sickbeard.SUBTITLES_MULTI) @@ -1513,7 +1503,7 @@ class TVEpisode(object): if sickbeard.SUBTITLES_HISTORY: for video, subs in foundSubs.iteritems(): for sub in subs: - logger.log(u'history.logSubtitle %s, %s' % (sub.provider_name, sub.language.alpha3), logger.DEBUG) + logger.log(u'history.logSubtitle %s, %s' % (sub.provider_name, sub.language.opensubtitles), logger.DEBUG) history.logSubtitle(self.show.indexerid, self.season, self.episode, self.status, sub) return self.subtitles diff --git a/sickbeard/tvcache.py b/sickbeard/tvcache.py index 03b2312868ad894265b015695a2d5126d3910eff..ddfea9e4b010e5594a34bfa34b2607e730e9477c 100644 --- a/sickbeard/tvcache.py +++ b/sickbeard/tvcache.py @@ -231,8 +231,7 @@ class TVCache(): def shouldUpdate(self): # if we've updated recently then skip the update if datetime.datetime.today() - self.lastUpdate < datetime.timedelta(minutes=self.minTime): - logger.log(u"Last update was too soon, using old cache: today()-" + str(self.lastUpdate) + "<" + str( - datetime.timedelta(minutes=self.minTime)), logger.DEBUG) + logger.log(u"Last update was too soon, using old cache: " + str(self.lastUpdate) + ". Updated less then " + str(self.minTime) + " minutes ago", logger.DEBUG) return False return True diff --git a/sickbeard/webapi.py b/sickbeard/webapi.py index 65bbb7d33327fc404bd08543c3508c77a3e83cdb..086520f44377974d7cd093cd467f3d71e56ec988 100644 --- a/sickbeard/webapi.py +++ b/sickbeard/webapi.py @@ -74,7 +74,7 @@ result_type_map = {RESULT_SUCCESS: "success", class ApiHandler(RequestHandler): """ api class that returns json results """ - version = 5 # use an int since float-point is unpredictible + version = 5 # use an int since float-point is unpredictable intent = 4 def __init__(self, *args, **kwargs): @@ -254,6 +254,8 @@ class ApiHandler(RequestHandler): # Redirect initial poster/banner thumb to default images if which[0:6] == 'poster': default_image_name = 'poster.png' + elif which[0:6] == 'fanart': + default_image_name = 'fanart.png' else: default_image_name = 'banner.png' @@ -271,6 +273,10 @@ class ApiHandler(RequestHandler): image_file_name = cache_obj.banner_path(show) if which == 'banner_thumb': image_file_name = cache_obj.banner_thumb_path(show) + if which == 'fanart': + if not cache_obj.has_fanart(show): + cache_obj.fill_cache(sickbeard.helpers.findCertainShow(sickbeard.showList, int(show))) + image_file_name = cache_obj.fanart_path(show) if ek.ek(os.path.isfile, image_file_name): static_image_path = os.path.normpath(image_file_name.replace(sickbeard.CACHE_DIR, '/cache')) @@ -278,6 +284,18 @@ class ApiHandler(RequestHandler): static_image_path = sickbeard.WEB_ROOT + static_image_path.replace('\\', '/') return self.redirect(static_image_path) + def showNetworkLogo(self, show=None): + show = sickbeard.helpers.findCertainShow(sickbeard.showList, int(show)) + + if show: + image_file_name = show.network_logo_name + else: + image_file_name = 'nonetwork' + + static_image_path = '%s/images/network/%s.png' % (sickbeard.WEB_ROOT, image_file_name) + + return self.redirect(static_image_path) + class ApiCall(ApiHandler): _help = {"desc": "No help message available. Please tell the devs that a help msg is missing for this cmd"} @@ -2410,6 +2428,39 @@ class CMD_ShowGetBanner(ApiCall): """ get the banner for a show in sickrage """ return {'outputType': 'image', 'image': self.rh.showPoster(self.indexerid, 'banner')} +class CMD_ShowGetNetworkLogo(ApiCall): + _help = { + "desc": "Get the network logo stored for a show in SickRage", + "requiredParameters": { + "indexerid": { + "desc": "Unique id of a show", + }, + }, + "optionalParameters": { + "tvdbid": { + "desc": "TheTVDB.com unique id of a show", + }, + "tvrageid": { + "desc": "TVRage.con unique id of a show", + }, + }, + } + + def __init__(self, args, kwargs): + # required + self.indexerid, args = self.check_params(args, kwargs, "indexerid", None, True, "int", []) + # optional + # super, missing, help + ApiCall.__init__(self, args, kwargs) + + def run(self): + """ + :return: The network logo for a show in SickRage + """ + return { + 'outputType': 'image', + 'image': self.rh.showNetworkLogo(self.indexerid) + } class CMD_ShowPause(ApiCall): _help = {"desc": "set a show's paused state in sickrage", @@ -2941,6 +2992,7 @@ _functionMaper = {"help": CMD_Help, "show.getquality": CMD_ShowGetQuality, "show.getposter": CMD_ShowGetPoster, "show.getbanner": CMD_ShowGetBanner, + "show.getnetworklogo": CMD_ShowGetNetworkLogo, "show.pause": CMD_ShowPause, "show.refresh": CMD_ShowRefresh, "show.seasonlist": CMD_ShowSeasonList, diff --git a/sickbeard/webserve.py b/sickbeard/webserve.py index 144dcf24dc67a413402ab645fe134caad6390169..078b862cddf16cffda4ef35896c727b6a8900f14 100644 --- a/sickbeard/webserve.py +++ b/sickbeard/webserve.py @@ -91,23 +91,24 @@ class html_entities(CheetahFilter): filtered = '' elif isinstance(val, str): try: - filtered = unicode(val).encode('ascii', 'xmlcharrefreplace') - except UnicodeDecodeError, UnicodeEncodeError: + filtered = unicode(val) + except Exception: try: - filtered = unicode(val, chardet.detect(val).get('encoding')).encode('ascii', 'xmlcharrefreplace') - except (UnicodeDecodeError, UnicodeEncodeError) as e: + filtered = unicode(val, sickbeard.SYS_ENCODING) + except Exception: try: - filtered = unicode(val, sickbeard.SYS_ENCODING).encode('ascii', 'xmlcharrefreplace') - except (UnicodeDecodeError, UnicodeEncodeError) as e: - logger.log(u'Unable to decode using {0}, trying utf-8. Error is: {1}'.format(sickbeard.SYS_ENCODING, ex(e)), logger.DEBUG) + filtered = unicode(val, 'utf-8') + except Exception: try: - filtered = unicode(val, 'utf-8').encode('ascii', 'xmlcharrefreplace') - except (UnicodeDecodeError, UnicodeEncodeError) as e: - try: - logger.log(u'Unable to decode using utf-8, trying latin-1. Error is: {1}'.format(ex(e)), logger.DEBUG) - filtered = unicode(val, 'latin-1').encode('ascii', 'xmlcharrefreplace') - except UnicodeDecodeError, UnicodeEncodeError: - logger.log(u'Unable to decode using latin-1, Error is {0}.'.format(ex(e)),logger.ERROR) + filtered = unicode(val, 'latin-1') + except Exception: + logger.log(u'Unable to decode using %s, utf-8, or latin-1. Falling back to chardet!' % + sickbeard.SYS_ENCODING, logger.ERROR) + filtered = unicode(val, chardet.detect(val).get('encoding')) + try: + filtered = filtered.encode('ascii', 'xmlcharrefreplace') + except Exception: + logger.log(u'Unable to encode to ascii using xmlcharrefreplace.', logger.ERROR) else: filtered = self.filter(str(val)) @@ -410,6 +411,18 @@ class WebRoot(WebHandler): static_image_path = static_image_path.replace('\\', '/') return self.redirect(static_image_path) + def showNetworkLogo(self, show=None): + show = sickbeard.helpers.findCertainShow(sickbeard.showList, int(show)) + + if show: + image_file_name = show.network_logo_name + else: + image_file_name = 'nonetwork' + + static_image_path = '%s/images/network/%s.png' % (sickbeard.WEB_ROOT, image_file_name) + + return self.redirect(static_image_path) + def setHomeLayout(self, layout): if layout not in ('poster', 'small', 'banner', 'simple', 'coverflow'): @@ -1005,7 +1018,7 @@ class Home(WebRoot): def getTraktToken(self, trakt_pin=None): - trakt_api = TraktAPI(sickbeard.TRAKT_DISABLE_SSL_VERIFY, sickbeard.TRAKT_TIMEOUT) + trakt_api = TraktAPI(sickbeard.SSL_VERIFY, sickbeard.TRAKT_TIMEOUT) response = trakt_api.traktToken(trakt_pin) if response: return "Trakt Authorized" @@ -2388,7 +2401,7 @@ class HomeAddShows(Home): t.trending_shows = [] - trakt_api = TraktAPI(sickbeard.TRAKT_DISABLE_SSL_VERIFY, sickbeard.TRAKT_TIMEOUT) + trakt_api = TraktAPI(sickbeard.SSL_VERIFY, sickbeard.TRAKT_TIMEOUT) try: not_liked_show = "" @@ -2449,7 +2462,7 @@ class HomeAddShows(Home): t.trending_shows = [] - trakt_api = TraktAPI(sickbeard.TRAKT_DISABLE_SSL_VERIFY, sickbeard.TRAKT_TIMEOUT) + trakt_api = TraktAPI(sickbeard.SSL_VERIFY, sickbeard.TRAKT_TIMEOUT) try: not_liked_show = "" @@ -2501,7 +2514,7 @@ class HomeAddShows(Home): ] } - trakt_api = TraktAPI(sickbeard.TRAKT_DISABLE_SSL_VERIFY, sickbeard.TRAKT_TIMEOUT) + trakt_api = TraktAPI(sickbeard.SSL_VERIFY, sickbeard.TRAKT_TIMEOUT) result=trakt_api.traktRequest("users/" + sickbeard.TRAKT_USERNAME + "/lists/" + sickbeard.TRAKT_BLACKLIST_NAME + "/items", data, method='POST') @@ -3691,7 +3704,7 @@ class ConfigGeneral(Config): web_password=None, version_notify=None, enable_https=None, https_cert=None, https_key=None, handle_reverse_proxy=None, sort_article=None, auto_update=None, notify_on_update=None, proxy_setting=None, proxy_indexers=None, anon_redirect=None, git_path=None, git_remote=None, - calendar_unprotected=None, debug=None, no_restart=None, coming_eps_missed_range=None, + calendar_unprotected=None, debug=None, ssl_verify=None, no_restart=None, coming_eps_missed_range=None, filter_row=None, fuzzy_dating=None, trim_zero=None, date_preset=None, date_preset_na=None, time_preset=None, indexer_timeout=None, download_url=None, rootDir=None, theme_name=None, git_reset=None, git_username=None, git_password=None, git_autoissues=None, display_all_seasons=None): @@ -3730,6 +3743,7 @@ class ConfigGeneral(Config): sickbeard.CALENDAR_UNPROTECTED = config.checkbox_to_value(calendar_unprotected) sickbeard.NO_RESTART = config.checkbox_to_value(no_restart) sickbeard.DEBUG = config.checkbox_to_value(debug) + sickbeard.SSL_VERIFY = config.checkbox_to_value(ssl_verify) # sickbeard.LOG_DIR is set in config.change_LOG_DIR() sickbeard.COMING_EPS_MISSED_RANGE = config.to_int(coming_eps_missed_range,default=7) sickbeard.DISPLAY_ALL_SEASONS = config.checkbox_to_value(display_all_seasons) @@ -4664,7 +4678,7 @@ class ConfigNotifications(Config): use_trakt=None, trakt_username=None, trakt_pin=None, trakt_remove_watchlist=None, trakt_sync_watchlist=None, trakt_remove_show_from_sickrage=None, trakt_method_add=None, trakt_start_paused=None, trakt_use_recommended=None, trakt_sync=None, trakt_sync_remove=None, - trakt_default_indexer=None, trakt_remove_serieslist=None, trakt_disable_ssl_verify=None, trakt_timeout=None, trakt_blacklist_name=None, + trakt_default_indexer=None, trakt_remove_serieslist=None, trakt_timeout=None, trakt_blacklist_name=None, trakt_use_rolling_download=None, trakt_rolling_num_ep=None, trakt_rolling_add_paused=None, trakt_rolling_frequency=None, use_synologynotifier=None, synologynotifier_notify_onsnatch=None, synologynotifier_notify_ondownload=None, synologynotifier_notify_onsubtitledownload=None, @@ -4792,7 +4806,6 @@ class ConfigNotifications(Config): sickbeard.TRAKT_SYNC = config.checkbox_to_value(trakt_sync) sickbeard.TRAKT_SYNC_REMOVE = config.checkbox_to_value(trakt_sync_remove) sickbeard.TRAKT_DEFAULT_INDEXER = int(trakt_default_indexer) - sickbeard.TRAKT_DISABLE_SSL_VERIFY = config.checkbox_to_value(trakt_disable_ssl_verify) sickbeard.TRAKT_TIMEOUT = int(trakt_timeout) sickbeard.TRAKT_BLACKLIST_NAME = trakt_blacklist_name config.change_TRAKT_USE_ROLLING_DOWNLOAD(trakt_use_rolling_download) diff --git a/sickbeard/webserveInit.py b/sickbeard/webserveInit.py index 8f2bd31e762dd73b2890b4094eb5223fc8cb6e8c..2c44c9fd5d82612e53d7e3ebba1ffe00f9eff56b 100644 --- a/sickbeard/webserveInit.py +++ b/sickbeard/webserveInit.py @@ -138,11 +138,11 @@ class SRWebServer(threading.Thread): try: self.server.listen(self.options['port'], self.options['host']) except: - etype, evalue, etb = sys.exc_info() - logger.log( - "Could not start webserver on %s. Excpeption: %s, Error: %s" % (self.options['port'], etype, evalue), - logger.ERROR) - return + if sickbeard.LAUNCH_BROWSER and not self.daemon: + sickbeard.launchBrowser('https' if sickbeard.ENABLE_HTTPS else 'http', self.options['port'], sickbeard.WEB_ROOT) + logger.log(u"Launching browser and exiting") + logger.log(u"Could not start webserver on port %s, already in use!" % self.options['port']) + os._exit(1) try: self.io_loop.start()