diff --git a/SickBeard.py b/SickBeard.py index f8a623f0e5d5b29af87462a43862fe4728b4bee2..04a481eab33af6fddbb43c09a1d32cb5bdbabb6d 100755 --- a/SickBeard.py +++ b/SickBeard.py @@ -34,6 +34,8 @@ except: print "The Python module Cheetah is required" sys.exit(1) +import os +sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), 'lib'))) # We only need this for compiling an EXE and I will just always do that on 2.6+ if sys.hexversion >= 0x020600F0: from multiprocessing import freeze_support diff --git a/data/css/style.css b/data/css/style.css index 0187128b0237d8a88dcb214e82341b3ba365b77e..a651a90598a1fc1006842b3a2acde30313d4bfe1 100644 --- a/data/css/style.css +++ b/data/css/style.css @@ -827,24 +827,24 @@ config_providers.tmpl #config-components > h2 { border-bottom: 4px solid #ddd; } -#providerOrderList { +#providerOrderList, #service_order_list { width: 250px; padding-left: 20px; list-style-type: none; } -#providerOrderList li { +#providerOrderList li, #service_order_list li { padding: 5px; margin: 5px 0; font-size: 14px; } -#providerOrderList input { +#providerOrderList input, #service_order_list input { margin: 0 2px; } .imgLink img { padding: 0 2px 2px; } /* fix drop target height */ -#providerOrderList .ui-state-highlight { +#providerOrderList.ui-state-highlight, #service_order_list .ui-state-highlight { height: 20px; line-height: 18px; } diff --git a/data/css/token-input.css b/data/css/token-input.css new file mode 100644 index 0000000000000000000000000000000000000000..fb9a84374e5593d754523e122e0ebb45fb3b6be5 --- /dev/null +++ b/data/css/token-input.css @@ -0,0 +1,136 @@ +/* Example tokeninput style #1: Token vertical list*/ +ul.token-input-list { + overflow: hidden; + height: auto !important; + height: 1%; + width: 273px; + border: 1px solid #999; + cursor: text; + font-size: 10px; + font-family: Verdana; + z-index: 999; + margin: 0; + padding: 0 0 1px 0; + background-color: #fff; + list-style-type: none; +/* clear: left; */ + border-top-left-radius: 3px; + border-top-right-radius: 3px; + border-bottom-left-radius: 3px; + border-bottom-right-radius: 3px; +} + +ul.token-input-list li { + list-style-type: none; +} + +ul.token-input-list li input { + border: 0; + padding: 3px 4px; + background-color: white; +/* -webkit-appearance: caret; */ +} + +li.token-input-token { + overflow: hidden; + height: auto !important; + height: 1%; + margin: 3px; + padding: 3px 5px 0 5px; + background-color: #d0efa0; + color: #000; + font-weight: bold; + cursor: default; + display: block; +} + +li.token-input-token img { + padding-top: 7px; + padding-right: 4px; + float: left; +} + +li.token-input-token input { + padding-top: 2px !important; + padding-right: 4px !important; + float: left; +} + +li.token-input-token p { + float: left; + padding: 0; + margin: 0; + line-height: 2.0 !important; +} + +li.token-input-token span { + float: right; + color: #777; + cursor: pointer; +} + +li.token-input-selected-token { + background-color: #08844e; + color: #fff; +} + +li.token-input-selected-token span { + color: #bbb; +} + +li.token-input-input-token input { + margin: 3px 3px 3px 3px !important; +} + +div.token-input-dropdown { + position: absolute; + width: 273px; + background-color: #fff; + overflow: hidden; + border-left: 1px solid #ccc; + border-right: 1px solid #ccc; + border-bottom: 1px solid #ccc; + cursor: default; + font-size: 11px; + font-family: Verdana; + z-index: 1; +} + +div.token-input-dropdown p { + margin: 0; + padding: 3px; + font-weight: bold; + color: #777; +} + +div.token-input-dropdown ul { + margin: 0; + padding: 0; +} + +div.token-input-dropdown ul li { + background-color: #fff; + padding: 3px; + list-style-type: none; +} + +div.token-input-dropdown ul li.token-input-dropdown-item { + background-color: #fafafa; +} + +div.token-input-dropdown ul li.token-input-dropdown-item2 { + background-color: #fff; +} + +div.token-input-dropdown ul li em { + font-weight: bold; + font-style: normal; +} + +div.token-input-dropdown ul li.token-input-selected-dropdown-item { + background-color: #d0efa0; +} + +span.token-input-delete-token { + margin: 0 1px; +} \ No newline at end of file diff --git a/data/images/closed_captioning.png b/data/images/closed_captioning.png new file mode 100644 index 0000000000000000000000000000000000000000..34424e9e20aa89b663d1af3b1929712aa6c0b140 Binary files /dev/null and b/data/images/closed_captioning.png differ diff --git a/data/images/ee.png b/data/images/ee.png new file mode 100644 index 0000000000000000000000000000000000000000..9040ce266512b016240bcd3424e3c6cc92e7894a Binary files /dev/null and b/data/images/ee.png differ diff --git a/data/images/subtitles/addic7ed.png b/data/images/subtitles/addic7ed.png new file mode 100644 index 0000000000000000000000000000000000000000..0bfcedaacae3c83ed58742d25fd76c4d969f728a Binary files /dev/null and b/data/images/subtitles/addic7ed.png differ diff --git a/data/images/subtitles/bierdopje.png b/data/images/subtitles/bierdopje.png new file mode 100644 index 0000000000000000000000000000000000000000..349eb3bc88c197c8b916f94781291284f637fb34 Binary files /dev/null and b/data/images/subtitles/bierdopje.png differ diff --git a/data/images/subtitles/opensubtitles.png b/data/images/subtitles/opensubtitles.png new file mode 100644 index 0000000000000000000000000000000000000000..14e264828dafb6c5555774c1e63b3220afdb4d0e Binary files /dev/null and b/data/images/subtitles/opensubtitles.png differ diff --git a/data/images/subtitles/podnapisiweb.png b/data/images/subtitles/podnapisiweb.png new file mode 100644 index 0000000000000000000000000000000000000000..c640d0db289a978589e5e97355f1319e0a07e237 Binary files /dev/null and b/data/images/subtitles/podnapisiweb.png differ diff --git a/data/images/subtitles/subscene.png b/data/images/subtitles/subscene.png new file mode 100644 index 0000000000000000000000000000000000000000..5dde8e2bbfc9988143c87264da7864db7f62dfa1 Binary files /dev/null and b/data/images/subtitles/subscene.png differ diff --git a/data/images/subtitles/subswiki.png b/data/images/subtitles/subswiki.png new file mode 100644 index 0000000000000000000000000000000000000000..8b0e0834f1c5be9be91117f13e6302cf0b637319 Binary files /dev/null and b/data/images/subtitles/subswiki.png differ diff --git a/data/images/subtitles/subtitlesource.png b/data/images/subtitles/subtitlesource.png new file mode 100644 index 0000000000000000000000000000000000000000..827a48a73c5a7858d8fe86ef6bae0de04dae92fb Binary files /dev/null and b/data/images/subtitles/subtitlesource.png differ diff --git a/data/images/subtitles/subtitulos.png b/data/images/subtitles/subtitulos.png new file mode 100644 index 0000000000000000000000000000000000000000..6bbd40d0fdf9bfabb8a2433c59132583a89b9da0 Binary files /dev/null and b/data/images/subtitles/subtitulos.png differ diff --git a/data/images/subtitles/thesubdb.png b/data/images/subtitles/thesubdb.png new file mode 100644 index 0000000000000000000000000000000000000000..69409549684a7f58a1bccf7eeab7cf0333058add Binary files /dev/null and b/data/images/subtitles/thesubdb.png differ diff --git a/data/images/subtitles/tvsubtitles.png b/data/images/subtitles/tvsubtitles.png new file mode 100644 index 0000000000000000000000000000000000000000..248fa11e9359b9d68e469aa471431158b365ec8d Binary files /dev/null and b/data/images/subtitles/tvsubtitles.png differ diff --git a/data/interfaces/default/config_notifications.tmpl b/data/interfaces/default/config_notifications.tmpl index 2144a0fdd2f717654d27c525322b8ab31a668c5f..3790b29db753d7527b110cc29069e9e27756d313 100755 --- a/data/interfaces/default/config_notifications.tmpl +++ b/data/interfaces/default/config_notifications.tmpl @@ -52,6 +52,13 @@ <span class="component-desc">Send notification when we finish a download?</span> </label> </div> + <div class="field-pair"> + <input type="checkbox" name="xbmc_notify_onsubtitledownload" id="xbmc_notify_onsubtitledownload" #if $sickbeard.XBMC_NOTIFY_ONSUBTITLEDOWNLOAD then "checked=\"checked\"" else ""# /> + <label class="clearfix" for="xbmc_notify_onsubtitledownload"> + <span class="component-title">Notify on Subtitle Download</span> + <span class="component-desc">Send notification when we finish a subtitle download?</span> + </label> + </div> <div class="field-pair"> <input type="checkbox" name="xbmc_update_library" id="xbmc_update_library" #if $sickbeard.XBMC_UPDATE_LIBRARY then "checked=\"checked\"" else ""# /> <label class="clearfix" for="xbmc_update_library"> @@ -146,6 +153,13 @@ <span class="component-desc">Send notification when we finish a download?</span> </label> </div> + <div class="field-pair"> + <input type="checkbox" name="plex_notify_onsubtitledownload" id="plex_notify_onsubtitledownload" #if $sickbeard.PLEX_NOTIFY_ONSUBTITLEDOWNLOAD then "checked=\"checked\"" else ""# /> + <label class="clearfix" for="plex_notify_onsubtitledownload"> + <span class="component-title">Notify on Subtitle Download</span> + <span class="component-desc">Send notification when we finish a subtitle download?</span> + </label> + </div> <div class="field-pair"> <input type="checkbox" name="plex_update_library" id="plex_update_library" #if $sickbeard.PLEX_UPDATE_LIBRARY then "checked=\"checked\"" else ""# /> <label class="clearfix" for="plex_update_library"> @@ -480,6 +494,13 @@ <span class="component-desc">Send notification when we finish a download?</span> </label> </div> + <div class="field-pair"> + <input type="checkbox" name="growl_notify_onsubtitledownload" id="growl_notify_onsubtitledownload" #if $sickbeard.GROWL_NOTIFY_ONSUBTITLEDOWNLOAD then "checked=\"checked\"" else ""# /> + <label class="clearfix" for="growl_notify_onsubtitledownload"> + <span class="component-title">Notify on Subtitle Download</span> + <span class="component-desc">Send notification when we finish a subtitle download?</span> + </label> + </div> <div class="field-pair"> <label class="nocheck clearfix"> <span class="component-title">Growl IP:Port</span> @@ -543,6 +564,13 @@ <span class="component-desc">Send notification when we finish a download?</span> </label> </div> + <div class="field-pair"> + <input type="checkbox" name="prowl_notify_onsubtitledownload" id="prowl_notify_onsubtitledownload" #if $sickbeard.PROWL_NOTIFY_ONSUBTITLEDOWNLOAD then "checked=\"checked\"" else ""# /> + <label class="clearfix" for="prowl_notify_onsubtitledownload"> + <span class="component-title">Notify on Subtitle Download</span> + <span class="component-desc">Send notification when we finish a subtitle download?</span> + </label> + </div> <div class="field-pair"> <label class="nocheck clearfix"> <span class="component-title">Prowl API key:</span> @@ -608,6 +636,13 @@ <span class="component-desc">Send notification when we finish a download?</span> </label> </div> + <div class="field-pair"> + <input type="checkbox" name="notifo_notify_onsubtitledownload" id="notifo_notify_onsubtitledownload" #if $sickbeard.NOTIFO_NOTIFY_ONSUBTITLEDOWNLOAD then "checked=\"checked\"" else ""# /> + <label class="clearfix" for="notifo_notify_onsubtitledownload"> + <span class="component-title">Notify on Subtitle Download</span> + <span class="component-desc">Send notification when we finish a subtitle download?</span> + </label> + </div> <div class="field-pair"> <label class="nocheck clearfix"> <span class="component-title">Notifo Username</span> @@ -667,6 +702,13 @@ <span class="component-desc">Send notification when we finish a download?</span> </label> </div> + <div class="field-pair"> + <input type="checkbox" name="libnotify_notify_onsubtitledownload" id="libnotify_notify_onsubtitledownload" #if $sickbeard.LIBNOTIFY_NOTIFY_ONSUBTITLEDOWNLOAD then "checked=\"checked\"" else ""# /> + <label class="clearfix" for="libnotify_notify_onsubtitledownload"> + <span class="component-title">Notify on Subtitle Download</span> + <span class="component-desc">Send notification when we finish a subtitle download?</span> + </label> + </div> <div class="testNotification" id="testLibnotify-result">Click below to test.</div> <input type="button" class="btn" value="Test Libnotify" id="testLibnotify" /> <input type="submit" class="btn config_submitter" value="Save Changes" /> @@ -706,6 +748,13 @@ <span class="component-desc">Send notification when we finish a download?</span> </label> </div> + <div class="field-pair"> + <input type="checkbox" name="pushover_notify_onsubtitledownload" id="pushover_notify_onsubtitledownload" #if $sickbeard.PUSHOVER_NOTIFY_ONSUBTITLEDOWNLOAD then "checked=\"checked\"" else ""# /> + <label class="clearfix" for="pushover_notify_onsubtitledownload"> + <span class="component-title">Notify on Subtitle Download</span> + <span class="component-desc">Send notification when we finish a subtitle download?</span> + </label> + </div> <div class="field-pair"> <label class="nocheck clearfix"> <span class="component-title">Pushover User Key</span> @@ -754,6 +803,13 @@ <span class="component-desc">Send notification when we finish a download?</span> </label> </div> + <div class="field-pair"> + <input type="checkbox" name="boxcar_notify_onsubtitledownload" id="boxcar_notify_onsubtitledownload" #if $sickbeard.BOXCAR_NOTIFY_ONSUBTITLEDOWNLOAD then "checked=\"checked\"" else ""# /> + <label class="clearfix" for="boxcar_notify_onsubtitledownload"> + <span class="component-title">Notify on Subtitle Download</span> + <span class="component-desc">Send notification when we finish a subtitle download?</span> + </label> + </div> <div class="field-pair"> <label class="nocheck clearfix"> <span class="component-title">Boxcar Username</span> @@ -803,6 +859,13 @@ <span class="component-desc">Send notification when we finish a download?</span> </label> </div> + <div class="field-pair"> + <input type="checkbox" name="nma_notify_onsubtitledownload" id="nma_notify_onsubtitledownload" #if $sickbeard.NMA_NOTIFY_ONSUBTITLEDOWNLOAD then "checked=\"checked\"" else ""# /> + <label class="clearfix" for="nma_notify_onsubtitledownload"> + <span class="component-title">Notify on Subtitle Download</span> + <span class="component-desc">Send notification when we finish a subtitle download?</span> + </label> + </div> <div class="field-pair"> <label class="nocheck clearfix"> <span class="component-title">NMA API key:</span> @@ -880,6 +943,13 @@ <span class="component-desc">Send notification when we finish a download?</span> </label> </div> + <div class="field-pair"> + <input type="checkbox" name="twitter_notify_onsubtitledownload" id="twitter_notify_onsubtitledownload" #if $sickbeard.TWITTER_NOTIFY_ONSUBTITLEDOWNLOAD then "checked=\"checked\"" else ""# /> + <label class="clearfix" for="twitter_notify_onsubtitledownload"> + <span class="component-title">Notify on Subtitle Download</span> + <span class="component-desc">Send notification when we finish a subtitle download?</span> + </label> + </div> <div class="field-pair"> <label class="clearfix"> <span class="component-title">Step One</span> diff --git a/data/interfaces/default/config_subtitles.tmpl b/data/interfaces/default/config_subtitles.tmpl new file mode 100644 index 0000000000000000000000000000000000000000..d21926a14aea3264c12804c8baf56085060b6c7a --- /dev/null +++ b/data/interfaces/default/config_subtitles.tmpl @@ -0,0 +1,139 @@ +#from sickbeard import subtitles +#import sickbeard + +#set global $title="Config - Subtitles" +#set global $header="Subtitles" + +#set global $sbPath="../.." + +#set global $topmenu="config" +#import os.path +#include $os.path.join($sickbeard.PROG_DIR, "data/interfaces/default/inc_top.tmpl") + +<script type="text/javascript" src="$sbRoot/js/configSubtitles.js?$sbPID"></script> +<script type="text/javascript" src="$sbRoot/js/config.js"></script> +<script type="text/javascript" src="$sbRoot/js/lib/jquery.tokeninput.js"></script> +<link rel="stylesheet" type="text/css" href="$sbRoot/css/token-input.css" /> + +<script type="text/javascript"> + \$(document).ready(function() { + \$("#subtitles_languages").tokenInput( + [ + <%=",\r\n".join("{id: \"" + lang[2] + "\", name: \"" + lang[3] + "\"}" for lang in subtitles.subtitleLanguageFilter())%> + ], + { + method: "POST", + hintText: "Write to search a language and select it", + preventDuplicates: true, + prePopulate: + + [ + <%= + ",\r\n".join("{id: \"" + lang + "\", name: \"" + subtitles.getLanguageName(lang) + "\"}" for lang in sickbeard.SUBTITLES_LANGUAGES) if sickbeard.SUBTITLES_LANGUAGES != '' else '' + %> + ] + } + ); + }); +</script> + + + +<div id="config"> +<div id="config-content"> + +<form id="configForm" action="saveSubtitles" method="post"> + + <div id="config-components"> + + <div id="core-component-group4" class="component-group clearfix"> + + <div class="component-group-desc"> + <h3>Subtitles Search</h3> + <p>Settings that dictate how Sick Beard handles subtitles search results.</p> + </div> + + <fieldset class="component-group-list"> + <div class="field-pair"> + <input type="checkbox" class="enabler" #if $sickbeard.USE_SUBTITLES then " checked=\"checked\"" else ""# id="use_subtitles" name="use_subtitles"> + <label for="use_subtitles" class="clearfix"> + <span class="component-title">Search Subtitles</span> + </label> + </div> + <div id="content_use_subtitles"> + <div class="field-pair"> + <label class="nocheck clearfix"> + <span class="component-title">Subtitle Languages</span> + <input type="text" id="subtitles_languages" name="subtitles_languages" style="border: 0px none"/> + </label> + </div> + <div class="field-pair"> + <label class="nocheck clearfix"> + <span class="component-title">Subtitle Directory</span> + <input type="text" size="35" value="$sickbeard.SUBTITLES_DIR" id="subtitles_dir" name="subtitles_dir"> + </label> + <label class="nocheck clearfix"> + <span class="component-title"> </span> + <span class="component-desc">The directory where Sick Beard should store your <i>Subtitles</i> files.</span> + </label> + <label class="nocheck clearfix"> + <span class="component-title"> </span> + <span class="component-desc"><b>NOTE:</b> Leave empty if you want store subtitle in episode path.</span> + </label> + </div> + <div class="field-pair"> + <input type="checkbox" name="subtitles_history" id="subtitles_history" #if $sickbeard.SUBTITLES_HISTORY then " checked=\"checked\"" else ""#/> + <label class="clearfix" for="subtitles_history"> + <span class="component-title">Subtitles History</span> + <span class="component-desc">Log downloaded Subtitle on History page?</span> + </label> + </div> + <br/><input type="submit" class="btn config_submitter" value="Save Changes" /><br/> + </div> + </fieldset> + </div><!-- /component-group1 //--> + + <div id="core-component-group2" class="component-group clearfix"> + + <div class="component-group-desc"> + <h3>Subtitle Plugins</h3> + <p>Check off and drag the plugins into the order you want them to be used.</p> + <p class="note">At least one plugin is required.</p> + <p class="note"><span style="color: #654B24; font-size: 16px;">*</span> Web-scraping plugin</p> + </div> + + <fieldset class="component-group-list"> + <ul id="service_order_list"> + #for $curService in $sickbeard.subtitles.sortedServiceList(): + #set $curName = $curService.id + <li class="ui-state-default" id="$curName"> + <input type="checkbox" id="enable_$curName" class="service_enabler" #if $curService.enabled then "checked=\"checked\"" else ""#/> + <a href="$curService.url" class="imgLink" target="_new"> + <img src="$sbRoot/images/subtitles/$curService.image" alt="$curService.name" title="$curService.name" width="16" height="16" /> + </a> + $curService.name.capitalize() + #if not $curService.api_based then "*" else ""# + <span class="ui-icon ui-icon-arrowthick-2-n-s pull-right"></span> + </li> + #end for + </ul> + <input type="hidden" name="service_order" id="service_order" value="<%=" ".join([x.get('id')+':'+str(int(x.get('enabled'))) for x in sickbeard.subtitles.sortedServiceList()])%>"/> + + <br/><input type="submit" class="btn config_submitter" value="Save Changes" /><br/> + </fieldset> + + </div><!-- /component-group2 //--> + + <br/><input type="submit" class="btn config_submitter" value="Save Changes" /><br/> + + </div><!-- /config-components //--> + +</form> +</div></div> +<script type="text/javascript" charset="utf-8"> +<!-- + + jQuery('#subtitles_dir').fileBrowser({ title: 'Select Subtitles Download Directory' }); +//--> +</script> +#include $os.path.join($sickbeard.PROG_DIR, "data/interfaces/default/inc_bottom.tmpl") diff --git a/data/interfaces/default/displayShow.tmpl b/data/interfaces/default/displayShow.tmpl index 56be429540f96f774fe44aa63f883d536540553f..aab4105dcd1cbfa6b2b22d6eab605a43c3d6e33c 100644 --- a/data/interfaces/default/displayShow.tmpl +++ b/data/interfaces/default/displayShow.tmpl @@ -1,6 +1,8 @@ #import sickbeard +#from sickbeard import subtitles #import sickbeard.helpers #from sickbeard.common import * +#import subliminal #import os.path, os #import datetime #set global $title=$show.name @@ -46,6 +48,7 @@ <script type="text/javascript" src="$sbRoot/js/displayShow.js?$sbPID"></script> <script type="text/javascript" src="$sbRoot/js/plotTooltip.js?$sbPID"></script> <script type="text/javascript" src="$sbRoot/js/ajaxEpSearch.js?$sbPID"></script> +<script type="text/javascript" src="$sbRoot/js/ajaxEpSubtitles.js?$sbPID"></script> <div class="align-left"><b>Change Show:</b> <div class="navShow"><img id="prevShow" width="16" height="18" src="$sbRoot/images/prev.gif" alt="<<" title="Prev Show" /></div> @@ -95,6 +98,9 @@ replace with: <b><%=", ".join([Quality.qualityStrings[x] for x in sorted(bestQua </td> </tr> <tr><td class="showLegend">Custom Search Names:</td><td>$show.custom_search_names</td></tr> + #if $sickbeard.USE_SUBTITLES + <tr><td class="showLegend">Subtitles: </td><td><img src="$sbRoot/images/#if int($show.subtitles) == 1 then "yes16.png\" alt=\"Y" else "no16.png\" alt=\"N"#" width="16" height="16" /></td></tr> +#end if <tr><td class="showLegend">Flatten Folders: </td><td><img src="$sbRoot/images/#if $show.flatten_folders == 1 or $sickbeard.NAMING_FORCE_FOLDERS then "yes16.png\" alt=\"Y" else "no16.png\" alt=\"N"#" width="16" height="16" /></td></tr> <tr><td class="showLegend">Paused: </td><td><img src="$sbRoot/images/#if int($show.paused) == 1 then "yes16.png\" alt=\"Y" else "no16.png\" alt=\"N"#" width="16" height="16" /></td></tr> <tr><td class="showLegend">Air-by-Date: </td><td><img src="$sbRoot/images/#if int($show.air_by_date) == 1 then "yes16.png\" alt=\"Y" else "no16.png\" alt=\"N"#" width="16" height="16" /></td></tr> @@ -148,13 +154,13 @@ Change Audio of selected episodes to #for $epResult in $sqlResults: #if int($epResult["season"]) != $curSeason: - <tr><td colspan="9"><a name="season-$epResult["season"]"></a></td></tr> + <tr><td colspan="10"><a name="season-$epResult["season"]"></a></td></tr> <tr class="seasonheader" id="season-$epResult["season"]"> <td colspan="9"> <h2>#if int($epResult["season"]) == 0 then "Specials" else "Season "+str($epResult["season"])#</h2> </td> </tr> - <tr id="season-$epResult["season"]-cols"><th width="1%"><input type="checkbox" class="seasonCheck" id="$epResult["season"]" /></th><th>NFO</th><th>TBN</th><th>Episode</th><th>Name</th><th class="nowrap">Airdate</th><th>Filename</th><th>Audio</th><th>Status</th><th>Search</th></tr> + <tr id="season-$epResult["season"]-cols"><th width="1%"><input type="checkbox" class="seasonCheck" id="$epResult["season"]" /></th><th>NFO</th><th>TBN</th><th>Episode</th><th>Name</th><th class="nowrap">Airdate</th><th>Filename</th><th>Audio</th>#if $sickbeard.USE_SUBTITLES and $show.subtitles then "<th>Subs</th>" else ""#<th>Status</th><th>Search</th></tr> #set $curSeason = int($epResult["season"]) #end if @@ -187,15 +193,31 @@ $epLoc <td align="center" class="audio_langs_column"> <img src="$sbRoot/images/flags/${epResult["audio_langs"]}.png" alt="$epResult["audio_langs"]" width="16" /> </td> +</small> + </td> +#if $sickbeard.USE_SUBTITLES and $show.subtitles: + <td id="subtitles_column" class="subtitles_column" align="center"> + #if $epResult["subtitles"]: + #for $sub_lang in subliminal.language.language_list($epResult["subtitles"].split(',')): + #if sub_lang.alpha2 != "" + <img src="$sbRoot/images/flags/${sub_lang.alpha2}.png" width="16" height="11" alt="${sub_lang}" /> + #end if + #end for + #end if + </td> +#end if #set $curStatus, $curQuality = $Quality.splitCompositeStatus(int($epResult["status"])) #if $curQuality != Quality.NONE: - <td class="status_column">$statusStrings[$curStatus] <span class="quality $Quality.qualityStrings[$curQuality].replace("720p","HD720p").replace("1080p","HD1080p").replace("RawHD TV", "RawHD").replace("HD TV", "HD720p")">$Quality.qualityStrings[$curQuality]</span></td> + <td class="status_column" align="center">$statusStrings[$curStatus] <span class="quality $Quality.qualityStrings[$curQuality].replace("720p","HD720p").replace("1080p","HD1080p").replace("RawHD TV", "RawHD").replace("HD TV", "HD720p")">$Quality.qualityStrings[$curQuality]</span></td> #else: - <td class="status_column">$statusStrings[$curStatus]</td> + <td class="status_column" align="center">$statusStrings[$curStatus]</td> #end if <td align="center"> #if int($epResult["season"]) != 0: <a class="epSearch" href="searchEpisode?show=$show.tvdbid&season=$epResult["season"]&episode=$epResult["episode"]"><img src="$sbRoot/images/search16.png" height="16" alt="search" title="Manual Search" /></a> + #end if + #if $sickbeard.USE_SUBTITLES and $show.subtitles and len(set(str($epResult["subtitles"]).split(',')).intersection(set($subtitles.wantedLanguages()))) < len($subtitles.wantedLanguages()) and $epResult["location"] + <a class="epSubtitlesSearch" href="searchEpisodeSubtitles?show=$show.tvdbid&season=$epResult["season"]&episode=$epResult["episode"]"><img src="$sbRoot/images/closed_captioning.png" height="16" alt="search subtitles" title="Search Subtitles" /></a> #end if </td> </tr> diff --git a/data/interfaces/default/editShow.tmpl b/data/interfaces/default/editShow.tmpl index be816155fc1a9499982de9cba6e57fb20234d3f4..70384f46c2474215e3d45a883e31ab107a69f730 100644 --- a/data/interfaces/default/editShow.tmpl +++ b/data/interfaces/default/editShow.tmpl @@ -79,7 +79,7 @@ Custom Search Names: <input type="text" name="custom_search_names" id="custom_se <br /> Flatten files (no folders): <input type="checkbox" name="flatten_folders" #if $show.flatten_folders == 1 and not $sickbeard.NAMING_FORCE_FOLDERS then "checked=\"checked\"" else ""# #if $sickbeard.NAMING_FORCE_FOLDERS then "disabled=\"disabled\"" else ""#/><br /><br /> Paused: <input type="checkbox" name="paused" #if $show.paused == 1 then "checked=\"checked\"" else ""# /><br /><br /> - +Download subtitles: <input type="checkbox" name="subtitles"#if $show.subtitles == 1 and $sickbeard.USE_SUBTITLES then " checked=\"checked\"" else ""##if not $sickbeard.USE_SUBTITLES then " disabled=\"disabled\"" else ""#/><br /><br /> Air by date: <input type="checkbox" name="air_by_date" #if $show.air_by_date == 1 then "checked=\"checked\"" else ""# /><br /> (check this if the show is released as Show.03.02.2010 rather than Show.S02E03) diff --git a/data/interfaces/default/history.tmpl b/data/interfaces/default/history.tmpl index 5da3b876c716a43f120678065a84508df9a75db1..06f4a1083dd0ec158a5cf238de4316de175eb21f 100644 --- a/data/interfaces/default/history.tmpl +++ b/data/interfaces/default/history.tmpl @@ -48,9 +48,13 @@ #set $curStatus, $curQuality = $Quality.splitCompositeStatus(int($hItem["action"])) <tr> <td class="nowrap">$datetime.datetime.strptime(str($hItem["date"]), $history.dateFormat)</td> + <td><a href="$sbRoot/home/displayShow?show=$hItem["showid"]#season-$hItem["season"]">$hItem["show_name"] - <%=str(hItem["season"]) +"x"+ "%02i" % int(hItem["episode"]) %></a></td> - <td align="center">$statusStrings[$curStatus]</td> - <td align="center"> + <td align="center" #if $curStatus == SUBTITLED then 'class="subtitles_column"' else ''#><span style="cursor: help;" title="$os.path.basename($hItem["resource"])">$statusStrings[$curStatus]</span> + #if $curStatus == SUBTITLED: + <img width="16" height="11" src="/images/flags/<%= hItem["resource"][len(hItem["resource"])-6:len(hItem["resource"])-4]+'.png'%>"> + #end if + </td><td align="center"> #if $curStatus == DOWNLOADED and $str($hItem["provider"]) == '-1': #set $match = $re.search("\-(\w+)\.\w{3}\Z", $os.path.basename($hItem["resource"])) #if $match: @@ -67,11 +71,15 @@ $hItem["provider"] #else #if $len($hItem["provider"]) > 0 + #if $curStatus == SNATCHED: #set $provider = $providers.getProviderClass($generic.GenericProvider.makeID($hItem["provider"])) #if $provider != None: <img src="$sbRoot/images/providers/<%=provider.imageName()%>" width="16" height="16" alt="$provider.name" title="$provider.name"/> #else: <img src="$sbRoot/images/providers/missing.png" width="16" height="16" alt="missing provider" title="missing provider"/> + #end if + #else: + <img src="$sbRoot/images/subtitles/<%=hItem["provider"]+'.png' %>" width="16" height="16" alt="$hItem["provider"]" title="<%=hItem["provider"].capitalize()%>"/> #end if #end if #end if diff --git a/data/interfaces/default/inc_addShowOptions.tmpl b/data/interfaces/default/inc_addShowOptions.tmpl index 784d5d2d6a1911261bf23a139fc7755b50973272..93e5dd35e58bf775a58fe1e0562c264dddfdc21b 100644 --- a/data/interfaces/default/inc_addShowOptions.tmpl +++ b/data/interfaces/default/inc_addShowOptions.tmpl @@ -1,6 +1,18 @@ #import sickbeard #from sickbeard import common #from sickbeard.common import * +#from sickbeard import subtitles + + #if $sickbeard.USE_SUBTITLES: + <div class="field-pair alt"> + <input type="checkbox" name="subtitles" id="subtitles" #if $sickbeard.SUBTITLES_DEFAULT then "checked=\"checked\"" else ""# /> + <label for="subtitles" class="clearfix"> + <span class="component-title">Subtitles</span> + <span class="component-desc">Download subtitles for this show?</span> + </label> + </div> + #end if + <div class="field-pair"> <label for="statusSelect" class="nocheck clearfix"> <span class="component-title"> diff --git a/data/interfaces/default/inc_top.tmpl b/data/interfaces/default/inc_top.tmpl index 7cf7fcf85a15efc30f5dca2efca21712a7da9621..288f2ef9ba87364d7350110dc49165824a8bfe11 100644 --- a/data/interfaces/default/inc_top.tmpl +++ b/data/interfaces/default/inc_top.tmpl @@ -3,7 +3,7 @@ <html> <head> <meta charset="utf-8"> - <title>Sick Beard - alpha Lang Version $sickbeard.version.SICKBEARD_VERSION - $title</title> + <title>Sick Beard - alpha $sickbeard.version.SICKBEARD_VERSION - $title</title> <meta http-equiv="X-UA-Compatible" content="IE=edge,chrome=1"> <meta name="robots" content="noindex"> <!--[if lt IE 9]> @@ -119,7 +119,7 @@ <div id="header"> <a name="top"></a> <span id="logo"><a href="$sbRoot/home/" title="Sick Beard homepage"><img alt="Sick Beard" src="$sbRoot/images/sickbeard.png" width="150" height="72" /></a></span> - <span id="versiontext">alpha Lang Version <a href="https://github.com/midgetspy/Sick-Beard/wiki/ChangeLog" onclick="window.open(this.href, '_blank'); return false;">$sickbeard.version.SICKBEARD_VERSION</a></span> + <span id="versiontext">alpha <a href="https://github.com/midgetspy/Sick-Beard/wiki/ChangeLog" onclick="window.open(this.href, '_blank'); return false;">$sickbeard.version.SICKBEARD_VERSION</a></span> </div> <div class="navbar"> @@ -178,6 +178,9 @@ <li><a href="$sbRoot/manage/manageSearches/"><i class="icon-search"></i> Manage Searches</a></li> <li class="divider"></li> <li><a href="$sbRoot/manage/episodeStatuses/"><i class="icon-list-alt"></i> Episode Status Management</a></li> + <li class="divider"></li> + <li><a href="$sbRoot/manage/subtitleMissed/"><i class="icon-list-alt"></i> Manage Missed Subtitles</a></li> + <li class="divider"></li> </ul> </li> <li class="divider-vertical"></li> @@ -194,6 +197,8 @@ <li class="divider"></li> <li><a href="$sbRoot/config/postProcessing/"><i class="icon-cog"></i> Post Processing</a></li> <li class="divider"></li> + <li><a href="$sbRoot/config/subtitles/"><i class="icon-cog"></i> Subtitles Settings</a></li> + <li class="divider"></li> <li><a href="$sbRoot/config/notifications/"><i class="icon-cog"></i> Notifications</a></li> </ul> </li> diff --git a/data/interfaces/default/manage_subtitleMissed.tmpl b/data/interfaces/default/manage_subtitleMissed.tmpl new file mode 100644 index 0000000000000000000000000000000000000000..eacc851ad0272019491e7ca02940755813877ab8 --- /dev/null +++ b/data/interfaces/default/manage_subtitleMissed.tmpl @@ -0,0 +1,65 @@ +#import sickbeard +#import subliminal +#import datetime +#from sickbeard import common +#set global $title="Episode Overview" +#set global $header="Episode Overview" + +#set global $sbPath=".." + +#set global $topmenu="manage"# +#import os.path +#include $os.path.join($sickbeard.PROG_DIR, "data/interfaces/default/inc_top.tmpl") +#if $varExists('header') + <h1 class="header">$header</h1> +#else + <h1 class="title">$title</h1> +#end if +#if $whichSubs: +#set subsLanguage = $subliminal.language.Language($whichSubs) if not $whichSubs == 'all' else 'All' +#end if +#if not $whichSubs or ($whichSubs and not $ep_counts): + +#if $whichSubs: +<h2>All of your episodes have $subsLanguage subtitles.</h2> +<br /> +#end if + +<form action="$sbRoot/manage/subtitleMissed" method="get"> +Manage episodes without <select name="whichSubs"> +<option value="all">All</option> +#for $sub_lang in $subliminal.language.language_list($sickbeard.SUBTITLES_LANGUAGES): +<option value="$sub_lang.alpha2">$sub_lang</option> +#end for +</select> +subtitles +<input class="btn" type="submit" value="Manage" /> +</form> + +#else + +<script type="text/javascript" src="$sbRoot/js/manageSubtitleMissed.js?$sbPID"></script> +<input type="hidden" id="selectSubLang" name="selectSubLang" value="$whichSubs" /> + +<form action="$sbRoot/manage/downloadSubtitleMissed" method="post"> +<h2>Episodes without $subsLanguage subtitles.</h2> +<br /> +Download missed subtitles for selected episodes <input class="btn" type="submit" value="Go" /> +<div class="pull-left"> + <button type="button" class="btn btn-mini selectAllShows" style="line-height: 8px;"><a>Select all</a></button> + <button type="button" class="btn btn-mini unselectAllShows" style="line-height: 8px;"><a>Clear all</a></button> +</div> +<br /><br /> +<table class="sickbeardTable" cellspacing="1" border="0" cellpadding="0"> +#for $cur_tvdb_id in $sorted_show_ids: + <tr id="$cur_tvdb_id"> + <th><input type="checkbox" class="allCheck" id="allCheck-$cur_tvdb_id" name="$cur_tvdb_id-all" checked="checked" /></th> + <th colspan="3" style="width: 100%; text-align: left;"><a class="whitelink" href="$sbRoot/home/displayShow?show=$cur_tvdb_id">$show_names[$cur_tvdb_id]</a> ($ep_counts[$cur_tvdb_id]) <input type="button" class="get_more_eps btn" id="$cur_tvdb_id" value="Expand" /></th> + </tr> +#end for +</table> +</form> + +#end if + +#include $os.path.join($sickbeard.PROG_DIR,"data/interfaces/default/inc_bottom.tmpl") diff --git a/data/js/addShowOptions.js b/data/js/addShowOptions.js index 72ba3b7c00978e26d4ea1a8538a6a8bb0d9f7ac8..21ab36b3f65dc2b9dc4b6cdcbcde0589b71dfd1f 100644 --- a/data/js/addShowOptions.js +++ b/data/js/addShowOptions.js @@ -10,6 +10,7 @@ $(document).ready(function () { anyQualities: anyQualArray.join(','), bestQualities: bestQualArray.join(','), audio_lang: $('#showLangSelect').val(), + subtitles: $('#subtitles').prop('checked'), defaultFlattenFolders: $('#flatten_folders').prop('checked')}); $(this).attr('disabled', true); $.pnotify({ @@ -19,7 +20,7 @@ $(document).ready(function () { }); }); - $('#statusSelect, #qualityPreset, #flatten_folders, #anyQualities, #bestQualities ,#showLangSelect').change(function () { + $('#statusSelect, #qualityPreset, #flatten_folders, #anyQualities, #bestQualities ,#showLangSelect, #subtitles').change(function () { $('#saveDefaultsButton').attr('disabled', false); }); diff --git a/data/js/ajaxEpSubtitles.js b/data/js/ajaxEpSubtitles.js new file mode 100644 index 0000000000000000000000000000000000000000..99094ef66aef345f0e3a1ee908e434700b6489ed --- /dev/null +++ b/data/js/ajaxEpSubtitles.js @@ -0,0 +1,49 @@ +(function(){ + $.fn.ajaxEpSubtitlesSearch = function(){ + $('.epSubtitlesSearch').click(function(){ + var subtitles_td = $(this).parent().siblings('.subtitles_column'); + var subtitles_search_link = $(this); + // fill with the ajax loading gif + subtitles_search_link.empty(); + subtitles_search_link.append($("<img/>").attr({"src": sbRoot+"/images/loading16_dddddd.gif", "alt": "", "title": "loading"})); + $.getJSON($(this).attr('href'), function(data){ + if (data.result != "failure" && data.result != "No subtitles downloaded") { + // clear and update the subtitles column with new informations + var subtitles = data.subtitles.split(','); + subtitles_td.empty() + $.each(subtitles,function(index, language){ + if (language != "" && language != "und") { + if (index != subtitles.length - 1) { + subtitles_td.append($("<img/>").attr({"src": sbRoot+"/images/flags/"+language+".png", "alt": language, "width": 16, "height": 11}).css({'padding-right' : '6px','padding-bottom' : '4px'})); + } else { + subtitles_td.append($("<img/>").attr({"src": sbRoot+"/images/flags/"+language+".png", "alt": language, "width": 16, "height": 11}).css({'padding-bottom' : '4px'})); + } + } + }); + // don't allow other searches + subtitles_search_link.remove(); + } else { + subtitles_search_link.remove(); + } + }); + + // don't follow the link + return false; + }); + }; + + $.fn.ajaxEpMergeSubtitles = function(){ + $('.epMergeSubtitles').click(function(){ + var subtitles_merge_link = $(this); + // fill with the ajax loading gif + subtitles_merge_link.empty(); + subtitles_merge_link.append($("<img/>").attr({"src": sbRoot+"/images/loading16_dddddd.gif", "alt": "", "title": "loading"})); + $.getJSON($(this).attr('href'), function(data){ + // don't allow other merges + subtitles_merge_link.remove(); + }); + // don't follow the link + return false; + }); + } +})(); diff --git a/data/js/configSubtitles.js b/data/js/configSubtitles.js new file mode 100644 index 0000000000000000000000000000000000000000..57100e347b782b4cbf25d4747b9d6ec510fea17a --- /dev/null +++ b/data/js/configSubtitles.js @@ -0,0 +1,69 @@ +$(document).ready(function(){ + + $.fn.showHideServices = function() { + $('.serviceDiv').each(function(){ + var serviceName = $(this).attr('id'); + var selectedService = $('#editAService :selected').val(); + + if (selectedService+'Div' == serviceName) + $(this).show(); + else + $(this).hide(); + + }); + } + + $.fn.addService = function (id, name, url, key, isDefault, showService) { + + if (url.match('/$') == null) + url = url + '/' + + var newData = [isDefault, [name, url, key]]; + + if ($('#service_order_list > #'+id).length == 0 && showService != false) { + var toAdd = '<li class="ui-state-default" id="'+id+'"> <input type="checkbox" id="enable_'+id+'" class="service_enabler" CHECKED> <a href="'+url+'" class="imgLink" target="_new"><img src="'+sbRoot+'/images/services/newznab.gif" alt="'+name+'" width="16" height="16"></a> '+name+'</li>' + + $('#service_order_list').append(toAdd); + $('#service_order_list').sortable("refresh"); + } + + } + + $.fn.deleteService = function (id) { + $('#service_order_list > #'+id).remove(); + } + + $.fn.refreshServiceList = function() { + var idArr = $("#service_order_list").sortable('toArray'); + var finalArr = new Array(); + $.each(idArr, function(key, val) { + var checked = + $('#enable_'+val).prop('checked') ? '1' : '0'; + finalArr.push(val + ':' + checked); + }); + + $("#service_order").val(finalArr.join(' ')); + } + + $('#editAService').change(function(){ + $(this).showHideServices(); + }); + + $('.service_enabler').live('click', function(){ + $(this).refreshServiceList(); + }); + + + // initialization stuff + + $(this).showHideServices(); + + $("#service_order_list").sortable({ + placeholder: 'ui-state-highlight', + update: function (event, ui) { + $(this).refreshServiceList(); + } + }); + + $("#service_order_list").disableSelection(); + +}); \ No newline at end of file diff --git a/data/js/displayShow.js b/data/js/displayShow.js index 9c0160f0934909de495314237f82047fcd579791..7dc7a937515dd3fe6948bf3b5b73d0445e96845a 100644 --- a/data/js/displayShow.js +++ b/data/js/displayShow.js @@ -1,6 +1,7 @@ $(document).ready(function(){ $('#sbRoot').ajaxEpSearch({'colorRow': true}); + $('#sbRoot').ajaxEpSubtitlesSearch({'colorRow': true}); $('#seasonJump').change(function() { var id = $(this).val(); diff --git a/data/js/lib/jquery.tokeninput.js b/data/js/lib/jquery.tokeninput.js new file mode 100644 index 0000000000000000000000000000000000000000..672be5221329f73fd771e6e99d2a2c0ca2ec31cb --- /dev/null +++ b/data/js/lib/jquery.tokeninput.js @@ -0,0 +1,861 @@ +/* + * jQuery Plugin: Tokenizing Autocomplete Text Entry + * Version 1.6.0 + * + * Copyright (c) 2009 James Smith (http://loopj.com) + * Licensed jointly under the GPL and MIT licenses, + * choose which one suits your project best! + * + */ + +(function ($) { +// Default settings +var DEFAULT_SETTINGS = { + // Search settings + method: "GET", + contentType: "json", + queryParam: "q", + searchDelay: 300, + minChars: 1, + propertyToSearch: "name", + jsonContainer: null, + + // Display settings + hintText: "Type in a search term", + noResultsText: "No results", + searchingText: "Searching...", + deleteText: "×", + animateDropdown: true, + + // Tokenization settings + tokenLimit: null, + tokenDelimiter: ",", + preventDuplicates: false, + + // Output settings + tokenValue: "id", + + // Prepopulation settings + prePopulate: null, + processPrePopulate: true, + + // Manipulation settings + idPrefix: "token-input-", + + // Formatters + resultsFormatter: function(item){ return "<li><img src='/images/flags/"+item["id"]+".png' /> " + item[this.propertyToSearch]+ "</li>" }, + tokenFormatter: function(item) { return "<li><img src='/images/flags/"+item["id"]+".png' /> <p>" + item[this.propertyToSearch] + "</p></li>" }, + flag: "flag", + + // Callbacks + onResult: null, + onAdd: null, + onDelete: null, + onReady: null +}; + +// Default classes to use when theming +var DEFAULT_CLASSES = { + tokenList: "token-input-list", + token: "token-input-token", + tokenDelete: "token-input-delete-token", + selectedToken: "token-input-selected-token", + highlightedToken: "token-input-highlighted-token", + dropdown: "token-input-dropdown", + dropdownItem: "token-input-dropdown-item", + dropdownItem2: "token-input-dropdown-item2", + selectedDropdownItem: "token-input-selected-dropdown-item", + inputToken: "token-input-input-token" +}; + +// Input box position "enum" +var POSITION = { + BEFORE: 0, + AFTER: 1, + END: 2 +}; + +// Keys "enum" +var KEY = { + BACKSPACE: 8, + TAB: 9, + ENTER: 13, + ESCAPE: 27, + SPACE: 32, + PAGE_UP: 33, + PAGE_DOWN: 34, + END: 35, + HOME: 36, + LEFT: 37, + UP: 38, + RIGHT: 39, + DOWN: 40, + NUMPAD_ENTER: 108, + COMMA: 188 +}; + +// Additional public (exposed) methods +var methods = { + init: function(url_or_data_or_function, options) { + var settings = $.extend({}, DEFAULT_SETTINGS, options || {}); + + return this.each(function () { + $(this).data("tokenInputObject", new $.TokenList(this, url_or_data_or_function, settings)); + }); + }, + clear: function() { + this.data("tokenInputObject").clear(); + return this; + }, + add: function(item) { + this.data("tokenInputObject").add(item); + return this; + }, + remove: function(item) { + this.data("tokenInputObject").remove(item); + return this; + }, + get: function() { + return this.data("tokenInputObject").getTokens(); + } +} + +// Expose the .tokenInput function to jQuery as a plugin +$.fn.tokenInput = function (method) { + // Method calling and initialization logic + if(methods[method]) { + return methods[method].apply(this, Array.prototype.slice.call(arguments, 1)); + } else { + return methods.init.apply(this, arguments); + } +}; + +// TokenList class for each input +$.TokenList = function (input, url_or_data, settings) { + // + // Initialization + // + + // Configure the data source + if($.type(url_or_data) === "string" || $.type(url_or_data) === "function") { + // Set the url to query against + settings.url = url_or_data; + + // If the URL is a function, evaluate it here to do our initalization work + var url = computeURL(); + + // Make a smart guess about cross-domain if it wasn't explicitly specified + if(settings.crossDomain === undefined) { + if(url.indexOf("://") === -1) { + settings.crossDomain = false; + } else { + settings.crossDomain = (location.href.split(/\/+/g)[1] !== url.split(/\/+/g)[1]); + } + } + } else if(typeof(url_or_data) === "object") { + // Set the local data to search through + settings.local_data = url_or_data; + } + + // Build class names + if(settings.classes) { + // Use custom class names + settings.classes = $.extend({}, DEFAULT_CLASSES, settings.classes); + } else if(settings.theme) { + // Use theme-suffixed default class names + settings.classes = {}; + $.each(DEFAULT_CLASSES, function(key, value) { + settings.classes[key] = value + "-" + settings.theme; + }); + } else { + settings.classes = DEFAULT_CLASSES; + } + + + // Save the tokens + var saved_tokens = []; + + // Keep track of the number of tokens in the list + var token_count = 0; + + // Basic cache to save on db hits + var cache = new $.TokenList.Cache(); + + // Keep track of the timeout, old vals + var timeout; + var input_val; + + // Create a new text input an attach keyup events + var input_box = $("<input type=\"text\" autocomplete=\"off\" >") + .css({ + outline: "none" + }) + .attr("id", settings.idPrefix + input.id) + .focus(function () { + if (settings.tokenLimit === null || settings.tokenLimit !== token_count) { + show_dropdown_hint(); + } + }) + .blur(function () { + hide_dropdown(); + $(this).val(""); + }) + .bind("keyup keydown blur update", resize_input) + .keydown(function (event) { + var previous_token; + var next_token; + + switch(event.keyCode) { + case KEY.LEFT: + case KEY.RIGHT: + case KEY.UP: + case KEY.DOWN: + if(!$(this).val()) { + previous_token = input_token.prev(); + next_token = input_token.next(); + + if((previous_token.length && previous_token.get(0) === selected_token) || (next_token.length && next_token.get(0) === selected_token)) { + // Check if there is a previous/next token and it is selected + if(event.keyCode === KEY.LEFT || event.keyCode === KEY.UP) { + deselect_token($(selected_token), POSITION.BEFORE); + } else { + deselect_token($(selected_token), POSITION.AFTER); + } + } else if((event.keyCode === KEY.LEFT || event.keyCode === KEY.UP) && previous_token.length) { + // We are moving left, select the previous token if it exists + select_token($(previous_token.get(0))); + } else if((event.keyCode === KEY.RIGHT || event.keyCode === KEY.DOWN) && next_token.length) { + // We are moving right, select the next token if it exists + select_token($(next_token.get(0))); + } + } else { + var dropdown_item = null; + + if(event.keyCode === KEY.DOWN || event.keyCode === KEY.RIGHT) { + dropdown_item = $(selected_dropdown_item).next(); + } else { + dropdown_item = $(selected_dropdown_item).prev(); + } + + if(dropdown_item.length) { + select_dropdown_item(dropdown_item); + } + return false; + } + break; + + case KEY.BACKSPACE: + previous_token = input_token.prev(); + + if(!$(this).val().length) { + if(selected_token) { + delete_token($(selected_token)); + hidden_input.change(); + } else if(previous_token.length) { + select_token($(previous_token.get(0))); + } + + return false; + } else if($(this).val().length === 1) { + hide_dropdown(); + } else { + // set a timeout just long enough to let this function finish. + setTimeout(function(){do_search();}, 5); + } + break; + + case KEY.TAB: + case KEY.ENTER: + case KEY.NUMPAD_ENTER: + case KEY.COMMA: + if(selected_dropdown_item) { + add_token($(selected_dropdown_item).data("tokeninput")); + hidden_input.change(); + return false; + } + break; + + case KEY.ESCAPE: + hide_dropdown(); + return true; + + default: + if(String.fromCharCode(event.which)) { + // set a timeout just long enough to let this function finish. + setTimeout(function(){do_search();}, 5); + } + break; + } + }); + + // Keep a reference to the original input box + var hidden_input = $(input) + .hide() + .val("") + .focus(function () { + input_box.focus(); + }) + .blur(function () { + input_box.blur(); + }); + + // Keep a reference to the selected token and dropdown item + var selected_token = null; + var selected_token_index = 0; + var selected_dropdown_item = null; + + // The list to store the token items in + var token_list = $("<ul />") + .addClass(settings.classes.tokenList) + .click(function (event) { + var li = $(event.target).closest("li"); + if(li && li.get(0) && $.data(li.get(0), "tokeninput")) { + toggle_select_token(li); + } else { + // Deselect selected token + if(selected_token) { + deselect_token($(selected_token), POSITION.END); + } + + // Focus input box + input_box.focus(); + } + }) + .mouseover(function (event) { + var li = $(event.target).closest("li"); + if(li && selected_token !== this) { + li.addClass(settings.classes.highlightedToken); + } + }) + .mouseout(function (event) { + var li = $(event.target).closest("li"); + if(li && selected_token !== this) { + li.removeClass(settings.classes.highlightedToken); + } + }) + .insertBefore(hidden_input); + + // The token holding the input box + var input_token = $("<li />") + .addClass(settings.classes.inputToken) + .appendTo(token_list) + .append(input_box); + + // The list to store the dropdown items in + var dropdown = $("<div>") + .addClass(settings.classes.dropdown) + .appendTo("body") + .hide(); + + // Magic element to help us resize the text input + var input_resizer = $("<tester/>") + .insertAfter(input_box) + .css({ + position: "absolute", + top: -9999, + left: -9999, + width: "auto", + fontSize: input_box.css("fontSize"), + fontFamily: input_box.css("fontFamily"), + fontWeight: input_box.css("fontWeight"), + letterSpacing: input_box.css("letterSpacing"), + whiteSpace: "nowrap" + }); + + // Pre-populate list if items exist + hidden_input.val(""); + var li_data = settings.prePopulate || hidden_input.data("pre"); + if(settings.processPrePopulate && $.isFunction(settings.onResult)) { + li_data = settings.onResult.call(hidden_input, li_data); + } + if(li_data && li_data.length) { + $.each(li_data, function (index, value) { + insert_token(value); + checkTokenLimit(); + }); + } + + // Initialization is done + if($.isFunction(settings.onReady)) { + settings.onReady.call(); + } + + // + // Public functions + // + + this.clear = function() { + token_list.children("li").each(function() { + if ($(this).children("input").length === 0) { + delete_token($(this)); + } + }); + } + + this.add = function(item) { + add_token(item); + } + + this.remove = function(item) { + token_list.children("li").each(function() { + if ($(this).children("input").length === 0) { + var currToken = $(this).data("tokeninput"); + var match = true; + for (var prop in item) { + if (item[prop] !== currToken[prop]) { + match = false; + break; + } + } + if (match) { + delete_token($(this)); + } + } + }); + } + + this.getTokens = function() { + return saved_tokens; + } + + // + // Private functions + // + + function checkTokenLimit() { + if(settings.tokenLimit !== null && token_count >= settings.tokenLimit) { + input_box.hide(); + hide_dropdown(); + return; + } + } + + function resize_input() { + if(input_val === (input_val = input_box.val())) {return;} + + // Enter new content into resizer and resize input accordingly + var escaped = input_val.replace(/&/g, '&').replace(/\s/g,' ').replace(/</g, '<').replace(/>/g, '>'); + input_resizer.html(escaped); + input_box.width(input_resizer.width() + 30); + } + + function is_printable_character(keycode) { + return ((keycode >= 48 && keycode <= 90) || // 0-1a-z + (keycode >= 96 && keycode <= 111) || // numpad 0-9 + - / * . + (keycode >= 186 && keycode <= 192) || // ; = , - . / ^ + (keycode >= 219 && keycode <= 222)); // ( \ ) ' + } + + // Inner function to a token to the list + function insert_token(item) { + var this_token = settings.tokenFormatter(item); + this_token = $(this_token) + .addClass(settings.classes.token) + .insertBefore(input_token); + + // The 'delete token' button + $("<span>" + settings.deleteText + "</span>") + .addClass(settings.classes.tokenDelete) + .appendTo(this_token) + .click(function () { + delete_token($(this).parent()); + hidden_input.change(); + return false; + }); + + // Store data on the token + var token_data = {"id": item.id}; + token_data[settings.propertyToSearch] = item[settings.propertyToSearch]; + $.data(this_token.get(0), "tokeninput", item); + + // Save this token for duplicate checking + saved_tokens = saved_tokens.slice(0,selected_token_index).concat([token_data]).concat(saved_tokens.slice(selected_token_index)); + selected_token_index++; + + // Update the hidden input + update_hidden_input(saved_tokens, hidden_input); + + token_count += 1; + + // Check the token limit + if(settings.tokenLimit !== null && token_count >= settings.tokenLimit) { + input_box.hide(); + hide_dropdown(); + } + + return this_token; + } + + // Add a token to the token list based on user input + function add_token (item) { + var callback = settings.onAdd; + + // See if the token already exists and select it if we don't want duplicates + if(token_count > 0 && settings.preventDuplicates) { + var found_existing_token = null; + token_list.children().each(function () { + var existing_token = $(this); + var existing_data = $.data(existing_token.get(0), "tokeninput"); + if(existing_data && existing_data.id === item.id) { + found_existing_token = existing_token; + return false; + } + }); + + if(found_existing_token) { + select_token(found_existing_token); + input_token.insertAfter(found_existing_token); + input_box.focus(); + return; + } + } + + // Insert the new tokens + if(settings.tokenLimit == null || token_count < settings.tokenLimit) { + insert_token(item); + checkTokenLimit(); + } + + // Clear input box + input_box.val(""); + + // Don't show the help dropdown, they've got the idea + hide_dropdown(); + + // Execute the onAdd callback if defined + if($.isFunction(callback)) { + callback.call(hidden_input,item); + } + } + + // Select a token in the token list + function select_token (token) { + token.addClass(settings.classes.selectedToken); + selected_token = token.get(0); + + // Hide input box + input_box.val(""); + + // Hide dropdown if it is visible (eg if we clicked to select token) + hide_dropdown(); + } + + // Deselect a token in the token list + function deselect_token (token, position) { + token.removeClass(settings.classes.selectedToken); + selected_token = null; + + if(position === POSITION.BEFORE) { + input_token.insertBefore(token); + selected_token_index--; + } else if(position === POSITION.AFTER) { + input_token.insertAfter(token); + selected_token_index++; + } else { + input_token.appendTo(token_list); + selected_token_index = token_count; + } + + // Show the input box and give it focus again + input_box.focus(); + } + + // Toggle selection of a token in the token list + function toggle_select_token(token) { + var previous_selected_token = selected_token; + + if(selected_token) { + deselect_token($(selected_token), POSITION.END); + } + + if(previous_selected_token === token.get(0)) { + deselect_token(token, POSITION.END); + } else { + select_token(token); + } + } + + // Delete a token from the token list + function delete_token (token) { + // Remove the id from the saved list + var token_data = $.data(token.get(0), "tokeninput"); + var callback = settings.onDelete; + + var index = token.prevAll().length; + if(index > selected_token_index) index--; + + // Delete the token + token.remove(); + selected_token = null; + + // Show the input box and give it focus again + input_box.focus(); + + // Remove this token from the saved list + saved_tokens = saved_tokens.slice(0,index).concat(saved_tokens.slice(index+1)); + if(index < selected_token_index) selected_token_index--; + + // Update the hidden input + update_hidden_input(saved_tokens, hidden_input); + + token_count -= 1; + + if(settings.tokenLimit !== null) { + input_box + .show() + .val("") + .focus(); + } + + // Execute the onDelete callback if defined + if($.isFunction(callback)) { + callback.call(hidden_input,token_data); + } + } + + // Update the hidden input box value + function update_hidden_input(saved_tokens, hidden_input) { + var token_values = $.map(saved_tokens, function (el) { + return el[settings.tokenValue]; + }); + hidden_input.val(token_values.join(settings.tokenDelimiter)); + + } + + // Hide and clear the results dropdown + function hide_dropdown () { + dropdown.hide().empty(); + selected_dropdown_item = null; + } + + function show_dropdown() { + dropdown + .css({ + position: "absolute", + top: $(token_list).offset().top + $(token_list).outerHeight(), + left: $(token_list).offset().left, + zindex: 999 + }) + .show(); + } + + function show_dropdown_searching () { + if(settings.searchingText) { + dropdown.html("<p>"+settings.searchingText+"</p>"); + show_dropdown(); + } + } + + function show_dropdown_hint () { + if(settings.hintText) { + dropdown.html("<p>"+settings.hintText+"</p>"); + show_dropdown(); + } + } + + // Highlight the query part of the search term + function highlight_term(value, term) { + return value.replace(new RegExp("(?![^&;]+;)(?!<[^<>]*)(" + term + ")(?![^<>]*>)(?![^&;]+;)", "gi"), "<b>$1</b>"); + } + + function find_value_and_highlight_term(template, value, term) { + return template.replace(new RegExp("(?![^&;]+;)(?!<[^<>]*)(" + value + ")(?![^<>]*>)(?![^&;]+;)", "g"), highlight_term(value, term)); + } + + // Populate the results dropdown with some results + function populate_dropdown (query, results) { + if(results && results.length) { + dropdown.empty(); + var dropdown_ul = $("<ul>") + .appendTo(dropdown) + .mouseover(function (event) { + select_dropdown_item($(event.target).closest("li")); + }) + .mousedown(function (event) { + add_token($(event.target).closest("li").data("tokeninput")); + hidden_input.change(); + return false; + }) + .hide(); + + $.each(results, function(index, value) { + var this_li = settings.resultsFormatter(value); + + this_li = find_value_and_highlight_term(this_li ,value[settings.propertyToSearch], query); + + this_li = $(this_li).appendTo(dropdown_ul); + + if(index % 2) { + this_li.addClass(settings.classes.dropdownItem); + } else { + this_li.addClass(settings.classes.dropdownItem2); + } + + if(index === 0) { + select_dropdown_item(this_li); + } + + $.data(this_li.get(0), "tokeninput", value); + }); + + show_dropdown(); + + if(settings.animateDropdown) { + dropdown_ul.slideDown("fast"); + } else { + dropdown_ul.show(); + } + } else { + if(settings.noResultsText) { + dropdown.html("<p>"+settings.noResultsText+"</p>"); + show_dropdown(); + } + } + } + + // Highlight an item in the results dropdown + function select_dropdown_item (item) { + if(item) { + if(selected_dropdown_item) { + deselect_dropdown_item($(selected_dropdown_item)); + } + + item.addClass(settings.classes.selectedDropdownItem); + selected_dropdown_item = item.get(0); + } + } + + // Remove highlighting from an item in the results dropdown + function deselect_dropdown_item (item) { + item.removeClass(settings.classes.selectedDropdownItem); + selected_dropdown_item = null; + } + + // Do a search and show the "searching" dropdown if the input is longer + // than settings.minChars + function do_search() { + var query = input_box.val().toLowerCase(); + + if(query && query.length) { + if(selected_token) { + deselect_token($(selected_token), POSITION.AFTER); + } + + if(query.length >= settings.minChars) { + show_dropdown_searching(); + clearTimeout(timeout); + + timeout = setTimeout(function(){ + run_search(query); + }, settings.searchDelay); + } else { + hide_dropdown(); + } + } + } + + // Do the actual search + function run_search(query) { + var cache_key = query + computeURL(); + var cached_results = cache.get(cache_key); + if(cached_results) { + populate_dropdown(query, cached_results); + } else { + // Are we doing an ajax search or local data search? + if(settings.url) { + var url = computeURL(); + // Extract exisiting get params + var ajax_params = {}; + ajax_params.data = {}; + if(url.indexOf("?") > -1) { + var parts = url.split("?"); + ajax_params.url = parts[0]; + + var param_array = parts[1].split("&"); + $.each(param_array, function (index, value) { + var kv = value.split("="); + ajax_params.data[kv[0]] = kv[1]; + }); + } else { + ajax_params.url = url; + } + + // Prepare the request + ajax_params.data[settings.queryParam] = query; + ajax_params.type = settings.method; + ajax_params.dataType = settings.contentType; + if(settings.crossDomain) { + ajax_params.dataType = "jsonp"; + } + + // Attach the success callback + ajax_params.success = function(results) { + if($.isFunction(settings.onResult)) { + results = settings.onResult.call(hidden_input, results); + } + cache.add(cache_key, settings.jsonContainer ? results[settings.jsonContainer] : results); + + // only populate the dropdown if the results are associated with the active search query + if(input_box.val().toLowerCase() === query) { + populate_dropdown(query, settings.jsonContainer ? results[settings.jsonContainer] : results); + } + }; + + // Make the request + $.ajax(ajax_params); + } else if(settings.local_data) { + // Do the search through local data + var results = $.grep(settings.local_data, function (row) { + return row[settings.propertyToSearch].toLowerCase().indexOf(query.toLowerCase()) > -1; + }); + + if($.isFunction(settings.onResult)) { + results = settings.onResult.call(hidden_input, results); + } + cache.add(cache_key, results); + populate_dropdown(query, results); + } + } + } + + // compute the dynamic URL + function computeURL() { + var url = settings.url; + if(typeof settings.url == 'function') { + url = settings.url.call(); + } + return url; + } +}; + +// Really basic cache for the results +$.TokenList.Cache = function (options) { + var settings = $.extend({ + max_size: 500 + }, options); + + var data = {}; + var size = 0; + + var flush = function () { + data = {}; + size = 0; + }; + + this.add = function (query, results) { + if(size > settings.max_size) { + flush(); + } + + if(!data[query]) { + size += 1; + } + + data[query] = results; + }; + + this.get = function (query) { + return data[query]; + }; +}; +}(jQuery)); diff --git a/data/js/manageSubtitleMissed.js b/data/js/manageSubtitleMissed.js new file mode 100644 index 0000000000000000000000000000000000000000..4202055b0d86bac97b7464137130cd128bddcc74 --- /dev/null +++ b/data/js/manageSubtitleMissed.js @@ -0,0 +1,72 @@ +$(document).ready(function() { + + function make_row(tvdb_id, season, episode, name, subtitles, checked) { + if (checked) + var checked = ' checked'; + else + var checked = ''; + + var row = ''; + row += ' <tr class="good">'; + row += ' <td><input type="checkbox" class="'+tvdb_id+'-epcheck" name="'+tvdb_id+'-'+season+'x'+episode+'"'+checked+'></td>'; + row += ' <td style="width: 1%;">'+season+'x'+episode+'</td>'; + row += ' <td>'+name+'</td>'; + row += ' <td style="float: right;">'; + subtitles = subtitles.split(',') + for (i in subtitles) + { + row += ' <img src="/images/flags/'+subtitles[i]+'.png" width="16" height="11" alt="'+subtitles[i]+'" /> '; + } + row += ' </td>'; + row += ' </tr>' + + return row; + } + + $('.allCheck').click(function(){ + var tvdb_id = $(this).attr('id').split('-')[1]; + $('.'+tvdb_id+'-epcheck').prop('checked', $(this).prop('checked')); + }); + + $('.get_more_eps').click(function(){ + var cur_tvdb_id = $(this).attr('id'); + var checked = $('#allCheck-'+cur_tvdb_id).prop('checked'); + var last_row = $('tr#'+cur_tvdb_id); + + $.getJSON(sbRoot+'/manage/showSubtitleMissed', + { + tvdb_id: cur_tvdb_id, + whichSubs: $('#selectSubLang').val() + }, + function (data) { + $.each(data, function(season,eps){ + $.each(eps, function(episode, data) { + //alert(season+'x'+episode+': '+name); + last_row.after(make_row(cur_tvdb_id, season, episode, data.name, data.subtitles, checked)); + }); + }); + }); + $(this).hide(); + }); + + // selects all visible episode checkboxes. + $('.selectAllShows').click(function(){ + $('.allCheck').each(function(){ + this.checked = true; + }); + $('input[class*="-epcheck"]').each(function(){ + this.checked = true; + }); + }); + + // clears all visible episode checkboxes and the season selectors + $('.unselectAllShows').click(function(){ + $('.allCheck').each(function(){ + this.checked = false; + }); + $('input[class*="-epcheck"]').each(function(){ + this.checked = false; + }); + }); + +}); \ No newline at end of file diff --git a/data/js/script.js b/data/js/script.js new file mode 100644 index 0000000000000000000000000000000000000000..e9576f9d2f1400e5be79978ed522f56efd90367e --- /dev/null +++ b/data/js/script.js @@ -0,0 +1,131 @@ +function initHeader() { + //settings + var header = $("#header"); + var fadeSpeed = 100, fadeTo = 0.8, topDistance = 20; + var topbarME = function() { $(header).fadeTo(fadeSpeed,1); }, topbarML = function() { $(header).fadeTo(fadeSpeed,fadeTo); }; + var inside = false; + //do + $(window).scroll(function() { + position = $(window).scrollTop(); + if(position > topDistance && !inside) { + //add events + topbarML(); + $(header).bind('mouseenter',topbarME); + $(header).bind('mouseleave',topbarML); + inside = true; + } + else if (position < topDistance){ + topbarME(); + $(header).unbind('mouseenter',topbarME); + $(header).unbind('mouseleave',topbarML); + inside = false; + } + }); + +} + + +function showMsg(msg,loader,timeout,ms) { + var feedback = $("#ajaxMsg"); + update = $("#updatebar"); + if ( update.is(":visible") ) { + var height = update.height() + 35; + feedback.css("bottom",height + "px"); + } else { + feedback.removeAttr("style"); + } + feedback.fadeIn(); + var message = $("<div class='msg'>" + msg + "</div>"); + if (loader) { + var message = $("<div class='msg'><img src='interfaces/default/images/loader_black.gif' alt='loading' class='loader' style='position: relative;top:10px;margin-top:-15px; margin-left:-10px;'/>" + msg + "</div>"); + feedback.css("padding","14px 10px") + } + $(feedback).prepend(message); + if (timeout) { + setTimeout(function(){ + message.fadeOut(function(){ + $(this).remove(); + feedback.fadeOut(); + }); + },ms); + } +} + +function resetFilters(text){ + if ( $(".dataTables_filter").length > 0 ) { + $(".dataTables_filter input").attr("placeholder","filter " + text + ""); + } +} + +function preventDefault(){ + $("a[href='#']").live('click', function(){ + return false; + }); +} + +function initFancybox() { + if ( $("a[rel=dialog]").length > 0 ) { + $.getScript('/js/fancybox/jquery.fancybox-1.3.4.js', function() { + $("head").append("<link rel='stylesheet' href='/js/fancybox/jquery.fancybox-1.3.4.css'>"); + $("a[rel=dialog]").fancybox({ + type: "image" + }); + }); + } +} + +function initTabs() { + $("#config-components").tabs({ + show: function(event, ui) { + + var lastOpenedPanel = $(this).data("lastOpenedPanel"); + var selected = $(this).tabs('option', 'selected'); + + if (lastOpenedPanel) { + } else { + lastOpenedPanel = $(this).tabs("option", "panel").find('.ui-tabs-panel').eq('0'); + } + + if (!$(this).data("topPositionTab")) { + $(this).data("topPositionTab", $(ui.panel).position().top) + } + + //Dont use the builtin fx effects. This will fade in/out both tabs, we dont want that + //Fadein the new tab yourself + $(ui.panel).hide().fadeIn(400); + + if (lastOpenedPanel) { + + // 1. Show the previous opened tab by removing the jQuery UI class + // 2. Make the tab temporary position:absolute so the two tabs will overlap + // 3. Set topposition so they will overlap if you go from tab 1 to tab 0 + // 4. Remove position:absolute after animation + lastOpenedPanel + .toggleClass("ui-tabs-hide") + .css("position", "absolute") + .css("top", $(this).data("topPositionTab") + "px") + .fadeOut(400, function() { + $(this) + .css("position", ""); + }); + + } + + //Saving the last tab has been opened + $(this).data("lastOpenedPanel", $(ui.panel)); + + } + + }); +} + +function init() { + initHeader(); + preventDefault(); + initFancybox(); + initTabs(); +} + +$(document).ready(function(){ + init(); +}); diff --git a/lib/enzyme/__init__.py b/lib/enzyme/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..c283dd5941a3af428ac6412e3f30c62648029a56 --- /dev/null +++ b/lib/enzyme/__init__.py @@ -0,0 +1,63 @@ +# -*- coding: utf-8 -*- +# enzyme - Video metadata parser +# Copyright 2011-2012 Antoine Bertin <diaoulael@gmail.com> +# Copyright 2003-2006 Thomas Schueppel <stain@acm.org> +# Copyright 2003-2006 Dirk Meyer <dischi@freevo.org> +# +# This file is part of enzyme. +# +# enzyme is free software; you can redistribute it and/or modify it under +# the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3 of the License, or +# (at your option) any later version. +# +# enzyme is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with enzyme. If not, see <http://www.gnu.org/licenses/>. +import mimetypes +import os +import sys +from exceptions import * + + +PARSERS = [('asf', ['video/asf'], ['asf', 'wmv', 'wma']), + ('flv', ['video/flv'], ['flv']), + ('mkv', ['video/x-matroska', 'application/mkv'], ['mkv', 'mka', 'webm']), + ('mp4', ['video/quicktime', 'video/mp4'], ['mov', 'qt', 'mp4', 'mp4a', '3gp', '3gp2', '3g2', 'mk2']), + ('mpeg', ['video/mpeg'], ['mpeg', 'mpg', 'mp4', 'ts']), + ('ogm', ['application/ogg'], ['ogm', 'ogg', 'ogv']), + ('real', ['video/real'], ['rm', 'ra', 'ram']), + ('riff', ['video/avi'], ['wav', 'avi']) +] + + +def parse(path): + """Parse metadata of the given video + + :param string path: path to the video file to parse + :return: a parser corresponding to the video's mimetype or extension + :rtype: :class:`~enzyme.core.AVContainer` + + """ + if not os.path.isfile(path): + raise ValueError('Invalid path') + extension = os.path.splitext(path)[1][1:] + mimetype = mimetypes.guess_type(path)[0] + parser_ext = None + parser_mime = None + for (parser_name, parser_mimetypes, parser_extensions) in PARSERS: + if mimetype in parser_mimetypes: + parser_mime = parser_name + if extension in parser_extensions: + parser_ext = parser_name + parser = parser_mime or parser_ext + if not parser: + raise NoParserError() + mod = __import__(parser, globals=globals(), locals=locals(), fromlist=[], level=-1) + with open(path, 'rb') as f: + p = mod.Parser(f) + return p diff --git a/lib/enzyme/asf.py b/lib/enzyme/asf.py new file mode 100644 index 0000000000000000000000000000000000000000..a623b5919d3d37a3b012e422fed7c1187500e3bd --- /dev/null +++ b/lib/enzyme/asf.py @@ -0,0 +1,389 @@ +# -*- coding: utf-8 -*- +# enzyme - Video metadata parser +# Copyright 2011-2012 Antoine Bertin <diaoulael@gmail.com> +# Copyright 2003-2006 Thomas Schueppel <stain@acm.org> +# Copyright 2003-2006 Dirk Meyer <dischi@freevo.org> +# +# This file is part of enzyme. +# +# enzyme is free software; you can redistribute it and/or modify it under +# the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3 of the License, or +# (at your option) any later version. +# +# enzyme is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with enzyme. If not, see <http://www.gnu.org/licenses/>. +from exceptions import ParseError +import core +import logging +import string +import struct + + +__all__ = ['Parser'] + + +# get logging object +log = logging.getLogger(__name__) + +def _guid(input): + # Remove any '-' + s = string.join(string.split(input, '-'), '') + r = '' + if len(s) != 32: + return '' + for i in range(0, 16): + r += chr(int(s[2 * i:2 * i + 2], 16)) + guid = struct.unpack('>IHHBB6s', r) + return guid + +GUIDS = { + 'ASF_Header_Object' : _guid('75B22630-668E-11CF-A6D9-00AA0062CE6C'), + 'ASF_Data_Object' : _guid('75B22636-668E-11CF-A6D9-00AA0062CE6C'), + 'ASF_Simple_Index_Object' : _guid('33000890-E5B1-11CF-89F4-00A0C90349CB'), + 'ASF_Index_Object' : _guid('D6E229D3-35DA-11D1-9034-00A0C90349BE'), + 'ASF_Media_Object_Index_Object' : _guid('FEB103F8-12AD-4C64-840F-2A1D2F7AD48C'), + 'ASF_Timecode_Index_Object' : _guid('3CB73FD0-0C4A-4803-953D-EDF7B6228F0C'), + + 'ASF_File_Properties_Object' : _guid('8CABDCA1-A947-11CF-8EE4-00C00C205365'), + 'ASF_Stream_Properties_Object' : _guid('B7DC0791-A9B7-11CF-8EE6-00C00C205365'), + 'ASF_Header_Extension_Object' : _guid('5FBF03B5-A92E-11CF-8EE3-00C00C205365'), + 'ASF_Codec_List_Object' : _guid('86D15240-311D-11D0-A3A4-00A0C90348F6'), + 'ASF_Script_Command_Object' : _guid('1EFB1A30-0B62-11D0-A39B-00A0C90348F6'), + 'ASF_Marker_Object' : _guid('F487CD01-A951-11CF-8EE6-00C00C205365'), + 'ASF_Bitrate_Mutual_Exclusion_Object' : _guid('D6E229DC-35DA-11D1-9034-00A0C90349BE'), + 'ASF_Error_Correction_Object' : _guid('75B22635-668E-11CF-A6D9-00AA0062CE6C'), + 'ASF_Content_Description_Object' : _guid('75B22633-668E-11CF-A6D9-00AA0062CE6C'), + 'ASF_Extended_Content_Description_Object' : _guid('D2D0A440-E307-11D2-97F0-00A0C95EA850'), + 'ASF_Content_Branding_Object' : _guid('2211B3FA-BD23-11D2-B4B7-00A0C955FC6E'), + 'ASF_Stream_Bitrate_Properties_Object' : _guid('7BF875CE-468D-11D1-8D82-006097C9A2B2'), + 'ASF_Content_Encryption_Object' : _guid('2211B3FB-BD23-11D2-B4B7-00A0C955FC6E'), + 'ASF_Extended_Content_Encryption_Object' : _guid('298AE614-2622-4C17-B935-DAE07EE9289C'), + 'ASF_Alt_Extended_Content_Encryption_Obj' : _guid('FF889EF1-ADEE-40DA-9E71-98704BB928CE'), + 'ASF_Digital_Signature_Object' : _guid('2211B3FC-BD23-11D2-B4B7-00A0C955FC6E'), + 'ASF_Padding_Object' : _guid('1806D474-CADF-4509-A4BA-9AABCB96AAE8'), + + 'ASF_Extended_Stream_Properties_Object' : _guid('14E6A5CB-C672-4332-8399-A96952065B5A'), + 'ASF_Advanced_Mutual_Exclusion_Object' : _guid('A08649CF-4775-4670-8A16-6E35357566CD'), + 'ASF_Group_Mutual_Exclusion_Object' : _guid('D1465A40-5A79-4338-B71B-E36B8FD6C249'), + 'ASF_Stream_Prioritization_Object' : _guid('D4FED15B-88D3-454F-81F0-ED5C45999E24'), + 'ASF_Bandwidth_Sharing_Object' : _guid('A69609E6-517B-11D2-B6AF-00C04FD908E9'), + 'ASF_Language_List_Object' : _guid('7C4346A9-EFE0-4BFC-B229-393EDE415C85'), + 'ASF_Metadata_Object' : _guid('C5F8CBEA-5BAF-4877-8467-AA8C44FA4CCA'), + 'ASF_Metadata_Library_Object' : _guid('44231C94-9498-49D1-A141-1D134E457054'), + 'ASF_Index_Parameters_Object' : _guid('D6E229DF-35DA-11D1-9034-00A0C90349BE'), + 'ASF_Media_Object_Index_Parameters_Obj' : _guid('6B203BAD-3F11-4E84-ACA8-D7613DE2CFA7'), + 'ASF_Timecode_Index_Parameters_Object' : _guid('F55E496D-9797-4B5D-8C8B-604DFE9BFB24'), + + 'ASF_Audio_Media' : _guid('F8699E40-5B4D-11CF-A8FD-00805F5C442B'), + 'ASF_Video_Media' : _guid('BC19EFC0-5B4D-11CF-A8FD-00805F5C442B'), + 'ASF_Command_Media' : _guid('59DACFC0-59E6-11D0-A3AC-00A0C90348F6'), + 'ASF_JFIF_Media' : _guid('B61BE100-5B4E-11CF-A8FD-00805F5C442B'), + 'ASF_Degradable_JPEG_Media' : _guid('35907DE0-E415-11CF-A917-00805F5C442B'), + 'ASF_File_Transfer_Media' : _guid('91BD222C-F21C-497A-8B6D-5AA86BFC0185'), + 'ASF_Binary_Media' : _guid('3AFB65E2-47EF-40F2-AC2C-70A90D71D343'), + + 'ASF_Web_Stream_Media_Subtype' : _guid('776257D4-C627-41CB-8F81-7AC7FF1C40CC'), + 'ASF_Web_Stream_Format' : _guid('DA1E6B13-8359-4050-B398-388E965BF00C'), + + 'ASF_No_Error_Correction' : _guid('20FB5700-5B55-11CF-A8FD-00805F5C442B'), + 'ASF_Audio_Spread' : _guid('BFC3CD50-618F-11CF-8BB2-00AA00B4E220')} + + +class Asf(core.AVContainer): + """ + ASF video parser. The ASF format is also used for Microsft Windows + Media files like wmv. + """ + def __init__(self, file): + core.AVContainer.__init__(self) + self.mime = 'video/x-ms-asf' + self.type = 'asf format' + self._languages = [] + self._extinfo = {} + + h = file.read(30) + if len(h) < 30: + raise ParseError() + + (guidstr, objsize, objnum, reserved1, \ + reserved2) = struct.unpack('<16sQIBB', h) + guid = self._parseguid(guidstr) + + if (guid != GUIDS['ASF_Header_Object']): + raise ParseError() + if reserved1 != 0x01 or reserved2 != 0x02: + raise ParseError() + + log.debug(u'Header size: %d / %d objects' % (objsize, objnum)) + header = file.read(objsize - 30) + for _ in range(0, objnum): + h = self._getnextheader(header) + header = header[h[1]:] + + del self._languages + del self._extinfo + + + def _findstream(self, id): + for stream in self.video + self.audio: + if stream.id == id: + return stream + + def _apply_extinfo(self, streamid): + stream = self._findstream(streamid) + if not stream or streamid not in self._extinfo: + return + stream.bitrate, stream.fps, langid, metadata = self._extinfo[streamid] + if langid is not None and langid >= 0 and langid < len(self._languages): + stream.language = self._languages[langid] + if metadata: + stream._appendtable('ASFMETADATA', metadata) + + + def _parseguid(self, string): + return struct.unpack('<IHHBB6s', string[:16]) + + + def _parsekv(self, s): + pos = 0 + (descriptorlen,) = struct.unpack('<H', s[pos:pos + 2]) + pos += 2 + descriptorname = s[pos:pos + descriptorlen] + pos += descriptorlen + descriptortype, valuelen = struct.unpack('<HH', s[pos:pos + 4]) + pos += 4 + descriptorvalue = s[pos:pos + valuelen] + pos += valuelen + value = None + if descriptortype == 0x0000: + # Unicode string + value = descriptorvalue + elif descriptortype == 0x0001: + # Byte Array + value = descriptorvalue + elif descriptortype == 0x0002: + # Bool (?) + value = struct.unpack('<I', descriptorvalue)[0] != 0 + elif descriptortype == 0x0003: + # DWORD + value = struct.unpack('<I', descriptorvalue)[0] + elif descriptortype == 0x0004: + # QWORD + value = struct.unpack('<Q', descriptorvalue)[0] + elif descriptortype == 0x0005: + # WORD + value = struct.unpack('<H', descriptorvalue)[0] + else: + log.debug(u'Unknown Descriptor Type %d' % descriptortype) + return (pos, descriptorname, value) + + + def _parsekv2(self, s): + pos = 0 + strno, descriptorlen, descriptortype, valuelen = struct.unpack('<2xHHHI', s[pos:pos + 12]) + pos += 12 + descriptorname = s[pos:pos + descriptorlen] + pos += descriptorlen + descriptorvalue = s[pos:pos + valuelen] + pos += valuelen + value = None + + if descriptortype == 0x0000: + # Unicode string + value = descriptorvalue + elif descriptortype == 0x0001: + # Byte Array + value = descriptorvalue + elif descriptortype == 0x0002: + # Bool + value = struct.unpack('<H', descriptorvalue)[0] != 0 + pass + elif descriptortype == 0x0003: + # DWORD + value = struct.unpack('<I', descriptorvalue)[0] + elif descriptortype == 0x0004: + # QWORD + value = struct.unpack('<Q', descriptorvalue)[0] + elif descriptortype == 0x0005: + # WORD + value = struct.unpack('<H', descriptorvalue)[0] + else: + log.debug(u'Unknown Descriptor Type %d' % descriptortype) + return (pos, descriptorname, value, strno) + + + def _getnextheader(self, s): + r = struct.unpack('<16sQ', s[:24]) + (guidstr, objsize) = r + guid = self._parseguid(guidstr) + if guid == GUIDS['ASF_File_Properties_Object']: + log.debug(u'File Properties Object') + val = struct.unpack('<16s6Q4I', s[24:24 + 80]) + (fileid, size, date, packetcount, duration, \ + senddur, preroll, flags, minpack, maxpack, maxbr) = \ + val + # FIXME: parse date to timestamp + self.length = duration / 10000000.0 + + elif guid == GUIDS['ASF_Stream_Properties_Object']: + log.debug(u'Stream Properties Object [%d]' % objsize) + streamtype = self._parseguid(s[24:40]) + errortype = self._parseguid(s[40:56]) + offset, typelen, errorlen, flags = struct.unpack('<QIIH', s[56:74]) + strno = flags & 0x7f + encrypted = flags >> 15 + if encrypted: + self._set('encrypted', True) + if streamtype == GUIDS['ASF_Video_Media']: + vi = core.VideoStream() + vi.width, vi.height, depth, codec, = struct.unpack('<4xII2xH4s', s[89:89 + 20]) + vi.codec = codec + vi.id = strno + self.video.append(vi) + elif streamtype == GUIDS['ASF_Audio_Media']: + ai = core.AudioStream() + twocc, ai.channels, ai.samplerate, bitrate, block, \ + ai.samplebits, = struct.unpack('<HHIIHH', s[78:78 + 16]) + ai.bitrate = 8 * bitrate + ai.codec = twocc + ai.id = strno + self.audio.append(ai) + + self._apply_extinfo(strno) + + elif guid == GUIDS['ASF_Extended_Stream_Properties_Object']: + streamid, langid, frametime = struct.unpack('<HHQ', s[72:84]) + (bitrate,) = struct.unpack('<I', s[40:40 + 4]) + if streamid not in self._extinfo: + self._extinfo[streamid] = [None, None, None, {}] + if frametime == 0: + # Problaby VFR, report as 1000fps (which is what MPlayer does) + frametime = 10000.0 + self._extinfo[streamid][:3] = [bitrate, 10000000.0 / frametime, langid] + self._apply_extinfo(streamid) + + elif guid == GUIDS['ASF_Header_Extension_Object']: + log.debug(u'ASF_Header_Extension_Object %d' % objsize) + size = struct.unpack('<I', s[42:46])[0] + data = s[46:46 + size] + while len(data): + log.debug(u'Sub:') + h = self._getnextheader(data) + data = data[h[1]:] + + elif guid == GUIDS['ASF_Codec_List_Object']: + log.debug(u'List Object') + pass + + elif guid == GUIDS['ASF_Error_Correction_Object']: + log.debug(u'Error Correction') + pass + + elif guid == GUIDS['ASF_Content_Description_Object']: + log.debug(u'Content Description Object') + val = struct.unpack('<5H', s[24:24 + 10]) + pos = 34 + strings = [] + for i in val: + ss = s[pos:pos + i].replace('\0', '').lstrip().rstrip() + strings.append(ss) + pos += i + + # Set empty strings to None + strings = [x or None for x in strings] + self.title, self.artist, self.copyright, self.caption, rating = strings + + elif guid == GUIDS['ASF_Extended_Content_Description_Object']: + (count,) = struct.unpack('<H', s[24:26]) + pos = 26 + descriptor = {} + for i in range(0, count): + # Read additional content descriptors + d = self._parsekv(s[pos:]) + pos += d[0] + descriptor[d[1]] = d[2] + self._appendtable('ASFDESCRIPTOR', descriptor) + + elif guid == GUIDS['ASF_Metadata_Object']: + (count,) = struct.unpack('<H', s[24:26]) + pos = 26 + streams = {} + for i in range(0, count): + # Read additional content descriptors + size, key, value, strno = self._parsekv2(s[pos:]) + if strno not in streams: + streams[strno] = {} + streams[strno][key] = value + pos += size + + for strno, metadata in streams.items(): + if strno not in self._extinfo: + self._extinfo[strno] = [None, None, None, {}] + self._extinfo[strno][3].update(metadata) + self._apply_extinfo(strno) + + elif guid == GUIDS['ASF_Language_List_Object']: + count = struct.unpack('<H', s[24:26])[0] + pos = 26 + for i in range(0, count): + idlen = struct.unpack('<B', s[pos:pos + 1])[0] + idstring = s[pos + 1:pos + 1 + idlen] + idstring = unicode(idstring, 'utf-16').replace('\0', '') + log.debug(u'Language: %d/%d: %r' % (i + 1, count, idstring)) + self._languages.append(idstring) + pos += 1 + idlen + + elif guid == GUIDS['ASF_Stream_Bitrate_Properties_Object']: + # This record contains stream bitrate with payload overhead. For + # audio streams, we should have the average bitrate from + # ASF_Stream_Properties_Object. For video streams, we get it from + # ASF_Extended_Stream_Properties_Object. So this record is not + # used. + pass + + elif guid == GUIDS['ASF_Content_Encryption_Object'] or \ + guid == GUIDS['ASF_Extended_Content_Encryption_Object']: + self._set('encrypted', True) + else: + # Just print the type: + for h in GUIDS.keys(): + if GUIDS[h] == guid: + log.debug(u'Unparsed %r [%d]' % (h, objsize)) + break + else: + u = "%.8X-%.4X-%.4X-%.2X%.2X-%s" % guid + log.debug(u'unknown: len=%d [%d]' % (len(u), objsize)) + return r + + +class AsfAudio(core.AudioStream): + """ + ASF audio parser for wma files. + """ + def __init__(self): + core.AudioStream.__init__(self) + self.mime = 'audio/x-ms-asf' + self.type = 'asf format' + + +def Parser(file): + """ + Wrapper around audio and av content. + """ + asf = Asf(file) + if not len(asf.audio) or len(asf.video): + # AV container + return asf + # No video but audio streams. Handle has audio core + audio = AsfAudio() + for key in audio._keys: + if key in asf._keys: + if not getattr(audio, key, None): + setattr(audio, key, getattr(asf, key)) + return audio diff --git a/lib/enzyme/core.py b/lib/enzyme/core.py new file mode 100644 index 0000000000000000000000000000000000000000..565cb4f674fca74351c4346b29dd0e9798bae8dc --- /dev/null +++ b/lib/enzyme/core.py @@ -0,0 +1,450 @@ +# -*- coding: utf-8 -*- +# enzyme - Video metadata parser +# Copyright 2011-2012 Antoine Bertin <diaoulael@gmail.com> +# Copyright 2003-2006 Thomas Schueppel <stain@acm.org> +# Copyright 2003-2006 Dirk Meyer <dischi@freevo.org> +# +# This file is part of enzyme. +# +# enzyme is free software; you can redistribute it and/or modify it under +# the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3 of the License, or +# (at your option) any later version. +# +# enzyme is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with enzyme. If not, see <http://www.gnu.org/licenses/>. +import re +import logging +import fourcc +import language +from strutils import str_to_unicode, unicode_to_str + +UNPRINTABLE_KEYS = ['thumbnail', 'url', 'codec_private'] +MEDIACORE = ['title', 'caption', 'comment', 'size', 'type', 'subtype', 'timestamp', + 'keywords', 'country', 'language', 'langcode', 'url', 'artist', + 'mime', 'datetime', 'tags', 'hash'] +AUDIOCORE = ['channels', 'samplerate', 'length', 'encoder', 'codec', 'format', + 'samplebits', 'bitrate', 'fourcc', 'trackno', 'id', 'userdate', + 'enabled', 'default', 'codec_private'] +MUSICCORE = ['trackof', 'album', 'genre', 'discs', 'thumbnail'] +VIDEOCORE = ['length', 'encoder', 'bitrate', 'samplerate', 'codec', 'format', + 'samplebits', 'width', 'height', 'fps', 'aspect', 'trackno', + 'fourcc', 'id', 'enabled', 'default', 'codec_private'] +AVCORE = ['length', 'encoder', 'trackno', 'trackof', 'copyright', 'product', + 'genre', 'writer', 'producer', 'studio', 'rating', 'actors', 'thumbnail', + 'delay', 'image', 'video', 'audio', 'subtitles', 'chapters', 'software', + 'summary', 'synopsis', 'season', 'episode', 'series'] + +# get logging object +log = logging.getLogger(__name__) + + +class Media(object): + """ + Media is the base class to all Media Metadata Containers. It defines + the basic structures that handle metadata. Media and its derivates + contain a common set of metadata attributes that is listed in keys. + Specific derivates contain additional keys to the dublin core set that is + defined in Media. + """ + media = None + _keys = MEDIACORE + table_mapping = {} + + def __init__(self, hash=None): + if hash is not None: + # create Media based on dict + for key, value in hash.items(): + if isinstance(value, list) and value and isinstance(value[0], dict): + value = [Media(x) for x in value] + self._set(key, value) + return + + self._keys = self._keys[:] + self.tables = {} + # Tags, unlike tables, are more well-defined dicts whose values are + # either Tag objects, other dicts (for nested tags), or lists of either + # (for multiple instances of the tag, e.g. actor). Where possible, + # parsers should transform tag names to conform to the Official + # Matroska tags defined at http://www.matroska.org/technical/specs/tagging/index.html + # All tag names will be lower-cased. + self.tags = Tags() + for key in set(self._keys) - set(['media', 'tags']): + setattr(self, key, None) + + # + # unicode and string convertion for debugging + # + #TODO: Fix that mess + def __unicode__(self): + result = u'' + + # print normal attributes + lists = [] + for key in self._keys: + value = getattr(self, key, None) + if value == None or key == 'url': + continue + if isinstance(value, list): + if not value: + continue + elif isinstance(value[0], basestring): + # Just a list of strings (keywords?), so don't treat it specially. + value = u', '.join(value) + else: + lists.append((key, value)) + continue + elif isinstance(value, dict): + # Tables or tags treated separately. + continue + if key in UNPRINTABLE_KEYS: + value = '<unprintable data, size=%d>' % len(value) + result += u'| %10s: %s\n' % (unicode(key), unicode(value)) + + # print tags (recursively, to support nested tags). + def print_tags(tags, suffix, show_label): + result = '' + for n, (name, tag) in enumerate(tags.items()): + result += u'| %12s%s%s = ' % (u'tags: ' if n == 0 and show_label else '', suffix, name) + if isinstance(tag, list): + # TODO: doesn't support lists/dicts within lists. + result += u'%s\n' % ', '.join(subtag.value for subtag in tag) + else: + result += u'%s\n' % (tag.value or '') + if isinstance(tag, dict): + result += print_tags(tag, ' ', False) + return result + result += print_tags(self.tags, '', True) + + # print lists + for key, l in lists: + for n, item in enumerate(l): + label = '+-- ' + key.rstrip('s').capitalize() + if key not in ['tracks', 'subtitles', 'chapters']: + label += ' Track' + result += u'%s #%d\n' % (label, n + 1) + result += '| ' + re.sub(r'\n(.)', r'\n| \1', unicode(item)) + + # print tables + #FIXME: WTH? +# if log.level >= 10: +# for name, table in self.tables.items(): +# result += '+-- Table %s\n' % str(name) +# for key, value in table.items(): +# try: +# value = unicode(value) +# if len(value) > 50: +# value = u'<unprintable data, size=%d>' % len(value) +# except (UnicodeDecodeError, TypeError): +# try: +# value = u'<unprintable data, size=%d>' % len(value) +# except AttributeError: +# value = u'<unprintable data>' +# result += u'| | %s: %s\n' % (unicode(key), value) + return result + + def __str__(self): + return unicode(self).encode() + + def __repr__(self): + if hasattr(self, 'url'): + return '<%s %s>' % (str(self.__class__)[8:-2], self.url) + else: + return '<%s>' % (str(self.__class__)[8:-2]) + + # + # internal functions + # + def _appendtable(self, name, hashmap): + """ + Appends a tables of additional metadata to the Object. + If such a table already exists, the given tables items are + added to the existing one. + """ + if name not in self.tables: + self.tables[name] = hashmap + else: + # Append to the already existing table + for k in hashmap.keys(): + self.tables[name][k] = hashmap[k] + + def _set(self, key, value): + """ + Set key to value and add the key to the internal keys list if + missing. + """ + if value is None and getattr(self, key, None) is None: + return + if isinstance(value, str): + value = str_to_unicode(value) + setattr(self, key, value) + if not key in self._keys: + self._keys.append(key) + + def _set_url(self, url): + """ + Set the URL of the source + """ + self.url = url + + def _finalize(self): + """ + Correct same data based on specific rules + """ + # make sure all strings are unicode + for key in self._keys: + if key in UNPRINTABLE_KEYS: + continue + value = getattr(self, key) + if value is None: + continue + if key == 'image': + if isinstance(value, unicode): + setattr(self, key, unicode_to_str(value)) + continue + if isinstance(value, str): + setattr(self, key, str_to_unicode(value)) + if isinstance(value, unicode): + setattr(self, key, value.strip().rstrip().replace(u'\0', u'')) + if isinstance(value, list) and value and isinstance(value[0], Media): + for submenu in value: + submenu._finalize() + + # copy needed tags from tables + for name, table in self.tables.items(): + mapping = self.table_mapping.get(name, {}) + for tag, attr in mapping.items(): + if self.get(attr): + continue + value = table.get(tag, None) + if value is not None: + if not isinstance(value, (str, unicode)): + value = str_to_unicode(str(value)) + elif isinstance(value, str): + value = str_to_unicode(value) + value = value.strip().rstrip().replace(u'\0', u'') + setattr(self, attr, value) + + if 'fourcc' in self._keys and 'codec' in self._keys and self.codec is not None: + # Codec may be a fourcc, in which case we resolve it to its actual + # name and set the fourcc attribute. + self.fourcc, self.codec = fourcc.resolve(self.codec) + if 'language' in self._keys: + self.langcode, self.language = language.resolve(self.language) + + # + # data access + # + def __contains__(self, key): + """ + Test if key exists in the dict + """ + return hasattr(self, key) + + def get(self, attr, default=None): + """ + Returns the given attribute. If the attribute is not set by + the parser return 'default'. + """ + return getattr(self, attr, default) + + def __getitem__(self, attr): + """ + Get the value of the given attribute + """ + return getattr(self, attr, None) + + def __setitem__(self, key, value): + """ + Set the value of 'key' to 'value' + """ + setattr(self, key, value) + + def has_key(self, key): + """ + Check if the object has an attribute 'key' + """ + return hasattr(self, key) + + def convert(self): + """ + Convert Media to dict. + """ + result = {} + for k in self._keys: + value = getattr(self, k, None) + if isinstance(value, list) and value and isinstance(value[0], Media): + value = [x.convert() for x in value] + result[k] = value + return result + + def keys(self): + """ + Return all keys for the attributes set by the parser. + """ + return self._keys + + +class Collection(Media): + """ + Collection of Digial Media like CD, DVD, Directory, Playlist + """ + _keys = Media._keys + ['id', 'tracks'] + + def __init__(self): + Media.__init__(self) + self.tracks = [] + + +class Tag(object): + """ + An individual tag, which will be a value stored in a Tags object. + + Tag values are strings (for binary data), unicode objects, or datetime + objects for tags that represent dates or times. + """ + def __init__(self, value=None, langcode='und', binary=False): + super(Tag, self).__init__() + self.value = value + self.langcode = langcode + self.binary = binary + + def __unicode__(self): + return unicode(self.value) + + def __str__(self): + return str(self.value) + + def __repr__(self): + if not self.binary: + return '<Tag object: %s>' % repr(self.value) + else: + return '<Binary Tag object: size=%d>' % len(self.value) + + @property + def langcode(self): + return self._langcode + + @langcode.setter + def langcode(self, code): + self._langcode, self.language = language.resolve(code) + + +class Tags(dict, Tag): + """ + A dictionary containing Tag objects. Values can be other Tags objects + (for nested tags), lists, or Tag objects. + + A Tags object is more or less a dictionary but it also contains a value. + This is necessary in order to represent this kind of tag specification + (e.g. for Matroska):: + + <Simple> + <Name>LAW_RATING</Name> + <String>PG</String> + <Simple> + <Name>COUNTRY</Name> + <String>US</String> + </Simple> + </Simple> + + The attribute RATING has a value (PG), but it also has a child tag + COUNTRY that specifies the country code the rating belongs to. + """ + def __init__(self, value=None, langcode='und', binary=False): + super(Tags, self).__init__() + self.value = value + self.langcode = langcode + self.binary = False + + +class AudioStream(Media): + """ + Audio Tracks in a Multiplexed Container. + """ + _keys = Media._keys + AUDIOCORE + + +class Music(AudioStream): + """ + Digital Music. + """ + _keys = AudioStream._keys + MUSICCORE + + def _finalize(self): + """ + Correct same data based on specific rules + """ + AudioStream._finalize(self) + if self.trackof: + try: + # XXX Why is this needed anyway? + if int(self.trackno) < 10: + self.trackno = u'0%s' % int(self.trackno) + except (AttributeError, ValueError): + pass + + +class VideoStream(Media): + """ + Video Tracks in a Multiplexed Container. + """ + _keys = Media._keys + VIDEOCORE + + +class Chapter(Media): + """ + Chapter in a Multiplexed Container. + """ + _keys = ['enabled', 'name', 'pos', 'id'] + + def __init__(self, name=None, pos=0): + Media.__init__(self) + self.name = name + self.pos = pos + self.enabled = True + + +class Subtitle(Media): + """ + Subtitle Tracks in a Multiplexed Container. + """ + _keys = ['enabled', 'default', 'langcode', 'language', 'trackno', 'title', + 'id', 'codec'] + + def __init__(self, language=None): + Media.__init__(self) + self.language = language + + +class AVContainer(Media): + """ + Container for Audio and Video streams. This is the Container Type for + all media, that contain more than one stream. + """ + _keys = Media._keys + AVCORE + + def __init__(self): + Media.__init__(self) + self.audio = [] + self.video = [] + self.subtitles = [] + self.chapters = [] + + def _finalize(self): + """ + Correct same data based on specific rules + """ + Media._finalize(self) + if not self.length and len(self.video) and self.video[0].length: + self.length = 0 + # Length not specified for container, so use the largest length + # of its tracks as container length. + for track in self.video + self.audio: + if track.length: + self.length = max(self.length, track.length) diff --git a/lib/enzyme/exceptions.py b/lib/enzyme/exceptions.py new file mode 100644 index 0000000000000000000000000000000000000000..5bc4f7acccb30722e5313c6e3fae29bc8b458db9 --- /dev/null +++ b/lib/enzyme/exceptions.py @@ -0,0 +1,28 @@ +# -*- coding: utf-8 -*- +# enzyme - Video metadata parser +# Copyright 2011-2012 Antoine Bertin <diaoulael@gmail.com> +# +# This file is part of enzyme. +# +# enzyme is free software; you can redistribute it and/or modify it under +# the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3 of the License, or +# (at your option) any later version. +# +# enzyme is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with enzyme. If not, see <http://www.gnu.org/licenses/>. +class Error(Exception): + pass + + +class NoParserError(Error): + pass + + +class ParseError(Error): + pass diff --git a/lib/enzyme/flv.py b/lib/enzyme/flv.py new file mode 100644 index 0000000000000000000000000000000000000000..61f34a78e7b0f7143a88aabf87c955579f1f2390 --- /dev/null +++ b/lib/enzyme/flv.py @@ -0,0 +1,181 @@ +# -*- coding: utf-8 -*- +# enzyme - Video metadata parser +# Copyright 2011-2012 Antoine Bertin <diaoulael@gmail.com> +# Copyright 2003-2006 Dirk Meyer <dischi@freevo.org> +# +# This file is part of enzyme. +# +# enzyme is free software; you can redistribute it and/or modify it under +# the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3 of the License, or +# (at your option) any later version. +# +# enzyme is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with enzyme. If not, see <http://www.gnu.org/licenses/>. +from exceptions import ParseError +import core +import logging +import struct + +__all__ = ['Parser'] + + +# get logging object +log = logging.getLogger(__name__) + +FLV_TAG_TYPE_AUDIO = 0x08 +FLV_TAG_TYPE_VIDEO = 0x09 +FLV_TAG_TYPE_META = 0x12 + +# audio flags +FLV_AUDIO_CHANNEL_MASK = 0x01 +FLV_AUDIO_SAMPLERATE_MASK = 0x0c +FLV_AUDIO_CODECID_MASK = 0xf0 + +FLV_AUDIO_SAMPLERATE_OFFSET = 2 +FLV_AUDIO_CODECID_OFFSET = 4 +FLV_AUDIO_CODECID = (0x0001, 0x0002, 0x0055, 0x0001) + +# video flags +FLV_VIDEO_CODECID_MASK = 0x0f +FLV_VIDEO_CODECID = ('FLV1', 'MSS1', 'VP60') # wild guess + +FLV_DATA_TYPE_NUMBER = 0x00 +FLV_DATA_TYPE_BOOL = 0x01 +FLV_DATA_TYPE_STRING = 0x02 +FLV_DATA_TYPE_OBJECT = 0x03 +FLC_DATA_TYPE_CLIP = 0x04 +FLV_DATA_TYPE_REFERENCE = 0x07 +FLV_DATA_TYPE_ECMARRAY = 0x08 +FLV_DATA_TYPE_ENDOBJECT = 0x09 +FLV_DATA_TYPE_ARRAY = 0x0a +FLV_DATA_TYPE_DATE = 0x0b +FLV_DATA_TYPE_LONGSTRING = 0x0c + +FLVINFO = { + 'creator': 'copyright', +} + +class FlashVideo(core.AVContainer): + """ + Experimental parser for Flash videos. It requires certain flags to + be set to report video resolutions and in most cases it does not + provide that information. + """ + table_mapping = { 'FLVINFO' : FLVINFO } + + def __init__(self, file): + core.AVContainer.__init__(self) + self.mime = 'video/flv' + self.type = 'Flash Video' + data = file.read(13) + if len(data) < 13 or struct.unpack('>3sBBII', data)[0] != 'FLV': + raise ParseError() + + for _ in range(10): + if self.audio and self.video: + break + data = file.read(11) + if len(data) < 11: + break + chunk = struct.unpack('>BH4BI', data) + size = (chunk[1] << 8) + chunk[2] + + if chunk[0] == FLV_TAG_TYPE_AUDIO: + flags = ord(file.read(1)) + if not self.audio: + a = core.AudioStream() + a.channels = (flags & FLV_AUDIO_CHANNEL_MASK) + 1 + srate = (flags & FLV_AUDIO_SAMPLERATE_MASK) + a.samplerate = (44100 << (srate >> FLV_AUDIO_SAMPLERATE_OFFSET) >> 3) + codec = (flags & FLV_AUDIO_CODECID_MASK) >> FLV_AUDIO_CODECID_OFFSET + if codec < len(FLV_AUDIO_CODECID): + a.codec = FLV_AUDIO_CODECID[codec] + self.audio.append(a) + + file.seek(size - 1, 1) + + elif chunk[0] == FLV_TAG_TYPE_VIDEO: + flags = ord(file.read(1)) + if not self.video: + v = core.VideoStream() + codec = (flags & FLV_VIDEO_CODECID_MASK) - 2 + if codec < len(FLV_VIDEO_CODECID): + v.codec = FLV_VIDEO_CODECID[codec] + # width and height are in the meta packet, but I have + # no file with such a packet inside. So maybe we have + # to decode some parts of the video. + self.video.append(v) + + file.seek(size - 1, 1) + + elif chunk[0] == FLV_TAG_TYPE_META: + log.info(u'metadata %r', str(chunk)) + metadata = file.read(size) + try: + while metadata: + length, value = self._parse_value(metadata) + if isinstance(value, dict): + log.info(u'metadata: %r', value) + if value.get('creator'): + self.copyright = value.get('creator') + if value.get('width'): + self.width = value.get('width') + if value.get('height'): + self.height = value.get('height') + if value.get('duration'): + self.length = value.get('duration') + self._appendtable('FLVINFO', value) + if not length: + # parse error + break + metadata = metadata[length:] + except (IndexError, struct.error, TypeError): + pass + else: + log.info(u'unkown %r', str(chunk)) + file.seek(size, 1) + + file.seek(4, 1) + + def _parse_value(self, data): + """ + Parse the next metadata value. + """ + if ord(data[0]) == FLV_DATA_TYPE_NUMBER: + value = struct.unpack('>d', data[1:9])[0] + return 9, value + + if ord(data[0]) == FLV_DATA_TYPE_BOOL: + return 2, bool(data[1]) + + if ord(data[0]) == FLV_DATA_TYPE_STRING: + length = (ord(data[1]) << 8) + ord(data[2]) + return length + 3, data[3:length + 3] + + if ord(data[0]) == FLV_DATA_TYPE_ECMARRAY: + init_length = len(data) + num = struct.unpack('>I', data[1:5])[0] + data = data[5:] + result = {} + for _ in range(num): + length = (ord(data[0]) << 8) + ord(data[1]) + key = data[2:length + 2] + data = data[length + 2:] + length, value = self._parse_value(data) + if not length: + return 0, result + result[key] = value + data = data[length:] + return init_length - len(data), result + + log.info(u'unknown code: %x. Stop metadata parser', ord(data[0])) + return 0, None + + +Parser = FlashVideo diff --git a/lib/enzyme/fourcc.py b/lib/enzyme/fourcc.py new file mode 100644 index 0000000000000000000000000000000000000000..ac15b0b2b90c4ee3ee073f888c0887093beab560 --- /dev/null +++ b/lib/enzyme/fourcc.py @@ -0,0 +1,850 @@ +# -*- coding: utf-8 -*- +# enzyme - Video metadata parser +# Copyright 2011-2012 Antoine Bertin <diaoulael@gmail.com> +# Copyright 2003-2006 Dirk Meyer <dischi@freevo.org> +# +# This file is part of enzyme. +# +# enzyme is free software; you can redistribute it and/or modify it under +# the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3 of the License, or +# (at your option) any later version. +# +# enzyme is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with enzyme. If not, see <http://www.gnu.org/licenses/>. +import string +import re +import struct + +__all__ = ['resolve'] + + +def resolve(code): + """ + Transform a twocc or fourcc code into a name. Returns a 2-tuple of (cc, + codec) where both are strings and cc is a string in the form '0xXX' if it's + a twocc, or 'ABCD' if it's a fourcc. If the given code is not a known + twocc or fourcc, the return value will be (None, 'Unknown'), unless the + code is otherwise a printable string in which case it will be returned as + the codec. + """ + if isinstance(code, basestring): + codec = u'Unknown' + # Check for twocc + if re.match(r'^0x[\da-f]{1,4}$', code, re.I): + # Twocc in hex form + return code, TWOCC.get(int(code, 16), codec) + elif code.isdigit() and 0 <= int(code) <= 0xff: + # Twocc in decimal form + return hex(int(code)), TWOCC.get(int(code), codec) + elif len(code) == 2: + code = struct.unpack('H', code)[0] + return hex(code), TWOCC.get(code, codec) + elif len(code) != 4 and len([x for x in code if x not in string.printable]) == 0: + # Code is a printable string. + codec = unicode(code) + + if code[:2] == 'MS' and code[2:].upper() in FOURCC: + code = code[2:] + + if code.upper() in FOURCC: + return code.upper(), unicode(FOURCC[code.upper()]) + return None, codec + elif isinstance(code, (int, long)): + return hex(code), TWOCC.get(code, u'Unknown') + + return None, u'Unknown' + + +TWOCC = { + 0x0000: 'Unknown Wave Format', + 0x0001: 'PCM', + 0x0002: 'Microsoft ADPCM', + 0x0003: 'IEEE Float', + 0x0004: 'Compaq Computer VSELP', + 0x0005: 'IBM CVSD', + 0x0006: 'A-Law', + 0x0007: 'mu-Law', + 0x0008: 'Microsoft DTS', + 0x0009: 'Microsoft DRM', + 0x0010: 'OKI ADPCM', + 0x0011: 'Intel DVI/IMA ADPCM', + 0x0012: 'Videologic MediaSpace ADPCM', + 0x0013: 'Sierra Semiconductor ADPCM', + 0x0014: 'Antex Electronics G.723 ADPCM', + 0x0015: 'DSP Solutions DigiSTD', + 0x0016: 'DSP Solutions DigiFIX', + 0x0017: 'Dialogic OKI ADPCM', + 0x0018: 'MediaVision ADPCM', + 0x0019: 'Hewlett-Packard CU', + 0x0020: 'Yamaha ADPCM', + 0x0021: 'Speech Compression Sonarc', + 0x0022: 'DSP Group TrueSpeech', + 0x0023: 'Echo Speech EchoSC1', + 0x0024: 'Audiofile AF36', + 0x0025: 'Audio Processing Technology APTX', + 0x0026: 'AudioFile AF10', + 0x0027: 'Prosody 1612', + 0x0028: 'LRC', + 0x0030: 'Dolby AC2', + 0x0031: 'Microsoft GSM 6.10', + 0x0032: 'MSNAudio', + 0x0033: 'Antex Electronics ADPCME', + 0x0034: 'Control Resources VQLPC', + 0x0035: 'DSP Solutions DigiREAL', + 0x0036: 'DSP Solutions DigiADPCM', + 0x0037: 'Control Resources CR10', + 0x0038: 'Natural MicroSystems VBXADPCM', + 0x0039: 'Crystal Semiconductor IMA ADPCM', + 0x003A: 'EchoSC3', + 0x003B: 'Rockwell ADPCM', + 0x003C: 'Rockwell Digit LK', + 0x003D: 'Xebec', + 0x0040: 'Antex Electronics G.721 ADPCM', + 0x0041: 'G.728 CELP', + 0x0042: 'MSG723', + 0x0043: 'IBM AVC ADPCM', + 0x0045: 'ITU-T G.726 ADPCM', + 0x0050: 'MPEG 1, Layer 1,2', + 0x0052: 'RT24', + 0x0053: 'PAC', + 0x0055: 'MPEG Layer 3', + 0x0059: 'Lucent G.723', + 0x0060: 'Cirrus', + 0x0061: 'ESPCM', + 0x0062: 'Voxware', + 0x0063: 'Canopus Atrac', + 0x0064: 'G.726 ADPCM', + 0x0065: 'G.722 ADPCM', + 0x0066: 'DSAT', + 0x0067: 'DSAT Display', + 0x0069: 'Voxware Byte Aligned', + 0x0070: 'Voxware AC8', + 0x0071: 'Voxware AC10', + 0x0072: 'Voxware AC16', + 0x0073: 'Voxware AC20', + 0x0074: 'Voxware MetaVoice', + 0x0075: 'Voxware MetaSound', + 0x0076: 'Voxware RT29HW', + 0x0077: 'Voxware VR12', + 0x0078: 'Voxware VR18', + 0x0079: 'Voxware TQ40', + 0x0080: 'Softsound', + 0x0081: 'Voxware TQ60', + 0x0082: 'MSRT24', + 0x0083: 'G.729A', + 0x0084: 'MVI MV12', + 0x0085: 'DF G.726', + 0x0086: 'DF GSM610', + 0x0088: 'ISIAudio', + 0x0089: 'Onlive', + 0x0091: 'SBC24', + 0x0092: 'Dolby AC3 SPDIF', + 0x0093: 'MediaSonic G.723', + 0x0094: 'Aculab PLC Prosody 8KBPS', + 0x0097: 'ZyXEL ADPCM', + 0x0098: 'Philips LPCBB', + 0x0099: 'Packed', + 0x00A0: 'Malden Electronics PHONYTALK', + 0x00FF: 'AAC', + 0x0100: 'Rhetorex ADPCM', + 0x0101: 'IBM mu-law', + 0x0102: 'IBM A-law', + 0x0103: 'IBM AVC Adaptive Differential Pulse Code Modulation', + 0x0111: 'Vivo G.723', + 0x0112: 'Vivo Siren', + 0x0123: 'Digital G.723', + 0x0125: 'Sanyo LD ADPCM', + 0x0130: 'Sipro Lab Telecom ACELP.net', + 0x0131: 'Sipro Lab Telecom ACELP.4800', + 0x0132: 'Sipro Lab Telecom ACELP.8V3', + 0x0133: 'Sipro Lab Telecom ACELP.G.729', + 0x0134: 'Sipro Lab Telecom ACELP.G.729A', + 0x0135: 'Sipro Lab Telecom ACELP.KELVIN', + 0x0140: 'Windows Media Video V8', + 0x0150: 'Qualcomm PureVoice', + 0x0151: 'Qualcomm HalfRate', + 0x0155: 'Ring Zero Systems TUB GSM', + 0x0160: 'Windows Media Audio V1 / DivX audio (WMA)', + 0x0161: 'Windows Media Audio V7 / V8 / V9', + 0x0162: 'Windows Media Audio Professional V9', + 0x0163: 'Windows Media Audio Lossless V9', + 0x0170: 'UNISYS NAP ADPCM', + 0x0171: 'UNISYS NAP ULAW', + 0x0172: 'UNISYS NAP ALAW', + 0x0173: 'UNISYS NAP 16K', + 0x0200: 'Creative Labs ADPCM', + 0x0202: 'Creative Labs Fastspeech8', + 0x0203: 'Creative Labs Fastspeech10', + 0x0210: 'UHER Informatic ADPCM', + 0x0215: 'Ulead DV ACM', + 0x0216: 'Ulead DV ACM', + 0x0220: 'Quarterdeck', + 0x0230: 'I-link Worldwide ILINK VC', + 0x0240: 'Aureal Semiconductor RAW SPORT', + 0x0241: 'ESST AC3', + 0x0250: 'Interactive Products HSX', + 0x0251: 'Interactive Products RPELP', + 0x0260: 'Consistent Software CS2', + 0x0270: 'Sony ATRAC3 (SCX, same as MiniDisk LP2)', + 0x0300: 'Fujitsu FM Towns Snd', + 0x0400: 'BTV Digital', + 0x0401: 'Intel Music Coder (IMC)', + 0x0402: 'Ligos Indeo Audio', + 0x0450: 'QDesign Music', + 0x0680: 'VME VMPCM', + 0x0681: 'AT&T Labs TPC', + 0x0700: 'YMPEG Alpha', + 0x08AE: 'ClearJump LiteWave', + 0x1000: 'Olivetti GSM', + 0x1001: 'Olivetti ADPCM', + 0x1002: 'Olivetti CELP', + 0x1003: 'Olivetti SBC', + 0x1004: 'Olivetti OPR', + 0x1100: 'Lernout & Hauspie LH Codec', + 0x1101: 'Lernout & Hauspie CELP codec', + 0x1102: 'Lernout & Hauspie SBC codec', + 0x1103: 'Lernout & Hauspie SBC codec', + 0x1104: 'Lernout & Hauspie SBC codec', + 0x1400: 'Norris', + 0x1401: 'AT&T ISIAudio', + 0x1500: 'Soundspace Music Compression', + 0x181C: 'VoxWare RT24 speech codec', + 0x181E: 'Lucent elemedia AX24000P Music codec', + 0x1C07: 'Lucent SX8300P speech codec', + 0x1C0C: 'Lucent SX5363S G.723 compliant codec', + 0x1F03: 'CUseeMe DigiTalk (ex-Rocwell)', + 0x1FC4: 'NCT Soft ALF2CD ACM', + 0x2000: 'AC3', + 0x2001: 'Dolby DTS (Digital Theater System)', + 0x2002: 'RealAudio 1 / 2 14.4', + 0x2003: 'RealAudio 1 / 2 28.8', + 0x2004: 'RealAudio G2 / 8 Cook (low bitrate)', + 0x2005: 'RealAudio 3 / 4 / 5 Music (DNET)', + 0x2006: 'RealAudio 10 AAC (RAAC)', + 0x2007: 'RealAudio 10 AAC+ (RACP)', + 0x3313: 'makeAVIS', + 0x4143: 'Divio MPEG-4 AAC audio', + 0x434C: 'LEAD Speech', + 0x564C: 'LEAD Vorbis', + 0x674F: 'Ogg Vorbis (mode 1)', + 0x6750: 'Ogg Vorbis (mode 2)', + 0x6751: 'Ogg Vorbis (mode 3)', + 0x676F: 'Ogg Vorbis (mode 1+)', + 0x6770: 'Ogg Vorbis (mode 2+)', + 0x6771: 'Ogg Vorbis (mode 3+)', + 0x7A21: 'GSM-AMR (CBR, no SID)', + 0x7A22: 'GSM-AMR (VBR, including SID)', + 0xDFAC: 'DebugMode SonicFoundry Vegas FrameServer ACM Codec', + 0xF1AC: 'Free Lossless Audio Codec FLAC', + 0xFFFE: 'Extensible wave format', + 0xFFFF: 'development' +} + + +FOURCC = { + '1978': 'A.M.Paredes predictor (LossLess)', + '2VUY': 'Optibase VideoPump 8-bit 4:2:2 Component YCbCr', + '3IV0': 'MPEG4-based codec 3ivx', + '3IV1': '3ivx v1', + '3IV2': '3ivx v2', + '3IVD': 'FFmpeg DivX ;-) (MS MPEG-4 v3)', + '3IVX': 'MPEG4-based codec 3ivx', + '8BPS': 'Apple QuickTime Planar RGB with Alpha-channel', + 'AAS4': 'Autodesk Animator codec (RLE)', + 'AASC': 'Autodesk Animator', + 'ABYR': 'Kensington ABYR', + 'ACTL': 'Streambox ACT-L2', + 'ADV1': 'Loronix WaveCodec', + 'ADVJ': 'Avid M-JPEG Avid Technology Also known as AVRn', + 'AEIK': 'Intel Indeo Video 3.2', + 'AEMI': 'Array VideoONE MPEG1-I Capture', + 'AFLC': 'Autodesk Animator FLC', + 'AFLI': 'Autodesk Animator FLI', + 'AHDV': 'CineForm 10-bit Visually Perfect HD', + 'AJPG': '22fps JPEG-based codec for digital cameras', + 'AMPG': 'Array VideoONE MPEG', + 'ANIM': 'Intel RDX (ANIM)', + 'AP41': 'AngelPotion Definitive', + 'AP42': 'AngelPotion Definitive', + 'ASLC': 'AlparySoft Lossless Codec', + 'ASV1': 'Asus Video v1', + 'ASV2': 'Asus Video v2', + 'ASVX': 'Asus Video 2.0 (audio)', + 'ATM4': 'Ahead Nero Digital MPEG-4 Codec', + 'AUR2': 'Aura 2 Codec - YUV 4:2:2', + 'AURA': 'Aura 1 Codec - YUV 4:1:1', + 'AV1X': 'Avid 1:1x (Quick Time)', + 'AVC1': 'H.264 AVC', + 'AVD1': 'Avid DV (Quick Time)', + 'AVDJ': 'Avid Meridien JFIF with Alpha-channel', + 'AVDN': 'Avid DNxHD (Quick Time)', + 'AVDV': 'Avid DV', + 'AVI1': 'MainConcept Motion JPEG Codec', + 'AVI2': 'MainConcept Motion JPEG Codec', + 'AVID': 'Avid Motion JPEG', + 'AVIS': 'Wrapper for AviSynth', + 'AVMP': 'Avid IMX (Quick Time)', + 'AVR ': 'Avid ABVB/NuVista MJPEG with Alpha-channel', + 'AVRN': 'Avid Motion JPEG', + 'AVUI': 'Avid Meridien Uncompressed with Alpha-channel', + 'AVUP': 'Avid 10bit Packed (Quick Time)', + 'AYUV': '4:4:4 YUV (AYUV)', + 'AZPR': 'Quicktime Apple Video', + 'AZRP': 'Quicktime Apple Video', + 'BGR ': 'Uncompressed BGR32 8:8:8:8', + 'BGR(15)': 'Uncompressed BGR15 5:5:5', + 'BGR(16)': 'Uncompressed BGR16 5:6:5', + 'BGR(24)': 'Uncompressed BGR24 8:8:8', + 'BHIV': 'BeHere iVideo', + 'BINK': 'RAD Game Tools Bink Video', + 'BIT ': 'BI_BITFIELDS (Raw RGB)', + 'BITM': 'Microsoft H.261', + 'BLOX': 'Jan Jezabek BLOX MPEG Codec', + 'BLZ0': 'DivX for Blizzard Decoder Filter', + 'BT20': 'Conexant Prosumer Video', + 'BTCV': 'Conexant Composite Video Codec', + 'BTVC': 'Conexant Composite Video', + 'BW00': 'BergWave (Wavelet)', + 'BW10': 'Data Translation Broadway MPEG Capture', + 'BXBG': 'BOXX BGR', + 'BXRG': 'BOXX RGB', + 'BXY2': 'BOXX 10-bit YUV', + 'BXYV': 'BOXX YUV', + 'CC12': 'Intel YUV12', + 'CDV5': 'Canopus SD50/DVHD', + 'CDVC': 'Canopus DV', + 'CDVH': 'Canopus SD50/DVHD', + 'CFCC': 'Digital Processing Systems DPS Perception', + 'CFHD': 'CineForm 10-bit Visually Perfect HD', + 'CGDI': 'Microsoft Office 97 Camcorder Video', + 'CHAM': 'Winnov Caviara Champagne', + 'CJPG': 'Creative WebCam JPEG', + 'CLJR': 'Cirrus Logic YUV 4 pixels', + 'CLLC': 'Canopus LossLess', + 'CLPL': 'YV12', + 'CMYK': 'Common Data Format in Printing', + 'COL0': 'FFmpeg DivX ;-) (MS MPEG-4 v3)', + 'COL1': 'FFmpeg DivX ;-) (MS MPEG-4 v3)', + 'CPLA': 'Weitek 4:2:0 YUV Planar', + 'CRAM': 'Microsoft Video 1 (CRAM)', + 'CSCD': 'RenderSoft CamStudio lossless Codec', + 'CTRX': 'Citrix Scalable Video Codec', + 'CUVC': 'Canopus HQ', + 'CVID': 'Radius Cinepak', + 'CWLT': 'Microsoft Color WLT DIB', + 'CYUV': 'Creative Labs YUV', + 'CYUY': 'ATI YUV', + 'D261': 'H.261', + 'D263': 'H.263', + 'DAVC': 'Dicas MPEGable H.264/MPEG-4 AVC base profile codec', + 'DC25': 'MainConcept ProDV Codec', + 'DCAP': 'Pinnacle DV25 Codec', + 'DCL1': 'Data Connection Conferencing Codec', + 'DCT0': 'WniWni Codec', + 'DFSC': 'DebugMode FrameServer VFW Codec', + 'DIB ': 'Full Frames (Uncompressed)', + 'DIV1': 'FFmpeg-4 V1 (hacked MS MPEG-4 V1)', + 'DIV2': 'MS MPEG-4 V2', + 'DIV3': 'DivX v3 MPEG-4 Low-Motion', + 'DIV4': 'DivX v3 MPEG-4 Fast-Motion', + 'DIV5': 'DIV5', + 'DIV6': 'DivX MPEG-4', + 'DIVX': 'DivX', + 'DM4V': 'Dicas MPEGable MPEG-4', + 'DMB1': 'Matrox Rainbow Runner hardware MJPEG', + 'DMB2': 'Paradigm MJPEG', + 'DMK2': 'ViewSonic V36 PDA Video', + 'DP02': 'DynaPel MPEG-4', + 'DPS0': 'DPS Reality Motion JPEG', + 'DPSC': 'DPS PAR Motion JPEG', + 'DRWX': 'Pinnacle DV25 Codec', + 'DSVD': 'DSVD', + 'DTMT': 'Media-100 Codec', + 'DTNT': 'Media-100 Codec', + 'DUCK': 'Duck True Motion 1.0', + 'DV10': 'BlueFish444 (lossless RGBA, YUV 10-bit)', + 'DV25': 'Matrox DVCPRO codec', + 'DV50': 'Matrox DVCPRO50 codec', + 'DVAN': 'DVAN', + 'DVC ': 'Apple QuickTime DV (DVCPRO NTSC)', + 'DVCP': 'Apple QuickTime DV (DVCPRO PAL)', + 'DVCS': 'MainConcept DV Codec', + 'DVE2': 'InSoft DVE-2 Videoconferencing', + 'DVH1': 'Pinnacle DVHD100', + 'DVHD': 'DV 1125 lines at 30.00 Hz or 1250 lines at 25.00 Hz', + 'DVIS': 'VSYNC DualMoon Iris DV codec', + 'DVL ': 'Radius SoftDV 16:9 NTSC', + 'DVLP': 'Radius SoftDV 16:9 PAL', + 'DVMA': 'Darim Vision DVMPEG', + 'DVOR': 'BlueFish444 (lossless RGBA, YUV 10-bit)', + 'DVPN': 'Apple QuickTime DV (DV NTSC)', + 'DVPP': 'Apple QuickTime DV (DV PAL)', + 'DVR1': 'TARGA2000 Codec', + 'DVRS': 'VSYNC DualMoon Iris DV codec', + 'DVSD': 'DV', + 'DVSL': 'DV compressed in SD (SDL)', + 'DVX1': 'DVX1000SP Video Decoder', + 'DVX2': 'DVX2000S Video Decoder', + 'DVX3': 'DVX3000S Video Decoder', + 'DX50': 'DivX v5', + 'DXGM': 'Electronic Arts Game Video codec', + 'DXSB': 'DivX Subtitles Codec', + 'DXT1': 'Microsoft DirectX Compressed Texture (DXT1)', + 'DXT2': 'Microsoft DirectX Compressed Texture (DXT2)', + 'DXT3': 'Microsoft DirectX Compressed Texture (DXT3)', + 'DXT4': 'Microsoft DirectX Compressed Texture (DXT4)', + 'DXT5': 'Microsoft DirectX Compressed Texture (DXT5)', + 'DXTC': 'Microsoft DirectX Compressed Texture (DXTC)', + 'DXTN': 'Microsoft DirectX Compressed Texture (DXTn)', + 'EKQ0': 'Elsa EKQ0', + 'ELK0': 'Elsa ELK0', + 'EM2V': 'Etymonix MPEG-2 I-frame', + 'EQK0': 'Elsa graphics card quick codec', + 'ESCP': 'Eidos Escape', + 'ETV1': 'eTreppid Video ETV1', + 'ETV2': 'eTreppid Video ETV2', + 'ETVC': 'eTreppid Video ETVC', + 'FFDS': 'FFDShow supported', + 'FFV1': 'FFDShow supported', + 'FFVH': 'FFVH codec', + 'FLIC': 'Autodesk FLI/FLC Animation', + 'FLJP': 'D-Vision Field Encoded Motion JPEG', + 'FLV1': 'FLV1 codec', + 'FMJP': 'D-Vision fieldbased ISO MJPEG', + 'FRLE': 'SoftLab-NSK Y16 + Alpha RLE', + 'FRWA': 'SoftLab-Nsk Forward Motion JPEG w/ alpha channel', + 'FRWD': 'SoftLab-Nsk Forward Motion JPEG', + 'FRWT': 'SoftLab-NSK Vision Forward Motion JPEG with Alpha-channel', + 'FRWU': 'SoftLab-NSK Vision Forward Uncompressed', + 'FVF1': 'Iterated Systems Fractal Video Frame', + 'FVFW': 'ff MPEG-4 based on XviD codec', + 'GEPJ': 'White Pine (ex Paradigm Matrix) Motion JPEG Codec', + 'GJPG': 'Grand Tech GT891x Codec', + 'GLCC': 'GigaLink AV Capture codec', + 'GLZW': 'Motion LZW', + 'GPEG': 'Motion JPEG', + 'GPJM': 'Pinnacle ReelTime MJPEG Codec', + 'GREY': 'Apparently a duplicate of Y800', + 'GWLT': 'Microsoft Greyscale WLT DIB', + 'H260': 'H.260', + 'H261': 'H.261', + 'H262': 'H.262', + 'H263': 'H.263', + 'H264': 'H.264 AVC', + 'H265': 'H.265', + 'H266': 'H.266', + 'H267': 'H.267', + 'H268': 'H.268', + 'H269': 'H.269', + 'HD10': 'BlueFish444 (lossless RGBA, YUV 10-bit)', + 'HDX4': 'Jomigo HDX4', + 'HFYU': 'Huffman Lossless Codec', + 'HMCR': 'Rendition Motion Compensation Format (HMCR)', + 'HMRR': 'Rendition Motion Compensation Format (HMRR)', + 'I263': 'Intel ITU H.263 Videoconferencing (i263)', + 'I420': 'Intel Indeo 4', + 'IAN ': 'Intel RDX', + 'ICLB': 'InSoft CellB Videoconferencing', + 'IDM0': 'IDM Motion Wavelets 2.0', + 'IF09': 'Microsoft H.261', + 'IGOR': 'Power DVD', + 'IJPG': 'Intergraph JPEG', + 'ILVC': 'Intel Layered Video', + 'ILVR': 'ITU-T H.263+', + 'IMC1': 'IMC1', + 'IMC2': 'IMC2', + 'IMC3': 'IMC3', + 'IMC4': 'IMC4', + 'IMJG': 'Accom SphereOUS MJPEG with Alpha-channel', + 'IPDV': 'I-O Data Device Giga AVI DV Codec', + 'IPJ2': 'Image Power JPEG2000', + 'IR21': 'Intel Indeo 2.1', + 'IRAW': 'Intel YUV Uncompressed', + 'IUYV': 'Interlaced version of UYVY (line order 0,2,4 then 1,3,5 etc)', + 'IV30': 'Ligos Indeo 3.0', + 'IV31': 'Ligos Indeo 3.1', + 'IV32': 'Ligos Indeo 3.2', + 'IV33': 'Ligos Indeo 3.3', + 'IV34': 'Ligos Indeo 3.4', + 'IV35': 'Ligos Indeo 3.5', + 'IV36': 'Ligos Indeo 3.6', + 'IV37': 'Ligos Indeo 3.7', + 'IV38': 'Ligos Indeo 3.8', + 'IV39': 'Ligos Indeo 3.9', + 'IV40': 'Ligos Indeo Interactive 4.0', + 'IV41': 'Ligos Indeo Interactive 4.1', + 'IV42': 'Ligos Indeo Interactive 4.2', + 'IV43': 'Ligos Indeo Interactive 4.3', + 'IV44': 'Ligos Indeo Interactive 4.4', + 'IV45': 'Ligos Indeo Interactive 4.5', + 'IV46': 'Ligos Indeo Interactive 4.6', + 'IV47': 'Ligos Indeo Interactive 4.7', + 'IV48': 'Ligos Indeo Interactive 4.8', + 'IV49': 'Ligos Indeo Interactive 4.9', + 'IV50': 'Ligos Indeo Interactive 5.0', + 'IY41': 'Interlaced version of Y41P (line order 0,2,4,...,1,3,5...)', + 'IYU1': '12 bit format used in mode 2 of the IEEE 1394 Digital Camera 1.04 spec', + 'IYU2': '24 bit format used in mode 2 of the IEEE 1394 Digital Camera 1.04 spec', + 'IYUV': 'Intel Indeo iYUV 4:2:0', + 'JBYR': 'Kensington JBYR', + 'JFIF': 'Motion JPEG (FFmpeg)', + 'JPEG': 'Still Image JPEG DIB', + 'JPG ': 'JPEG compressed', + 'JPGL': 'Webcam JPEG Light', + 'KMVC': 'Karl Morton\'s Video Codec', + 'KPCD': 'Kodak Photo CD', + 'L261': 'Lead Technologies H.261', + 'L263': 'Lead Technologies H.263', + 'LAGS': 'Lagarith LossLess', + 'LBYR': 'Creative WebCam codec', + 'LCMW': 'Lead Technologies Motion CMW Codec', + 'LCW2': 'LEADTools MCMW 9Motion Wavelet)', + 'LEAD': 'LEAD Video Codec', + 'LGRY': 'Lead Technologies Grayscale Image', + 'LJ2K': 'LEADTools JPEG2000', + 'LJPG': 'LEAD MJPEG Codec', + 'LMP2': 'LEADTools MPEG2', + 'LOCO': 'LOCO Lossless Codec', + 'LSCR': 'LEAD Screen Capture', + 'LSVM': 'Vianet Lighting Strike Vmail (Streaming)', + 'LZO1': 'LZO compressed (lossless codec)', + 'M261': 'Microsoft H.261', + 'M263': 'Microsoft H.263', + 'M4CC': 'ESS MPEG4 Divio codec', + 'M4S2': 'Microsoft MPEG-4 (M4S2)', + 'MC12': 'ATI Motion Compensation Format (MC12)', + 'MC24': 'MainConcept Motion JPEG Codec', + 'MCAM': 'ATI Motion Compensation Format (MCAM)', + 'MCZM': 'Theory MicroCosm Lossless 64bit RGB with Alpha-channel', + 'MDVD': 'Alex MicroDVD Video (hacked MS MPEG-4)', + 'MDVF': 'Pinnacle DV/DV50/DVHD100', + 'MHFY': 'A.M.Paredes mhuffyYUV (LossLess)', + 'MJ2C': 'Morgan Multimedia Motion JPEG2000', + 'MJPA': 'Pinnacle ReelTime MJPG hardware codec', + 'MJPB': 'Motion JPEG codec', + 'MJPG': 'Motion JPEG DIB', + 'MJPX': 'Pegasus PICVideo Motion JPEG', + 'MMES': 'Matrox MPEG-2 I-frame', + 'MNVD': 'MindBend MindVid LossLess', + 'MP2A': 'MPEG-2 Audio', + 'MP2T': 'MPEG-2 Transport Stream', + 'MP2V': 'MPEG-2 Video', + 'MP41': 'Microsoft MPEG-4 V1 (enhansed H263)', + 'MP42': 'Microsoft MPEG-4 (low-motion)', + 'MP43': 'Microsoft MPEG-4 (fast-motion)', + 'MP4A': 'MPEG-4 Audio', + 'MP4S': 'Microsoft MPEG-4 (MP4S)', + 'MP4T': 'MPEG-4 Transport Stream', + 'MP4V': 'Apple QuickTime MPEG-4 native', + 'MPEG': 'MPEG-1', + 'MPG1': 'FFmpeg-1', + 'MPG2': 'FFmpeg-1', + 'MPG3': 'Same as Low motion DivX MPEG-4', + 'MPG4': 'Microsoft MPEG-4 Video High Speed Compressor', + 'MPGI': 'Sigma Designs MPEG', + 'MPNG': 'Motion PNG codec', + 'MRCA': 'Martin Regen Codec', + 'MRLE': 'Run Length Encoding', + 'MSS1': 'Windows Screen Video', + 'MSS2': 'Windows Media 9', + 'MSUC': 'MSU LossLess', + 'MSVC': 'Microsoft Video 1', + 'MSZH': 'Lossless codec (ZIP compression)', + 'MTGA': 'Motion TGA images (24, 32 bpp)', + 'MTX1': 'Matrox MTX1', + 'MTX2': 'Matrox MTX2', + 'MTX3': 'Matrox MTX3', + 'MTX4': 'Matrox MTX4', + 'MTX5': 'Matrox MTX5', + 'MTX6': 'Matrox MTX6', + 'MTX7': 'Matrox MTX7', + 'MTX8': 'Matrox MTX8', + 'MTX9': 'Matrox MTX9', + 'MV12': 'MV12', + 'MVI1': 'Motion Pixels MVI', + 'MVI2': 'Motion Pixels MVI', + 'MWV1': 'Aware Motion Wavelets', + 'MYUV': 'Media-100 844/X Uncompressed', + 'NAVI': 'nAVI', + 'NDIG': 'Ahead Nero Digital MPEG-4 Codec', + 'NHVU': 'NVidia Texture Format (GEForce 3)', + 'NO16': 'Theory None16 64bit uncompressed RAW', + 'NT00': 'NewTek LigtWave HDTV YUV with Alpha-channel', + 'NTN1': 'Nogatech Video Compression 1', + 'NTN2': 'Nogatech Video Compression 2 (GrabBee hardware coder)', + 'NUV1': 'NuppelVideo', + 'NV12': '8-bit Y plane followed by an interleaved U/V plane with 2x2 subsampling', + 'NV21': 'As NV12 with U and V reversed in the interleaved plane', + 'NVDS': 'nVidia Texture Format', + 'NVHS': 'NVidia Texture Format (GEForce 3)', + 'NVS0': 'nVidia GeForce Texture', + 'NVS1': 'nVidia GeForce Texture', + 'NVS2': 'nVidia GeForce Texture', + 'NVS3': 'nVidia GeForce Texture', + 'NVS4': 'nVidia GeForce Texture', + 'NVS5': 'nVidia GeForce Texture', + 'NVT0': 'nVidia GeForce Texture', + 'NVT1': 'nVidia GeForce Texture', + 'NVT2': 'nVidia GeForce Texture', + 'NVT3': 'nVidia GeForce Texture', + 'NVT4': 'nVidia GeForce Texture', + 'NVT5': 'nVidia GeForce Texture', + 'PDVC': 'I-O Data Device Digital Video Capture DV codec', + 'PGVV': 'Radius Video Vision', + 'PHMO': 'IBM Photomotion', + 'PIM1': 'Pegasus Imaging', + 'PIM2': 'Pegasus Imaging', + 'PIMJ': 'Pegasus Imaging Lossless JPEG', + 'PIXL': 'MiroVideo XL (Motion JPEG)', + 'PNG ': 'Apple PNG', + 'PNG1': 'Corecodec.org CorePNG Codec', + 'PVEZ': 'Horizons Technology PowerEZ', + 'PVMM': 'PacketVideo Corporation MPEG-4', + 'PVW2': 'Pegasus Imaging Wavelet Compression', + 'PVWV': 'Pegasus Imaging Wavelet 2000', + 'PXLT': 'Apple Pixlet (Wavelet)', + 'Q1.0': 'Q-Team QPEG 1.0 (www.q-team.de)', + 'Q1.1': 'Q-Team QPEG 1.1 (www.q-team.de)', + 'QDGX': 'Apple QuickDraw GX', + 'QPEG': 'Q-Team QPEG 1.0', + 'QPEQ': 'Q-Team QPEG 1.1', + 'R210': 'BlackMagic YUV (Quick Time)', + 'R411': 'Radius DV NTSC YUV', + 'R420': 'Radius DV PAL YUV', + 'RAVI': 'GroupTRON ReferenceAVI codec (dummy for MPEG compressor)', + 'RAV_': 'GroupTRON ReferenceAVI codec (dummy for MPEG compressor)', + 'RAW ': 'Full Frames (Uncompressed)', + 'RGB ': 'Full Frames (Uncompressed)', + 'RGB(15)': 'Uncompressed RGB15 5:5:5', + 'RGB(16)': 'Uncompressed RGB16 5:6:5', + 'RGB(24)': 'Uncompressed RGB24 8:8:8', + 'RGB1': 'Uncompressed RGB332 3:3:2', + 'RGBA': 'Raw RGB with alpha', + 'RGBO': 'Uncompressed RGB555 5:5:5', + 'RGBP': 'Uncompressed RGB565 5:6:5', + 'RGBQ': 'Uncompressed RGB555X 5:5:5 BE', + 'RGBR': 'Uncompressed RGB565X 5:6:5 BE', + 'RGBT': 'Computer Concepts 32-bit support', + 'RL4 ': 'RLE 4bpp RGB', + 'RL8 ': 'RLE 8bpp RGB', + 'RLE ': 'Microsoft Run Length Encoder', + 'RLE4': 'Run Length Encoded 4', + 'RLE8': 'Run Length Encoded 8', + 'RMP4': 'REALmagic MPEG-4 Video Codec', + 'ROQV': 'Id RoQ File Video Decoder', + 'RPZA': 'Apple Video 16 bit "road pizza"', + 'RT21': 'Intel Real Time Video 2.1', + 'RTV0': 'NewTek VideoToaster', + 'RUD0': 'Rududu video codec', + 'RV10': 'RealVideo codec', + 'RV13': 'RealVideo codec', + 'RV20': 'RealVideo G2', + 'RV30': 'RealVideo 8', + 'RV40': 'RealVideo 9', + 'RVX ': 'Intel RDX (RVX )', + 'S263': 'Sorenson Vision H.263', + 'S422': 'Tekram VideoCap C210 YUV 4:2:2', + 'SAMR': 'Adaptive Multi-Rate (AMR) audio codec', + 'SAN3': 'MPEG-4 codec (direct copy of DivX 3.11a)', + 'SDCC': 'Sun Communication Digital Camera Codec', + 'SEDG': 'Samsung MPEG-4 codec', + 'SFMC': 'CrystalNet Surface Fitting Method', + 'SHR0': 'BitJazz SheerVideo', + 'SHR1': 'BitJazz SheerVideo', + 'SHR2': 'BitJazz SheerVideo', + 'SHR3': 'BitJazz SheerVideo', + 'SHR4': 'BitJazz SheerVideo', + 'SHR5': 'BitJazz SheerVideo', + 'SHR6': 'BitJazz SheerVideo', + 'SHR7': 'BitJazz SheerVideo', + 'SJPG': 'CUseeMe Networks Codec', + 'SL25': 'SoftLab-NSK DVCPRO', + 'SL50': 'SoftLab-NSK DVCPRO50', + 'SLDV': 'SoftLab-NSK Forward DV Draw codec', + 'SLIF': 'SoftLab-NSK MPEG2 I-frames', + 'SLMJ': 'SoftLab-NSK Forward MJPEG', + 'SMC ': 'Apple Graphics (SMC) codec (256 color)', + 'SMSC': 'Radius SMSC', + 'SMSD': 'Radius SMSD', + 'SMSV': 'WorldConnect Wavelet Video', + 'SNOW': 'SNOW codec', + 'SP40': 'SunPlus YUV', + 'SP44': 'SunPlus Aiptek MegaCam Codec', + 'SP53': 'SunPlus Aiptek MegaCam Codec', + 'SP54': 'SunPlus Aiptek MegaCam Codec', + 'SP55': 'SunPlus Aiptek MegaCam Codec', + 'SP56': 'SunPlus Aiptek MegaCam Codec', + 'SP57': 'SunPlus Aiptek MegaCam Codec', + 'SP58': 'SunPlus Aiptek MegaCam Codec', + 'SPIG': 'Radius Spigot', + 'SPLC': 'Splash Studios ACM Audio Codec', + 'SPRK': 'Sorenson Spark', + 'SQZ2': 'Microsoft VXTreme Video Codec V2', + 'STVA': 'ST CMOS Imager Data (Bayer)', + 'STVB': 'ST CMOS Imager Data (Nudged Bayer)', + 'STVC': 'ST CMOS Imager Data (Bunched)', + 'STVX': 'ST CMOS Imager Data (Extended CODEC Data Format)', + 'STVY': 'ST CMOS Imager Data (Extended CODEC Data Format with Correction Data)', + 'SV10': 'Sorenson Video R1', + 'SVQ1': 'Sorenson Video R3', + 'SVQ3': 'Sorenson Video 3 (Apple Quicktime 5)', + 'SWC1': 'MainConcept Motion JPEG Codec', + 'T420': 'Toshiba YUV 4:2:0', + 'TGA ': 'Apple TGA (with Alpha-channel)', + 'THEO': 'FFVFW Supported Codec', + 'TIFF': 'Apple TIFF (with Alpha-channel)', + 'TIM2': 'Pinnacle RAL DVI', + 'TLMS': 'TeraLogic Motion Intraframe Codec (TLMS)', + 'TLST': 'TeraLogic Motion Intraframe Codec (TLST)', + 'TM20': 'Duck TrueMotion 2.0', + 'TM2A': 'Duck TrueMotion Archiver 2.0', + 'TM2X': 'Duck TrueMotion 2X', + 'TMIC': 'TeraLogic Motion Intraframe Codec (TMIC)', + 'TMOT': 'Horizons Technology TrueMotion S', + 'TR20': 'Duck TrueMotion RealTime 2.0', + 'TRLE': 'Akula Alpha Pro Custom AVI (LossLess)', + 'TSCC': 'TechSmith Screen Capture Codec', + 'TV10': 'Tecomac Low-Bit Rate Codec', + 'TVJP': 'TrueVision Field Encoded Motion JPEG', + 'TVMJ': 'Truevision TARGA MJPEG Hardware Codec', + 'TY0N': 'Trident TY0N', + 'TY2C': 'Trident TY2C', + 'TY2N': 'Trident TY2N', + 'U263': 'UB Video StreamForce H.263', + 'U<Y ': 'Discreet UC YUV 4:2:2:4 10 bit', + 'U<YA': 'Discreet UC YUV 4:2:2:4 10 bit (with Alpha-channel)', + 'UCOD': 'eMajix.com ClearVideo', + 'ULTI': 'IBM Ultimotion', + 'UMP4': 'UB Video MPEG 4', + 'UYNV': 'UYVY', + 'UYVP': 'YCbCr 4:2:2', + 'UYVU': 'SoftLab-NSK Forward YUV codec', + 'UYVY': 'UYVY 4:2:2 byte ordering', + 'V210': 'Optibase VideoPump 10-bit 4:2:2 Component YCbCr', + 'V261': 'Lucent VX2000S', + 'V422': '24 bit YUV 4:2:2 Format', + 'V655': '16 bit YUV 4:2:2 Format', + 'VBLE': 'MarcFD VBLE Lossless Codec', + 'VCR1': 'ATI VCR 1.0', + 'VCR2': 'ATI VCR 2.0', + 'VCR3': 'ATI VCR 3.0', + 'VCR4': 'ATI VCR 4.0', + 'VCR5': 'ATI VCR 5.0', + 'VCR6': 'ATI VCR 6.0', + 'VCR7': 'ATI VCR 7.0', + 'VCR8': 'ATI VCR 8.0', + 'VCR9': 'ATI VCR 9.0', + 'VDCT': 'Video Maker Pro DIB', + 'VDOM': 'VDOnet VDOWave', + 'VDOW': 'VDOnet VDOLive (H.263)', + 'VDST': 'VirtualDub remote frameclient ICM driver', + 'VDTZ': 'Darim Vison VideoTizer YUV', + 'VGPX': 'VGPixel Codec', + 'VIDM': 'DivX 5.0 Pro Supported Codec', + 'VIDS': 'YUV 4:2:2 CCIR 601 for V422', + 'VIFP': 'VIFP', + 'VIV1': 'Vivo H.263', + 'VIV2': 'Vivo H.263', + 'VIVO': 'Vivo H.263 v2.00', + 'VIXL': 'Miro Video XL', + 'VLV1': 'Videologic VLCAP.DRV', + 'VP30': 'On2 VP3.0', + 'VP31': 'On2 VP3.1', + 'VP40': 'On2 TrueCast VP4', + 'VP50': 'On2 TrueCast VP5', + 'VP60': 'On2 TrueCast VP6', + 'VP61': 'On2 TrueCast VP6.1', + 'VP62': 'On2 TrueCast VP6.2', + 'VP70': 'On2 TrueMotion VP7', + 'VQC1': 'Vector-quantised codec 1', + 'VQC2': 'Vector-quantised codec 2', + 'VR21': 'BlackMagic YUV (Quick Time)', + 'VSSH': 'Vanguard VSS H.264', + 'VSSV': 'Vanguard Software Solutions Video Codec', + 'VSSW': 'Vanguard VSS H.264', + 'VTLP': 'Alaris VideoGramPixel Codec', + 'VX1K': 'VX1000S Video Codec', + 'VX2K': 'VX2000S Video Codec', + 'VXSP': 'VX1000SP Video Codec', + 'VYU9': 'ATI Technologies YUV', + 'VYUY': 'ATI Packed YUV Data', + 'WBVC': 'Winbond W9960', + 'WHAM': 'Microsoft Video 1 (WHAM)', + 'WINX': 'Winnov Software Compression', + 'WJPG': 'AverMedia Winbond JPEG', + 'WMV1': 'Windows Media Video V7', + 'WMV2': 'Windows Media Video V8', + 'WMV3': 'Windows Media Video V9', + 'WMVA': 'WMVA codec', + 'WMVP': 'Windows Media Video V9', + 'WNIX': 'WniWni Codec', + 'WNV1': 'Winnov Hardware Compression', + 'WNVA': 'Winnov hw compress', + 'WRLE': 'Apple QuickTime BMP Codec', + 'WRPR': 'VideoTools VideoServer Client Codec', + 'WV1F': 'WV1F codec', + 'WVLT': 'IllusionHope Wavelet 9/7', + 'WVP2': 'WVP2 codec', + 'X263': 'Xirlink H.263', + 'X264': 'XiWave GNU GPL x264 MPEG-4 Codec', + 'XLV0': 'NetXL Video Decoder', + 'XMPG': 'Xing MPEG (I-Frame only)', + 'XVID': 'XviD MPEG-4', + 'XVIX': 'Based on XviD MPEG-4 codec', + 'XWV0': 'XiWave Video Codec', + 'XWV1': 'XiWave Video Codec', + 'XWV2': 'XiWave Video Codec', + 'XWV3': 'XiWave Video Codec (Xi-3 Video)', + 'XWV4': 'XiWave Video Codec', + 'XWV5': 'XiWave Video Codec', + 'XWV6': 'XiWave Video Codec', + 'XWV7': 'XiWave Video Codec', + 'XWV8': 'XiWave Video Codec', + 'XWV9': 'XiWave Video Codec', + 'XXAN': 'XXAN', + 'XYZP': 'Extended PAL format XYZ palette', + 'Y211': 'YUV 2:1:1 Packed', + 'Y216': 'Pinnacle TARGA CineWave YUV (Quick Time)', + 'Y411': 'YUV 4:1:1 Packed', + 'Y41B': 'YUV 4:1:1 Planar', + 'Y41P': 'PC1 4:1:1', + 'Y41T': 'PC1 4:1:1 with transparency', + 'Y422': 'Y422', + 'Y42B': 'YUV 4:2:2 Planar', + 'Y42T': 'PCI 4:2:2 with transparency', + 'Y444': 'IYU2', + 'Y8 ': 'Grayscale video', + 'Y800': 'Simple grayscale video', + 'YC12': 'Intel YUV12 Codec', + 'YMPG': 'YMPEG Alpha', + 'YU12': 'ATI YV12 4:2:0 Planar', + 'YU92': 'Intel - YUV', + 'YUNV': 'YUNV', + 'YUV2': 'Apple Component Video (YUV 4:2:2)', + 'YUV8': 'Winnov Caviar YUV8', + 'YUV9': 'Intel YUV9', + 'YUVP': 'YCbCr 4:2:2', + 'YUY2': 'Uncompressed YUV 4:2:2', + 'YUYV': 'Canopus YUV', + 'YV12': 'YVU12 Planar', + 'YV16': 'Elecard YUV 4:2:2 Planar', + 'YV92': 'Intel Smart Video Recorder YVU9', + 'YVU9': 'Intel YVU9 Planar', + 'YVYU': 'YVYU 4:2:2 byte ordering', + 'ZLIB': 'ZLIB', + 'ZPEG': 'Metheus Video Zipper', + 'ZYGO': 'ZyGo Video Codec' +} + +# make it fool prove +for code, value in FOURCC.items(): + if not code.upper() in FOURCC: + FOURCC[code.upper()] = value + if code.endswith(' '): + FOURCC[code.strip().upper()] = value diff --git a/lib/enzyme/infos.py b/lib/enzyme/infos.py new file mode 100644 index 0000000000000000000000000000000000000000..a6f0bcc5913e44975aeeca9e1a80568a007ca055 --- /dev/null +++ b/lib/enzyme/infos.py @@ -0,0 +1,19 @@ +# -*- coding: utf-8 -*- +# enzyme - Video metadata parser +# Copyright 2011-2012 Antoine Bertin <diaoulael@gmail.com> +# +# This file is part of enzyme. +# +# enzyme is free software; you can redistribute it and/or modify it under +# the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3 of the License, or +# (at your option) any later version. +# +# enzyme is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with enzyme. If not, see <http://www.gnu.org/licenses/>. +__version__ = '0.2' diff --git a/lib/enzyme/language.py b/lib/enzyme/language.py new file mode 100644 index 0000000000000000000000000000000000000000..3957f9d9f62a351357bf92fc2ee5f983e94ea14e --- /dev/null +++ b/lib/enzyme/language.py @@ -0,0 +1,535 @@ +# -*- coding: utf-8 -*- +# enzyme - Video metadata parser +# Copyright 2011-2012 Antoine Bertin <diaoulael@gmail.com> +# Copyright 2003-2006 Dirk Meyer <dischi@freevo.org> +# +# This file is part of enzyme. +# +# enzyme is free software; you can redistribute it and/or modify it under +# the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3 of the License, or +# (at your option) any later version. +# +# enzyme is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with enzyme. If not, see <http://www.gnu.org/licenses/>. +import re + +__all__ = ['resolve'] + + +def resolve(code): + """ + Transform the given (2- or 3-letter) language code to a human readable + language name. The return value is a 2-tuple containing the given + language code and the language name. If the language code cannot be + resolved, name will be 'Unknown (<code>)'. + """ + if not code: + return None, None + if not isinstance(code, basestring): + raise ValueError('Invalid language code specified by parser') + + # Take up to 3 letters from the code. + code = re.split(r'[^a-z]', code.lower())[0][:3] + + for spec in codes: + if code in spec[:-1]: + return code, spec[-1] + + return code, u'Unknown (%r)' % code + + +# Parsed from http://www.loc.gov/standards/iso639-2/ISO-639-2_utf-8.txt +codes = ( + ('aar', 'aa', u'Afar'), + ('abk', 'ab', u'Abkhazian'), + ('ace', u'Achinese'), + ('ach', u'Acoli'), + ('ada', u'Adangme'), + ('ady', u'Adyghe'), + ('afa', u'Afro-Asiatic '), + ('afh', u'Afrihili'), + ('afr', 'af', u'Afrikaans'), + ('ain', u'Ainu'), + ('aka', 'ak', u'Akan'), + ('akk', u'Akkadian'), + ('alb', 'sq', u'Albanian'), + ('ale', u'Aleut'), + ('alg', u'Algonquian languages'), + ('alt', u'Southern Altai'), + ('amh', 'am', u'Amharic'), + ('ang', u'English, Old '), + ('anp', u'Angika'), + ('apa', u'Apache languages'), + ('ara', 'ar', u'Arabic'), + ('arc', u'Official Aramaic '), + ('arg', 'an', u'Aragonese'), + ('arm', 'hy', u'Armenian'), + ('arn', u'Mapudungun'), + ('arp', u'Arapaho'), + ('art', u'Artificial '), + ('arw', u'Arawak'), + ('asm', 'as', u'Assamese'), + ('ast', u'Asturian'), + ('ath', u'Athapascan languages'), + ('aus', u'Australian languages'), + ('ava', 'av', u'Avaric'), + ('ave', 'ae', u'Avestan'), + ('awa', u'Awadhi'), + ('aym', 'ay', u'Aymara'), + ('aze', 'az', u'Azerbaijani'), + ('bad', u'Banda languages'), + ('bai', u'Bamileke languages'), + ('bak', 'ba', u'Bashkir'), + ('bal', u'Baluchi'), + ('bam', 'bm', u'Bambara'), + ('ban', u'Balinese'), + ('baq', 'eu', u'Basque'), + ('bas', u'Basa'), + ('bat', u'Baltic '), + ('bej', u'Beja'), + ('bel', 'be', u'Belarusian'), + ('bem', u'Bemba'), + ('ben', 'bn', u'Bengali'), + ('ber', u'Berber '), + ('bho', u'Bhojpuri'), + ('bih', 'bh', u'Bihari'), + ('bik', u'Bikol'), + ('bin', u'Bini'), + ('bis', 'bi', u'Bislama'), + ('bla', u'Siksika'), + ('bnt', u'Bantu '), + ('bos', 'bs', u'Bosnian'), + ('bra', u'Braj'), + ('bre', 'br', u'Breton'), + ('btk', u'Batak languages'), + ('bua', u'Buriat'), + ('bug', u'Buginese'), + ('bul', 'bg', u'Bulgarian'), + ('bur', 'my', u'Burmese'), + ('byn', u'Blin'), + ('cad', u'Caddo'), + ('cai', u'Central American Indian '), + ('car', u'Galibi Carib'), + ('cat', 'ca', u'Catalan'), + ('cau', u'Caucasian '), + ('ceb', u'Cebuano'), + ('cel', u'Celtic '), + ('cha', 'ch', u'Chamorro'), + ('chb', u'Chibcha'), + ('che', 'ce', u'Chechen'), + ('chg', u'Chagatai'), + ('chi', 'zh', u'Chinese'), + ('chk', u'Chuukese'), + ('chm', u'Mari'), + ('chn', u'Chinook jargon'), + ('cho', u'Choctaw'), + ('chp', u'Chipewyan'), + ('chr', u'Cherokee'), + ('chu', 'cu', u'Church Slavic'), + ('chv', 'cv', u'Chuvash'), + ('chy', u'Cheyenne'), + ('cmc', u'Chamic languages'), + ('cop', u'Coptic'), + ('cor', 'kw', u'Cornish'), + ('cos', 'co', u'Corsican'), + ('cpe', u'Creoles and pidgins, English based '), + ('cpf', u'Creoles and pidgins, French-based '), + ('cpp', u'Creoles and pidgins, Portuguese-based '), + ('cre', 'cr', u'Cree'), + ('crh', u'Crimean Tatar'), + ('crp', u'Creoles and pidgins '), + ('csb', u'Kashubian'), + ('cus', u'Cushitic '), + ('cze', 'cs', u'Czech'), + ('dak', u'Dakota'), + ('dan', 'da', u'Danish'), + ('dar', u'Dargwa'), + ('day', u'Land Dayak languages'), + ('del', u'Delaware'), + ('den', u'Slave '), + ('dgr', u'Dogrib'), + ('din', u'Dinka'), + ('div', 'dv', u'Divehi'), + ('doi', u'Dogri'), + ('dra', u'Dravidian '), + ('dsb', u'Lower Sorbian'), + ('dua', u'Duala'), + ('dum', u'Dutch, Middle '), + ('dut', 'nl', u'Dutch'), + ('dyu', u'Dyula'), + ('dzo', 'dz', u'Dzongkha'), + ('efi', u'Efik'), + ('egy', u'Egyptian '), + ('eka', u'Ekajuk'), + ('elx', u'Elamite'), + ('eng', 'en', u'English'), + ('enm', u'English, Middle '), + ('epo', 'eo', u'Esperanto'), + ('est', 'et', u'Estonian'), + ('ewe', 'ee', u'Ewe'), + ('ewo', u'Ewondo'), + ('fan', u'Fang'), + ('fao', 'fo', u'Faroese'), + ('fat', u'Fanti'), + ('fij', 'fj', u'Fijian'), + ('fil', u'Filipino'), + ('fin', 'fi', u'Finnish'), + ('fiu', u'Finno-Ugrian '), + ('fon', u'Fon'), + ('fre', 'fr', u'French'), + ('frm', u'French, Middle '), + ('fro', u'French, Old '), + ('frr', u'Northern Frisian'), + ('frs', u'Eastern Frisian'), + ('fry', 'fy', u'Western Frisian'), + ('ful', 'ff', u'Fulah'), + ('fur', u'Friulian'), + ('gaa', u'Ga'), + ('gay', u'Gayo'), + ('gba', u'Gbaya'), + ('gem', u'Germanic '), + ('geo', 'ka', u'Georgian'), + ('ger', 'de', u'German'), + ('gez', u'Geez'), + ('gil', u'Gilbertese'), + ('gla', 'gd', u'Gaelic'), + ('gle', 'ga', u'Irish'), + ('glg', 'gl', u'Galician'), + ('glv', 'gv', u'Manx'), + ('gmh', u'German, Middle High '), + ('goh', u'German, Old High '), + ('gon', u'Gondi'), + ('gor', u'Gorontalo'), + ('got', u'Gothic'), + ('grb', u'Grebo'), + ('grc', u'Greek, Ancient '), + ('gre', 'el', u'Greek, Modern '), + ('grn', 'gn', u'Guarani'), + ('gsw', u'Swiss German'), + ('guj', 'gu', u'Gujarati'), + ('gwi', u"Gwich'in"), + ('hai', u'Haida'), + ('hat', 'ht', u'Haitian'), + ('hau', 'ha', u'Hausa'), + ('haw', u'Hawaiian'), + ('heb', 'he', u'Hebrew'), + ('her', 'hz', u'Herero'), + ('hil', u'Hiligaynon'), + ('him', u'Himachali'), + ('hin', 'hi', u'Hindi'), + ('hit', u'Hittite'), + ('hmn', u'Hmong'), + ('hmo', 'ho', u'Hiri Motu'), + ('hsb', u'Upper Sorbian'), + ('hun', 'hu', u'Hungarian'), + ('hup', u'Hupa'), + ('iba', u'Iban'), + ('ibo', 'ig', u'Igbo'), + ('ice', 'is', u'Icelandic'), + ('ido', 'io', u'Ido'), + ('iii', 'ii', u'Sichuan Yi'), + ('ijo', u'Ijo languages'), + ('iku', 'iu', u'Inuktitut'), + ('ile', 'ie', u'Interlingue'), + ('ilo', u'Iloko'), + ('ina', 'ia', u'Interlingua '), + ('inc', u'Indic '), + ('ind', 'id', u'Indonesian'), + ('ine', u'Indo-European '), + ('inh', u'Ingush'), + ('ipk', 'ik', u'Inupiaq'), + ('ira', u'Iranian '), + ('iro', u'Iroquoian languages'), + ('ita', 'it', u'Italian'), + ('jav', 'jv', u'Javanese'), + ('jbo', u'Lojban'), + ('jpn', 'ja', u'Japanese'), + ('jpr', u'Judeo-Persian'), + ('jrb', u'Judeo-Arabic'), + ('kaa', u'Kara-Kalpak'), + ('kab', u'Kabyle'), + ('kac', u'Kachin'), + ('kal', 'kl', u'Kalaallisut'), + ('kam', u'Kamba'), + ('kan', 'kn', u'Kannada'), + ('kar', u'Karen languages'), + ('kas', 'ks', u'Kashmiri'), + ('kau', 'kr', u'Kanuri'), + ('kaw', u'Kawi'), + ('kaz', 'kk', u'Kazakh'), + ('kbd', u'Kabardian'), + ('kha', u'Khasi'), + ('khi', u'Khoisan '), + ('khm', 'km', u'Central Khmer'), + ('kho', u'Khotanese'), + ('kik', 'ki', u'Kikuyu'), + ('kin', 'rw', u'Kinyarwanda'), + ('kir', 'ky', u'Kirghiz'), + ('kmb', u'Kimbundu'), + ('kok', u'Konkani'), + ('kom', 'kv', u'Komi'), + ('kon', 'kg', u'Kongo'), + ('kor', 'ko', u'Korean'), + ('kos', u'Kosraean'), + ('kpe', u'Kpelle'), + ('krc', u'Karachay-Balkar'), + ('krl', u'Karelian'), + ('kro', u'Kru languages'), + ('kru', u'Kurukh'), + ('kua', 'kj', u'Kuanyama'), + ('kum', u'Kumyk'), + ('kur', 'ku', u'Kurdish'), + ('kut', u'Kutenai'), + ('lad', u'Ladino'), + ('lah', u'Lahnda'), + ('lam', u'Lamba'), + ('lao', 'lo', u'Lao'), + ('lat', 'la', u'Latin'), + ('lav', 'lv', u'Latvian'), + ('lez', u'Lezghian'), + ('lim', 'li', u'Limburgan'), + ('lin', 'ln', u'Lingala'), + ('lit', 'lt', u'Lithuanian'), + ('lol', u'Mongo'), + ('loz', u'Lozi'), + ('ltz', 'lb', u'Luxembourgish'), + ('lua', u'Luba-Lulua'), + ('lub', 'lu', u'Luba-Katanga'), + ('lug', 'lg', u'Ganda'), + ('lui', u'Luiseno'), + ('lun', u'Lunda'), + ('luo', u'Luo '), + ('lus', u'Lushai'), + ('mac', 'mk', u'Macedonian'), + ('mad', u'Madurese'), + ('mag', u'Magahi'), + ('mah', 'mh', u'Marshallese'), + ('mai', u'Maithili'), + ('mak', u'Makasar'), + ('mal', 'ml', u'Malayalam'), + ('man', u'Mandingo'), + ('mao', 'mi', u'Maori'), + ('map', u'Austronesian '), + ('mar', 'mr', u'Marathi'), + ('mas', u'Masai'), + ('may', 'ms', u'Malay'), + ('mdf', u'Moksha'), + ('mdr', u'Mandar'), + ('men', u'Mende'), + ('mga', u'Irish, Middle '), + ('mic', u"Mi'kmaq"), + ('min', u'Minangkabau'), + ('mis', u'Uncoded languages'), + ('mkh', u'Mon-Khmer '), + ('mlg', 'mg', u'Malagasy'), + ('mlt', 'mt', u'Maltese'), + ('mnc', u'Manchu'), + ('mni', u'Manipuri'), + ('mno', u'Manobo languages'), + ('moh', u'Mohawk'), + ('mol', 'mo', u'Moldavian'), + ('mon', 'mn', u'Mongolian'), + ('mos', u'Mossi'), + ('mul', u'Multiple languages'), + ('mun', u'Munda languages'), + ('mus', u'Creek'), + ('mwl', u'Mirandese'), + ('mwr', u'Marwari'), + ('myn', u'Mayan languages'), + ('myv', u'Erzya'), + ('nah', u'Nahuatl languages'), + ('nai', u'North American Indian'), + ('nap', u'Neapolitan'), + ('nau', 'na', u'Nauru'), + ('nav', 'nv', u'Navajo'), + ('nbl', 'nr', u'Ndebele, South'), + ('nde', 'nd', u'Ndebele, North'), + ('ndo', 'ng', u'Ndonga'), + ('nds', u'Low German'), + ('nep', 'ne', u'Nepali'), + ('new', u'Nepal Bhasa'), + ('nia', u'Nias'), + ('nic', u'Niger-Kordofanian '), + ('niu', u'Niuean'), + ('nno', 'nn', u'Norwegian Nynorsk'), + ('nob', 'nb', u'Bokm\xe5l, Norwegian'), + ('nog', u'Nogai'), + ('non', u'Norse, Old'), + ('nor', 'no', u'Norwegian'), + ('nqo', u"N'Ko"), + ('nso', u'Pedi'), + ('nub', u'Nubian languages'), + ('nwc', u'Classical Newari'), + ('nya', 'ny', u'Chichewa'), + ('nym', u'Nyamwezi'), + ('nyn', u'Nyankole'), + ('nyo', u'Nyoro'), + ('nzi', u'Nzima'), + ('oci', 'oc', u'Occitan '), + ('oji', 'oj', u'Ojibwa'), + ('ori', 'or', u'Oriya'), + ('orm', 'om', u'Oromo'), + ('osa', u'Osage'), + ('oss', 'os', u'Ossetian'), + ('ota', u'Turkish, Ottoman '), + ('oto', u'Otomian languages'), + ('paa', u'Papuan '), + ('pag', u'Pangasinan'), + ('pal', u'Pahlavi'), + ('pam', u'Pampanga'), + ('pan', 'pa', u'Panjabi'), + ('pap', u'Papiamento'), + ('pau', u'Palauan'), + ('peo', u'Persian, Old '), + ('per', 'fa', u'Persian'), + ('phi', u'Philippine '), + ('phn', u'Phoenician'), + ('pli', 'pi', u'Pali'), + ('pol', 'pl', u'Polish'), + ('pon', u'Pohnpeian'), + ('por', 'pt', u'Portuguese'), + ('pra', u'Prakrit languages'), + ('pro', u'Proven\xe7al, Old '), + ('pus', 'ps', u'Pushto'), + ('qaa-qtz', u'Reserved for local use'), + ('que', 'qu', u'Quechua'), + ('raj', u'Rajasthani'), + ('rap', u'Rapanui'), + ('rar', u'Rarotongan'), + ('roa', u'Romance '), + ('roh', 'rm', u'Romansh'), + ('rom', u'Romany'), + ('rum', 'ro', u'Romanian'), + ('run', 'rn', u'Rundi'), + ('rup', u'Aromanian'), + ('rus', 'ru', u'Russian'), + ('sad', u'Sandawe'), + ('sag', 'sg', u'Sango'), + ('sah', u'Yakut'), + ('sai', u'South American Indian '), + ('sal', u'Salishan languages'), + ('sam', u'Samaritan Aramaic'), + ('san', 'sa', u'Sanskrit'), + ('sas', u'Sasak'), + ('sat', u'Santali'), + ('scc', 'sr', u'Serbian'), + ('scn', u'Sicilian'), + ('sco', u'Scots'), + ('scr', 'hr', u'Croatian'), + ('sel', u'Selkup'), + ('sem', u'Semitic '), + ('sga', u'Irish, Old '), + ('sgn', u'Sign Languages'), + ('shn', u'Shan'), + ('sid', u'Sidamo'), + ('sin', 'si', u'Sinhala'), + ('sio', u'Siouan languages'), + ('sit', u'Sino-Tibetan '), + ('sla', u'Slavic '), + ('slo', 'sk', u'Slovak'), + ('slv', 'sl', u'Slovenian'), + ('sma', u'Southern Sami'), + ('sme', 'se', u'Northern Sami'), + ('smi', u'Sami languages '), + ('smj', u'Lule Sami'), + ('smn', u'Inari Sami'), + ('smo', 'sm', u'Samoan'), + ('sms', u'Skolt Sami'), + ('sna', 'sn', u'Shona'), + ('snd', 'sd', u'Sindhi'), + ('snk', u'Soninke'), + ('sog', u'Sogdian'), + ('som', 'so', u'Somali'), + ('son', u'Songhai languages'), + ('sot', 'st', u'Sotho, Southern'), + ('spa', 'es', u'Spanish'), + ('srd', 'sc', u'Sardinian'), + ('srn', u'Sranan Tongo'), + ('srr', u'Serer'), + ('ssa', u'Nilo-Saharan '), + ('ssw', 'ss', u'Swati'), + ('suk', u'Sukuma'), + ('sun', 'su', u'Sundanese'), + ('sus', u'Susu'), + ('sux', u'Sumerian'), + ('swa', 'sw', u'Swahili'), + ('swe', 'sv', u'Swedish'), + ('syc', u'Classical Syriac'), + ('syr', u'Syriac'), + ('tah', 'ty', u'Tahitian'), + ('tai', u'Tai '), + ('tam', 'ta', u'Tamil'), + ('tat', 'tt', u'Tatar'), + ('tel', 'te', u'Telugu'), + ('tem', u'Timne'), + ('ter', u'Tereno'), + ('tet', u'Tetum'), + ('tgk', 'tg', u'Tajik'), + ('tgl', 'tl', u'Tagalog'), + ('tha', 'th', u'Thai'), + ('tib', 'bo', u'Tibetan'), + ('tig', u'Tigre'), + ('tir', 'ti', u'Tigrinya'), + ('tiv', u'Tiv'), + ('tkl', u'Tokelau'), + ('tlh', u'Klingon'), + ('tli', u'Tlingit'), + ('tmh', u'Tamashek'), + ('tog', u'Tonga '), + ('ton', 'to', u'Tonga '), + ('tpi', u'Tok Pisin'), + ('tsi', u'Tsimshian'), + ('tsn', 'tn', u'Tswana'), + ('tso', 'ts', u'Tsonga'), + ('tuk', 'tk', u'Turkmen'), + ('tum', u'Tumbuka'), + ('tup', u'Tupi languages'), + ('tur', 'tr', u'Turkish'), + ('tut', u'Altaic '), + ('tvl', u'Tuvalu'), + ('twi', 'tw', u'Twi'), + ('tyv', u'Tuvinian'), + ('udm', u'Udmurt'), + ('uga', u'Ugaritic'), + ('uig', 'ug', u'Uighur'), + ('ukr', 'uk', u'Ukrainian'), + ('umb', u'Umbundu'), + ('und', u'Undetermined'), + ('urd', 'ur', u'Urdu'), + ('uzb', 'uz', u'Uzbek'), + ('vai', u'Vai'), + ('ven', 've', u'Venda'), + ('vie', 'vi', u'Vietnamese'), + ('vol', 'vo', u'Volap\xfck'), + ('vot', u'Votic'), + ('wak', u'Wakashan languages'), + ('wal', u'Walamo'), + ('war', u'Waray'), + ('was', u'Washo'), + ('wel', 'cy', u'Welsh'), + ('wen', u'Sorbian languages'), + ('wln', 'wa', u'Walloon'), + ('wol', 'wo', u'Wolof'), + ('xal', u'Kalmyk'), + ('xho', 'xh', u'Xhosa'), + ('yao', u'Yao'), + ('yap', u'Yapese'), + ('yid', 'yi', u'Yiddish'), + ('yor', 'yo', u'Yoruba'), + ('ypk', u'Yupik languages'), + ('zap', u'Zapotec'), + ('zbl', u'Blissymbols'), + ('zen', u'Zenaga'), + ('zha', 'za', u'Zhuang'), + ('znd', u'Zande languages'), + ('zul', 'zu', u'Zulu'), + ('zun', u'Zuni'), + ('zxx', u'No linguistic content'), + ('zza', u'Zaza'), +) diff --git a/lib/enzyme/mkv.py b/lib/enzyme/mkv.py new file mode 100644 index 0000000000000000000000000000000000000000..aba5325e29501e2637ef7168236ff8d9b0193eab --- /dev/null +++ b/lib/enzyme/mkv.py @@ -0,0 +1,840 @@ +# -*- coding: utf-8 -*- +# enzyme - Video metadata parser +# Copyright 2011-2012 Antoine Bertin <diaoulael@gmail.com> +# Copyright 2003-2006 Thomas Schueppel <stain@acm.org> +# Copyright 2003-2006 Dirk Meyer <dischi@freevo.org> +# Copyright 2003-2006 Jason Tackaberry <tack@urandom.ca> +# +# This file is part of enzyme. +# +# enzyme is free software; you can redistribute it and/or modify it under +# the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3 of the License, or +# (at your option) any later version. +# +# enzyme is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with enzyme. If not, see <http://www.gnu.org/licenses/>. +from datetime import datetime +from exceptions import ParseError +from struct import unpack +import core +import logging +import re + +__all__ = ['Parser'] + + +# get logging object +log = logging.getLogger(__name__) + +# Main IDs for the Matroska streams +MATROSKA_VIDEO_TRACK = 0x01 +MATROSKA_AUDIO_TRACK = 0x02 +MATROSKA_SUBTITLES_TRACK = 0x11 + +MATROSKA_HEADER_ID = 0x1A45DFA3 +MATROSKA_TRACKS_ID = 0x1654AE6B +MATROSKA_CUES_ID = 0x1C53BB6B +MATROSKA_SEGMENT_ID = 0x18538067 +MATROSKA_SEGMENT_INFO_ID = 0x1549A966 +MATROSKA_CLUSTER_ID = 0x1F43B675 +MATROSKA_VOID_ID = 0xEC +MATROSKA_CRC_ID = 0xBF +MATROSKA_TIMECODESCALE_ID = 0x2AD7B1 +MATROSKA_DURATION_ID = 0x4489 +MATROSKA_CRC32_ID = 0xBF +MATROSKA_TIMECODESCALE_ID = 0x2AD7B1 +MATROSKA_MUXING_APP_ID = 0x4D80 +MATROSKA_WRITING_APP_ID = 0x5741 +MATROSKA_CODEC_ID = 0x86 +MATROSKA_CODEC_PRIVATE_ID = 0x63A2 +MATROSKA_FRAME_DURATION_ID = 0x23E383 +MATROSKA_VIDEO_SETTINGS_ID = 0xE0 +MATROSKA_VIDEO_WIDTH_ID = 0xB0 +MATROSKA_VIDEO_HEIGHT_ID = 0xBA +MATROSKA_VIDEO_INTERLACED_ID = 0x9A +MATROSKA_VIDEO_DISPLAY_WIDTH_ID = 0x54B0 +MATROSKA_VIDEO_DISPLAY_HEIGHT_ID = 0x54BA +MATROSKA_AUDIO_SETTINGS_ID = 0xE1 +MATROSKA_AUDIO_SAMPLERATE_ID = 0xB5 +MATROSKA_AUDIO_CHANNELS_ID = 0x9F +MATROSKA_TRACK_UID_ID = 0x73C5 +MATROSKA_TRACK_NUMBER_ID = 0xD7 +MATROSKA_TRACK_TYPE_ID = 0x83 +MATROSKA_TRACK_LANGUAGE_ID = 0x22B59C +MATROSKA_TRACK_OFFSET = 0x537F +MATROSKA_TRACK_FLAG_DEFAULT_ID = 0x88 +MATROSKA_TRACK_FLAG_ENABLED_ID = 0xB9 +MATROSKA_TITLE_ID = 0x7BA9 +MATROSKA_DATE_UTC_ID = 0x4461 +MATROSKA_NAME_ID = 0x536E + +MATROSKA_CHAPTERS_ID = 0x1043A770 +MATROSKA_CHAPTER_UID_ID = 0x73C4 +MATROSKA_EDITION_ENTRY_ID = 0x45B9 +MATROSKA_CHAPTER_ATOM_ID = 0xB6 +MATROSKA_CHAPTER_TIME_START_ID = 0x91 +MATROSKA_CHAPTER_TIME_END_ID = 0x92 +MATROSKA_CHAPTER_FLAG_ENABLED_ID = 0x4598 +MATROSKA_CHAPTER_DISPLAY_ID = 0x80 +MATROSKA_CHAPTER_LANGUAGE_ID = 0x437C +MATROSKA_CHAPTER_STRING_ID = 0x85 + +MATROSKA_ATTACHMENTS_ID = 0x1941A469 +MATROSKA_ATTACHED_FILE_ID = 0x61A7 +MATROSKA_FILE_DESC_ID = 0x467E +MATROSKA_FILE_NAME_ID = 0x466E +MATROSKA_FILE_MIME_TYPE_ID = 0x4660 +MATROSKA_FILE_DATA_ID = 0x465C + +MATROSKA_SEEKHEAD_ID = 0x114D9B74 +MATROSKA_SEEK_ID = 0x4DBB +MATROSKA_SEEKID_ID = 0x53AB +MATROSKA_SEEK_POSITION_ID = 0x53AC + +MATROSKA_TAGS_ID = 0x1254C367 +MATROSKA_TAG_ID = 0x7373 +MATROSKA_TARGETS_ID = 0x63C0 +MATROSKA_TARGET_TYPE_VALUE_ID = 0x68CA +MATROSKA_TARGET_TYPE_ID = 0x63CA +MATRSOKA_TAGS_TRACK_UID_ID = 0x63C5 +MATRSOKA_TAGS_EDITION_UID_ID = 0x63C9 +MATRSOKA_TAGS_CHAPTER_UID_ID = 0x63C4 +MATRSOKA_TAGS_ATTACHMENT_UID_ID = 0x63C6 +MATROSKA_SIMPLE_TAG_ID = 0x67C8 +MATROSKA_TAG_NAME_ID = 0x45A3 +MATROSKA_TAG_LANGUAGE_ID = 0x447A +MATROSKA_TAG_STRING_ID = 0x4487 +MATROSKA_TAG_BINARY_ID = 0x4485 + + +# See mkv spec for details: +# http://www.matroska.org/technical/specs/index.html + +# Map to convert to well known codes +# http://haali.cs.msu.ru/mkv/codecs.pdf +FOURCCMap = { + 'V_THEORA': 'THEO', + 'V_SNOW': 'SNOW', + 'V_MPEG4/ISO/ASP': 'MP4V', + 'V_MPEG4/ISO/AVC': 'AVC1', + 'A_AC3': 0x2000, + 'A_MPEG/L3': 0x0055, + 'A_MPEG/L2': 0x0050, + 'A_MPEG/L1': 0x0050, + 'A_DTS': 0x2001, + 'A_PCM/INT/LIT': 0x0001, + 'A_PCM/FLOAT/IEEE': 0x003, + 'A_TTA1': 0x77a1, + 'A_WAVPACK4': 0x5756, + 'A_VORBIS': 0x6750, + 'A_FLAC': 0xF1AC, + 'A_AAC': 0x00ff, + 'A_AAC/': 0x00ff +} + + +def matroska_date_to_datetime(date): + """ + Converts a date in Matroska's date format to a python datetime object. + Returns the given date string if it could not be converted. + """ + # From the specs: + # The fields with dates should have the following format: YYYY-MM-DD + # HH:MM:SS.MSS [...] To store less accuracy, you remove items starting + # from the right. To store only the year, you would use, "2004". To store + # a specific day such as May 1st, 2003, you would use "2003-05-01". + format = re.split(r'([-:. ])', '%Y-%m-%d %H:%M:%S.%f') + while format: + try: + return datetime.strptime(date, ''.join(format)) + except ValueError: + format = format[:-2] + return date + + +def matroska_bps_to_bitrate(bps): + """ + Tries to convert a free-form bps string into a bitrate (bits per second). + """ + m = re.search('([\d.]+)\s*(\D.*)', bps) + if m: + bps, suffix = m.groups() + if 'kbit' in suffix: + return float(bps) * 1024 + elif 'kbyte' in suffix: + return float(bps) * 1024 * 8 + elif 'byte' in suffix: + return float(bps) * 8 + elif 'bps' in suffix or 'bit' in suffix: + return float(bps) + if bps.replace('.', '').isdigit(): + if float(bps) < 30000: + # Assume kilobits and convert to bps + return float(bps) * 1024 + return float(bps) + + +# Used to convert the official matroska tag names (only lower-cased) to core +# attributes. tag name -> attr, filter +TAGS_MAP = { + # From Media core + u'title': ('title', None), + u'subtitle': ('caption', None), + u'comment': ('comment', None), + u'url': ('url', None), + u'artist': ('artist', None), + u'keywords': ('keywords', lambda s: [word.strip() for word in s.split(',')]), + u'composer_nationality': ('country', None), + u'date_released': ('datetime', None), + u'date_recorded': ('datetime', None), + u'date_written': ('datetime', None), + + # From Video core + u'encoder': ('encoder', None), + u'bps': ('bitrate', matroska_bps_to_bitrate), + u'part_number': ('trackno', int), + u'total_parts': ('trackof', int), + u'copyright': ('copyright', None), + u'genre': ('genre', None), + u'actor': ('actors', None), + u'written_by': ('writer', None), + u'producer': ('producer', None), + u'production_studio': ('studio', None), + u'law_rating': ('rating', None), + u'summary': ('summary', None), + u'synopsis': ('synopsis', None), +} + + +class EbmlEntity: + """ + This is class that is responsible to handle one Ebml entity as described in + the Matroska/Ebml spec + """ + def __init__(self, inbuf): + # Compute the EBML id + # Set the CRC len to zero + self.crc_len = 0 + # Now loop until we find an entity without CRC + try: + self.build_entity(inbuf) + except IndexError: + raise ParseError() + while self.get_id() == MATROSKA_CRC32_ID: + self.crc_len += self.get_total_len() + inbuf = inbuf[self.get_total_len():] + self.build_entity(inbuf) + + def build_entity(self, inbuf): + self.compute_id(inbuf) + + if self.id_len == 0: + log.error(u'EBML entity not found, bad file format') + raise ParseError() + + self.entity_len, self.len_size = self.compute_len(inbuf[self.id_len:]) + self.entity_data = inbuf[self.get_header_len() : self.get_total_len()] + self.ebml_length = self.entity_len + self.entity_len = min(len(self.entity_data), self.entity_len) + + # if the data size is 8 or less, it could be a numeric value + self.value = 0 + if self.entity_len <= 8: + for pos, shift in zip(range(self.entity_len), range((self.entity_len - 1) * 8, -1, -8)): + self.value |= ord(self.entity_data[pos]) << shift + + + def add_data(self, data): + maxlen = self.ebml_length - len(self.entity_data) + if maxlen <= 0: + return + self.entity_data += data[:maxlen] + self.entity_len = len(self.entity_data) + + + def compute_id(self, inbuf): + self.id_len = 0 + if len(inbuf) < 1: + return 0 + first = ord(inbuf[0]) + if first & 0x80: + self.id_len = 1 + self.entity_id = first + elif first & 0x40: + if len(inbuf) < 2: + return 0 + self.id_len = 2 + self.entity_id = ord(inbuf[0]) << 8 | ord(inbuf[1]) + elif first & 0x20: + if len(inbuf) < 3: + return 0 + self.id_len = 3 + self.entity_id = (ord(inbuf[0]) << 16) | (ord(inbuf[1]) << 8) | \ + (ord(inbuf[2])) + elif first & 0x10: + if len(inbuf) < 4: + return 0 + self.id_len = 4 + self.entity_id = (ord(inbuf[0]) << 24) | (ord(inbuf[1]) << 16) | \ + (ord(inbuf[2]) << 8) | (ord(inbuf[3])) + self.entity_str = inbuf[0:self.id_len] + + + def compute_len(self, inbuf): + if not inbuf: + return 0, 0 + i = num_ffs = 0 + len_mask = 0x80 + len = ord(inbuf[0]) + while not len & len_mask: + i += 1 + len_mask >>= 1 + if i >= 8: + return 0, 0 + + len &= len_mask - 1 + if len == len_mask - 1: + num_ffs += 1 + for p in range(i): + len = (len << 8) | ord(inbuf[p + 1]) + if len & 0xff == 0xff: + num_ffs += 1 + if num_ffs == i + 1: + len = 0 + return len, i + 1 + + + def get_crc_len(self): + return self.crc_len + + + def get_value(self): + return self.value + + + def get_float_value(self): + if len(self.entity_data) == 4: + return unpack('!f', self.entity_data)[0] + elif len(self.entity_data) == 8: + return unpack('!d', self.entity_data)[0] + return 0.0 + + + def get_data(self): + return self.entity_data + + + def get_utf8(self): + return unicode(self.entity_data, 'utf-8', 'replace') + + + def get_str(self): + return unicode(self.entity_data, 'ascii', 'replace') + + + def get_id(self): + return self.entity_id + + + def get_str_id(self): + return self.entity_str + + + def get_len(self): + return self.entity_len + + + def get_total_len(self): + return self.entity_len + self.id_len + self.len_size + + + def get_header_len(self): + return self.id_len + self.len_size + + + +class Matroska(core.AVContainer): + """ + Matroska video and audio parser. If at least one video stream is + detected it will set the type to MEDIA_AV. + """ + def __init__(self, file): + core.AVContainer.__init__(self) + self.samplerate = 1 + + self.file = file + # Read enough that we're likely to get the full seekhead (FIXME: kludge) + buffer = file.read(2000) + if len(buffer) == 0: + # Regular File end + raise ParseError() + + # Check the Matroska header + header = EbmlEntity(buffer) + if header.get_id() != MATROSKA_HEADER_ID: + raise ParseError() + + log.debug(u'HEADER ID found %08X' % header.get_id()) + self.mime = 'video/x-matroska' + self.type = 'Matroska' + self.has_idx = False + self.objects_by_uid = {} + + # Now get the segment + self.segment = segment = EbmlEntity(buffer[header.get_total_len():]) + # Record file offset of segment data for seekheads + self.segment.offset = header.get_total_len() + segment.get_header_len() + if segment.get_id() != MATROSKA_SEGMENT_ID: + log.debug(u'SEGMENT ID not found %08X' % segment.get_id()) + return + + log.debug(u'SEGMENT ID found %08X' % segment.get_id()) + try: + for elem in self.process_one_level(segment): + if elem.get_id() == MATROSKA_SEEKHEAD_ID: + self.process_elem(elem) + except ParseError: + pass + + if not self.has_idx: + log.warning(u'File has no index') + self._set('corrupt', True) + + def process_elem(self, elem): + elem_id = elem.get_id() + log.debug(u'BEGIN: process element %r' % hex(elem_id)) + if elem_id == MATROSKA_SEGMENT_INFO_ID: + duration = 0 + scalecode = 1000000.0 + + for ielem in self.process_one_level(elem): + ielem_id = ielem.get_id() + if ielem_id == MATROSKA_TIMECODESCALE_ID: + scalecode = ielem.get_value() + elif ielem_id == MATROSKA_DURATION_ID: + duration = ielem.get_float_value() + elif ielem_id == MATROSKA_TITLE_ID: + self.title = ielem.get_utf8() + elif ielem_id == MATROSKA_DATE_UTC_ID: + timestamp = unpack('!q', ielem.get_data())[0] / 10.0 ** 9 + # Date is offset 2001-01-01 00:00:00 (timestamp 978307200.0) + self.timestamp = int(timestamp + 978307200) + + self.length = duration * scalecode / 1000000000.0 + + elif elem_id == MATROSKA_TRACKS_ID: + self.process_tracks(elem) + + elif elem_id == MATROSKA_CHAPTERS_ID: + self.process_chapters(elem) + + elif elem_id == MATROSKA_ATTACHMENTS_ID: + self.process_attachments(elem) + + elif elem_id == MATROSKA_SEEKHEAD_ID: + self.process_seekhead(elem) + + elif elem_id == MATROSKA_TAGS_ID: + self.process_tags(elem) + + elif elem_id == MATROSKA_CUES_ID: + self.has_idx = True + + log.debug(u'END: process element %r' % hex(elem_id)) + return True + + + def process_seekhead(self, elem): + for seek_elem in self.process_one_level(elem): + if seek_elem.get_id() != MATROSKA_SEEK_ID: + continue + for sub_elem in self.process_one_level(seek_elem): + if sub_elem.get_id() == MATROSKA_SEEKID_ID: + if sub_elem.get_value() == MATROSKA_CLUSTER_ID: + # Not interested in these. + return + + elif sub_elem.get_id() == MATROSKA_SEEK_POSITION_ID: + self.file.seek(self.segment.offset + sub_elem.get_value()) + buffer = self.file.read(100) + try: + elem = EbmlEntity(buffer) + except ParseError: + continue + + # Fetch all data necessary for this element. + elem.add_data(self.file.read(elem.ebml_length)) + self.process_elem(elem) + + + def process_tracks(self, tracks): + tracksbuf = tracks.get_data() + index = 0 + while index < tracks.get_len(): + trackelem = EbmlEntity(tracksbuf[index:]) + log.debug (u'ELEMENT %X found' % trackelem.get_id()) + self.process_track(trackelem) + index += trackelem.get_total_len() + trackelem.get_crc_len() + + + def process_one_level(self, item): + buf = item.get_data() + index = 0 + while index < item.get_len(): + if len(buf[index:]) == 0: + break + elem = EbmlEntity(buf[index:]) + yield elem + index += elem.get_total_len() + elem.get_crc_len() + + def set_track_defaults(self, track): + track.language = 'eng' + + def process_track(self, track): + # Collapse generator into a list since we need to iterate over it + # twice. + elements = [x for x in self.process_one_level(track)] + track_type = [x.get_value() for x in elements if x.get_id() == MATROSKA_TRACK_TYPE_ID] + if not track_type: + log.debug(u'Bad track: no type id found') + return + + track_type = track_type[0] + track = None + + if track_type == MATROSKA_VIDEO_TRACK: + log.debug(u'Video track found') + track = self.process_video_track(elements) + elif track_type == MATROSKA_AUDIO_TRACK: + log.debug(u'Audio track found') + track = self.process_audio_track(elements) + elif track_type == MATROSKA_SUBTITLES_TRACK: + log.debug(u'Subtitle track found') + track = core.Subtitle() + self.set_track_defaults(track) + track.id = len(self.subtitles) + self.subtitles.append(track) + for elem in elements: + self.process_track_common(elem, track) + + + def process_track_common(self, elem, track): + elem_id = elem.get_id() + if elem_id == MATROSKA_TRACK_LANGUAGE_ID: + track.language = elem.get_str() + log.debug(u'Track language found: %r' % track.language) + elif elem_id == MATROSKA_NAME_ID: + track.title = elem.get_utf8() + elif elem_id == MATROSKA_TRACK_NUMBER_ID: + track.trackno = elem.get_value() + elif elem_id == MATROSKA_TRACK_FLAG_ENABLED_ID: + track.enabled = bool(elem.get_value()) + elif elem_id == MATROSKA_TRACK_FLAG_DEFAULT_ID: + track.default = bool(elem.get_value()) + elif elem_id == MATROSKA_CODEC_ID: + track.codec = elem.get_str() + elif elem_id == MATROSKA_CODEC_PRIVATE_ID: + track.codec_private = elem.get_data() + elif elem_id == MATROSKA_TRACK_UID_ID: + self.objects_by_uid[elem.get_value()] = track + + + def process_video_track(self, elements): + track = core.VideoStream() + # Defaults + track.codec = u'Unknown' + track.fps = 0 + self.set_track_defaults(track) + + for elem in elements: + elem_id = elem.get_id() + if elem_id == MATROSKA_CODEC_ID: + track.codec = elem.get_str() + + elif elem_id == MATROSKA_FRAME_DURATION_ID: + try: + track.fps = 1 / (pow(10, -9) * (elem.get_value())) + except ZeroDivisionError: + pass + + elif elem_id == MATROSKA_VIDEO_SETTINGS_ID: + d_width = d_height = None + for settings_elem in self.process_one_level(elem): + settings_elem_id = settings_elem.get_id() + if settings_elem_id == MATROSKA_VIDEO_WIDTH_ID: + track.width = settings_elem.get_value() + elif settings_elem_id == MATROSKA_VIDEO_HEIGHT_ID: + track.height = settings_elem.get_value() + elif settings_elem_id == MATROSKA_VIDEO_DISPLAY_WIDTH_ID: + d_width = settings_elem.get_value() + elif settings_elem_id == MATROSKA_VIDEO_DISPLAY_HEIGHT_ID: + d_height = settings_elem.get_value() + elif settings_elem_id == MATROSKA_VIDEO_INTERLACED_ID: + value = int(settings_elem.get_value()) + self._set('interlaced', value) + + if None not in [d_width, d_height]: + track.aspect = float(d_width) / d_height + + else: + self.process_track_common(elem, track) + + # convert codec information + # http://haali.cs.msu.ru/mkv/codecs.pdf + if track.codec in FOURCCMap: + track.codec = FOURCCMap[track.codec] + elif '/' in track.codec and track.codec.split('/')[0] + '/' in FOURCCMap: + track.codec = FOURCCMap[track.codec.split('/')[0] + '/'] + elif track.codec.endswith('FOURCC') and len(track.codec_private or '') == 40: + track.codec = track.codec_private[16:20] + elif track.codec.startswith('V_REAL/'): + track.codec = track.codec[7:] + elif track.codec.startswith('V_'): + # FIXME: add more video codecs here + track.codec = track.codec[2:] + + track.id = len(self.video) + self.video.append(track) + return track + + + def process_audio_track(self, elements): + track = core.AudioStream() + track.codec = u'Unknown' + self.set_track_defaults(track) + + for elem in elements: + elem_id = elem.get_id() + if elem_id == MATROSKA_CODEC_ID: + track.codec = elem.get_str() + elif elem_id == MATROSKA_AUDIO_SETTINGS_ID: + for settings_elem in self.process_one_level(elem): + settings_elem_id = settings_elem.get_id() + if settings_elem_id == MATROSKA_AUDIO_SAMPLERATE_ID: + track.samplerate = settings_elem.get_float_value() + elif settings_elem_id == MATROSKA_AUDIO_CHANNELS_ID: + track.channels = settings_elem.get_value() + else: + self.process_track_common(elem, track) + + + if track.codec in FOURCCMap: + track.codec = FOURCCMap[track.codec] + elif '/' in track.codec and track.codec.split('/')[0] + '/' in FOURCCMap: + track.codec = FOURCCMap[track.codec.split('/')[0] + '/'] + elif track.codec.startswith('A_'): + track.codec = track.codec[2:] + + track.id = len(self.audio) + self.audio.append(track) + return track + + + def process_chapters(self, chapters): + elements = self.process_one_level(chapters) + for elem in elements: + if elem.get_id() == MATROSKA_EDITION_ENTRY_ID: + buf = elem.get_data() + index = 0 + while index < elem.get_len(): + sub_elem = EbmlEntity(buf[index:]) + if sub_elem.get_id() == MATROSKA_CHAPTER_ATOM_ID: + self.process_chapter_atom(sub_elem) + index += sub_elem.get_total_len() + sub_elem.get_crc_len() + + + def process_chapter_atom(self, atom): + elements = self.process_one_level(atom) + chap = core.Chapter() + + for elem in elements: + elem_id = elem.get_id() + if elem_id == MATROSKA_CHAPTER_TIME_START_ID: + # Scale timecode to seconds (float) + chap.pos = elem.get_value() / 1000000 / 1000.0 + elif elem_id == MATROSKA_CHAPTER_FLAG_ENABLED_ID: + chap.enabled = elem.get_value() + elif elem_id == MATROSKA_CHAPTER_DISPLAY_ID: + # Matroska supports multiple (chapter name, language) pairs for + # each chapter, so chapter names can be internationalized. This + # logic will only take the last one in the list. + for display_elem in self.process_one_level(elem): + if display_elem.get_id() == MATROSKA_CHAPTER_STRING_ID: + chap.name = display_elem.get_utf8() + elif elem_id == MATROSKA_CHAPTER_UID_ID: + self.objects_by_uid[elem.get_value()] = chap + + log.debug(u'Chapter %r found', chap.name) + chap.id = len(self.chapters) + self.chapters.append(chap) + + + def process_attachments(self, attachments): + buf = attachments.get_data() + index = 0 + while index < attachments.get_len(): + elem = EbmlEntity(buf[index:]) + if elem.get_id() == MATROSKA_ATTACHED_FILE_ID: + self.process_attachment(elem) + index += elem.get_total_len() + elem.get_crc_len() + + + def process_attachment(self, attachment): + elements = self.process_one_level(attachment) + name = desc = mimetype = "" + data = None + + for elem in elements: + elem_id = elem.get_id() + if elem_id == MATROSKA_FILE_NAME_ID: + name = elem.get_utf8() + elif elem_id == MATROSKA_FILE_DESC_ID: + desc = elem.get_utf8() + elif elem_id == MATROSKA_FILE_MIME_TYPE_ID: + mimetype = elem.get_data() + elif elem_id == MATROSKA_FILE_DATA_ID: + data = elem.get_data() + + # Right now we only support attachments that could be cover images. + # Make a guess to see if this attachment is a cover image. + if mimetype.startswith("image/") and u"cover" in (name + desc).lower() and data: + self.thumbnail = data + + log.debug(u'Attachment %r found' % name) + + + def process_tags(self, tags): + # Tags spec: http://www.matroska.org/technical/specs/tagging/index.html + # Iterate over Tags children. Tags element children is a + # Tag element (whose children are SimpleTags) and a Targets element + # whose children specific what objects the tags apply to. + for tag_elem in self.process_one_level(tags): + # Start a new dict to hold all SimpleTag elements. + tags_dict = core.Tags() + # A list of target uids this tags dict applies too. If empty, + # tags are global. + targets = [] + for sub_elem in self.process_one_level(tag_elem): + if sub_elem.get_id() == MATROSKA_SIMPLE_TAG_ID: + self.process_simple_tag(sub_elem, tags_dict) + elif sub_elem.get_id() == MATROSKA_TARGETS_ID: + # Targets element: if there is no uid child (track uid, + # chapter uid, etc.) then the tags dict applies to the + # whole file (top-level Media object). + for target_elem in self.process_one_level(sub_elem): + target_elem_id = target_elem.get_id() + if target_elem_id in (MATRSOKA_TAGS_TRACK_UID_ID, MATRSOKA_TAGS_EDITION_UID_ID, + MATRSOKA_TAGS_CHAPTER_UID_ID, MATRSOKA_TAGS_ATTACHMENT_UID_ID): + targets.append(target_elem.get_value()) + elif target_elem_id == MATROSKA_TARGET_TYPE_VALUE_ID: + # Target types not supported for now. (Unclear how this + # would fit with kaa.metadata.) + pass + if targets: + # Assign tags to all listed uids + for target in targets: + try: + self.objects_by_uid[target].tags.update(tags_dict) + self.tags_to_attributes(self.objects_by_uid[target], tags_dict) + except KeyError: + log.warning(u'Tags assigned to unknown/unsupported target uid %d', target) + else: + self.tags.update(tags_dict) + self.tags_to_attributes(self, tags_dict) + + + def process_simple_tag(self, simple_tag_elem, tags_dict): + """ + Returns a dict representing the Tag element. + """ + name = lang = value = children = None + binary = False + for elem in self.process_one_level(simple_tag_elem): + elem_id = elem.get_id() + if elem_id == MATROSKA_TAG_NAME_ID: + name = elem.get_utf8().lower() + elif elem_id == MATROSKA_TAG_STRING_ID: + value = elem.get_utf8() + elif elem_id == MATROSKA_TAG_BINARY_ID: + value = elem.get_data() + binary = True + elif elem_id == MATROSKA_TAG_LANGUAGE_ID: + lang = elem.get_utf8() + elif elem_id == MATROSKA_SIMPLE_TAG_ID: + if children is None: + children = core.Tags() + self.process_simple_tag(elem, children) + + if children: + # Convert ourselves to a Tags object. + children.value = value + children.langcode = lang + value = children + else: + if name.startswith('date_'): + # Try to convert date to a datetime object. + value = matroska_date_to_datetime(value) + value = core.Tag(value, lang, binary) + + if name in tags_dict: + # Multiple items of this tag name. + if not isinstance(tags_dict[name], list): + # Convert to a list + tags_dict[name] = [tags_dict[name]] + # Append to list + tags_dict[name].append(value) + else: + tags_dict[name] = value + + + def tags_to_attributes(self, obj, tags): + # Convert tags to core attributes. + for name, tag in tags.items(): + if isinstance(tag, dict): + # Nested tags dict, recurse. + self.tags_to_attributes(obj, tag) + continue + elif name not in TAGS_MAP: + continue + + attr, filter = TAGS_MAP[name] + if attr not in obj._keys and attr not in self._keys: + # Tag is not in any core attribute for this object or global, + # so skip. + continue + + # Pull value out of Tag object or list of Tag objects. + value = [item.value for item in tag] if isinstance(tag, list) else tag.value + if filter: + try: + value = [filter(item) for item in value] if isinstance(value, list) else filter(value) + except Exception, e: + log.warning(u'Failed to convert tag to core attribute: %r', e) + # Special handling for tv series recordings. The 'title' tag + # can be used for both the series and the episode name. The + # same is true for trackno which may refer to the season + # and the episode number. Therefore, if we find these + # attributes already set we try some guessing. + if attr == 'trackno' and getattr(self, attr) is not None: + # delete trackno and save season and episode + self.season = self.trackno + self.episode = value + self.trackno = None + continue + if attr == 'title' and getattr(self, attr) is not None: + # store current value of title as series and use current + # value of title as title + self.series = self.title + if attr in obj._keys: + setattr(obj, attr, value) + else: + setattr(self, attr, value) + + +Parser = Matroska diff --git a/lib/enzyme/mp4.py b/lib/enzyme/mp4.py new file mode 100644 index 0000000000000000000000000000000000000000..c53f30d3a6b6cba50dc0bcc92609a4c3e305f85f --- /dev/null +++ b/lib/enzyme/mp4.py @@ -0,0 +1,474 @@ +# -*- coding: utf-8 -*- +# enzyme - Video metadata parser +# Copyright 2011-2012 Antoine Bertin <diaoulael@gmail.com> +# Copyright 2003-2007 Thomas Schueppel <stain@acm.org> +# Copyright 2003-2007 Dirk Meyer <dischi@freevo.org> +# +# This file is part of enzyme. +# +# enzyme is free software; you can redistribute it and/or modify it under +# the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3 of the License, or +# (at your option) any later version. +# +# enzyme is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with enzyme. If not, see <http://www.gnu.org/licenses/>. +__all__ = ['Parser'] + +import zlib +import logging +import StringIO +import struct +from exceptions import ParseError +import core + +# get logging object +log = logging.getLogger(__name__) + + +# http://developer.apple.com/documentation/QuickTime/QTFF/index.html +# http://developer.apple.com/documentation/QuickTime/QTFF/QTFFChap4/\ +# chapter_5_section_2.html#//apple_ref/doc/uid/TP40000939-CH206-BBCBIICE +# Note: May need to define custom log level to work like ATOM_DEBUG did here + +QTUDTA = { + 'nam': 'title', + 'aut': 'artist', + 'cpy': 'copyright' +} + +QTLANGUAGES = { + 0: "en", + 1: "fr", + 2: "de", + 3: "it", + 4: "nl", + 5: "sv", + 6: "es", + 7: "da", + 8: "pt", + 9: "no", + 10: "he", + 11: "ja", + 12: "ar", + 13: "fi", + 14: "el", + 15: "is", + 16: "mt", + 17: "tr", + 18: "hr", + 19: "Traditional Chinese", + 20: "ur", + 21: "hi", + 22: "th", + 23: "ko", + 24: "lt", + 25: "pl", + 26: "hu", + 27: "et", + 28: "lv", + 29: "Lappish", + 30: "fo", + 31: "Farsi", + 32: "ru", + 33: "Simplified Chinese", + 34: "Flemish", + 35: "ga", + 36: "sq", + 37: "ro", + 38: "cs", + 39: "sk", + 40: "sl", + 41: "yi", + 42: "sr", + 43: "mk", + 44: "bg", + 45: "uk", + 46: "be", + 47: "uz", + 48: "kk", + 49: "az", + 50: "AzerbaijanAr", + 51: "hy", + 52: "ka", + 53: "mo", + 54: "ky", + 55: "tg", + 56: "tk", + 57: "mn", + 58: "MongolianCyr", + 59: "ps", + 60: "ku", + 61: "ks", + 62: "sd", + 63: "bo", + 64: "ne", + 65: "sa", + 66: "mr", + 67: "bn", + 68: "as", + 69: "gu", + 70: "pa", + 71: "or", + 72: "ml", + 73: "kn", + 74: "ta", + 75: "te", + 76: "si", + 77: "my", + 78: "Khmer", + 79: "lo", + 80: "vi", + 81: "id", + 82: "tl", + 83: "MalayRoman", + 84: "MalayArabic", + 85: "am", + 86: "ti", + 87: "om", + 88: "so", + 89: "sw", + 90: "Ruanda", + 91: "Rundi", + 92: "Chewa", + 93: "mg", + 94: "eo", + 128: "cy", + 129: "eu", + 130: "ca", + 131: "la", + 132: "qu", + 133: "gn", + 134: "ay", + 135: "tt", + 136: "ug", + 137: "Dzongkha", + 138: "JavaneseRom", +} + +class MPEG4(core.AVContainer): + """ + Parser for the MP4 container format. This format is mostly + identical to Apple Quicktime and 3GP files. It maps to mp4, mov, + qt and some other extensions. + """ + table_mapping = {'QTUDTA': QTUDTA} + + def __init__(self, file): + core.AVContainer.__init__(self) + self._references = [] + + self.mime = 'video/quicktime' + self.type = 'Quicktime Video' + h = file.read(8) + try: + (size, type) = struct.unpack('>I4s', h) + except struct.error: + # EOF. + raise ParseError() + + if type == 'ftyp': + # file type information + if size >= 12: + # this should always happen + if file.read(4) != 'qt ': + # not a quicktime movie, it is a mpeg4 container + self.mime = 'video/mp4' + self.type = 'MPEG-4 Video' + size -= 4 + file.seek(size - 8, 1) + h = file.read(8) + (size, type) = struct.unpack('>I4s', h) + + while type in ['mdat', 'skip']: + # movie data at the beginning, skip + file.seek(size - 8, 1) + h = file.read(8) + (size, type) = struct.unpack('>I4s', h) + + if not type in ['moov', 'wide', 'free']: + log.debug(u'invalid header: %r' % type) + raise ParseError() + + # Extended size + if size == 1: + size = struct.unpack('>Q', file.read(8)) + + # Back over the atom header we just read, since _readatom expects the + # file position to be at the start of an atom. + file.seek(-8, 1) + while self._readatom(file): + pass + + if self._references: + self._set('references', self._references) + + + def _readatom(self, file): + s = file.read(8) + if len(s) < 8: + return 0 + + atomsize, atomtype = struct.unpack('>I4s', s) + if not str(atomtype).decode('latin1').isalnum(): + # stop at nonsense data + return 0 + + log.debug(u'%r [%X]' % (atomtype, atomsize)) + + if atomtype == 'udta': + # Userdata (Metadata) + pos = 0 + tabl = {} + i18ntabl = {} + atomdata = file.read(atomsize - 8) + while pos < atomsize - 12: + (datasize, datatype) = struct.unpack('>I4s', atomdata[pos:pos + 8]) + if ord(datatype[0]) == 169: + # i18n Metadata... + mypos = 8 + pos + while mypos + 4 < datasize + pos: + # first 4 Bytes are i18n header + (tlen, lang) = struct.unpack('>HH', atomdata[mypos:mypos + 4]) + i18ntabl[lang] = i18ntabl.get(lang, {}) + l = atomdata[mypos + 4:mypos + tlen + 4] + i18ntabl[lang][datatype[1:]] = l + mypos += tlen + 4 + elif datatype == 'WLOC': + # Drop Window Location + pass + else: + if ord(atomdata[pos + 8:pos + datasize][0]) > 1: + tabl[datatype] = atomdata[pos + 8:pos + datasize] + pos += datasize + if len(i18ntabl.keys()) > 0: + for k in i18ntabl.keys(): + if QTLANGUAGES.has_key(k) and QTLANGUAGES[k] == 'en': + self._appendtable('QTUDTA', i18ntabl[k]) + self._appendtable('QTUDTA', tabl) + else: + log.debug(u'NO i18') + self._appendtable('QTUDTA', tabl) + + elif atomtype == 'trak': + atomdata = file.read(atomsize - 8) + pos = 0 + trackinfo = {} + tracktype = None + while pos < atomsize - 8: + (datasize, datatype) = struct.unpack('>I4s', atomdata[pos:pos + 8]) + + if datatype == 'tkhd': + tkhd = struct.unpack('>6I8x4H36xII', atomdata[pos + 8:pos + datasize]) + trackinfo['width'] = tkhd[10] >> 16 + trackinfo['height'] = tkhd[11] >> 16 + trackinfo['id'] = tkhd[3] + + try: + # XXX Timestamp of Seconds is since January 1st 1904! + # XXX 2082844800 is the difference between Unix and + # XXX Apple time. FIXME to work on Apple, too + self.timestamp = int(tkhd[1]) - 2082844800 + except Exception, e: + log.exception(u'There was trouble extracting timestamp') + + elif datatype == 'mdia': + pos += 8 + datasize -= 8 + log.debug(u'--> mdia information') + + while datasize: + mdia = struct.unpack('>I4s', atomdata[pos:pos + 8]) + if mdia[1] == 'mdhd': + # Parse based on version of mdhd header. See + # http://wiki.multimedia.cx/index.php?title=QuickTime_container#mdhd + ver = ord(atomdata[pos + 8]) + if ver == 0: + mdhd = struct.unpack('>IIIIIhh', atomdata[pos + 8:pos + 8 + 24]) + elif ver == 1: + mdhd = struct.unpack('>IQQIQhh', atomdata[pos + 8:pos + 8 + 36]) + else: + mdhd = None + + if mdhd: + # duration / time scale + trackinfo['length'] = mdhd[4] / mdhd[3] + if mdhd[5] in QTLANGUAGES: + trackinfo['language'] = QTLANGUAGES[mdhd[5]] + # mdhd[6] == quality + self.length = max(self.length, mdhd[4] / mdhd[3]) + elif mdia[1] == 'minf': + # minf has only atoms inside + pos -= (mdia[0] - 8) + datasize += (mdia[0] - 8) + elif mdia[1] == 'stbl': + # stbl has only atoms inside + pos -= (mdia[0] - 8) + datasize += (mdia[0] - 8) + elif mdia[1] == 'hdlr': + hdlr = struct.unpack('>I4s4s', atomdata[pos + 8:pos + 8 + 12]) + if hdlr[1] == 'mhlr': + if hdlr[2] == 'vide': + tracktype = 'video' + if hdlr[2] == 'soun': + tracktype = 'audio' + elif mdia[1] == 'stsd': + stsd = struct.unpack('>2I', atomdata[pos + 8:pos + 8 + 8]) + if stsd[1] > 0: + codec = atomdata[pos + 16:pos + 16 + 8] + codec = struct.unpack('>I4s', codec) + trackinfo['codec'] = codec[1] + if codec[1] == 'jpeg': + tracktype = 'image' + elif mdia[1] == 'dinf': + dref = struct.unpack('>I4s', atomdata[pos + 8:pos + 8 + 8]) + log.debug(u' --> %r, %r (useless)' % mdia) + if dref[1] == 'dref': + num = struct.unpack('>I', atomdata[pos + 20:pos + 20 + 4])[0] + rpos = pos + 20 + 4 + for ref in range(num): + # FIXME: do somthing if this references + ref = struct.unpack('>I3s', atomdata[rpos:rpos + 7]) + data = atomdata[rpos + 7:rpos + ref[0]] + rpos += ref[0] + else: + if mdia[1].startswith('st'): + log.debug(u' --> %r, %r (sample)' % mdia) + elif mdia[1] == 'vmhd' and not tracktype: + # indicates that this track is video + tracktype = 'video' + elif mdia[1] in ['vmhd', 'smhd'] and not tracktype: + # indicates that this track is audio + tracktype = 'audio' + else: + log.debug(u' --> %r, %r (unknown)' % mdia) + + pos += mdia[0] + datasize -= mdia[0] + + elif datatype == 'udta': + log.debug(u'udta: %r' % struct.unpack('>I4s', atomdata[:8])) + else: + if datatype == 'edts': + log.debug(u'--> %r [%d] (edit list)' % \ + (datatype, datasize)) + else: + log.debug(u'--> %r [%d] (unknown)' % \ + (datatype, datasize)) + pos += datasize + + info = None + if tracktype == 'video': + info = core.VideoStream() + self.video.append(info) + if tracktype == 'audio': + info = core.AudioStream() + self.audio.append(info) + if info: + for key, value in trackinfo.items(): + setattr(info, key, value) + + elif atomtype == 'mvhd': + # movie header + mvhd = struct.unpack('>6I2h', file.read(28)) + self.length = max(self.length, mvhd[4] / mvhd[3]) + self.volume = mvhd[6] + file.seek(atomsize - 8 - 28, 1) + + + elif atomtype == 'cmov': + # compressed movie + datasize, atomtype = struct.unpack('>I4s', file.read(8)) + if not atomtype == 'dcom': + return atomsize + + method = struct.unpack('>4s', file.read(datasize - 8))[0] + + datasize, atomtype = struct.unpack('>I4s', file.read(8)) + if not atomtype == 'cmvd': + return atomsize + + if method == 'zlib': + data = file.read(datasize - 8) + try: + decompressed = zlib.decompress(data) + except Exception, e: + try: + decompressed = zlib.decompress(data[4:]) + except Exception, e: + log.exception(u'There was a proble decompressiong atom') + return atomsize + + decompressedIO = StringIO.StringIO(decompressed) + while self._readatom(decompressedIO): + pass + + else: + log.info(u'unknown compression %r' % method) + # unknown compression method + file.seek(datasize - 8, 1) + + elif atomtype == 'moov': + # decompressed movie info + while self._readatom(file): + pass + + elif atomtype == 'mdat': + pos = file.tell() + atomsize - 8 + # maybe there is data inside the mdat + log.info(u'parsing mdat') + while self._readatom(file): + pass + log.info(u'end of mdat') + file.seek(pos, 0) + + + elif atomtype == 'rmra': + # reference list + while self._readatom(file): + pass + + elif atomtype == 'rmda': + # reference + atomdata = file.read(atomsize - 8) + pos = 0 + url = '' + quality = 0 + datarate = 0 + while pos < atomsize - 8: + (datasize, datatype) = struct.unpack('>I4s', atomdata[pos:pos + 8]) + if datatype == 'rdrf': + rflags, rtype, rlen = struct.unpack('>I4sI', atomdata[pos + 8:pos + 20]) + if rtype == 'url ': + url = atomdata[pos + 20:pos + 20 + rlen] + if url.find('\0') > 0: + url = url[:url.find('\0')] + elif datatype == 'rmqu': + quality = struct.unpack('>I', atomdata[pos + 8:pos + 12])[0] + + elif datatype == 'rmdr': + datarate = struct.unpack('>I', atomdata[pos + 12:pos + 16])[0] + + pos += datasize + if url: + self._references.append((url, quality, datarate)) + + else: + if not atomtype in ['wide', 'free']: + log.info(u'unhandled base atom %r' % atomtype) + + # Skip unknown atoms + try: + file.seek(atomsize - 8, 1) + except IOError: + return 0 + + return atomsize + + +Parser = MPEG4 diff --git a/lib/enzyme/mpeg.py b/lib/enzyme/mpeg.py new file mode 100644 index 0000000000000000000000000000000000000000..3d43ba4fd73ab855d87570168b7af3ee1b6b4d3e --- /dev/null +++ b/lib/enzyme/mpeg.py @@ -0,0 +1,913 @@ +# -*- coding: utf-8 -*- +# enzyme - Video metadata parser +# Copyright 2011-2012 Antoine Bertin <diaoulael@gmail.com> +# Copyright 2003-2006 Thomas Schueppel <stain@acm.org> +# Copyright 2003-2006 Dirk Meyer <dischi@freevo.org> +# +# This file is part of enzyme. +# +# enzyme is free software; you can redistribute it and/or modify it under +# the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3 of the License, or +# (at your option) any later version. +# +# enzyme is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with enzyme. If not, see <http://www.gnu.org/licenses/>. +__all__ = ['Parser'] + +import os +import struct +import logging +import stat +from exceptions import ParseError +import core + +# get logging object +log = logging.getLogger(__name__) + +##------------------------------------------------------------------------ +## START_CODE +## +## Start Codes, with 'slice' occupying 0x01..0xAF +##------------------------------------------------------------------------ +START_CODE = { + 0x00 : 'picture_start_code', + 0xB0 : 'reserved', + 0xB1 : 'reserved', + 0xB2 : 'user_data_start_code', + 0xB3 : 'sequence_header_code', + 0xB4 : 'sequence_error_code', + 0xB5 : 'extension_start_code', + 0xB6 : 'reserved', + 0xB7 : 'sequence end', + 0xB8 : 'group of pictures', +} +for i in range(0x01, 0xAF): + START_CODE[i] = 'slice_start_code' + +##------------------------------------------------------------------------ +## START CODES +##------------------------------------------------------------------------ +PICTURE = 0x00 +USERDATA = 0xB2 +SEQ_HEAD = 0xB3 +SEQ_ERR = 0xB4 +EXT_START = 0xB5 +SEQ_END = 0xB7 +GOP = 0xB8 + +SEQ_START_CODE = 0xB3 +PACK_PKT = 0xBA +SYS_PKT = 0xBB +PADDING_PKT = 0xBE +AUDIO_PKT = 0xC0 +VIDEO_PKT = 0xE0 +PRIVATE_STREAM1 = 0xBD +PRIVATE_STREAM2 = 0xBf + +TS_PACKET_LENGTH = 188 +TS_SYNC = 0x47 + +##------------------------------------------------------------------------ +## FRAME_RATE +## +## A lookup table of all the standard frame rates. Some rates adhere to +## a particular profile that ensures compatibility with VLSI capabilities +## of the early to mid 1990s. +## +## CPB +## Constrained Parameters Bitstreams, an MPEG-1 set of sampling and +## bitstream parameters designed to normalize decoder computational +## complexity, buffer size, and memory bandwidth while still addressing +## the widest possible range of applications. +## +## Main Level +## MPEG-2 Video Main Profile and Main Level is analogous to MPEG-1's +## CPB, with sampling limits at CCIR 601 parameters (720x480x30 Hz or +## 720x576x24 Hz). +## +##------------------------------------------------------------------------ +FRAME_RATE = [ + 0, + 24000.0 / 1001, ## 3-2 pulldown NTSC (CPB/Main Level) + 24, ## Film (CPB/Main Level) + 25, ## PAL/SECAM or 625/60 video + 30000.0 / 1001, ## NTSC (CPB/Main Level) + 30, ## drop-frame NTSC or component 525/60 (CPB/Main Level) + 50, ## double-rate PAL + 60000.0 / 1001, ## double-rate NTSC + 60, ## double-rate, drop-frame NTSC/component 525/60 video + ] + +##------------------------------------------------------------------------ +## ASPECT_RATIO -- INCOMPLETE? +## +## This lookup table maps the header aspect ratio index to a float value. +## These are just the defined ratios for CPB I believe. As I understand +## it, a stream that doesn't adhere to one of these aspect ratios is +## technically considered non-compliant. +##------------------------------------------------------------------------ +ASPECT_RATIO = (None, # Forbidden + 1.0, # 1/1 (VGA) + 4.0 / 3, # 4/3 (TV) + 16.0 / 9, # 16/9 (Widescreen) + 2.21 # (Cinema) + ) + + +class MPEG(core.AVContainer): + """ + Parser for various MPEG files. This includes MPEG-1 and MPEG-2 + program streams, elementary streams and transport streams. The + reported length differs from the length reported by most video + players but the provides length here is correct. An MPEG file has + no additional metadata like title, etc; only codecs, length and + resolution is reported back. + """ + def __init__(self, file): + core.AVContainer.__init__(self) + self.sequence_header_offset = 0 + self.mpeg_version = 2 + + # detect TS (fast scan) + if not self.isTS(file): + # detect system mpeg (many infos) + if not self.isMPEG(file): + # detect PES + if not self.isPES(file): + # Maybe it's MPEG-ES + if self.isES(file): + # If isES() succeeds, we needn't do anything further. + return + if file.name.lower().endswith('mpeg') or \ + file.name.lower().endswith('mpg'): + # This has to be an mpeg file. It could be a bad + # recording from an ivtv based hardware encoder with + # same bytes missing at the beginning. + # Do some more digging... + if not self.isMPEG(file, force=True) or \ + not self.video or not self.audio: + # does not look like an mpeg at all + raise ParseError() + else: + # no mpeg at all + raise ParseError() + + self.mime = 'video/mpeg' + if not self.video: + self.video.append(core.VideoStream()) + + if self.sequence_header_offset <= 0: + return + + self.progressive(file) + + for vi in self.video: + vi.width, vi.height = self.dxy(file) + vi.fps, vi.aspect = self.framerate_aspect(file) + vi.bitrate = self.bitrate(file) + if self.length: + vi.length = self.length + + if not self.type: + self.type = 'MPEG Video' + + # set fourcc codec for video and audio + vc, ac = 'MP2V', 'MP2A' + if self.mpeg_version == 1: + vc, ac = 'MPEG', 0x0050 + for v in self.video: + v.codec = vc + for a in self.audio: + if not a.codec: + a.codec = ac + + + def dxy(self, file): + """ + get width and height of the video + """ + file.seek(self.sequence_header_offset + 4, 0) + v = file.read(4) + x = struct.unpack('>H', v[:2])[0] >> 4 + y = struct.unpack('>H', v[1:3])[0] & 0x0FFF + return (x, y) + + + def framerate_aspect(self, file): + """ + read framerate and aspect ratio + """ + file.seek(self.sequence_header_offset + 7, 0) + v = struct.unpack('>B', file.read(1))[0] + try: + fps = FRAME_RATE[v & 0xf] + except IndexError: + fps = None + if v >> 4 < len(ASPECT_RATIO): + aspect = ASPECT_RATIO[v >> 4] + else: + aspect = None + return (fps, aspect) + + + def progressive(self, file): + """ + Try to find out with brute force if the mpeg is interlaced or not. + Search for the Sequence_Extension in the extension header (01B5) + """ + file.seek(0) + buffer = '' + count = 0 + while 1: + if len(buffer) < 1000: + count += 1 + if count > 1000: + break + buffer += file.read(1024) + if len(buffer) < 1000: + break + pos = buffer.find('\x00\x00\x01\xb5') + if pos == -1 or len(buffer) - pos < 5: + buffer = buffer[-10:] + continue + ext = (ord(buffer[pos + 4]) >> 4) + if ext == 8: + pass + elif ext == 1: + if (ord(buffer[pos + 5]) >> 3) & 1: + self._set('progressive', True) + else: + self._set('interlaced', True) + return True + else: + log.debug(u'ext: %r' % ext) + buffer = buffer[pos + 4:] + return False + + + ##------------------------------------------------------------------------ + ## bitrate() + ## + ## From the MPEG-2.2 spec: + ## + ## bit_rate -- This is a 30-bit integer. The lower 18 bits of the + ## integer are in bit_rate_value and the upper 12 bits are in + ## bit_rate_extension. The 30-bit integer specifies the bitrate of the + ## bitstream measured in units of 400 bits/second, rounded upwards. + ## The value zero is forbidden. + ## + ## So ignoring all the variable bitrate stuff for now, this 30 bit integer + ## multiplied times 400 bits/sec should give the rate in bits/sec. + ## + ## TODO: Variable bitrates? I need one that implements this. + ## + ## Continued from the MPEG-2.2 spec: + ## + ## If the bitstream is a constant bitrate stream, the bitrate specified + ## is the actual rate of operation of the VBV specified in annex C. If + ## the bitstream is a variable bitrate stream, the STD specifications in + ## ISO/IEC 13818-1 supersede the VBV, and the bitrate specified here is + ## used to dimension the transport stream STD (2.4.2 in ITU-T Rec. xxx | + ## ISO/IEC 13818-1), or the program stream STD (2.4.5 in ITU-T Rec. xxx | + ## ISO/IEC 13818-1). + ## + ## If the bitstream is not a constant rate bitstream the vbv_delay + ## field shall have the value FFFF in hexadecimal. + ## + ## Given the value encoded in the bitrate field, the bitstream shall be + ## generated so that the video encoding and the worst case multiplex + ## jitter do not cause STD buffer overflow or underflow. + ## + ## + ##------------------------------------------------------------------------ + + + ## Some parts in the code are based on mpgtx (mpgtx.sf.net) + + def bitrate(self, file): + """ + read the bitrate (most of the time broken) + """ + file.seek(self.sequence_header_offset + 8, 0) + t, b = struct.unpack('>HB', file.read(3)) + vrate = t << 2 | b >> 6 + return vrate * 400 + + + def ReadSCRMpeg2(self, buffer): + """ + read SCR (timestamp) for MPEG2 at the buffer beginning (6 Bytes) + """ + if len(buffer) < 6: + return None + + highbit = (ord(buffer[0]) & 0x20) >> 5 + + low4Bytes = ((long(ord(buffer[0])) & 0x18) >> 3) << 30 + low4Bytes |= (ord(buffer[0]) & 0x03) << 28 + low4Bytes |= ord(buffer[1]) << 20 + low4Bytes |= (ord(buffer[2]) & 0xF8) << 12 + low4Bytes |= (ord(buffer[2]) & 0x03) << 13 + low4Bytes |= ord(buffer[3]) << 5 + low4Bytes |= (ord(buffer[4])) >> 3 + + sys_clock_ref = (ord(buffer[4]) & 0x3) << 7 + sys_clock_ref |= (ord(buffer[5]) >> 1) + + return (long(highbit * (1 << 16) * (1 << 16)) + low4Bytes) / 90000 + + + def ReadSCRMpeg1(self, buffer): + """ + read SCR (timestamp) for MPEG1 at the buffer beginning (5 Bytes) + """ + if len(buffer) < 5: + return None + + highbit = (ord(buffer[0]) >> 3) & 0x01 + + low4Bytes = ((long(ord(buffer[0])) >> 1) & 0x03) << 30 + low4Bytes |= ord(buffer[1]) << 22; + low4Bytes |= (ord(buffer[2]) >> 1) << 15; + low4Bytes |= ord(buffer[3]) << 7; + low4Bytes |= ord(buffer[4]) >> 1; + + return (long(highbit) * (1 << 16) * (1 << 16) + low4Bytes) / 90000; + + + def ReadPTS(self, buffer): + """ + read PTS (PES timestamp) at the buffer beginning (5 Bytes) + """ + high = ((ord(buffer[0]) & 0xF) >> 1) + med = (ord(buffer[1]) << 7) + (ord(buffer[2]) >> 1) + low = (ord(buffer[3]) << 7) + (ord(buffer[4]) >> 1) + return ((long(high) << 30) + (med << 15) + low) / 90000 + + + def ReadHeader(self, buffer, offset): + """ + Handle MPEG header in buffer on position offset + Return None on error, new offset or 0 if the new offset can't be scanned + """ + if buffer[offset:offset + 3] != '\x00\x00\x01': + return None + + id = ord(buffer[offset + 3]) + + if id == PADDING_PKT: + return offset + (ord(buffer[offset + 4]) << 8) + \ + ord(buffer[offset + 5]) + 6 + + if id == PACK_PKT: + if ord(buffer[offset + 4]) & 0xF0 == 0x20: + self.type = 'MPEG-1 Video' + self.get_time = self.ReadSCRMpeg1 + self.mpeg_version = 1 + return offset + 12 + elif (ord(buffer[offset + 4]) & 0xC0) == 0x40: + self.type = 'MPEG-2 Video' + self.get_time = self.ReadSCRMpeg2 + return offset + (ord(buffer[offset + 13]) & 0x07) + 14 + else: + # I have no idea what just happened, but for some DVB + # recordings done with mencoder this points to a + # PACK_PKT describing something odd. Returning 0 here + # (let's hope there are no extensions in the header) + # fixes it. + return 0 + + if 0xC0 <= id <= 0xDF: + # code for audio stream + for a in self.audio: + if a.id == id: + break + else: + self.audio.append(core.AudioStream()) + self.audio[-1]._set('id', id) + return 0 + + if 0xE0 <= id <= 0xEF: + # code for video stream + for v in self.video: + if v.id == id: + break + else: + self.video.append(core.VideoStream()) + self.video[-1]._set('id', id) + return 0 + + if id == SEQ_HEAD: + # sequence header, remember that position for later use + self.sequence_header_offset = offset + return 0 + + if id in [PRIVATE_STREAM1, PRIVATE_STREAM2]: + # private stream. we don't know, but maybe we can guess later + add = ord(buffer[offset + 8]) + # if (ord(buffer[offset+6]) & 4) or 1: + # id = ord(buffer[offset+10+add]) + if buffer[offset + 11 + add:offset + 15 + add].find('\x0b\x77') != -1: + # AC3 stream + for a in self.audio: + if a.id == id: + break + else: + self.audio.append(core.AudioStream()) + self.audio[-1]._set('id', id) + self.audio[-1].codec = 0x2000 # AC3 + return 0 + + if id == SYS_PKT: + return 0 + + if id == EXT_START: + return 0 + + return 0 + + + # Normal MPEG (VCD, SVCD) ======================================== + + def isMPEG(self, file, force=False): + """ + This MPEG starts with a sequence of 0x00 followed by a PACK Header + http://dvd.sourceforge.net/dvdinfo/packhdr.html + """ + file.seek(0, 0) + buffer = file.read(10000) + offset = 0 + + # seek until the 0 byte stop + while offset < len(buffer) - 100 and buffer[offset] == '\0': + offset += 1 + offset -= 2 + + # test for mpeg header 0x00 0x00 0x01 + header = '\x00\x00\x01%s' % chr(PACK_PKT) + if offset < 0 or not buffer[offset:offset + 4] == header: + if not force: + return 0 + # brute force and try to find the pack header in the first + # 10000 bytes somehow + offset = buffer.find(header) + if offset < 0: + return 0 + + # scan the 100000 bytes of data + buffer += file.read(100000) + + # scan first header, to get basic info about + # how to read a timestamp + self.ReadHeader(buffer, offset) + + # store first timestamp + self.start = self.get_time(buffer[offset + 4:]) + while len(buffer) > offset + 1000 and \ + buffer[offset:offset + 3] == '\x00\x00\x01': + # read the mpeg header + new_offset = self.ReadHeader(buffer, offset) + + # header scanning detected error, this is no mpeg + if new_offset == None: + return 0 + + if new_offset: + # we have a new offset + offset = new_offset + + # skip padding 0 before a new header + while len(buffer) > offset + 10 and \ + not ord(buffer[offset + 2]): + offset += 1 + + else: + # seek to new header by brute force + offset += buffer[offset + 4:].find('\x00\x00\x01') + 4 + + # fill in values for support functions: + self.__seek_size__ = 1000000 + self.__sample_size__ = 10000 + self.__search__ = self._find_timer_ + self.filename = file.name + + # get length of the file + self.length = self.get_length() + return 1 + + + def _find_timer_(self, buffer): + """ + Return position of timer in buffer or None if not found. + This function is valid for 'normal' mpeg files + """ + pos = buffer.find('\x00\x00\x01%s' % chr(PACK_PKT)) + if pos == -1: + return None + return pos + 4 + + + + # PES ============================================================ + + + def ReadPESHeader(self, offset, buffer, id=0): + """ + Parse a PES header. + Since it starts with 0x00 0x00 0x01 like 'normal' mpegs, this + function will return (0, None) when it is no PES header or + (packet length, timestamp position (maybe None)) + + http://dvd.sourceforge.net/dvdinfo/pes-hdr.html + """ + if not buffer[0:3] == '\x00\x00\x01': + return 0, None + + packet_length = (ord(buffer[4]) << 8) + ord(buffer[5]) + 6 + align = ord(buffer[6]) & 4 + header_length = ord(buffer[8]) + + # PES ID (starting with 001) + if ord(buffer[3]) & 0xE0 == 0xC0: + id = id or ord(buffer[3]) & 0x1F + for a in self.audio: + if a.id == id: + break + else: + self.audio.append(core.AudioStream()) + self.audio[-1]._set('id', id) + + elif ord(buffer[3]) & 0xF0 == 0xE0: + id = id or ord(buffer[3]) & 0xF + for v in self.video: + if v.id == id: + break + else: + self.video.append(core.VideoStream()) + self.video[-1]._set('id', id) + + # new mpeg starting + if buffer[header_length + 9:header_length + 13] == \ + '\x00\x00\x01\xB3' and not self.sequence_header_offset: + # yes, remember offset for later use + self.sequence_header_offset = offset + header_length + 9 + elif ord(buffer[3]) == 189 or ord(buffer[3]) == 191: + # private stream. we don't know, but maybe we can guess later + id = id or ord(buffer[3]) & 0xF + if align and \ + buffer[header_length + 9:header_length + 11] == '\x0b\x77': + # AC3 stream + for a in self.audio: + if a.id == id: + break + else: + self.audio.append(core.AudioStream()) + self.audio[-1]._set('id', id) + self.audio[-1].codec = 0x2000 # AC3 + + else: + # unknown content + pass + + ptsdts = ord(buffer[7]) >> 6 + + if ptsdts and ptsdts == ord(buffer[9]) >> 4: + if ord(buffer[9]) >> 4 != ptsdts: + log.warning(u'WARNING: bad PTS/DTS, please contact us') + return packet_length, None + + # timestamp = self.ReadPTS(buffer[9:14]) + high = ((ord(buffer[9]) & 0xF) >> 1) + med = (ord(buffer[10]) << 7) + (ord(buffer[11]) >> 1) + low = (ord(buffer[12]) << 7) + (ord(buffer[13]) >> 1) + return packet_length, 9 + + return packet_length, None + + + + def isPES(self, file): + log.info(u'trying mpeg-pes scan') + file.seek(0, 0) + buffer = file.read(3) + + # header (also valid for all mpegs) + if not buffer == '\x00\x00\x01': + return 0 + + self.sequence_header_offset = 0 + buffer += file.read(10000) + + offset = 0 + while offset + 1000 < len(buffer): + pos, timestamp = self.ReadPESHeader(offset, buffer[offset:]) + if not pos: + return 0 + if timestamp != None and not hasattr(self, 'start'): + self.get_time = self.ReadPTS + bpos = buffer[offset + timestamp:offset + timestamp + 5] + self.start = self.get_time(bpos) + if self.sequence_header_offset and hasattr(self, 'start'): + # we have all informations we need + break + + offset += pos + if offset + 1000 < len(buffer) and len(buffer) < 1000000 or 1: + # looks like a pes, read more + buffer += file.read(10000) + + if not self.video and not self.audio: + # no video and no audio? + return 0 + + self.type = 'MPEG-PES' + + # fill in values for support functions: + self.__seek_size__ = 10000000 # 10 MB + self.__sample_size__ = 500000 # 500 k scanning + self.__search__ = self._find_timer_PES_ + self.filename = file.name + + # get length of the file + self.length = self.get_length() + return 1 + + + def _find_timer_PES_(self, buffer): + """ + Return position of timer in buffer or -1 if not found. + This function is valid for PES files + """ + pos = buffer.find('\x00\x00\x01') + offset = 0 + if pos == -1 or offset + 1000 >= len(buffer): + return None + + retpos = -1 + ackcount = 0 + while offset + 1000 < len(buffer): + pos, timestamp = self.ReadPESHeader(offset, buffer[offset:]) + if timestamp != None and retpos == -1: + retpos = offset + timestamp + if pos == 0: + # Oops, that was a mpeg header, no PES header + offset += buffer[offset:].find('\x00\x00\x01') + retpos = -1 + ackcount = 0 + else: + offset += pos + if retpos != -1: + ackcount += 1 + if ackcount > 10: + # looks ok to me + return retpos + return None + + + # Elementary Stream =============================================== + + def isES(self, file): + file.seek(0, 0) + try: + header = struct.unpack('>LL', file.read(8)) + except (struct.error, IOError): + return False + + if header[0] != 0x1B3: + return False + + # Is an mpeg video elementary stream + + self.mime = 'video/mpeg' + video = core.VideoStream() + video.width = header[1] >> 20 + video.height = (header[1] >> 8) & 0xfff + if header[1] & 0xf < len(FRAME_RATE): + video.fps = FRAME_RATE[header[1] & 0xf] + if (header[1] >> 4) & 0xf < len(ASPECT_RATIO): + # FIXME: Empirically the aspect looks like PAR rather than DAR + video.aspect = ASPECT_RATIO[(header[1] >> 4) & 0xf] + self.video.append(video) + return True + + + # Transport Stream =============================================== + + def isTS(self, file): + file.seek(0, 0) + + buffer = file.read(TS_PACKET_LENGTH * 2) + c = 0 + + while c + TS_PACKET_LENGTH < len(buffer): + if ord(buffer[c]) == ord(buffer[c + TS_PACKET_LENGTH]) == TS_SYNC: + break + c += 1 + else: + return 0 + + buffer += file.read(10000) + self.type = 'MPEG-TS' + + while c + TS_PACKET_LENGTH < len(buffer): + start = ord(buffer[c + 1]) & 0x40 + # maybe load more into the buffer + if c + 2 * TS_PACKET_LENGTH > len(buffer) and c < 500000: + buffer += file.read(10000) + + # wait until the ts payload contains a payload header + if not start: + c += TS_PACKET_LENGTH + continue + + tsid = ((ord(buffer[c + 1]) & 0x3F) << 8) + ord(buffer[c + 2]) + adapt = (ord(buffer[c + 3]) & 0x30) >> 4 + + offset = 4 + if adapt & 0x02: + # meta info present, skip it for now + adapt_len = ord(buffer[c + offset]) + offset += adapt_len + 1 + + if not ord(buffer[c + 1]) & 0x40: + # no new pes or psi in stream payload starting + pass + elif adapt & 0x01: + # PES + timestamp = self.ReadPESHeader(c + offset, buffer[c + offset:], + tsid)[1] + if timestamp != None: + if not hasattr(self, 'start'): + self.get_time = self.ReadPTS + timestamp = c + offset + timestamp + self.start = self.get_time(buffer[timestamp:timestamp + 5]) + elif not hasattr(self, 'audio_ok'): + timestamp = c + offset + timestamp + start = self.get_time(buffer[timestamp:timestamp + 5]) + if start is not None and self.start is not None and \ + abs(start - self.start) < 10: + # looks ok + self.audio_ok = True + else: + # timestamp broken + del self.start + log.warning(u'Timestamp error, correcting') + + if hasattr(self, 'start') and self.start and \ + self.sequence_header_offset and self.video and self.audio: + break + + c += TS_PACKET_LENGTH + + + if not self.sequence_header_offset: + return 0 + + # fill in values for support functions: + self.__seek_size__ = 10000000 # 10 MB + self.__sample_size__ = 100000 # 100 k scanning + self.__search__ = self._find_timer_TS_ + self.filename = file.name + + # get length of the file + self.length = self.get_length() + return 1 + + + def _find_timer_TS_(self, buffer): + c = 0 + + while c + TS_PACKET_LENGTH < len(buffer): + if ord(buffer[c]) == ord(buffer[c + TS_PACKET_LENGTH]) == TS_SYNC: + break + c += 1 + else: + return None + + while c + TS_PACKET_LENGTH < len(buffer): + start = ord(buffer[c + 1]) & 0x40 + if not start: + c += TS_PACKET_LENGTH + continue + + tsid = ((ord(buffer[c + 1]) & 0x3F) << 8) + ord(buffer[c + 2]) + adapt = (ord(buffer[c + 3]) & 0x30) >> 4 + + offset = 4 + if adapt & 0x02: + # meta info present, skip it for now + offset += ord(buffer[c + offset]) + 1 + + if adapt & 0x01: + timestamp = self.ReadPESHeader(c + offset, buffer[c + offset:], tsid)[1] + if timestamp is None: + # this should not happen + log.error(u'bad TS') + return None + return c + offset + timestamp + c += TS_PACKET_LENGTH + return None + + + + # Support functions ============================================== + + def get_endpos(self): + """ + get the last timestamp of the mpeg, return -1 if this is not possible + """ + if not hasattr(self, 'filename') or not hasattr(self, 'start'): + return None + + length = os.stat(self.filename)[stat.ST_SIZE] + if length < self.__sample_size__: + return + + file = open(self.filename) + file.seek(length - self.__sample_size__) + buffer = file.read(self.__sample_size__) + + end = None + while 1: + pos = self.__search__(buffer) + if pos == None: + break + end = self.get_time(buffer[pos:]) or end + buffer = buffer[pos + 100:] + + file.close() + return end + + + def get_length(self): + """ + get the length in seconds, return -1 if this is not possible + """ + end = self.get_endpos() + if end == None or self.start == None: + return None + if self.start > end: + return int(((long(1) << 33) - 1) / 90000) - self.start + end + return end - self.start + + + def seek(self, end_time): + """ + Return the byte position in the file where the time position + is 'pos' seconds. Return 0 if this is not possible + """ + if not hasattr(self, 'filename') or not hasattr(self, 'start'): + return 0 + + file = open(self.filename) + seek_to = 0 + + while 1: + file.seek(self.__seek_size__, 1) + buffer = file.read(self.__sample_size__) + if len(buffer) < 10000: + break + pos = self.__search__(buffer) + if pos != None: + # found something + nt = self.get_time(buffer[pos:]) + if nt is not None and nt >= end_time: + # too much, break + break + # that wasn't enough + seek_to = file.tell() + + file.close() + return seek_to + + + def __scan__(self): + """ + scan file for timestamps (may take a long time) + """ + if not hasattr(self, 'filename') or not hasattr(self, 'start'): + return 0 + + file = open(self.filename) + log.debug(u'scanning file...') + while 1: + file.seek(self.__seek_size__ * 10, 1) + buffer = file.read(self.__sample_size__) + if len(buffer) < 10000: + break + pos = self.__search__(buffer) + if pos == None: + continue + log.debug(u'buffer position: %r' % self.get_time(buffer[pos:])) + + file.close() + log.debug(u'done scanning file') + + +Parser = MPEG diff --git a/lib/enzyme/ogm.py b/lib/enzyme/ogm.py new file mode 100644 index 0000000000000000000000000000000000000000..4198be24af34b412886d03eaa2dd0fb5dfbd44c4 --- /dev/null +++ b/lib/enzyme/ogm.py @@ -0,0 +1,299 @@ +# -*- coding: utf-8 -*- +# enzyme - Video metadata parser +# Copyright 2011-2012 Antoine Bertin <diaoulael@gmail.com> +# Copyright 2003-2006 Thomas Schueppel <stain@acm.org> +# Copyright 2003-2006 Dirk Meyer <dischi@freevo.org> +# +# This file is part of enzyme. +# +# enzyme is free software; you can redistribute it and/or modify it under +# the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3 of the License, or +# (at your option) any later version. +# +# enzyme is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with enzyme. If not, see <http://www.gnu.org/licenses/>. +__all__ = ['Parser'] + +import struct +import re +import stat +import os +import logging +from exceptions import ParseError +import core + +# get logging object +log = logging.getLogger(__name__) + +PACKET_TYPE_HEADER = 0x01 +PACKED_TYPE_METADATA = 0x03 +PACKED_TYPE_SETUP = 0x05 +PACKET_TYPE_BITS = 0x07 +PACKET_IS_SYNCPOINT = 0x08 + +#VORBIS_VIDEO_PACKET_INFO = 'video' + +STREAM_HEADER_VIDEO = '<4sIQQIIHII' +STREAM_HEADER_AUDIO = '<4sIQQIIHHHI' + +VORBISCOMMENT = { 'TITLE': 'title', + 'ALBUM': 'album', + 'ARTIST': 'artist', + 'COMMENT': 'comment', + 'ENCODER': 'encoder', + 'TRACKNUMBER': 'trackno', + 'LANGUAGE': 'language', + 'GENRE': 'genre', + } + +# FIXME: check VORBISCOMMENT date and convert to timestamp +# Deactived tag: 'DATE': 'date', + +MAXITERATIONS = 30 + +class Ogm(core.AVContainer): + + table_mapping = { 'VORBISCOMMENT' : VORBISCOMMENT } + + def __init__(self, file): + core.AVContainer.__init__(self) + self.samplerate = 1 + self.all_streams = [] # used to add meta data to streams + self.all_header = [] + + for i in range(MAXITERATIONS): + granule, nextlen = self._parseOGGS(file) + if granule == None: + if i == 0: + # oops, bad file + raise ParseError() + break + elif granule > 0: + # ok, file started + break + + # seek to the end of the stream, to avoid scanning the whole file + if (os.stat(file.name)[stat.ST_SIZE] > 50000): + file.seek(os.stat(file.name)[stat.ST_SIZE] - 49000) + + # read the rest of the file into a buffer + h = file.read() + + # find last OggS to get length info + if len(h) > 200: + idx = h.find('OggS') + pos = -49000 + idx + if idx: + file.seek(os.stat(file.name)[stat.ST_SIZE] + pos) + while 1: + granule, nextlen = self._parseOGGS(file) + if not nextlen: + break + + # Copy metadata to the streams + if len(self.all_header) == len(self.all_streams): + for i in range(len(self.all_header)): + + # get meta info + for key in self.all_streams[i].keys(): + if self.all_header[i].has_key(key): + self.all_streams[i][key] = self.all_header[i][key] + del self.all_header[i][key] + if self.all_header[i].has_key(key.upper()): + asi = self.all_header[i][key.upper()] + self.all_streams[i][key] = asi + del self.all_header[i][key.upper()] + + # Chapter parser + if self.all_header[i].has_key('CHAPTER01') and \ + not self.chapters: + while 1: + s = 'CHAPTER%02d' % (len(self.chapters) + 1) + if self.all_header[i].has_key(s) and \ + self.all_header[i].has_key(s + 'NAME'): + pos = self.all_header[i][s] + try: + pos = int(pos) + except ValueError: + new_pos = 0 + for v in pos.split(':'): + new_pos = new_pos * 60 + float(v) + pos = int(new_pos) + + c = self.all_header[i][s + 'NAME'] + c = core.Chapter(c, pos) + del self.all_header[i][s + 'NAME'] + del self.all_header[i][s] + self.chapters.append(c) + else: + break + + # If there are no video streams in this ogg container, it + # must be an audio file. Raise an exception to cause the + # factory to fall back to audio.ogg. + if len(self.video) == 0: + raise ParseError + + # Copy Metadata from tables into the main set of attributes + for header in self.all_header: + self._appendtable('VORBISCOMMENT', header) + + + def _parseOGGS(self, file): + h = file.read(27) + if len(h) == 0: + # Regular File end + return None, None + elif len(h) < 27: + log.debug(u'%d Bytes of Garbage found after End.' % len(h)) + return None, None + if h[:4] != "OggS": + log.debug(u'Invalid Ogg') + raise ParseError() + + version = ord(h[4]) + if version != 0: + log.debug(u'Unsupported OGG/OGM Version %d' % version) + return None, None + + head = struct.unpack('<BQIIIB', h[5:]) + headertype, granulepos, serial, pageseqno, checksum, \ + pageSegCount = head + + self.mime = 'application/ogm' + self.type = 'OGG Media' + tab = file.read(pageSegCount) + nextlen = 0 + for i in range(len(tab)): + nextlen += ord(tab[i]) + else: + h = file.read(1) + packettype = ord(h[0]) & PACKET_TYPE_BITS + if packettype == PACKET_TYPE_HEADER: + h += file.read(nextlen - 1) + self._parseHeader(h, granulepos) + elif packettype == PACKED_TYPE_METADATA: + h += file.read(nextlen - 1) + self._parseMeta(h) + else: + file.seek(nextlen - 1, 1) + if len(self.all_streams) > serial: + stream = self.all_streams[serial] + if hasattr(stream, 'samplerate') and \ + stream.samplerate: + stream.length = granulepos / stream.samplerate + elif hasattr(stream, 'bitrate') and \ + stream.bitrate: + stream.length = granulepos / stream.bitrate + + return granulepos, nextlen + 27 + pageSegCount + + + def _parseMeta(self, h): + flags = ord(h[0]) + headerlen = len(h) + if headerlen >= 7 and h[1:7] == 'vorbis': + header = {} + nextlen, self.encoder = self._extractHeaderString(h[7:]) + numItems = struct.unpack('<I', h[7 + nextlen:7 + nextlen + 4])[0] + start = 7 + 4 + nextlen + for _ in range(numItems): + (nextlen, s) = self._extractHeaderString(h[start:]) + start += nextlen + if s: + a = re.split('=', s) + header[(a[0]).upper()] = a[1] + # Put Header fields into info fields + self.type = 'OGG Vorbis' + self.subtype = '' + self.all_header.append(header) + + + def _parseHeader(self, header, granule): + headerlen = len(header) + flags = ord(header[0]) + + if headerlen >= 30 and header[1:7] == 'vorbis': + ai = core.AudioStream() + ai.version, ai.channels, ai.samplerate, bitrate_max, ai.bitrate, \ + bitrate_min, blocksize, framing = \ + struct.unpack('<IBIiiiBB', header[7:7 + 23]) + ai.codec = 'Vorbis' + #ai.granule = granule + #ai.length = granule / ai.samplerate + self.audio.append(ai) + self.all_streams.append(ai) + + elif headerlen >= 7 and header[1:7] == 'theora': + # Theora Header + # XXX Finish Me + vi = core.VideoStream() + vi.codec = 'theora' + self.video.append(vi) + self.all_streams.append(vi) + + elif headerlen >= 142 and \ + header[1:36] == 'Direct Show Samples embedded in Ogg': + # Old Directshow format + # XXX Finish Me + vi = core.VideoStream() + vi.codec = 'dshow' + self.video.append(vi) + self.all_streams.append(vi) + + elif flags & PACKET_TYPE_BITS == PACKET_TYPE_HEADER and \ + headerlen >= struct.calcsize(STREAM_HEADER_VIDEO) + 1: + # New Directshow Format + htype = header[1:9] + + if htype[:5] == 'video': + sh = header[9:struct.calcsize(STREAM_HEADER_VIDEO) + 9] + streamheader = struct.unpack(STREAM_HEADER_VIDEO, sh) + vi = core.VideoStream() + (type, ssize, timeunit, samplerate, vi.length, buffersize, \ + vi.bitrate, vi.width, vi.height) = streamheader + + vi.width /= 65536 + vi.height /= 65536 + # XXX length, bitrate are very wrong + vi.codec = type + vi.fps = 10000000 / timeunit + self.video.append(vi) + self.all_streams.append(vi) + + elif htype[:5] == 'audio': + sha = header[9:struct.calcsize(STREAM_HEADER_AUDIO) + 9] + streamheader = struct.unpack(STREAM_HEADER_AUDIO, sha) + ai = core.AudioStream() + (type, ssize, timeunit, ai.samplerate, ai.length, buffersize, \ + ai.bitrate, ai.channels, bloc, ai.bitrate) = streamheader + self.samplerate = ai.samplerate + log.debug(u'Samplerate %d' % self.samplerate) + self.audio.append(ai) + self.all_streams.append(ai) + + elif htype[:4] == 'text': + subtitle = core.Subtitle() + # FIXME: add more info + self.subtitles.append(subtitle) + self.all_streams.append(subtitle) + + else: + log.debug(u'Unknown Header') + + + def _extractHeaderString(self, header): + len = struct.unpack('<I', header[:4])[0] + try: + return (len + 4, unicode(header[4:4 + len], 'utf-8')) + except (KeyError, IndexError, UnicodeDecodeError): + return (len + 4, None) + + +Parser = Ogm diff --git a/lib/enzyme/real.py b/lib/enzyme/real.py new file mode 100644 index 0000000000000000000000000000000000000000..e7c69e930f324dfb45142d47cdfa255ad588c200 --- /dev/null +++ b/lib/enzyme/real.py @@ -0,0 +1,118 @@ +# -*- coding: utf-8 -*- +# enzyme - Video metadata parser +# Copyright 2011-2012 Antoine Bertin <diaoulael@gmail.com> +# Copyright 2003-2006 Thomas Schueppel <stain@acm.org> +# Copyright 2003-2006 Dirk Meyer <dischi@freevo.org> +# +# This file is part of enzyme. +# +# enzyme is free software; you can redistribute it and/or modify it under +# the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3 of the License, or +# (at your option) any later version. +# +# enzyme is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with enzyme. If not, see <http://www.gnu.org/licenses/>. +__all__ = ['Parser'] + +import struct +import logging +from exceptions import ParseError +import core + +# http://www.pcisys.net/~melanson/codecs/rmff.htm +# http://www.pcisys.net/~melanson/codecs/ + +# get logging object +log = logging.getLogger(__name__) + +class RealVideo(core.AVContainer): + def __init__(self, file): + core.AVContainer.__init__(self) + self.mime = 'video/real' + self.type = 'Real Video' + h = file.read(10) + try: + (object_id, object_size, object_version) = struct.unpack('>4sIH', h) + except struct.error: + # EOF. + raise ParseError() + + if not object_id == '.RMF': + raise ParseError() + + file_version, num_headers = struct.unpack('>II', file.read(8)) + log.debug(u'size: %d, ver: %d, headers: %d' % \ + (object_size, file_version, num_headers)) + for _ in range(0, num_headers): + try: + oi = struct.unpack('>4sIH', file.read(10)) + except (struct.error, IOError): + # Header data we expected wasn't there. File may be + # only partially complete. + break + + if object_id == 'DATA' and oi[0] != 'INDX': + log.debug(u'INDX chunk expected after DATA but not found -- file corrupt') + break + + (object_id, object_size, object_version) = oi + if object_id == 'DATA': + # Seek over the data chunk rather than reading it in. + file.seek(object_size - 10, 1) + else: + self._read_header(object_id, file.read(object_size - 10)) + log.debug(u'%r [%d]' % (object_id, object_size - 10)) + # Read all the following headers + + + def _read_header(self, object_id, s): + if object_id == 'PROP': + prop = struct.unpack('>9IHH', s) + log.debug(u'PROP: %r' % prop) + if object_id == 'MDPR': + mdpr = struct.unpack('>H7I', s[:30]) + log.debug(u'MDPR: %r' % mdpr) + self.length = mdpr[7] / 1000.0 + (stream_name_size,) = struct.unpack('>B', s[30:31]) + stream_name = s[31:31 + stream_name_size] + pos = 31 + stream_name_size + (mime_type_size,) = struct.unpack('>B', s[pos:pos + 1]) + mime = s[pos + 1:pos + 1 + mime_type_size] + pos += mime_type_size + 1 + (type_specific_len,) = struct.unpack('>I', s[pos:pos + 4]) + type_specific = s[pos + 4:pos + 4 + type_specific_len] + pos += 4 + type_specific_len + if mime[:5] == 'audio': + ai = core.AudioStream() + ai.id = mdpr[0] + ai.bitrate = mdpr[2] + self.audio.append(ai) + elif mime[:5] == 'video': + vi = core.VideoStream() + vi.id = mdpr[0] + vi.bitrate = mdpr[2] + self.video.append(vi) + else: + log.debug(u'Unknown: %r' % mime) + if object_id == 'CONT': + pos = 0 + (title_len,) = struct.unpack('>H', s[pos:pos + 2]) + self.title = s[2:title_len + 2] + pos += title_len + 2 + (author_len,) = struct.unpack('>H', s[pos:pos + 2]) + self.artist = s[pos + 2:pos + author_len + 2] + pos += author_len + 2 + (copyright_len,) = struct.unpack('>H', s[pos:pos + 2]) + self.copyright = s[pos + 2:pos + copyright_len + 2] + pos += copyright_len + 2 + (comment_len,) = struct.unpack('>H', s[pos:pos + 2]) + self.comment = s[pos + 2:pos + comment_len + 2] + + +Parser = RealVideo diff --git a/lib/enzyme/riff.py b/lib/enzyme/riff.py new file mode 100644 index 0000000000000000000000000000000000000000..516c727b10f951ff45feff64089024341ecf3899 --- /dev/null +++ b/lib/enzyme/riff.py @@ -0,0 +1,566 @@ +# -*- coding: utf-8 -*- +# enzyme - Video metadata parser +# Copyright 2011-2012 Antoine Bertin <diaoulael@gmail.com> +# Copyright 2003-2006 Thomas Schueppel <stain@acm.org> +# Copyright 2003-2006 Dirk Meyer <dischi@freevo.org> +# +# This file is part of enzyme. +# +# enzyme is free software; you can redistribute it and/or modify it under +# the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3 of the License, or +# (at your option) any later version. +# +# enzyme is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with enzyme. If not, see <http://www.gnu.org/licenses/>. +__all__ = ['Parser'] + +import os +import struct +import string +import logging +import time +from exceptions import ParseError +import core + +# get logging object +log = logging.getLogger(__name__) + +# List of tags +# http://kibus1.narod.ru/frames_eng.htm?sof/abcavi/infotags.htm +# http://www.divx-digest.com/software/avitags_dll.html +# File Format: google for odmlff2.pdf + +AVIINFO = { + 'INAM': 'title', + 'IART': 'artist', + 'IPRD': 'product', + 'ISFT': 'software', + 'ICMT': 'comment', + 'ILNG': 'language', + 'IKEY': 'keywords', + 'IPRT': 'trackno', + 'IFRM': 'trackof', + 'IPRO': 'producer', + 'IWRI': 'writer', + 'IGNR': 'genre', + 'ICOP': 'copyright' +} + +# Taken from libavcodec/mpeg4data.h (pixel_aspect struct) +PIXEL_ASPECT = { + 1: (1, 1), + 2: (12, 11), + 3: (10, 11), + 4: (16, 11), + 5: (40, 33) +} + + +class Riff(core.AVContainer): + """ + AVI parser also parsing metadata like title, languages, etc. + """ + table_mapping = { 'AVIINFO' : AVIINFO } + + def __init__(self, file): + core.AVContainer.__init__(self) + # read the header + h = file.read(12) + if h[:4] != "RIFF" and h[:4] != 'SDSS': + raise ParseError() + + self.has_idx = False + self.header = {} + self.junkStart = None + self.infoStart = None + self.type = h[8:12] + if self.type == 'AVI ': + self.mime = 'video/avi' + elif self.type == 'WAVE': + self.mime = 'audio/wav' + try: + while self._parseRIFFChunk(file): + pass + except IOError: + log.exception(u'error in file, stop parsing') + + self._find_subtitles(file.name) + + if not self.has_idx and isinstance(self, core.AVContainer): + log.debug(u'WARNING: avi has no index') + self._set('corrupt', True) + + + def _find_subtitles(self, filename): + """ + Search for subtitle files. Right now only VobSub is supported + """ + base = os.path.splitext(filename)[0] + if os.path.isfile(base + '.idx') and \ + (os.path.isfile(base + '.sub') or os.path.isfile(base + '.rar')): + file = open(base + '.idx') + if file.readline().find('VobSub index file') > 0: + for line in file.readlines(): + if line.find('id') == 0: + sub = core.Subtitle() + sub.language = line[4:6] + sub.trackno = base + '.idx' # Maybe not? + self.subtitles.append(sub) + file.close() + + + def _parseAVIH(self, t): + retval = {} + v = struct.unpack('<IIIIIIIIIIIIII', t[0:56]) + (retval['dwMicroSecPerFrame'], + retval['dwMaxBytesPerSec'], + retval['dwPaddingGranularity'], + retval['dwFlags'], + retval['dwTotalFrames'], + retval['dwInitialFrames'], + retval['dwStreams'], + retval['dwSuggestedBufferSize'], + retval['dwWidth'], + retval['dwHeight'], + retval['dwScale'], + retval['dwRate'], + retval['dwStart'], + retval['dwLength']) = v + if retval['dwMicroSecPerFrame'] == 0: + log.warning(u'ERROR: Corrupt AVI') + raise ParseError() + + return retval + + + def _parseSTRH(self, t): + retval = {} + retval['fccType'] = t[0:4] + log.debug(u'_parseSTRH(%r) : %d bytes' % (retval['fccType'], len(t))) + if retval['fccType'] != 'auds': + retval['fccHandler'] = t[4:8] + v = struct.unpack('<IHHIIIIIIIII', t[8:52]) + (retval['dwFlags'], + retval['wPriority'], + retval['wLanguage'], + retval['dwInitialFrames'], + retval['dwScale'], + retval['dwRate'], + retval['dwStart'], + retval['dwLength'], + retval['dwSuggestedBufferSize'], + retval['dwQuality'], + retval['dwSampleSize'], + retval['rcFrame']) = v + else: + try: + v = struct.unpack('<IHHIIIIIIIII', t[8:52]) + (retval['dwFlags'], + retval['wPriority'], + retval['wLanguage'], + retval['dwInitialFrames'], + retval['dwScale'], + retval['dwRate'], + retval['dwStart'], + retval['dwLength'], + retval['dwSuggestedBufferSize'], + retval['dwQuality'], + retval['dwSampleSize'], + retval['rcFrame']) = v + self.delay = float(retval['dwStart']) / \ + (float(retval['dwRate']) / retval['dwScale']) + except (KeyError, IndexError, ValueError, ZeroDivisionError): + pass + + return retval + + + def _parseSTRF(self, t, strh): + fccType = strh['fccType'] + retval = {} + if fccType == 'auds': + v = struct.unpack('<HHHHHH', t[0:12]) + (retval['wFormatTag'], + retval['nChannels'], + retval['nSamplesPerSec'], + retval['nAvgBytesPerSec'], + retval['nBlockAlign'], + retval['nBitsPerSample'], + ) = v + ai = core.AudioStream() + ai.samplerate = retval['nSamplesPerSec'] + ai.channels = retval['nChannels'] + # FIXME: Bitrate calculation is completely wrong. + #ai.samplebits = retval['nBitsPerSample'] + #ai.bitrate = retval['nAvgBytesPerSec'] * 8 + + # TODO: set code if possible + # http://www.stats.uwa.edu.au/Internal/Specs/DXALL/FileSpec/\ + # Languages + # ai.language = strh['wLanguage'] + ai.codec = retval['wFormatTag'] + self.audio.append(ai) + elif fccType == 'vids': + v = struct.unpack('<IIIHH', t[0:16]) + (retval['biSize'], + retval['biWidth'], + retval['biHeight'], + retval['biPlanes'], + retval['biBitCount']) = v + v = struct.unpack('IIIII', t[20:40]) + (retval['biSizeImage'], + retval['biXPelsPerMeter'], + retval['biYPelsPerMeter'], + retval['biClrUsed'], + retval['biClrImportant']) = v + vi = core.VideoStream() + vi.codec = t[16:20] + vi.width = retval['biWidth'] + vi.height = retval['biHeight'] + # FIXME: Bitrate calculation is completely wrong. + #vi.bitrate = strh['dwRate'] + vi.fps = float(strh['dwRate']) / strh['dwScale'] + vi.length = strh['dwLength'] / vi.fps + self.video.append(vi) + return retval + + + def _parseSTRL(self, t): + retval = {} + size = len(t) + i = 0 + + while i < len(t) - 8: + key = t[i:i + 4] + sz = struct.unpack('<I', t[i + 4:i + 8])[0] + i += 8 + value = t[i:] + + if key == 'strh': + retval[key] = self._parseSTRH(value) + elif key == 'strf': + retval[key] = self._parseSTRF(value, retval['strh']) + else: + log.debug(u'_parseSTRL: unsupported stream tag %r', key) + + i += sz + + return retval, i + + + def _parseODML(self, t): + retval = {} + size = len(t) + i = 0 + key = t[i:i + 4] + sz = struct.unpack('<I', t[i + 4:i + 8])[0] + i += 8 + value = t[i:] + if key != 'dmlh': + log.debug(u'_parseODML: Error') + + i += sz - 8 + return (retval, i) + + + def _parseVPRP(self, t): + retval = {} + v = struct.unpack('<IIIIIIIIII', t[:4 * 10]) + + (retval['VideoFormat'], + retval['VideoStandard'], + retval['RefreshRate'], + retval['HTotalIn'], + retval['VTotalIn'], + retval['FrameAspectRatio'], + retval['wPixel'], + retval['hPixel']) = v[1:-1] + + # I need an avi with more informations + # enum {FORMAT_UNKNOWN, FORMAT_PAL_SQUARE, FORMAT_PAL_CCIR_601, + # FORMAT_NTSC_SQUARE, FORMAT_NTSC_CCIR_601,...} VIDEO_FORMAT; + # enum {STANDARD_UNKNOWN, STANDARD_PAL, STANDARD_NTSC, STANDARD_SECAM} + # VIDEO_STANDARD; + # + r = retval['FrameAspectRatio'] + r = float(r >> 16) / (r & 0xFFFF) + retval['FrameAspectRatio'] = r + if self.video: + map(lambda v: setattr(v, 'aspect', r), self.video) + return (retval, v[0]) + + + def _parseLISTmovi(self, size, file): + """ + Digs into movi list, looking for a Video Object Layer header in an + mpeg4 stream in order to determine aspect ratio. + """ + i = 0 + n_dc = 0 + done = False + # If the VOL header doesn't appear within 5MB or 5 video chunks, + # give up. The 5MB limit is not likely to apply except in + # pathological cases. + while i < min(1024 * 1024 * 5, size - 8) and n_dc < 5: + data = file.read(8) + if ord(data[0]) == 0: + # Eat leading nulls. + data = data[1:] + file.read(1) + i += 1 + + key, sz = struct.unpack('<4sI', data) + if key[2:] != 'dc' or sz > 1024 * 500: + # This chunk is not video or is unusually big (> 500KB); + # skip it. + file.seek(sz, 1) + i += 8 + sz + continue + + n_dc += 1 + # Read video chunk into memory + data = file.read(sz) + + #for p in range(0,min(80, sz)): + # print "%02x " % ord(data[p]), + #print "\n\n" + + # Look through the picture header for VOL startcode. The basic + # logic for this is taken from libavcodec, h263.c + pos = 0 + startcode = 0xff + def bits(v, o, n): + # Returns n bits in v, offset o bits. + return (v & 2 ** n - 1 << (64 - n - o)) >> 64 - n - o + + while pos < sz: + startcode = ((startcode << 8) | ord(data[pos])) & 0xffffffff + pos += 1 + if startcode & 0xFFFFFF00 != 0x100: + # No startcode found yet + continue + + if startcode >= 0x120 and startcode <= 0x12F: + # We have the VOL startcode. Pull 64 bits of it and treat + # as a bitstream + v = struct.unpack(">Q", data[pos : pos + 8])[0] + offset = 10 + if bits(v, 9, 1): + # is_ol_id, skip over vo_ver_id and vo_priority + offset += 7 + ar_info = bits(v, offset, 4) + if ar_info == 15: + # Extended aspect + num = bits(v, offset + 4, 8) + den = bits(v, offset + 12, 8) + else: + # A standard pixel aspect + num, den = PIXEL_ASPECT.get(ar_info, (0, 0)) + + # num/den indicates pixel aspect; convert to video aspect, + # so we need frame width and height. + if 0 not in [num, den]: + width, height = self.video[-1].width, self.video[-1].height + self.video[-1].aspect = num / float(den) * width / height + + done = True + break + + startcode = 0xff + + i += 8 + len(data) + + if done: + # We have the aspect, no need to continue parsing the movi + # list, so break out of the loop. + break + + + if i < size: + # Seek past whatever might be remaining of the movi list. + file.seek(size - i, 1) + + + + def _parseLIST(self, t): + retval = {} + i = 0 + size = len(t) + + while i < size - 8: + # skip zero + if ord(t[i]) == 0: i += 1 + key = t[i:i + 4] + sz = 0 + + if key == 'LIST': + sz = struct.unpack('<I', t[i + 4:i + 8])[0] + i += 8 + key = "LIST:" + t[i:i + 4] + value = self._parseLIST(t[i:i + sz]) + if key == 'strl': + for k in value.keys(): + retval[k] = value[k] + else: + retval[key] = value + i += sz + elif key == 'avih': + sz = struct.unpack('<I', t[i + 4:i + 8])[0] + i += 8 + value = self._parseAVIH(t[i:i + sz]) + i += sz + retval[key] = value + elif key == 'strl': + i += 4 + (value, sz) = self._parseSTRL(t[i:]) + key = value['strh']['fccType'] + i += sz + retval[key] = value + elif key == 'odml': + i += 4 + (value, sz) = self._parseODML(t[i:]) + i += sz + elif key == 'vprp': + i += 4 + (value, sz) = self._parseVPRP(t[i:]) + retval[key] = value + i += sz + elif key == 'JUNK': + sz = struct.unpack('<I', t[i + 4:i + 8])[0] + i += sz + 8 + else: + sz = struct.unpack('<I', t[i + 4:i + 8])[0] + i += 8 + # in most cases this is some info stuff + if not key in AVIINFO.keys() and key != 'IDIT': + log.debug(u'Unknown Key: %r, len: %d' % (key, sz)) + value = t[i:i + sz] + if key == 'ISFT': + # product information + if value.find('\0') > 0: + # works for Casio S500 camera videos + value = value[:value.find('\0')] + value = value.replace('\0', '').lstrip().rstrip() + value = value.replace('\0', '').lstrip().rstrip() + if value: + retval[key] = value + if key in ['IDIT', 'ICRD']: + # Timestamp the video was created. Spec says it + # should be a format like "Wed Jan 02 02:03:55 1990" + # Casio S500 uses "2005/12/24/ 14:11", but I've + # also seen "December 24, 2005" + specs = ('%a %b %d %H:%M:%S %Y', '%Y/%m/%d/ %H:%M', '%B %d, %Y') + for tmspec in specs: + try: + tm = time.strptime(value, tmspec) + # save timestamp as int + self.timestamp = int(time.mktime(tm)) + break + except ValueError: + pass + else: + log.debug(u'no support for time format %r', value) + i += sz + return retval + + + def _parseRIFFChunk(self, file): + h = file.read(8) + if len(h) < 8: + return False + name = h[:4] + size = struct.unpack('<I', h[4:8])[0] + + if name == 'LIST': + pos = file.tell() - 8 + key = file.read(4) + if key == 'movi' and self.video and not self.video[-1].aspect and \ + self.video[-1].width and self.video[-1].height and \ + self.video[-1].format in ['DIVX', 'XVID', 'FMP4']: # any others? + # If we don't have the aspect (i.e. it isn't in odml vprp + # header), but we do know the video's dimensions, and + # we're dealing with an mpeg4 stream, try to get the aspect + # from the VOL header in the mpeg4 stream. + self._parseLISTmovi(size - 4, file) + return True + elif size > 80000: + log.debug(u'RIFF LIST %r too long to parse: %r bytes' % (key, size)) + t = file.seek(size - 4, 1) + return True + elif size < 5: + log.debug(u'RIFF LIST %r too short: %r bytes' % (key, size)) + return True + + t = file.read(size - 4) + log.debug(u'parse RIFF LIST %r: %d bytes' % (key, size)) + value = self._parseLIST(t) + self.header[key] = value + if key == 'INFO': + self.infoStart = pos + self._appendtable('AVIINFO', value) + elif key == 'MID ': + self._appendtable('AVIMID', value) + elif key == 'hdrl': + # no need to add this info to a table + pass + else: + log.debug(u'Skipping table info %r' % key) + + elif name == 'JUNK': + self.junkStart = file.tell() - 8 + self.junkSize = size + file.seek(size, 1) + elif name == 'idx1': + self.has_idx = True + log.debug(u'idx1: %r bytes' % size) + # no need to parse this + t = file.seek(size, 1) + elif name == 'RIFF': + log.debug(u'New RIFF chunk, extended avi [%i]' % size) + type = file.read(4) + if type != 'AVIX': + log.debug(u'Second RIFF chunk is %r, not AVIX, skipping', type) + file.seek(size - 4, 1) + # that's it, no new informations should be in AVIX + return False + elif name == 'fmt ' and size <= 50: + # This is a wav file. + data = file.read(size) + fmt = struct.unpack("<HHLLHH", data[:16]) + self._set('codec', hex(fmt[0])) + self._set('samplerate', fmt[2]) + # fmt[3] is average bytes per second, so we must divide it + # by 125 to get kbits per second + self._set('bitrate', fmt[3] / 125) + # ugly hack: remember original rate in bytes per second + # so that the length can be calculated in next elif block + self._set('byterate', fmt[3]) + # Set a dummy fourcc so codec will be resolved in finalize. + self._set('fourcc', 'dummy') + elif name == 'data': + # XXX: this is naive and may not be right. For example if the + # stream is something that supports VBR like mp3, the value + # will be off. The only way to properly deal with this issue + # is to decode part of the stream based on its codec, but + # kaa.metadata doesn't have this capability (yet?) + # ugly hack: use original rate in bytes per second + self._set('length', size / float(self.byterate)) + file.seek(size, 1) + elif not name.strip(string.printable + string.whitespace): + # check if name is something usefull at all, maybe it is no + # avi or broken + t = file.seek(size, 1) + log.debug(u'Skipping %r [%i]' % (name, size)) + else: + # bad avi + log.debug(u'Bad or broken avi') + return False + return True + + +Parser = Riff diff --git a/lib/enzyme/strutils.py b/lib/enzyme/strutils.py new file mode 100644 index 0000000000000000000000000000000000000000..8578aefa85abd123818bfb6ed79ccbe1d4b0529e --- /dev/null +++ b/lib/enzyme/strutils.py @@ -0,0 +1,78 @@ +# -*- coding: utf-8 -*- +# enzyme - Video metadata parser +# Copyright 2011-2012 Antoine Bertin <diaoulael@gmail.com> +# Copyright 2006-2009 Dirk Meyer <dischi@freevo.org> +# Copyright 2006-2009 Jason Tackaberry +# +# This file is part of enzyme. +# +# enzyme is free software; you can redistribute it and/or modify it under +# the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3 of the License, or +# (at your option) any later version. +# +# enzyme is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with enzyme. If not, see <http://www.gnu.org/licenses/>. +__all__ = ['ENCODING', 'str_to_unicode', 'unicode_to_str'] + +import locale + +# find the correct encoding +try: + ENCODING = locale.getdefaultlocale()[1] + ''.encode(ENCODING) +except (UnicodeError, TypeError): + ENCODING = 'latin-1' + + +def str_to_unicode(s, encoding=None): + """ + Attempts to convert a string of unknown character set to a unicode + string. First it tries to decode the string based on the locale's + preferred encoding, and if that fails, fall back to UTF-8 and then + latin-1. If all fails, it will force encoding to the preferred + charset, replacing unknown characters. If the given object is no + string, this function will return the given object. + """ + if not type(s) == str: + return s + + if not encoding: + encoding = ENCODING + + for c in [encoding, "utf-8", "latin-1"]: + try: + return s.decode(c) + except UnicodeDecodeError: + pass + + return s.decode(encoding, "replace") + + +def unicode_to_str(s, encoding=None): + """ + Attempts to convert a unicode string of unknown character set to a + string. First it tries to encode the string based on the locale's + preferred encoding, and if that fails, fall back to UTF-8 and then + latin-1. If all fails, it will force encoding to the preferred + charset, replacing unknown characters. If the given object is no + unicode string, this function will return the given object. + """ + if not type(s) == unicode: + return s + + if not encoding: + encoding = ENCODING + + for c in [encoding, "utf-8", "latin-1"]: + try: + return s.encode(c) + except UnicodeDecodeError: + pass + + return s.encode(encoding, "replace") diff --git a/lib/guessit/ISO-3166-1_utf8.txt b/lib/guessit/ISO-3166-1_utf8.txt new file mode 100644 index 0000000000000000000000000000000000000000..7022040d91cffda6e191b7a7db0cac71200f3bd3 --- /dev/null +++ b/lib/guessit/ISO-3166-1_utf8.txt @@ -0,0 +1,249 @@ +Afghanistan|AF|AFG|004|ISO 3166-2:AF +Ã…land Islands|AX|ALA|248|ISO 3166-2:AX +Albania|AL|ALB|008|ISO 3166-2:AL +Algeria|DZ|DZA|012|ISO 3166-2:DZ +American Samoa|AS|ASM|016|ISO 3166-2:AS +Andorra|AD|AND|020|ISO 3166-2:AD +Angola|AO|AGO|024|ISO 3166-2:AO +Anguilla|AI|AIA|660|ISO 3166-2:AI +Antarctica|AQ|ATA|010|ISO 3166-2:AQ +Antigua and Barbuda|AG|ATG|028|ISO 3166-2:AG +Argentina|AR|ARG|032|ISO 3166-2:AR +Armenia|AM|ARM|051|ISO 3166-2:AM +Aruba|AW|ABW|533|ISO 3166-2:AW +Australia|AU|AUS|036|ISO 3166-2:AU +Austria|AT|AUT|040|ISO 3166-2:AT +Azerbaijan|AZ|AZE|031|ISO 3166-2:AZ +Bahamas|BS|BHS|044|ISO 3166-2:BS +Bahrain|BH|BHR|048|ISO 3166-2:BH +Bangladesh|BD|BGD|050|ISO 3166-2:BD +Barbados|BB|BRB|052|ISO 3166-2:BB +Belarus|BY|BLR|112|ISO 3166-2:BY +Belgium|BE|BEL|056|ISO 3166-2:BE +Belize|BZ|BLZ|084|ISO 3166-2:BZ +Benin|BJ|BEN|204|ISO 3166-2:BJ +Bermuda|BM|BMU|060|ISO 3166-2:BM +Bhutan|BT|BTN|064|ISO 3166-2:BT +Bolivia, Plurinational State of|BO|BOL|068|ISO 3166-2:BO +Bonaire, Sint Eustatius and Saba|BQ|BES|535|ISO 3166-2:BQ +Bosnia and Herzegovina|BA|BIH|070|ISO 3166-2:BA +Botswana|BW|BWA|072|ISO 3166-2:BW +Bouvet Island|BV|BVT|074|ISO 3166-2:BV +Brazil|BR|BRA|076|ISO 3166-2:BR +British Indian Ocean Territory|IO|IOT|086|ISO 3166-2:IO +Brunei Darussalam|BN|BRN|096|ISO 3166-2:BN +Bulgaria|BG|BGR|100|ISO 3166-2:BG +Burkina Faso|BF|BFA|854|ISO 3166-2:BF +Burundi|BI|BDI|108|ISO 3166-2:BI +Cambodia|KH|KHM|116|ISO 3166-2:KH +Cameroon|CM|CMR|120|ISO 3166-2:CM +Canada|CA|CAN|124|ISO 3166-2:CA +Cape Verde|CV|CPV|132|ISO 3166-2:CV +Cayman Islands|KY|CYM|136|ISO 3166-2:KY +Central African Republic|CF|CAF|140|ISO 3166-2:CF +Chad|TD|TCD|148|ISO 3166-2:TD +Chile|CL|CHL|152|ISO 3166-2:CL +China|CN|CHN|156|ISO 3166-2:CN +Christmas Island|CX|CXR|162|ISO 3166-2:CX +Cocos (Keeling) Islands|CC|CCK|166|ISO 3166-2:CC +Colombia|CO|COL|170|ISO 3166-2:CO +Comoros|KM|COM|174|ISO 3166-2:KM +Congo|CG|COG|178|ISO 3166-2:CG +Congo, the Democratic Republic of the|CD|COD|180|ISO 3166-2:CD +Cook Islands|CK|COK|184|ISO 3166-2:CK +Costa Rica|CR|CRI|188|ISO 3166-2:CR +Côte d'Ivoire|CI|CIV|384|ISO 3166-2:CI +Croatia|HR|HRV|191|ISO 3166-2:HR +Cuba|CU|CUB|192|ISO 3166-2:CU +Curaçao|CW|CUW|531|ISO 3166-2:CW +Cyprus|CY|CYP|196|ISO 3166-2:CY +Czech Republic|CZ|CZE|203|ISO 3166-2:CZ +Denmark|DK|DNK|208|ISO 3166-2:DK +Djibouti|DJ|DJI|262|ISO 3166-2:DJ +Dominica|DM|DMA|212|ISO 3166-2:DM +Dominican Republic|DO|DOM|214|ISO 3166-2:DO +Ecuador|EC|ECU|218|ISO 3166-2:EC +Egypt|EG|EGY|818|ISO 3166-2:EG +El Salvador|SV|SLV|222|ISO 3166-2:SV +Equatorial Guinea|GQ|GNQ|226|ISO 3166-2:GQ +Eritrea|ER|ERI|232|ISO 3166-2:ER +Estonia|EE|EST|233|ISO 3166-2:EE +Ethiopia|ET|ETH|231|ISO 3166-2:ET +Falkland Islands (Malvinas|FK|FLK|238|ISO 3166-2:FK +Faroe Islands|FO|FRO|234|ISO 3166-2:FO +Fiji|FJ|FJI|242|ISO 3166-2:FJ +Finland|FI|FIN|246|ISO 3166-2:FI +France|FR|FRA|250|ISO 3166-2:FR +French Guiana|GF|GUF|254|ISO 3166-2:GF +French Polynesia|PF|PYF|258|ISO 3166-2:PF +French Southern Territories|TF|ATF|260|ISO 3166-2:TF +Gabon|GA|GAB|266|ISO 3166-2:GA +Gambia|GM|GMB|270|ISO 3166-2:GM +Georgia|GE|GEO|268|ISO 3166-2:GE +Germany|DE|DEU|276|ISO 3166-2:DE +Ghana|GH|GHA|288|ISO 3166-2:GH +Gibraltar|GI|GIB|292|ISO 3166-2:GI +Greece|GR|GRC|300|ISO 3166-2:GR +Greenland|GL|GRL|304|ISO 3166-2:GL +Grenada|GD|GRD|308|ISO 3166-2:GD +Guadeloupe|GP|GLP|312|ISO 3166-2:GP +Guam|GU|GUM|316|ISO 3166-2:GU +Guatemala|GT|GTM|320|ISO 3166-2:GT +Guernsey|GG|GGY|831|ISO 3166-2:GG +Guinea|GN|GIN|324|ISO 3166-2:GN +Guinea-Bissau|GW|GNB|624|ISO 3166-2:GW +Guyana|GY|GUY|328|ISO 3166-2:GY +Haiti|HT|HTI|332|ISO 3166-2:HT +Heard Island and McDonald Islands|HM|HMD|334|ISO 3166-2:HM +Holy See (Vatican City State|VA|VAT|336|ISO 3166-2:VA +Honduras|HN|HND|340|ISO 3166-2:HN +Hong Kong|HK|HKG|344|ISO 3166-2:HK +Hungary|HU|HUN|348|ISO 3166-2:HU +Iceland|IS|ISL|352|ISO 3166-2:IS +India|IN|IND|356|ISO 3166-2:IN +Indonesia|ID|IDN|360|ISO 3166-2:ID +Iran, Islamic Republic of|IR|IRN|364|ISO 3166-2:IR +Iraq|IQ|IRQ|368|ISO 3166-2:IQ +Ireland|IE|IRL|372|ISO 3166-2:IE +Isle of Man|IM|IMN|833|ISO 3166-2:IM +Israel|IL|ISR|376|ISO 3166-2:IL +Italy|IT|ITA|380|ISO 3166-2:IT +Jamaica|JM|JAM|388|ISO 3166-2:JM +Japan|JP|JPN|392|ISO 3166-2:JP +Jersey|JE|JEY|832|ISO 3166-2:JE +Jordan|JO|JOR|400|ISO 3166-2:JO +Kazakhstan|KZ|KAZ|398|ISO 3166-2:KZ +Kenya|KE|KEN|404|ISO 3166-2:KE +Kiribati|KI|KIR|296|ISO 3166-2:KI +Korea, Democratic People's Republic of|KP|PRK|408|ISO 3166-2:KP +Korea, Republic of|KR|KOR|410|ISO 3166-2:KR +Kuwait|KW|KWT|414|ISO 3166-2:KW +Kyrgyzstan|KG|KGZ|417|ISO 3166-2:KG +Lao People's Democratic Republic|LA|LAO|418|ISO 3166-2:LA +Latvia|LV|LVA|428|ISO 3166-2:LV +Lebanon|LB|LBN|422|ISO 3166-2:LB +Lesotho|LS|LSO|426|ISO 3166-2:LS +Liberia|LR|LBR|430|ISO 3166-2:LR +Libya|LY|LBY|434|ISO 3166-2:LY +Liechtenstein|LI|LIE|438|ISO 3166-2:LI +Lithuania|LT|LTU|440|ISO 3166-2:LT +Luxembourg|LU|LUX|442|ISO 3166-2:LU +Macao|MO|MAC|446|ISO 3166-2:MO +Macedonia, the former Yugoslav Republic of|MK|MKD|807|ISO 3166-2:MK +Madagascar|MG|MDG|450|ISO 3166-2:MG +Malawi|MW|MWI|454|ISO 3166-2:MW +Malaysia|MY|MYS|458|ISO 3166-2:MY +Maldives|MV|MDV|462|ISO 3166-2:MV +Mali|ML|MLI|466|ISO 3166-2:ML +Malta|MT|MLT|470|ISO 3166-2:MT +Marshall Islands|MH|MHL|584|ISO 3166-2:MH +Martinique|MQ|MTQ|474|ISO 3166-2:MQ +Mauritania|MR|MRT|478|ISO 3166-2:MR +Mauritius|MU|MUS|480|ISO 3166-2:MU +Mayotte|YT|MYT|175|ISO 3166-2:YT +Mexico|MX|MEX|484|ISO 3166-2:MX +Micronesia, Federated States of|FM|FSM|583|ISO 3166-2:FM +Moldova, Republic of|MD|MDA|498|ISO 3166-2:MD +Monaco|MC|MCO|492|ISO 3166-2:MC +Mongolia|MN|MNG|496|ISO 3166-2:MN +Montenegro|ME|MNE|499|ISO 3166-2:ME +Montserrat|MS|MSR|500|ISO 3166-2:MS +Morocco|MA|MAR|504|ISO 3166-2:MA +Mozambique|MZ|MOZ|508|ISO 3166-2:MZ +Myanmar|MM|MMR|104|ISO 3166-2:MM +Namibia|NA|NAM|516|ISO 3166-2:NA +Nauru|NR|NRU|520|ISO 3166-2:NR +Nepal|NP|NPL|524|ISO 3166-2:NP +Netherlands|NL|NLD|528|ISO 3166-2:NL +New Caledonia|NC|NCL|540|ISO 3166-2:NC +New Zealand|NZ|NZL|554|ISO 3166-2:NZ +Nicaragua|NI|NIC|558|ISO 3166-2:NI +Niger|NE|NER|562|ISO 3166-2:NE +Nigeria|NG|NGA|566|ISO 3166-2:NG +Niue|NU|NIU|570|ISO 3166-2:NU +Norfolk Island|NF|NFK|574|ISO 3166-2:NF +Northern Mariana Islands|MP|MNP|580|ISO 3166-2:MP +Norway|NO|NOR|578|ISO 3166-2:NO +Oman|OM|OMN|512|ISO 3166-2:OM +Pakistan|PK|PAK|586|ISO 3166-2:PK +Palau|PW|PLW|585|ISO 3166-2:PW +Palestinian Territory, Occupied|PS|PSE|275|ISO 3166-2:PS +Panama|PA|PAN|591|ISO 3166-2:PA +Papua New Guinea|PG|PNG|598|ISO 3166-2:PG +Paraguay|PY|PRY|600|ISO 3166-2:PY +Peru|PE|PER|604|ISO 3166-2:PE +Philippines|PH|PHL|608|ISO 3166-2:PH +Pitcairn|PN|PCN|612|ISO 3166-2:PN +Poland|PL|POL|616|ISO 3166-2:PL +Portugal|PT|PRT|620|ISO 3166-2:PT +Puerto Rico|PR|PRI|630|ISO 3166-2:PR +Qatar|QA|QAT|634|ISO 3166-2:QA +Réunion|RE|REU|638|ISO 3166-2:RE +Romania|RO|ROU|642|ISO 3166-2:RO +Russian Federation|RU|RUS|643|ISO 3166-2:RU +Rwanda|RW|RWA|646|ISO 3166-2:RW +Saint Barthélemy|BL|BLM|652|ISO 3166-2:BL +Saint Helena, Ascension and Tristan da Cunha|SH|SHN|654|ISO 3166-2:SH +Saint Kitts and Nevis|KN|KNA|659|ISO 3166-2:KN +Saint Lucia|LC|LCA|662|ISO 3166-2:LC +Saint Martin (French part|MF|MAF|663|ISO 3166-2:MF +Saint Pierre and Miquelon|PM|SPM|666|ISO 3166-2:PM +Saint Vincent and the Grenadines|VC|VCT|670|ISO 3166-2:VC +Samoa|WS|WSM|882|ISO 3166-2:WS +San Marino|SM|SMR|674|ISO 3166-2:SM +Sao Tome and Principe|ST|STP|678|ISO 3166-2:ST +Saudi Arabia|SA|SAU|682|ISO 3166-2:SA +Senegal|SN|SEN|686|ISO 3166-2:SN +Serbia|RS|SRB|688|ISO 3166-2:RS +Seychelles|SC|SYC|690|ISO 3166-2:SC +Sierra Leone|SL|SLE|694|ISO 3166-2:SL +Singapore|SG|SGP|702|ISO 3166-2:SG +Sint Maarten (Dutch part|SX|SXM|534|ISO 3166-2:SX +Slovakia|SK|SVK|703|ISO 3166-2:SK +Slovenia|SI|SVN|705|ISO 3166-2:SI +Solomon Islands|SB|SLB|090|ISO 3166-2:SB +Somalia|SO|SOM|706|ISO 3166-2:SO +South Africa|ZA|ZAF|710|ISO 3166-2:ZA +South Georgia and the South Sandwich Islands|GS|SGS|239|ISO 3166-2:GS +South Sudan|SS|SSD|728|ISO 3166-2:SS +Spain|ES|ESP|724|ISO 3166-2:ES +Sri Lanka|LK|LKA|144|ISO 3166-2:LK +Sudan|SD|SDN|729|ISO 3166-2:SD +Suriname|SR|SUR|740|ISO 3166-2:SR +Svalbard and Jan Mayen|SJ|SJM|744|ISO 3166-2:SJ +Swaziland|SZ|SWZ|748|ISO 3166-2:SZ +Sweden|SE|SWE|752|ISO 3166-2:SE +Switzerland|CH|CHE|756|ISO 3166-2:CH +Syrian Arab Republic|SY|SYR|760|ISO 3166-2:SY +Taiwan, Province of China|TW|TWN|158|ISO 3166-2:TW +Tajikistan|TJ|TJK|762|ISO 3166-2:TJ +Tanzania, United Republic of|TZ|TZA|834|ISO 3166-2:TZ +Thailand|TH|THA|764|ISO 3166-2:TH +Timor-Leste|TL|TLS|626|ISO 3166-2:TL +Togo|TG|TGO|768|ISO 3166-2:TG +Tokelau|TK|TKL|772|ISO 3166-2:TK +Tonga|TO|TON|776|ISO 3166-2:TO +Trinidad and Tobago|TT|TTO|780|ISO 3166-2:TT +Tunisia|TN|TUN|788|ISO 3166-2:TN +Turkey|TR|TUR|792|ISO 3166-2:TR +Turkmenistan|TM|TKM|795|ISO 3166-2:TM +Turks and Caicos Islands|TC|TCA|796|ISO 3166-2:TC +Tuvalu|TV|TUV|798|ISO 3166-2:TV +Uganda|UG|UGA|800|ISO 3166-2:UG +Ukraine|UA|UKR|804|ISO 3166-2:UA +United Arab Emirates|AE|ARE|784|ISO 3166-2:AE +United Kingdom|GB|GBR|826|ISO 3166-2:GB +United States|US|USA|840|ISO 3166-2:US +United States Minor Outlying Islands|UM|UMI|581|ISO 3166-2:UM +Uruguay|UY|URY|858|ISO 3166-2:UY +Uzbekistan|UZ|UZB|860|ISO 3166-2:UZ +Vanuatu|VU|VUT|548|ISO 3166-2:VU +Venezuela, Bolivarian Republic of|VE|VEN|862|ISO 3166-2:VE +Viet Nam|VN|VNM|704|ISO 3166-2:VN +Virgin Islands, British|VG|VGB|092|ISO 3166-2:VG +Virgin Islands, U.S|VI|VIR|850|ISO 3166-2:VI +Wallis and Futuna|WF|WLF|876|ISO 3166-2:WF +Western Sahara|EH|ESH|732|ISO 3166-2:EH +Yemen|YE|YEM|887|ISO 3166-2:YE +Zambia|ZM|ZMB|894|ISO 3166-2:ZM +Zimbabwe|ZW|ZWE|716|ISO 3166-2:ZW diff --git a/lib/guessit/ISO-639-2_utf-8.txt b/lib/guessit/ISO-639-2_utf-8.txt new file mode 100644 index 0000000000000000000000000000000000000000..2961d219f391d7cc2ccdb943cd3de4597298d7dd --- /dev/null +++ b/lib/guessit/ISO-639-2_utf-8.txt @@ -0,0 +1,485 @@ +aar||aa|Afar|afar +abk||ab|Abkhazian|abkhaze +ace|||Achinese|aceh +ach|||Acoli|acoli +ada|||Adangme|adangme +ady|||Adyghe; Adygei|adyghé +afa|||Afro-Asiatic languages|afro-asiatiques, langues +afh|||Afrihili|afrihili +afr||af|Afrikaans|afrikaans +ain|||Ainu|aïnou +aka||ak|Akan|akan +akk|||Akkadian|akkadien +alb|sqi|sq|Albanian|albanais +ale|||Aleut|aléoute +alg|||Algonquian languages|algonquines, langues +alt|||Southern Altai|altai du Sud +amh||am|Amharic|amharique +ang|||English, Old (ca.450-1100)|anglo-saxon (ca.450-1100) +anp|||Angika|angika +apa|||Apache languages|apaches, langues +ara||ar|Arabic|arabe +arc|||Official Aramaic (700-300 BCE); Imperial Aramaic (700-300 BCE)|araméen d'empire (700-300 BCE) +arg||an|Aragonese|aragonais +arm|hye|hy|Armenian|arménien +arn|||Mapudungun; Mapuche|mapudungun; mapuche; mapuce +arp|||Arapaho|arapaho +art|||Artificial languages|artificielles, langues +arw|||Arawak|arawak +asm||as|Assamese|assamais +ast|||Asturian; Bable; Leonese; Asturleonese|asturien; bable; léonais; asturoléonais +ath|||Athapascan languages|athapascanes, langues +aus|||Australian languages|australiennes, langues +ava||av|Avaric|avar +ave||ae|Avestan|avestique +awa|||Awadhi|awadhi +aym||ay|Aymara|aymara +aze||az|Azerbaijani|azéri +bad|||Banda languages|banda, langues +bai|||Bamileke languages|bamiléké, langues +bak||ba|Bashkir|bachkir +bal|||Baluchi|baloutchi +bam||bm|Bambara|bambara +ban|||Balinese|balinais +baq|eus|eu|Basque|basque +bas|||Basa|basa +bat|||Baltic languages|baltes, langues +bej|||Beja; Bedawiyet|bedja +bel||be|Belarusian|biélorusse +bem|||Bemba|bemba +ben||bn|Bengali|bengali +ber|||Berber languages|berbères, langues +bho|||Bhojpuri|bhojpuri +bih||bh|Bihari languages|langues biharis +bik|||Bikol|bikol +bin|||Bini; Edo|bini; edo +bis||bi|Bislama|bichlamar +bla|||Siksika|blackfoot +bnt|||Bantu (Other)|bantoues, autres langues +bos||bs|Bosnian|bosniaque +bra|||Braj|braj +bre||br|Breton|breton +btk|||Batak languages|batak, langues +bua|||Buriat|bouriate +bug|||Buginese|bugi +bul||bg|Bulgarian|bulgare +bur|mya|my|Burmese|birman +byn|||Blin; Bilin|blin; bilen +cad|||Caddo|caddo +cai|||Central American Indian languages|amérindiennes de L'Amérique centrale, langues +car|||Galibi Carib|karib; galibi; carib +cat||ca|Catalan; Valencian|catalan; valencien +cau|||Caucasian languages|caucasiennes, langues +ceb|||Cebuano|cebuano +cel|||Celtic languages|celtiques, langues; celtes, langues +cha||ch|Chamorro|chamorro +chb|||Chibcha|chibcha +che||ce|Chechen|tchétchène +chg|||Chagatai|djaghataï +chi|zho|zh|Chinese|chinois +chk|||Chuukese|chuuk +chm|||Mari|mari +chn|||Chinook jargon|chinook, jargon +cho|||Choctaw|choctaw +chp|||Chipewyan; Dene Suline|chipewyan +chr|||Cherokee|cherokee +chu||cu|Church Slavic; Old Slavonic; Church Slavonic; Old Bulgarian; Old Church Slavonic|slavon d'église; vieux slave; slavon liturgique; vieux bulgare +chv||cv|Chuvash|tchouvache +chy|||Cheyenne|cheyenne +cmc|||Chamic languages|chames, langues +cop|||Coptic|copte +cor||kw|Cornish|cornique +cos||co|Corsican|corse +cpe|||Creoles and pidgins, English based|créoles et pidgins basés sur l'anglais +cpf|||Creoles and pidgins, French-based |créoles et pidgins basés sur le français +cpp|||Creoles and pidgins, Portuguese-based |créoles et pidgins basés sur le portugais +cre||cr|Cree|cree +crh|||Crimean Tatar; Crimean Turkish|tatar de Crimé +crp|||Creoles and pidgins |créoles et pidgins +csb|||Kashubian|kachoube +cus|||Cushitic languages|couchitiques, langues +cze|ces|cs|Czech|tchèque +dak|||Dakota|dakota +dan||da|Danish|danois +dar|||Dargwa|dargwa +day|||Land Dayak languages|dayak, langues +del|||Delaware|delaware +den|||Slave (Athapascan)|esclave (athapascan) +dgr|||Dogrib|dogrib +din|||Dinka|dinka +div||dv|Divehi; Dhivehi; Maldivian|maldivien +doi|||Dogri|dogri +dra|||Dravidian languages|dravidiennes, langues +dsb|||Lower Sorbian|bas-sorabe +dua|||Duala|douala +dum|||Dutch, Middle (ca.1050-1350)|néerlandais moyen (ca. 1050-1350) +dut|nld|nl|Dutch; Flemish|néerlandais; flamand +dyu|||Dyula|dioula +dzo||dz|Dzongkha|dzongkha +efi|||Efik|efik +egy|||Egyptian (Ancient)|égyptien +eka|||Ekajuk|ekajuk +elx|||Elamite|élamite +eng||en|English|anglais +enm|||English, Middle (1100-1500)|anglais moyen (1100-1500) +epo||eo|Esperanto|espéranto +est||et|Estonian|estonien +ewe||ee|Ewe|éwé +ewo|||Ewondo|éwondo +fan|||Fang|fang +fao||fo|Faroese|féroïen +fat|||Fanti|fanti +fij||fj|Fijian|fidjien +fil|||Filipino; Pilipino|filipino; pilipino +fin||fi|Finnish|finnois +fiu|||Finno-Ugrian languages|finno-ougriennes, langues +fon|||Fon|fon +fre|fra|fr|French|français +frm|||French, Middle (ca.1400-1600)|français moyen (1400-1600) +fro|||French, Old (842-ca.1400)|français ancien (842-ca.1400) +frr|||Northern Frisian|frison septentrional +frs|||Eastern Frisian|frison oriental +fry||fy|Western Frisian|frison occidental +ful||ff|Fulah|peul +fur|||Friulian|frioulan +gaa|||Ga|ga +gay|||Gayo|gayo +gba|||Gbaya|gbaya +gem|||Germanic languages|germaniques, langues +geo|kat|ka|Georgian|géorgien +ger|deu|de|German|allemand +gez|||Geez|guèze +gil|||Gilbertese|kiribati +gla||gd|Gaelic; Scottish Gaelic|gaélique; gaélique écossais +gle||ga|Irish|irlandais +glg||gl|Galician|galicien +glv||gv|Manx|manx; mannois +gmh|||German, Middle High (ca.1050-1500)|allemand, moyen haut (ca. 1050-1500) +goh|||German, Old High (ca.750-1050)|allemand, vieux haut (ca. 750-1050) +gon|||Gondi|gond +gor|||Gorontalo|gorontalo +got|||Gothic|gothique +grb|||Grebo|grebo +grc|||Greek, Ancient (to 1453)|grec ancien (jusqu'à 1453) +gre|ell|el|Greek, Modern (1453-)|grec moderne (après 1453) +grn||gn|Guarani|guarani +gsw|||Swiss German; Alemannic; Alsatian|suisse alémanique; alémanique; alsacien +guj||gu|Gujarati|goudjrati +gwi|||Gwich'in|gwich'in +hai|||Haida|haida +hat||ht|Haitian; Haitian Creole|haïtien; créole haïtien +hau||ha|Hausa|haoussa +haw|||Hawaiian|hawaïen +heb||he|Hebrew|hébreu +her||hz|Herero|herero +hil|||Hiligaynon|hiligaynon +him|||Himachali languages; Western Pahari languages|langues himachalis; langues paharis occidentales +hin||hi|Hindi|hindi +hit|||Hittite|hittite +hmn|||Hmong; Mong|hmong +hmo||ho|Hiri Motu|hiri motu +hrv||hr|Croatian|croate +hsb|||Upper Sorbian|haut-sorabe +hun||hu|Hungarian|hongrois +hup|||Hupa|hupa +iba|||Iban|iban +ibo||ig|Igbo|igbo +ice|isl|is|Icelandic|islandais +ido||io|Ido|ido +iii||ii|Sichuan Yi; Nuosu|yi de Sichuan +ijo|||Ijo languages|ijo, langues +iku||iu|Inuktitut|inuktitut +ile||ie|Interlingue; Occidental|interlingue +ilo|||Iloko|ilocano +ina||ia|Interlingua (International Auxiliary Language Association)|interlingua (langue auxiliaire internationale) +inc|||Indic languages|indo-aryennes, langues +ind||id|Indonesian|indonésien +ine|||Indo-European languages|indo-européennes, langues +inh|||Ingush|ingouche +ipk||ik|Inupiaq|inupiaq +ira|||Iranian languages|iraniennes, langues +iro|||Iroquoian languages|iroquoises, langues +ita||it|Italian|italien +jav||jv|Javanese|javanais +jbo|||Lojban|lojban +jpn||ja|Japanese|japonais +jpr|||Judeo-Persian|judéo-persan +jrb|||Judeo-Arabic|judéo-arabe +kaa|||Kara-Kalpak|karakalpak +kab|||Kabyle|kabyle +kac|||Kachin; Jingpho|kachin; jingpho +kal||kl|Kalaallisut; Greenlandic|groenlandais +kam|||Kamba|kamba +kan||kn|Kannada|kannada +kar|||Karen languages|karen, langues +kas||ks|Kashmiri|kashmiri +kau||kr|Kanuri|kanouri +kaw|||Kawi|kawi +kaz||kk|Kazakh|kazakh +kbd|||Kabardian|kabardien +kha|||Khasi|khasi +khi|||Khoisan languages|khoïsan, langues +khm||km|Central Khmer|khmer central +kho|||Khotanese; Sakan|khotanais; sakan +kik||ki|Kikuyu; Gikuyu|kikuyu +kin||rw|Kinyarwanda|rwanda +kir||ky|Kirghiz; Kyrgyz|kirghiz +kmb|||Kimbundu|kimbundu +kok|||Konkani|konkani +kom||kv|Komi|kom +kon||kg|Kongo|kongo +kor||ko|Korean|coréen +kos|||Kosraean|kosrae +kpe|||Kpelle|kpellé +krc|||Karachay-Balkar|karatchai balkar +krl|||Karelian|carélien +kro|||Kru languages|krou, langues +kru|||Kurukh|kurukh +kua||kj|Kuanyama; Kwanyama|kuanyama; kwanyama +kum|||Kumyk|koumyk +kur||ku|Kurdish|kurde +kut|||Kutenai|kutenai +lad|||Ladino|judéo-espagnol +lah|||Lahnda|lahnda +lam|||Lamba|lamba +lao||lo|Lao|lao +lat||la|Latin|latin +lav||lv|Latvian|letton +lez|||Lezghian|lezghien +lim||li|Limburgan; Limburger; Limburgish|limbourgeois +lin||ln|Lingala|lingala +lit||lt|Lithuanian|lituanien +lol|||Mongo|mongo +loz|||Lozi|lozi +ltz||lb|Luxembourgish; Letzeburgesch|luxembourgeois +lua|||Luba-Lulua|luba-lulua +lub||lu|Luba-Katanga|luba-katanga +lug||lg|Ganda|ganda +lui|||Luiseno|luiseno +lun|||Lunda|lunda +luo|||Luo (Kenya and Tanzania)|luo (Kenya et Tanzanie) +lus|||Lushai|lushai +mac|mkd|mk|Macedonian|macédonien +mad|||Madurese|madourais +mag|||Magahi|magahi +mah||mh|Marshallese|marshall +mai|||Maithili|maithili +mak|||Makasar|makassar +mal||ml|Malayalam|malayalam +man|||Mandingo|mandingue +mao|mri|mi|Maori|maori +map|||Austronesian languages|austronésiennes, langues +mar||mr|Marathi|marathe +mas|||Masai|massaï +may|msa|ms|Malay|malais +mdf|||Moksha|moksa +mdr|||Mandar|mandar +men|||Mende|mendé +mga|||Irish, Middle (900-1200)|irlandais moyen (900-1200) +mic|||Mi'kmaq; Micmac|mi'kmaq; micmac +min|||Minangkabau|minangkabau +mis|||Uncoded languages|langues non codées +mkh|||Mon-Khmer languages|môn-khmer, langues +mlg||mg|Malagasy|malgache +mlt||mt|Maltese|maltais +mnc|||Manchu|mandchou +mni|||Manipuri|manipuri +mno|||Manobo languages|manobo, langues +moh|||Mohawk|mohawk +mon||mn|Mongolian|mongol +mos|||Mossi|moré +mul|||Multiple languages|multilingue +mun|||Munda languages|mounda, langues +mus|||Creek|muskogee +mwl|||Mirandese|mirandais +mwr|||Marwari|marvari +myn|||Mayan languages|maya, langues +myv|||Erzya|erza +nah|||Nahuatl languages|nahuatl, langues +nai|||North American Indian languages|nord-amérindiennes, langues +nap|||Neapolitan|napolitain +nau||na|Nauru|nauruan +nav||nv|Navajo; Navaho|navaho +nbl||nr|Ndebele, South; South Ndebele|ndébélé du Sud +nde||nd|Ndebele, North; North Ndebele|ndébélé du Nord +ndo||ng|Ndonga|ndonga +nds|||Low German; Low Saxon; German, Low; Saxon, Low|bas allemand; bas saxon; allemand, bas; saxon, bas +nep||ne|Nepali|népalais +new|||Nepal Bhasa; Newari|nepal bhasa; newari +nia|||Nias|nias +nic|||Niger-Kordofanian languages|nigéro-kordofaniennes, langues +niu|||Niuean|niué +nno||nn|Norwegian Nynorsk; Nynorsk, Norwegian|norvégien nynorsk; nynorsk, norvégien +nob||nb|BokmÃ¥l, Norwegian; Norwegian BokmÃ¥l|norvégien bokmÃ¥l +nog|||Nogai|nogaï; nogay +non|||Norse, Old|norrois, vieux +nor||no|Norwegian|norvégien +nqo|||N'Ko|n'ko +nso|||Pedi; Sepedi; Northern Sotho|pedi; sepedi; sotho du Nord +nub|||Nubian languages|nubiennes, langues +nwc|||Classical Newari; Old Newari; Classical Nepal Bhasa|newari classique +nya||ny|Chichewa; Chewa; Nyanja|chichewa; chewa; nyanja +nym|||Nyamwezi|nyamwezi +nyn|||Nyankole|nyankolé +nyo|||Nyoro|nyoro +nzi|||Nzima|nzema +oci||oc|Occitan (post 1500); Provençal|occitan (après 1500); provençal +oji||oj|Ojibwa|ojibwa +ori||or|Oriya|oriya +orm||om|Oromo|galla +osa|||Osage|osage +oss||os|Ossetian; Ossetic|ossète +ota|||Turkish, Ottoman (1500-1928)|turc ottoman (1500-1928) +oto|||Otomian languages|otomi, langues +paa|||Papuan languages|papoues, langues +pag|||Pangasinan|pangasinan +pal|||Pahlavi|pahlavi +pam|||Pampanga; Kapampangan|pampangan +pan||pa|Panjabi; Punjabi|pendjabi +pap|||Papiamento|papiamento +pau|||Palauan|palau +peo|||Persian, Old (ca.600-400 B.C.)|perse, vieux (ca. 600-400 av. J.-C.) +per|fas|fa|Persian|persan +phi|||Philippine languages|philippines, langues +phn|||Phoenician|phénicien +pli||pi|Pali|pali +pol||pl|Polish|polonais +pon|||Pohnpeian|pohnpei +por||pt|Portuguese|portugais +pra|||Prakrit languages|prâkrit, langues +pro|||Provençal, Old (to 1500)|provençal ancien (jusqu'à 1500) +pus||ps|Pushto; Pashto|pachto +qaa-qtz|||Reserved for local use|réservée à l'usage local +que||qu|Quechua|quechua +raj|||Rajasthani|rajasthani +rap|||Rapanui|rapanui +rar|||Rarotongan; Cook Islands Maori|rarotonga; maori des îles Cook +roa|||Romance languages|romanes, langues +roh||rm|Romansh|romanche +rom|||Romany|tsigane +rum|ron|ro|Romanian; Moldavian; Moldovan|roumain; moldave +run||rn|Rundi|rundi +rup|||Aromanian; Arumanian; Macedo-Romanian|aroumain; macédo-roumain +rus||ru|Russian|russe +sad|||Sandawe|sandawe +sag||sg|Sango|sango +sah|||Yakut|iakoute +sai|||South American Indian (Other)|indiennes d'Amérique du Sud, autres langues +sal|||Salishan languages|salishennes, langues +sam|||Samaritan Aramaic|samaritain +san||sa|Sanskrit|sanskrit +sas|||Sasak|sasak +sat|||Santali|santal +scn|||Sicilian|sicilien +sco|||Scots|écossais +sel|||Selkup|selkoupe +sem|||Semitic languages|sémitiques, langues +sga|||Irish, Old (to 900)|irlandais ancien (jusqu'à 900) +sgn|||Sign Languages|langues des signes +shn|||Shan|chan +sid|||Sidamo|sidamo +sin||si|Sinhala; Sinhalese|singhalais +sio|||Siouan languages|sioux, langues +sit|||Sino-Tibetan languages|sino-tibétaines, langues +sla|||Slavic languages|slaves, langues +slo|slk|sk|Slovak|slovaque +slv||sl|Slovenian|slovène +sma|||Southern Sami|sami du Sud +sme||se|Northern Sami|sami du Nord +smi|||Sami languages|sames, langues +smj|||Lule Sami|sami de Lule +smn|||Inari Sami|sami d'Inari +smo||sm|Samoan|samoan +sms|||Skolt Sami|sami skolt +sna||sn|Shona|shona +snd||sd|Sindhi|sindhi +snk|||Soninke|soninké +sog|||Sogdian|sogdien +som||so|Somali|somali +son|||Songhai languages|songhai, langues +sot||st|Sotho, Southern|sotho du Sud +spa||es|Spanish; Castilian|espagnol; castillan +srd||sc|Sardinian|sarde +srn|||Sranan Tongo|sranan tongo +srp||sr|Serbian|serbe +srr|||Serer|sérère +ssa|||Nilo-Saharan languages|nilo-sahariennes, langues +ssw||ss|Swati|swati +suk|||Sukuma|sukuma +sun||su|Sundanese|soundanais +sus|||Susu|soussou +sux|||Sumerian|sumérien +swa||sw|Swahili|swahili +swe||sv|Swedish|suédois +syc|||Classical Syriac|syriaque classique +syr|||Syriac|syriaque +tah||ty|Tahitian|tahitien +tai|||Tai languages|tai, langues +tam||ta|Tamil|tamoul +tat||tt|Tatar|tatar +tel||te|Telugu|télougou +tem|||Timne|temne +ter|||Tereno|tereno +tet|||Tetum|tetum +tgk||tg|Tajik|tadjik +tgl||tl|Tagalog|tagalog +tha||th|Thai|thaï +tib|bod|bo|Tibetan|tibétain +tig|||Tigre|tigré +tir||ti|Tigrinya|tigrigna +tiv|||Tiv|tiv +tkl|||Tokelau|tokelau +tlh|||Klingon; tlhIngan-Hol|klingon +tli|||Tlingit|tlingit +tmh|||Tamashek|tamacheq +tog|||Tonga (Nyasa)|tonga (Nyasa) +ton||to|Tonga (Tonga Islands)|tongan (ÃŽles Tonga) +tpi|||Tok Pisin|tok pisin +tsi|||Tsimshian|tsimshian +tsn||tn|Tswana|tswana +tso||ts|Tsonga|tsonga +tuk||tk|Turkmen|turkmène +tum|||Tumbuka|tumbuka +tup|||Tupi languages|tupi, langues +tur||tr|Turkish|turc +tut|||Altaic languages|altaïques, langues +tvl|||Tuvalu|tuvalu +twi||tw|Twi|twi +tyv|||Tuvinian|touva +udm|||Udmurt|oudmourte +uga|||Ugaritic|ougaritique +uig||ug|Uighur; Uyghur|ouïgour +ukr||uk|Ukrainian|ukrainien +umb|||Umbundu|umbundu +und|||Undetermined|indéterminée +urd||ur|Urdu|ourdou +uzb||uz|Uzbek|ouszbek +vai|||Vai|vaï +ven||ve|Venda|venda +vie||vi|Vietnamese|vietnamien +vol||vo|Volapük|volapük +vot|||Votic|vote +wak|||Wakashan languages|wakashanes, langues +wal|||Walamo|walamo +war|||Waray|waray +was|||Washo|washo +wel|cym|cy|Welsh|gallois +wen|||Sorbian languages|sorabes, langues +wln||wa|Walloon|wallon +wol||wo|Wolof|wolof +xal|||Kalmyk; Oirat|kalmouk; oïrat +xho||xh|Xhosa|xhosa +yao|||Yao|yao +yap|||Yapese|yapois +yid||yi|Yiddish|yiddish +yor||yo|Yoruba|yoruba +ypk|||Yupik languages|yupik, langues +zap|||Zapotec|zapotèque +zbl|||Blissymbols; Blissymbolics; Bliss|symboles Bliss; Bliss +zen|||Zenaga|zenaga +zha||za|Zhuang; Chuang|zhuang; chuang +znd|||Zande languages|zandé, langues +zul||zu|Zulu|zoulou +zun|||Zuni|zuni +zxx|||No linguistic content; Not applicable|pas de contenu linguistique; non applicable +zza|||Zaza; Dimili; Dimli; Kirdki; Kirmanjki; Zazaki|zaza; dimili; dimli; kirdki; kirmanjki; zazaki \ No newline at end of file diff --git a/lib/guessit/__init__.py b/lib/guessit/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..9c1609883ef84f0a67e2664cc4a62040beb5ffa2 --- /dev/null +++ b/lib/guessit/__init__.py @@ -0,0 +1,183 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# +# GuessIt - A library for guessing information from filenames +# Copyright (c) 2011 Nicolas Wack <wackou@gmail.com> +# +# GuessIt is free software; you can redistribute it and/or modify it under +# the terms of the Lesser GNU General Public License as published by +# the Free Software Foundation; either version 3 of the License, or +# (at your option) any later version. +# +# GuessIt is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# Lesser GNU General Public License for more details. +# +# You should have received a copy of the Lesser GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. +# + + +__version__ = '0.6-dev' +__all__ = ['Guess', 'Language', + 'guess_file_info', 'guess_video_info', + 'guess_movie_info', 'guess_episode_info'] + + +# Do python3 detection before importing any other module, to be sure that +# it will then always be available +# with code from http://lucumr.pocoo.org/2011/1/22/forwards-compatible-python/ +import sys +if sys.version_info[0] >= 3: + PY3 = True + unicode_text_type = str + native_text_type = str + base_text_type = str + def u(x): + return str(x) + def s(x): + return x + class UnicodeMixin(object): + __str__ = lambda x: x.__unicode__() + import binascii + def to_hex(x): + return binascii.hexlify(x).decode('utf-8') + +else: + PY3 = False + __all__ = [ str(s) for s in __all__ ] # fix imports for python2 + unicode_text_type = unicode + native_text_type = str + base_text_type = basestring + def u(x): + if isinstance(x, str): + return x.decode('utf-8') + return unicode(x) + def s(x): + if isinstance(x, unicode): + return x.encode('utf-8') + if isinstance(x, list): + return [ s(y) for y in x ] + if isinstance(x, tuple): + return tuple(s(y) for y in x) + if isinstance(x, dict): + return dict((s(key), s(value)) for key, value in x.items()) + return x + class UnicodeMixin(object): + __str__ = lambda x: unicode(x).encode('utf-8') + def to_hex(x): + return x.encode('hex') + + +from guessit.guess import Guess, merge_all +from guessit.language import Language +from guessit.matcher import IterativeMatcher +import logging + +log = logging.getLogger(__name__) + + + +class NullHandler(logging.Handler): + def emit(self, record): + pass + +# let's be a nicely behaving library +h = NullHandler() +log.addHandler(h) + + +def guess_file_info(filename, filetype, info=None): + """info can contain the names of the various plugins, such as 'filename' to + detect filename info, or 'hash_md5' to get the md5 hash of the file. + + >>> guess_file_info('tests/dummy.srt', 'autodetect', info = ['hash_md5', 'hash_sha1']) + {'hash_md5': 'e781de9b94ba2753a8e2945b2c0a123d', 'hash_sha1': 'bfd18e2f4e5d59775c2bc14d80f56971891ed620'} + """ + result = [] + hashers = [] + + # Force unicode as soon as possible + filename = u(filename) + + if info is None: + info = ['filename'] + + if isinstance(info, base_text_type): + info = [info] + + for infotype in info: + if infotype == 'filename': + m = IterativeMatcher(filename, filetype=filetype) + result.append(m.matched()) + + elif infotype == 'hash_mpc': + from guessit.hash_mpc import hash_file + try: + result.append(Guess({'hash_mpc': hash_file(filename)}, + confidence=1.0)) + except Exception as e: + log.warning('Could not compute MPC-style hash because: %s' % e) + + elif infotype == 'hash_ed2k': + from guessit.hash_ed2k import hash_file + try: + result.append(Guess({'hash_ed2k': hash_file(filename)}, + confidence=1.0)) + except Exception as e: + log.warning('Could not compute ed2k hash because: %s' % e) + + elif infotype.startswith('hash_'): + import hashlib + hashname = infotype[5:] + try: + hasher = getattr(hashlib, hashname)() + hashers.append((infotype, hasher)) + except AttributeError: + log.warning('Could not compute %s hash because it is not available from python\'s hashlib module' % hashname) + + else: + log.warning('Invalid infotype: %s' % infotype) + + # do all the hashes now, but on a single pass + if hashers: + try: + blocksize = 8192 + hasherobjs = dict(hashers).values() + + with open(filename, 'rb') as f: + chunk = f.read(blocksize) + while chunk: + for hasher in hasherobjs: + hasher.update(chunk) + chunk = f.read(blocksize) + + for infotype, hasher in hashers: + result.append(Guess({infotype: hasher.hexdigest()}, + confidence=1.0)) + except Exception as e: + log.warning('Could not compute hash because: %s' % e) + + result = merge_all(result) + + # last minute adjustments + + # if country is in the guessed properties, make it part of the filename + if 'country' in result: + result['series'] += ' (%s)' % result['country'].alpha2.upper() + + + return result + + +def guess_video_info(filename, info=None): + return guess_file_info(filename, 'autodetect', info) + + +def guess_movie_info(filename, info=None): + return guess_file_info(filename, 'movie', info) + + +def guess_episode_info(filename, info=None): + return guess_file_info(filename, 'episode', info) diff --git a/lib/guessit/country.py b/lib/guessit/country.py new file mode 100644 index 0000000000000000000000000000000000000000..944b7df6da2c47f1e0b3f1c6e234c0aec7e815aa --- /dev/null +++ b/lib/guessit/country.py @@ -0,0 +1,112 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# +# GuessIt - A library for guessing information from filenames +# Copyright (c) 2012 Nicolas Wack <wackou@gmail.com> +# +# GuessIt is free software; you can redistribute it and/or modify it under +# the terms of the Lesser GNU General Public License as published by +# the Free Software Foundation; either version 3 of the License, or +# (at your option) any later version. +# +# GuessIt is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# Lesser GNU General Public License for more details. +# +# You should have received a copy of the Lesser GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. +# + +from __future__ import unicode_literals +from guessit import UnicodeMixin, base_text_type, u +from guessit.fileutils import load_file_in_same_dir +import logging + +__all__ = [ 'Country' ] + +log = logging.getLogger(__name__) + + +# parsed from http://en.wikipedia.org/wiki/ISO_3166-1 +# +# Description of the fields: +# "An English name, an alpha-2 code (when given), +# an alpha-3 code (when given), a numeric code, and an ISO 31666-2 code +# are all separated by pipe (|) characters." +_iso3166_contents = load_file_in_same_dir(__file__, 'ISO-3166-1_utf8.txt') + +country_matrix = [ l.strip().split('|') + for l in _iso3166_contents.strip().split('\n') ] + +country_matrix += [ [ 'Unknown', 'un', 'unk', '', '' ], + [ 'Latin America', '', 'lat', '', '' ] + ] + +country_to_alpha3 = dict((c[0].lower(), c[2].lower()) for c in country_matrix) +country_to_alpha3.update(dict((c[1].lower(), c[2].lower()) for c in country_matrix)) +country_to_alpha3.update(dict((c[2].lower(), c[2].lower()) for c in country_matrix)) + +# add here exceptions / non ISO representations +# Note: remember to put those exceptions in lower-case, they won't work otherwise +country_to_alpha3.update({ 'latinoamérica': 'lat', + 'brazilian': 'bra', + 'españa': 'esp', + 'uk': 'gbr' + }) + +country_alpha3_to_en_name = dict((c[2].lower(), c[0]) for c in country_matrix) +country_alpha3_to_alpha2 = dict((c[2].lower(), c[1].lower()) for c in country_matrix) + + + +class Country(UnicodeMixin): + """This class represents a country. + + You can initialize it with pretty much anything, as it knows conversion + from ISO-3166 2-letter and 3-letter codes, and an English name. + """ + + def __init__(self, country, strict=False): + country = u(country.strip().lower()) + self.alpha3 = country_to_alpha3.get(country) + + if self.alpha3 is None and strict: + msg = 'The given string "%s" could not be identified as a country' + raise ValueError(msg % country) + + if self.alpha3 is None: + self.alpha3 = 'unk' + + + @property + def alpha2(self): + return country_alpha3_to_alpha2[self.alpha3] + + @property + def english_name(self): + return country_alpha3_to_en_name[self.alpha3] + + def __hash__(self): + return hash(self.alpha3) + + def __eq__(self, other): + if isinstance(other, Country): + return self.alpha3 == other.alpha3 + + if isinstance(other, base_text_type): + try: + return self == Country(other) + except ValueError: + return False + + return False + + def __ne__(self, other): + return not self == other + + def __unicode__(self): + return self.english_name + + def __repr__(self): + return 'Country(%s)' % self.english_name diff --git a/lib/guessit/date.py b/lib/guessit/date.py new file mode 100644 index 0000000000000000000000000000000000000000..bd84c65da66d8fd21dbe7e49f180679ac79a9f3c --- /dev/null +++ b/lib/guessit/date.py @@ -0,0 +1,133 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# +# GuessIt - A library for guessing information from filenames +# Copyright (c) 2011 Nicolas Wack <wackou@gmail.com> +# +# GuessIt is free software; you can redistribute it and/or modify it under +# the terms of the Lesser GNU General Public License as published by +# the Free Software Foundation; either version 3 of the License, or +# (at your option) any later version. +# +# GuessIt is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# Lesser GNU General Public License for more details. +# +# You should have received a copy of the Lesser GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. +# + +from __future__ import unicode_literals +import datetime +import re + +def valid_year(year): + return 1920 < year < datetime.date.today().year + 5 + +def search_year(string): + """Looks for year patterns, and if found return the year and group span. + Assumes there are sentinels at the beginning and end of the string that + always allow matching a non-digit delimiting the date. + + Note this only looks for valid production years, that is between 1920 + and now + 5 years, so for instance 2000 would be returned as a valid + year but 1492 would not. + + >>> search_year('in the year 2000...') + (2000, (12, 16)) + + >>> search_year('they arrived in 1492.') + (None, None) + """ + match = re.search(r'[^0-9]([0-9]{4})[^0-9]', string) + if match: + year = int(match.group(1)) + if valid_year(year): + return (year, match.span(1)) + + return (None, None) + + +def search_date(string): + """Looks for date patterns, and if found return the date and group span. + Assumes there are sentinels at the beginning and end of the string that + always allow matching a non-digit delimiting the date. + + >>> search_date('This happened on 2002-04-22.') + (datetime.date(2002, 4, 22), (17, 27)) + + >>> search_date('And this on 17-06-1998.') + (datetime.date(1998, 6, 17), (12, 22)) + + >>> search_date('no date in here') + (None, None) + """ + + dsep = r'[-/ \.]' + + date_rexps = [ + # 20010823 + r'[^0-9]' + + r'(?P<year>[0-9]{4})' + + r'(?P<month>[0-9]{2})' + + r'(?P<day>[0-9]{2})' + + r'[^0-9]', + + # 2001-08-23 + r'[^0-9]' + + r'(?P<year>[0-9]{4})' + dsep + + r'(?P<month>[0-9]{2})' + dsep + + r'(?P<day>[0-9]{2})' + + r'[^0-9]', + + # 23-08-2001 + r'[^0-9]' + + r'(?P<day>[0-9]{2})' + dsep + + r'(?P<month>[0-9]{2})' + dsep + + r'(?P<year>[0-9]{4})' + + r'[^0-9]', + + # 23-08-01 + r'[^0-9]' + + r'(?P<day>[0-9]{2})' + dsep + + r'(?P<month>[0-9]{2})' + dsep + + r'(?P<year>[0-9]{2})' + + r'[^0-9]', + ] + + for drexp in date_rexps: + match = re.search(drexp, string) + if match: + d = match.groupdict() + year, month, day = int(d['year']), int(d['month']), int(d['day']) + # years specified as 2 digits should be adjusted here + if year < 100: + if year > (datetime.date.today().year % 100) + 5: + year = 1900 + year + else: + year = 2000 + year + + date = None + try: + date = datetime.date(year, month, day) + except ValueError: + try: + date = datetime.date(year, day, month) + except ValueError: + pass + + if date is None: + continue + + # check date plausibility + if not 1900 < date.year < datetime.date.today().year + 5: + continue + + # looks like we have a valid date + # note: span is [+1,-1] because we don't want to include the + # non-digit char + start, end = match.span() + return (date, (start + 1, end - 1)) + + return None, None diff --git a/lib/guessit/fileutils.py b/lib/guessit/fileutils.py new file mode 100644 index 0000000000000000000000000000000000000000..2fca6b7b299d2e0ce5445b115c6803bd2a11cb7d --- /dev/null +++ b/lib/guessit/fileutils.py @@ -0,0 +1,87 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# +# GuessIt - A library for guessing information from filenames +# Copyright (c) 2011 Nicolas Wack <wackou@gmail.com> +# +# GuessIt is free software; you can redistribute it and/or modify it under +# the terms of the Lesser GNU General Public License as published by +# the Free Software Foundation; either version 3 of the License, or +# (at your option) any later version. +# +# GuessIt is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# Lesser GNU General Public License for more details. +# +# You should have received a copy of the Lesser GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. +# + +from __future__ import unicode_literals +from guessit import s, u +import os.path +import zipfile + + +def split_path(path): + r"""Splits the given path into the list of folders and the filename (or the + last folder if you gave it a folder path. + + If the given path was an absolute path, the first element will always be: + - the '/' root folder on Unix systems + - the drive letter on Windows systems (eg: r'C:\') + - the mount point '\\' on Windows systems (eg: r'\\host\share') + + >>> s(split_path('/usr/bin/smewt')) + ['/', 'usr', 'bin', 'smewt'] + + >>> s(split_path('relative_path/to/my_folder/')) + ['relative_path', 'to', 'my_folder'] + + """ + result = [] + while True: + head, tail = os.path.split(path) + + # on Unix systems, the root folder is '/' + if head == '/' and tail == '': + return ['/'] + result + + # on Windows, the root folder is a drive letter (eg: 'C:\') or for shares \\ + if ((len(head) == 3 and head[1:] == ':\\') or (len(head) == 2 and head == '\\\\')) and tail == '': + return [head] + result + + if head == '' and tail == '': + return result + + # we just split a directory ending with '/', so tail is empty + if not tail: + path = head + continue + + result = [tail] + result + path = head + + +def file_in_same_dir(ref_file, desired_file): + """Return the path for a file in the same dir as a given reference file. + + >>> s(file_in_same_dir('~/smewt/smewt.db', 'smewt.settings')) + '~/smewt/smewt.settings' + + """ + return os.path.join(*(split_path(ref_file)[:-1] + [desired_file])) + + +def load_file_in_same_dir(ref_file, filename): + """Load a given file. Works even when the file is contained inside a zip.""" + path = split_path(ref_file)[:-1] + [filename] + + for i, p in enumerate(path): + if p.endswith('.zip'): + zfilename = os.path.join(*path[:i + 1]) + zfile = zipfile.ZipFile(zfilename) + return zfile.read('/'.join(path[i + 1:])) + + return u(open(os.path.join(*path)).read()) diff --git a/lib/guessit/guess.py b/lib/guessit/guess.py new file mode 100644 index 0000000000000000000000000000000000000000..62385e8c2a58e984926937ebb8701d8d19b535df --- /dev/null +++ b/lib/guessit/guess.py @@ -0,0 +1,318 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# +# GuessIt - A library for guessing information from filenames +# Copyright (c) 2011 Nicolas Wack <wackou@gmail.com> +# +# GuessIt is free software; you can redistribute it and/or modify it under +# the terms of the Lesser GNU General Public License as published by +# the Free Software Foundation; either version 3 of the License, or +# (at your option) any later version. +# +# GuessIt is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# Lesser GNU General Public License for more details. +# +# You should have received a copy of the Lesser GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. +# + +from __future__ import unicode_literals +from guessit import UnicodeMixin, s, u, base_text_type +from guessit.language import Language +from guessit.country import Country +import json +import datetime +import logging + +log = logging.getLogger(__name__) + + +class Guess(UnicodeMixin, dict): + """A Guess is a dictionary which has an associated confidence for each of + its values. + + As it is a subclass of dict, you can use it everywhere you expect a + simple dict.""" + + def __init__(self, *args, **kwargs): + try: + confidence = kwargs.pop('confidence') + except KeyError: + confidence = 0 + + dict.__init__(self, *args, **kwargs) + + self._confidence = {} + for prop in self: + self._confidence[prop] = confidence + + + def to_dict(self): + data = dict(self) + for prop, value in data.items(): + if isinstance(value, datetime.date): + data[prop] = value.isoformat() + elif isinstance(value, (Language, Country, base_text_type)): + data[prop] = u(value) + elif isinstance(value, list): + data[prop] = [u(x) for x in value] + + return data + + def nice_string(self): + data = self.to_dict() + + parts = json.dumps(data, indent=4).split('\n') + for i, p in enumerate(parts): + if p[:5] != ' "': + continue + + prop = p.split('"')[1] + parts[i] = (' [%.2f] "' % self.confidence(prop)) + p[5:] + + return '\n'.join(parts) + + def __unicode__(self): + return u(self.to_dict()) + + def confidence(self, prop): + return self._confidence.get(prop, -1) + + def set(self, prop, value, confidence=None): + self[prop] = value + if confidence is not None: + self._confidence[prop] = confidence + + def set_confidence(self, prop, value): + self._confidence[prop] = value + + def update(self, other, confidence=None): + dict.update(self, other) + if isinstance(other, Guess): + for prop in other: + self._confidence[prop] = other.confidence(prop) + + if confidence is not None: + for prop in other: + self._confidence[prop] = confidence + + def update_highest_confidence(self, other): + """Update this guess with the values from the given one. In case + there is property present in both, only the one with the highest one + is kept.""" + if not isinstance(other, Guess): + raise ValueError('Can only call this function on Guess instances') + + for prop in other: + if prop in self and self.confidence(prop) >= other.confidence(prop): + continue + self[prop] = other[prop] + self._confidence[prop] = other.confidence(prop) + + +def choose_int(g1, g2): + """Function used by merge_similar_guesses to choose between 2 possible + properties when they are integers.""" + v1, c1 = g1 # value, confidence + v2, c2 = g2 + if (v1 == v2): + return (v1, 1 - (1 - c1) * (1 - c2)) + else: + if c1 > c2: + return (v1, c1 - c2) + else: + return (v2, c2 - c1) + + +def choose_string(g1, g2): + """Function used by merge_similar_guesses to choose between 2 possible + properties when they are strings. + + If the 2 strings are similar, or one is contained in the other, the latter is returned + with an increased confidence. + + If the 2 strings are dissimilar, the one with the higher confidence is returned, with + a weaker confidence. + + Note that here, 'similar' means that 2 strings are either equal, or that they + differ very little, such as one string being the other one with the 'the' word + prepended to it. + + >>> s(choose_string(('Hello', 0.75), ('World', 0.5))) + ('Hello', 0.25) + + >>> s(choose_string(('Hello', 0.5), ('hello', 0.5))) + ('Hello', 0.75) + + >>> s(choose_string(('Hello', 0.4), ('Hello World', 0.4))) + ('Hello', 0.64) + + >>> s(choose_string(('simpsons', 0.5), ('The Simpsons', 0.5))) + ('The Simpsons', 0.75) + + """ + v1, c1 = g1 # value, confidence + v2, c2 = g2 + + if not v1: + return g2 + elif not v2: + return g1 + + v1, v2 = v1.strip(), v2.strip() + v1l, v2l = v1.lower(), v2.lower() + + combined_prob = 1 - (1 - c1) * (1 - c2) + + if v1l == v2l: + return (v1, combined_prob) + + # check for common patterns + elif v1l == 'the ' + v2l: + return (v1, combined_prob) + elif v2l == 'the ' + v1l: + return (v2, combined_prob) + + # if one string is contained in the other, return the shortest one + elif v2l in v1l: + return (v2, combined_prob) + elif v1l in v2l: + return (v1, combined_prob) + + # in case of conflict, return the one with highest priority + else: + if c1 > c2: + return (v1, c1 - c2) + else: + return (v2, c2 - c1) + + +def _merge_similar_guesses_nocheck(guesses, prop, choose): + """Take a list of guesses and merge those which have the same properties, + increasing or decreasing the confidence depending on whether their values + are similar. + + This function assumes there are at least 2 valid guesses.""" + + similar = [guess for guess in guesses if prop in guess] + + g1, g2 = similar[0], similar[1] + + other_props = set(g1) & set(g2) - set([prop]) + if other_props: + log.debug('guess 1: %s' % g1) + log.debug('guess 2: %s' % g2) + for prop in other_props: + if g1[prop] != g2[prop]: + log.warning('both guesses to be merged have more than one ' + 'different property in common, bailing out...') + return + + # merge all props of s2 into s1, updating the confidence for the + # considered property + v1, v2 = g1[prop], g2[prop] + c1, c2 = g1.confidence(prop), g2.confidence(prop) + + new_value, new_confidence = choose((v1, c1), (v2, c2)) + if new_confidence >= c1: + msg = "Updating matching property '%s' with confidence %.2f" + else: + msg = "Updating non-matching property '%s' with confidence %.2f" + log.debug(msg % (prop, new_confidence)) + + g2[prop] = new_value + g2.set_confidence(prop, new_confidence) + + g1.update(g2) + guesses.remove(g2) + + +def merge_similar_guesses(guesses, prop, choose): + """Take a list of guesses and merge those which have the same properties, + increasing or decreasing the confidence depending on whether their values + are similar.""" + + similar = [guess for guess in guesses if prop in guess] + if len(similar) < 2: + # nothing to merge + return + + if len(similar) == 2: + _merge_similar_guesses_nocheck(guesses, prop, choose) + + if len(similar) > 2: + log.debug('complex merge, trying our best...') + before = len(guesses) + _merge_similar_guesses_nocheck(guesses, prop, choose) + after = len(guesses) + if after < before: + # recurse only when the previous call actually did something, + # otherwise we end up in an infinite loop + merge_similar_guesses(guesses, prop, choose) + + +def merge_all(guesses, append=None): + """Merge all the guesses in a single result, remove very unlikely values, + and return it. + You can specify a list of properties that should be appended into a list + instead of being merged. + + >>> s(merge_all([ Guess({'season': 2}, confidence=0.6), + ... Guess({'episodeNumber': 13}, confidence=0.8) ])) + {'season': 2, 'episodeNumber': 13} + + >>> s(merge_all([ Guess({'episodeNumber': 27}, confidence=0.02), + ... Guess({'season': 1}, confidence=0.2) ])) + {'season': 1} + + >>> s(merge_all([ Guess({'other': 'PROPER'}, confidence=0.8), + ... Guess({'releaseGroup': '2HD'}, confidence=0.8) ], + ... append=['other'])) + {'releaseGroup': '2HD', 'other': ['PROPER']} + + + """ + if not guesses: + return Guess() + + result = guesses[0] + if append is None: + append = [] + + for g in guesses[1:]: + # first append our appendable properties + for prop in append: + if prop in g: + result.set(prop, result.get(prop, []) + [g[prop]], + # TODO: what to do with confidence here? maybe an + # arithmetic mean... + confidence=g.confidence(prop)) + + del g[prop] + + # then merge the remaining ones + dups = set(result) & set(g) + if dups: + log.warning('duplicate properties %s in merged result...' % dups) + + result.update_highest_confidence(g) + + # delete very unlikely values + for p in list(result.keys()): + if result.confidence(p) < 0.05: + del result[p] + + # make sure our appendable properties contain unique values + for prop in append: + try: + value = result[prop] + if isinstance(value, list): + result[prop] = list(set(value)) + else: + result[prop] = [ value ] + except KeyError: + pass + + return result diff --git a/lib/guessit/hash_ed2k.py b/lib/guessit/hash_ed2k.py new file mode 100644 index 0000000000000000000000000000000000000000..7422d4e90aa78906d95415495062528769e78192 --- /dev/null +++ b/lib/guessit/hash_ed2k.py @@ -0,0 +1,65 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# +# GuessIt - A library for guessing information from filenames +# Copyright (c) 2011 Nicolas Wack <wackou@gmail.com> +# +# GuessIt is free software; you can redistribute it and/or modify it under +# the terms of the Lesser GNU General Public License as published by +# the Free Software Foundation; either version 3 of the License, or +# (at your option) any later version. +# +# GuessIt is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# Lesser GNU General Public License for more details. +# +# You should have received a copy of the Lesser GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. +# + +from __future__ import unicode_literals +from guessit import s, to_hex +import hashlib +import os.path + + +def hash_file(filename): + """Returns the ed2k hash of a given file. + + >>> s(hash_file('tests/dummy.srt')) + 'ed2k://|file|dummy.srt|44|1CA0B9DED3473B926AA93A0A546138BB|/' + """ + return 'ed2k://|file|%s|%d|%s|/' % (os.path.basename(filename), + os.path.getsize(filename), + hash_filehash(filename).upper()) + + +def hash_filehash(filename): + """Returns the ed2k hash of a given file. + + This function is taken from: + http://www.radicand.org/blog/orz/2010/2/21/edonkey2000-hash-in-python/ + """ + md4 = hashlib.new('md4').copy + + def gen(f): + while True: + x = f.read(9728000) + if x: + yield x + else: + return + + def md4_hash(data): + m = md4() + m.update(data) + return m + + with open(filename, 'rb') as f: + a = gen(f) + hashes = [md4_hash(data).digest() for data in a] + if len(hashes) == 1: + return to_hex(hashes[0]) + else: + return md4_hash(reduce(lambda a, d: a + d, hashes, "")).hexd diff --git a/lib/guessit/hash_mpc.py b/lib/guessit/hash_mpc.py new file mode 100644 index 0000000000000000000000000000000000000000..c9dd4292c51edd8cd3c8d9857224e5140ac6a552 --- /dev/null +++ b/lib/guessit/hash_mpc.py @@ -0,0 +1,57 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# +# GuessIt - A library for guessing information from filenames +# Copyright (c) 2011 Nicolas Wack <wackou@gmail.com> +# +# GuessIt is free software; you can redistribute it and/or modify it under +# the terms of the Lesser GNU General Public License as published by +# the Free Software Foundation; either version 3 of the License, or +# (at your option) any later version. +# +# GuessIt is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# Lesser GNU General Public License for more details. +# +# You should have received a copy of the Lesser GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. +# + +from __future__ import unicode_literals +import struct +import os + + +def hash_file(filename): + """This function is taken from: + http://trac.opensubtitles.org/projects/opensubtitles/wiki/HashSourceCodes + and is licensed under the GPL.""" + + longlongformat = 'q' # long long + bytesize = struct.calcsize(longlongformat) + + f = open(filename, "rb") + + filesize = os.path.getsize(filename) + hash_value = filesize + + if filesize < 65536 * 2: + raise Exception("SizeError: size is %d, should be > 132K..." % filesize) + + for x in range(65536 / bytesize): + buf = f.read(bytesize) + (l_value,) = struct.unpack(longlongformat, buf) + hash_value += l_value + hash_value = hash_value & 0xFFFFFFFFFFFFFFFF #to remain as 64bit number + + f.seek(max(0, filesize - 65536), 0) + for x in range(65536 / bytesize): + buf = f.read(bytesize) + (l_value,) = struct.unpack(longlongformat, buf) + hash_value += l_value + hash_value = hash_value & 0xFFFFFFFFFFFFFFFF + + f.close() + + return "%016x" % hash_value diff --git a/lib/guessit/language.py b/lib/guessit/language.py new file mode 100644 index 0000000000000000000000000000000000000000..a0d86ad0cccaa878e23fdf78a3a11c7c1ab6473a --- /dev/null +++ b/lib/guessit/language.py @@ -0,0 +1,388 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# +# GuessIt - A library for guessing information from filenames +# Copyright (c) 2011 Nicolas Wack <wackou@gmail.com> +# +# GuessIt is free software; you can redistribute it and/or modify it under +# the terms of the Lesser GNU General Public License as published by +# the Free Software Foundation; either version 3 of the License, or +# (at your option) any later version. +# +# GuessIt is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# Lesser GNU General Public License for more details. +# +# You should have received a copy of the Lesser GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. +# + +from __future__ import unicode_literals +from guessit import UnicodeMixin, base_text_type, u, s +from guessit.fileutils import load_file_in_same_dir +from guessit.country import Country +import re +import logging + +__all__ = [ 'is_iso_language', 'is_language', 'lang_set', 'Language', + 'ALL_LANGUAGES', 'ALL_LANGUAGES_NAMES', 'UNDETERMINED', + 'search_language', 'guess_language' ] + + +log = logging.getLogger(__name__) + + +# downloaded from http://www.loc.gov/standards/iso639-2/ISO-639-2_utf-8.txt +# +# Description of the fields: +# "An alpha-3 (bibliographic) code, an alpha-3 (terminologic) code (when given), +# an alpha-2 code (when given), an English name, and a French name of a language +# are all separated by pipe (|) characters." +_iso639_contents = load_file_in_same_dir(__file__, 'ISO-639-2_utf-8.txt') + +# drop the BOM from the beginning of the file +_iso639_contents = _iso639_contents[1:] + +language_matrix = [ l.strip().split('|') + for l in _iso639_contents.strip().split('\n') ] + + +# update information in the language matrix +language_matrix += [['mol', '', 'mo', 'Moldavian', 'moldave'], + ['ass', '', '', 'Assyrian', 'assyrien']] + +for lang in language_matrix: + # remove unused languages that shadow other common ones with a non-official form + if (lang[2] == 'se' or # Northern Sami shadows Swedish + lang[2] == 'br'): # Breton shadows Brazilian + lang[2] = '' + # add missing information + if lang[0] == 'und': + lang[2] = 'un' + if lang[0] == 'srp': + lang[1] = 'scc' # from OpenSubtitles + + +lng3 = frozenset(l[0] for l in language_matrix if l[0]) +lng3term = frozenset(l[1] for l in language_matrix if l[1]) +lng2 = frozenset(l[2] for l in language_matrix if l[2]) +lng_en_name = frozenset(lng for l in language_matrix + for lng in l[3].lower().split('; ') if lng) +lng_fr_name = frozenset(lng for l in language_matrix + for lng in l[4].lower().split('; ') if lng) +lng_all_names = lng3 | lng3term | lng2 | lng_en_name | lng_fr_name + +lng3_to_lng3term = dict((l[0], l[1]) for l in language_matrix if l[1]) +lng3term_to_lng3 = dict((l[1], l[0]) for l in language_matrix if l[1]) + +lng3_to_lng2 = dict((l[0], l[2]) for l in language_matrix if l[2]) +lng2_to_lng3 = dict((l[2], l[0]) for l in language_matrix if l[2]) + +# we only return the first given english name, hoping it is the most used one +lng3_to_lng_en_name = dict((l[0], l[3].split('; ')[0]) + for l in language_matrix if l[3]) +lng_en_name_to_lng3 = dict((en_name.lower(), l[0]) + for l in language_matrix if l[3] + for en_name in l[3].split('; ')) + +# we only return the first given french name, hoping it is the most used one +lng3_to_lng_fr_name = dict((l[0], l[4].split('; ')[0]) + for l in language_matrix if l[4]) +lng_fr_name_to_lng3 = dict((fr_name.lower(), l[0]) + for l in language_matrix if l[4] + for fr_name in l[4].split('; ')) + +# contains a list of exceptions: strings that should be parsed as a language +# but which are not in an ISO form +lng_exceptions = { 'unknown': ('und', None), + 'inconnu': ('und', None), + 'unk': ('und', None), + 'un': ('und', None), + 'gr': ('gre', None), + 'greek': ('gre', None), + 'esp': ('spa', None), + 'español': ('spa', None), + 'se': ('swe', None), + 'po': ('pt', 'br'), + 'pb': ('pt', 'br'), + 'pob': ('pt', 'br'), + 'br': ('pt', 'br'), + 'brazilian': ('pt', 'br'), + 'català ': ('cat', None), + 'cz': ('cze', None), + 'ua': ('ukr', None), + 'cn': ('chi', None), + 'chs': ('chi', None), + 'jp': ('jpn', None), + 'scr': ('hrv', None) + } + + +def is_iso_language(language): + return language.lower() in lng_all_names + +def is_language(language): + return is_iso_language(language) or language in lng_exceptions + +def lang_set(languages, strict=False): + """Return a set of guessit.Language created from their given string + representation. + + if strict is True, then this will raise an exception if any language + could not be identified. + """ + return set(Language(l, strict=strict) for l in languages) + + +class Language(UnicodeMixin): + """This class represents a human language. + + You can initialize it with pretty much anything, as it knows conversion + from ISO-639 2-letter and 3-letter codes, English and French names. + + You can also distinguish languages for specific countries, such as + Portuguese and Brazilian Portuguese. + + There are various properties on the language object that give you the + representation of the language for a specific usage, such as .alpha3 + to get the ISO 3-letter code, or .opensubtitles to get the OpenSubtitles + language code. + + >>> Language('fr') + Language(French) + + >>> s(Language('eng').french_name) + 'anglais' + + >>> s(Language('pt(br)').country.english_name) + 'Brazil' + + >>> s(Language('Español (Latinoamérica)').country.english_name) + 'Latin America' + + >>> Language('Spanish (Latin America)') == Language('Español (Latinoamérica)') + True + + >>> s(Language('zz', strict=False).english_name) + 'Undetermined' + + >>> s(Language('pt(br)').opensubtitles) + 'pob' + """ + + _with_country_regexp = re.compile('(.*)\((.*)\)') + _with_country_regexp2 = re.compile('(.*)-(.*)') + + def __init__(self, language, country=None, strict=False, scheme=None): + language = u(language.strip().lower()) + with_country = (Language._with_country_regexp.match(language) or + Language._with_country_regexp2.match(language)) + if with_country: + self.lang = Language(with_country.group(1)).lang + self.country = Country(with_country.group(2)) + return + + self.lang = None + self.country = Country(country) if country else None + + # first look for scheme specific languages + if scheme == 'opensubtitles': + if language == 'br': + self.lang = 'bre' + return + elif language == 'se': + self.lang = 'sme' + return + elif scheme is not None: + log.warning('Unrecognized scheme: "%s" - Proceeding with standard one' % scheme) + + # look for ISO language codes + if len(language) == 2: + self.lang = lng2_to_lng3.get(language) + elif len(language) == 3: + self.lang = (language + if language in lng3 + else lng3term_to_lng3.get(language)) + else: + self.lang = (lng_en_name_to_lng3.get(language) or + lng_fr_name_to_lng3.get(language)) + + # general language exceptions + if self.lang is None and language in lng_exceptions: + lang, country = lng_exceptions[language] + self.lang = Language(lang).alpha3 + self.country = Country(country) if country else None + + msg = 'The given string "%s" could not be identified as a language' % language + + if self.lang is None and strict: + raise ValueError(msg) + + if self.lang is None: + log.debug(msg) + self.lang = 'und' + + @property + def alpha2(self): + return lng3_to_lng2[self.lang] + + @property + def alpha3(self): + return self.lang + + @property + def alpha3term(self): + return lng3_to_lng3term[self.lang] + + @property + def english_name(self): + return lng3_to_lng_en_name[self.lang] + + @property + def french_name(self): + return lng3_to_lng_fr_name[self.lang] + + @property + def opensubtitles(self): + if self.lang == 'por' and self.country and self.country.alpha2 == 'br': + return 'pob' + elif self.lang in ['gre', 'srp']: + return self.alpha3term + return self.alpha3 + + @property + def tmdb(self): + if self.country: + return '%s-%s' % (self.alpha2, self.country.alpha2.upper()) + return self.alpha2 + + def __hash__(self): + return hash(self.lang) + + def __eq__(self, other): + if isinstance(other, Language): + return self.lang == other.lang + + if isinstance(other, base_text_type): + try: + return self == Language(other) + except ValueError: + return False + + return False + + def __ne__(self, other): + return not self == other + + def __nonzero__(self): + return self.lang != 'und' + + def __unicode__(self): + if self.country: + return '%s(%s)' % (self.english_name, self.country.alpha2) + else: + return self.english_name + + def __repr__(self): + if self.country: + return 'Language(%s, country=%s)' % (self.english_name, self.country) + else: + return 'Language(%s)' % self.english_name + + +UNDETERMINED = Language('und') +ALL_LANGUAGES = frozenset(Language(lng) for lng in lng_all_names) - frozenset([UNDETERMINED]) +ALL_LANGUAGES_NAMES = lng_all_names + +def search_language(string, lang_filter=None): + """Looks for language patterns, and if found return the language object, + its group span and an associated confidence. + + you can specify a list of allowed languages using the lang_filter argument, + as in lang_filter = [ 'fr', 'eng', 'spanish' ] + + >>> search_language('movie [en].avi') + (Language(English), (7, 9), 0.8) + + >>> search_language('the zen fat cat and the gay mad men got a new fan', lang_filter = ['en', 'fr', 'es']) + (None, None, None) + """ + + # list of common words which could be interpreted as languages, but which + # are far too common to be able to say they represent a language in the + # middle of a string (where they most likely carry their commmon meaning) + lng_common_words = frozenset([ + # english words + 'is', 'it', 'am', 'mad', 'men', 'man', 'run', 'sin', 'st', 'to', + 'no', 'non', 'war', 'min', 'new', 'car', 'day', 'bad', 'bat', 'fan', + 'fry', 'cop', 'zen', 'gay', 'fat', 'cherokee', 'got', 'an', 'as', + 'cat', 'her', 'be', 'hat', 'sun', 'may', 'my', 'mr', + # french words + 'bas', 'de', 'le', 'son', 'vo', 'vf', 'ne', 'ca', 'ce', 'et', 'que', + 'mal', 'est', 'vol', 'or', 'mon', 'se', + # spanish words + 'la', 'el', 'del', 'por', 'mar', + # other + 'ind', 'arw', 'ts', 'ii', 'bin', 'chan', 'ss', 'san', 'oss', 'iii', + 'vi', 'ben' + ]) + sep = r'[](){} \._-+' + + if lang_filter: + lang_filter = lang_set(lang_filter) + + slow = ' %s ' % string.lower() + confidence = 1.0 # for all of them + for lang in lng_all_names: + + if lang in lng_common_words: + continue + + pos = slow.find(lang) + + if pos != -1: + end = pos + len(lang) + # make sure our word is always surrounded by separators + if slow[pos - 1] not in sep or slow[end] not in sep: + continue + + language = Language(slow[pos:end]) + if lang_filter and language not in lang_filter: + continue + + # only allow those languages that have a 2-letter code, those who + # don't are too esoteric and probably false matches + if language.lang not in lng3_to_lng2: + continue + + # confidence depends on lng2, lng3, english name, ... + if len(lang) == 2: + confidence = 0.8 + elif len(lang) == 3: + confidence = 0.9 + else: + # Note: we could either be really confident that we found a + # language or assume that full language names are too + # common words + confidence = 0.3 # going with the low-confidence route here + + return language, (pos - 1, end - 1), confidence + + return None, None, None + + +def guess_language(text): + """Guess the language in which a body of text is written. + + This uses the external guess-language python module, and will fail and return + Language(Undetermined) if it is not installed. + """ + try: + from guess_language import guessLanguage + return Language(guessLanguage(text)) + + except ImportError: + log.error('Cannot detect the language of the given text body, missing dependency: guess-language') + log.error('Please install it from PyPI, by doing eg: pip install guess-language') + return UNDETERMINED diff --git a/lib/guessit/matcher.py b/lib/guessit/matcher.py new file mode 100644 index 0000000000000000000000000000000000000000..a376a46265a85d30f2cb73f57b3818320948a546 --- /dev/null +++ b/lib/guessit/matcher.py @@ -0,0 +1,139 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# +# GuessIt - A library for guessing information from filenames +# Copyright (c) 2012 Nicolas Wack <wackou@gmail.com> +# +# GuessIt is free software; you can redistribute it and/or modify it under +# the terms of the Lesser GNU General Public License as published by +# the Free Software Foundation; either version 3 of the License, or +# (at your option) any later version. +# +# GuessIt is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# Lesser GNU General Public License for more details. +# +# You should have received a copy of the Lesser GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. +# + +from __future__ import unicode_literals +from guessit import PY3, u +from guessit.matchtree import MatchTree +import logging + +log = logging.getLogger(__name__) + + +class IterativeMatcher(object): + def __init__(self, filename, filetype='autodetect'): + """An iterative matcher tries to match different patterns that appear + in the filename. + + The 'filetype' argument indicates which type of file you want to match. + If it is 'autodetect', the matcher will try to see whether it can guess + that the file corresponds to an episode, or otherwise will assume it is + a movie. + + The recognized 'filetype' values are: + [ autodetect, subtitle, movie, moviesubtitle, episode, episodesubtitle ] + + + The IterativeMatcher works mainly in 2 steps: + + First, it splits the filename into a match_tree, which is a tree of groups + which have a semantic meaning, such as episode number, movie title, + etc... + + The match_tree created looks like the following: + + 0000000000000000000000000000000000000000000000000000000000000000000000000000000000 111 + 0000011111111111112222222222222233333333444444444444444455555555666777777778888888 000 + 0000000000000000000000000000000001111112011112222333333401123334000011233340000000 000 + __________________(The.Prestige).______.[____.HP.______.{__-___}.St{__-___}.Chaps].___ + xxxxxttttttttttttt ffffff vvvv xxxxxx ll lll xx xxx ccc + [XCT].Le.Prestige.(The.Prestige).DVDRip.[x264.HP.He-Aac.{Fr-Eng}.St{Fr-Eng}.Chaps].mkv + + The first 3 lines indicates the group index in which a char in the + filename is located. So for instance, x264 is the group (0, 4, 1), and + it corresponds to a video codec, denoted by the letter'v' in the 4th line. + (for more info, see guess.matchtree.to_string) + + + Second, it tries to merge all this information into a single object + containing all the found properties, and does some (basic) conflict + resolution when they arise. + """ + + valid_filetypes = ('autodetect', 'subtitle', 'video', + 'movie', 'moviesubtitle', + 'episode', 'episodesubtitle') + if filetype not in valid_filetypes: + raise ValueError("filetype needs to be one of %s" % valid_filetypes) + if not PY3 and not isinstance(filename, unicode): + log.warning('Given filename to matcher is not unicode...') + + self.match_tree = MatchTree(filename) + mtree = self.match_tree + mtree.guess.set('type', filetype, confidence=1.0) + + def apply_transfo(transfo_name, *args, **kwargs): + transfo = __import__('guessit.transfo.' + transfo_name, + globals=globals(), locals=locals(), + fromlist=['process'], level=0) + transfo.process(mtree, *args, **kwargs) + + # 1- first split our path into dirs + basename + ext + apply_transfo('split_path_components') + + # 2- guess the file type now (will be useful later) + apply_transfo('guess_filetype', filetype) + if mtree.guess['type'] == 'unknown': + return + + # 3- split each of those into explicit groups (separated by parentheses + # or square brackets) + apply_transfo('split_explicit_groups') + + # 4- try to match information for specific patterns + # NOTE: order needs to comply to the following: + # - website before language (eg: tvu.org.ru vs russian) + # - language before episodes_rexps + # - properties before language (eg: he-aac vs hebrew) + # - release_group before properties (eg: XviD-?? vs xvid) + if mtree.guess['type'] in ('episode', 'episodesubtitle'): + strategy = [ 'guess_date', 'guess_website', 'guess_release_group', + 'guess_properties', 'guess_language', + 'guess_video_rexps', + 'guess_episodes_rexps', 'guess_weak_episodes_rexps' ] + else: + strategy = [ 'guess_date', 'guess_website', 'guess_release_group', + 'guess_properties', 'guess_language', + 'guess_video_rexps' ] + + for name in strategy: + apply_transfo(name) + + # more guessers for both movies and episodes + for name in ['guess_bonus_features', 'guess_year', 'guess_country']: + apply_transfo(name) + + # split into '-' separated subgroups (with required separator chars + # around the dash) + apply_transfo('split_on_dash') + + # 5- try to identify the remaining unknown groups by looking at their + # position relative to other known elements + if mtree.guess['type'] in ('episode', 'episodesubtitle'): + apply_transfo('guess_episode_info_from_position') + else: + apply_transfo('guess_movie_title_from_position') + + # 6- perform some post-processing steps + apply_transfo('post_process') + + log.debug('Found match tree:\n%s' % u(mtree)) + + def matched(self): + return self.match_tree.matched() diff --git a/lib/guessit/matchtree.py b/lib/guessit/matchtree.py new file mode 100644 index 0000000000000000000000000000000000000000..2853c3a01c12d192bc0f3f8c42d1512d3b7b29ae --- /dev/null +++ b/lib/guessit/matchtree.py @@ -0,0 +1,287 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# +# GuessIt - A library for guessing information from filenames +# Copyright (c) 2011 Nicolas Wack <wackou@gmail.com> +# +# GuessIt is free software; you can redistribute it and/or modify it under +# the terms of the Lesser GNU General Public License as published by +# the Free Software Foundation; either version 3 of the License, or +# (at your option) any later version. +# +# GuessIt is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# Lesser GNU General Public License for more details. +# +# You should have received a copy of the Lesser GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. +# + +from __future__ import unicode_literals +from guessit import UnicodeMixin, base_text_type, Guess +from guessit.textutils import clean_string, str_fill +from guessit.patterns import group_delimiters +from guessit.guess import (merge_similar_guesses, merge_all, + choose_int, choose_string) +import copy +import logging + +log = logging.getLogger(__name__) + + +class BaseMatchTree(UnicodeMixin): + """A MatchTree represents the hierarchical split of a string into its + constituent semantic groups.""" + + def __init__(self, string='', span=None, parent=None): + self.string = string + self.span = span or (0, len(string)) + self.parent = parent + self.children = [] + self.guess = Guess() + + @property + def value(self): + return self.string[self.span[0]:self.span[1]] + + @property + def clean_value(self): + return clean_string(self.value) + + @property + def offset(self): + return self.span[0] + + @property + def info(self): + result = dict(self.guess) + + for c in self.children: + result.update(c.info) + + return result + + @property + def root(self): + if not self.parent: + return self + + return self.parent.root + + @property + def depth(self): + if self.is_leaf(): + return 0 + + return 1 + max(c.depth for c in self.children) + + def is_leaf(self): + return self.children == [] + + def add_child(self, span): + child = MatchTree(self.string, span=span, parent=self) + self.children.append(child) + + def partition(self, indices): + indices = sorted(indices) + if indices[0] != 0: + indices.insert(0, 0) + if indices[-1] != len(self.value): + indices.append(len(self.value)) + + for start, end in zip(indices[:-1], indices[1:]): + self.add_child(span=(self.offset + start, + self.offset + end)) + + def split_on_components(self, components): + offset = 0 + for c in components: + start = self.value.find(c, offset) + end = start + len(c) + self.add_child(span=(self.offset + start, + self.offset + end)) + offset = end + + def nodes_at_depth(self, depth): + if depth == 0: + yield self + + for child in self.children: + for node in child.nodes_at_depth(depth - 1): + yield node + + @property + def node_idx(self): + if self.parent is None: + return () + return self.parent.node_idx + (self.parent.children.index(self),) + + def node_at(self, idx): + if not idx: + return self + + try: + return self.children[idx[0]].node_at(idx[1:]) + except: + raise ValueError('Non-existent node index: %s' % (idx,)) + + def nodes(self): + yield self + for child in self.children: + for node in child.nodes(): + yield node + + def _leaves(self): + if self.is_leaf(): + yield self + else: + for child in self.children: + # pylint: disable=W0212 + for leaf in child._leaves(): + yield leaf + + def leaves(self): + return list(self._leaves()) + + def to_string(self): + empty_line = ' ' * len(self.string) + + def to_hex(x): + if isinstance(x, int): + return str(x) if x < 10 else chr(55 + x) + return x + + def meaning(result): + mmap = { 'episodeNumber': 'E', + 'season': 'S', + 'extension': 'e', + 'format': 'f', + 'language': 'l', + 'country': 'C', + 'videoCodec': 'v', + 'audioCodec': 'a', + 'website': 'w', + 'container': 'c', + 'series': 'T', + 'title': 't', + 'date': 'd', + 'year': 'y', + 'releaseGroup': 'r', + 'screenSize': 's' + } + + if result is None: + return ' ' + + for prop, l in mmap.items(): + if prop in result: + return l + + return 'x' + + lines = [ empty_line ] * (self.depth + 2) # +2: remaining, meaning + lines[-2] = self.string + + for node in self.nodes(): + if node == self: + continue + + idx = node.node_idx + depth = len(idx) - 1 + if idx: + lines[depth] = str_fill(lines[depth], node.span, + to_hex(idx[-1])) + if node.guess: + lines[-2] = str_fill(lines[-2], node.span, '_') + lines[-1] = str_fill(lines[-1], node.span, meaning(node.guess)) + + lines.append(self.string) + + return '\n'.join(lines) + + def __unicode__(self): + return self.to_string() + + +class MatchTree(BaseMatchTree): + """The MatchTree contains a few "utility" methods which are not necessary + for the BaseMatchTree, but add a lot of convenience for writing + higher-level rules.""" + + def _unidentified_leaves(self, + valid=lambda leaf: len(leaf.clean_value) >= 2): + for leaf in self._leaves(): + if not leaf.guess and valid(leaf): + yield leaf + + def unidentified_leaves(self, + valid=lambda leaf: len(leaf.clean_value) >= 2): + return list(self._unidentified_leaves(valid)) + + def _leaves_containing(self, property_name): + if isinstance(property_name, base_text_type): + property_name = [ property_name ] + + for leaf in self._leaves(): + for prop in property_name: + if prop in leaf.guess: + yield leaf + break + + def leaves_containing(self, property_name): + return list(self._leaves_containing(property_name)) + + def first_leaf_containing(self, property_name): + try: + return next(self._leaves_containing(property_name)) + except StopIteration: + return None + + def _previous_unidentified_leaves(self, node): + node_idx = node.node_idx + for leaf in self._unidentified_leaves(): + if leaf.node_idx < node_idx: + yield leaf + + def previous_unidentified_leaves(self, node): + return list(self._previous_unidentified_leaves(node)) + + def _previous_leaves_containing(self, node, property_name): + node_idx = node.node_idx + for leaf in self._leaves_containing(property_name): + if leaf.node_idx < node_idx: + yield leaf + + def previous_leaves_containing(self, node, property_name): + return list(self._previous_leaves_containing(node, property_name)) + + def is_explicit(self): + """Return whether the group was explicitly enclosed by + parentheses/square brackets/etc.""" + return (self.value[0] + self.value[-1]) in group_delimiters + + def matched(self): + # we need to make a copy here, as the merge functions work in place and + # calling them on the match tree would modify it + parts = [node.guess for node in self.nodes() if node.guess] + parts = copy.deepcopy(parts) + + # 1- try to merge similar information together and give it a higher + # confidence + for int_part in ('year', 'season', 'episodeNumber'): + merge_similar_guesses(parts, int_part, choose_int) + + for string_part in ('title', 'series', 'container', 'format', + 'releaseGroup', 'website', 'audioCodec', + 'videoCodec', 'screenSize', 'episodeFormat', + 'audioChannels'): + merge_similar_guesses(parts, string_part, choose_string) + + # 2- merge the rest, potentially discarding information not properly + # merged before + result = merge_all(parts, + append=['language', 'subtitleLanguage', 'other']) + + log.debug('Final result: ' + result.nice_string()) + return result diff --git a/lib/guessit/patterns.py b/lib/guessit/patterns.py new file mode 100644 index 0000000000000000000000000000000000000000..63aa6251069565738f990899a5cff5d81eefe845 --- /dev/null +++ b/lib/guessit/patterns.py @@ -0,0 +1,228 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# +# GuessIt - A library for guessing information from filenames +# Copyright (c) 2011 Nicolas Wack <wackou@gmail.com> +# Copyright (c) 2011 Ricard Marxer <ricardmp@gmail.com> +# +# GuessIt is free software; you can redistribute it and/or modify it under +# the terms of the Lesser GNU General Public License as published by +# the Free Software Foundation; either version 3 of the License, or +# (at your option) any later version. +# +# GuessIt is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# Lesser GNU General Public License for more details. +# +# You should have received a copy of the Lesser GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. +# + +from __future__ import unicode_literals +import re + + +subtitle_exts = [ 'srt', 'idx', 'sub', 'ssa' ] + +video_exts = ['3g2', '3gp', '3gp2', 'asf', 'avi', 'divx', 'flv', 'm4v', 'mk2', + 'mka', 'mkv', 'mov', 'mp4', 'mp4a', 'mpeg', 'mpg', 'ogg', 'ogm', + 'ogv', 'qt', 'ra', 'ram', 'rm', 'ts', 'wav', 'webm', 'wma', 'wmv'] + +group_delimiters = [ '()', '[]', '{}' ] + +# separator character regexp +sep = r'[][)(}{+ /\._-]' # regexp art, hehe :D + +# character used to represent a deleted char (when matching groups) +deleted = '_' + +# format: [ (regexp, confidence, span_adjust) ] +episode_rexps = [ # ... Season 2 ... + (r'season (?P<season>[0-9]+)', 1.0, (0, 0)), + (r'saison (?P<season>[0-9]+)', 1.0, (0, 0)), + + # ... s02e13 ... + (r'[Ss](?P<season>[0-9]{1,2}).?(?P<episodeNumber>(?:[Ee-][0-9]{1,2})+)[^0-9]', 1.0, (0, -1)), + + # ... s03-x02 ... + (r'[Ss](?P<season>[0-9]{1,2}).?(?P<bonusNumber>(?:[Xx][0-9]{1,2})+)[^0-9]', 1.0, (0, -1)), + + # ... 2x13 ... + (r'[^0-9](?P<season>[0-9]{1,2}).?(?P<episodeNumber>(?:[xX][0-9]{1,2})+)[^0-9]', 0.8, (1, -1)), + + # ... s02 ... + #(sep + r's(?P<season>[0-9]{1,2})' + sep, 0.6, (1, -1)), + (r's(?P<season>[0-9]{1,2})[^0-9]', 0.6, (0, -1)), + + # v2 or v3 for some mangas which have multiples rips + (r'(?P<episodeNumber>[0-9]{1,3})v[23]' + sep, 0.6, (0, 0)), + + # ... ep 23 ... + ('ep' + sep + r'(?P<episodeNumber>[0-9]{1,2})[^0-9]', 0.7, (0, -1)), + + # ... e13 ... for a mini-series without a season number + (sep + r'e(?P<episodeNumber>[0-9]{1,2})' + sep, 0.6, (1, -1)) + + ] + + +weak_episode_rexps = [ # ... 213 or 0106 ... + (sep + r'(?P<episodeNumber>[0-9]{2,4})' + sep, (1, -1)) + ] + +non_episode_title = [ 'extras', 'rip' ] + + +video_rexps = [ # cd number + (r'cd ?(?P<cdNumber>[0-9])( ?of ?(?P<cdNumberTotal>[0-9]))?', 1.0, (0, 0)), + (r'(?P<cdNumberTotal>[1-9]) cds?', 0.9, (0, 0)), + + # special editions + (r'edition' + sep + r'(?P<edition>collector)', 1.0, (0, 0)), + (r'(?P<edition>collector)' + sep + 'edition', 1.0, (0, 0)), + (r'(?P<edition>special)' + sep + 'edition', 1.0, (0, 0)), + (r'(?P<edition>criterion)' + sep + 'edition', 1.0, (0, 0)), + + # director's cut + (r"(?P<edition>director'?s?" + sep + "cut)", 1.0, (0, 0)), + + # video size + (r'(?P<width>[0-9]{3,4})x(?P<height>[0-9]{3,4})', 0.9, (0, 0)), + + # website + (r'(?P<website>www(\.[a-zA-Z0-9]+){2,3})', 0.8, (0, 0)), + + # bonusNumber: ... x01 ... + (r'x(?P<bonusNumber>[0-9]{1,2})', 1.0, (0, 0)), + + # filmNumber: ... f01 ... + (r'f(?P<filmNumber>[0-9]{1,2})', 1.0, (0, 0)) + ] + +websites = [ 'tvu.org.ru', 'emule-island.com', 'UsaBit.com', 'www.divx-overnet.com', + 'sharethefiles.com' ] + +unlikely_series = [ 'series' ] + + +# prop_multi is a dict of { property_name: { canonical_form: [ pattern ] } } +# pattern is a string considered as a regexp, with the addition that dashes are +# replaced with '([ \.-_])?' which matches more types of separators (or none) +# note: simpler patterns need to be at the end of the list to not shadow more +# complete ones, eg: 'AAC' needs to come after 'He-AAC' +# ie: from most specific to less specific +prop_multi = { 'format': { 'DVD': [ 'DVD', 'DVD-Rip', 'VIDEO-TS', 'DVDivX' ], + 'HD-DVD': [ 'HD-(?:DVD)?-Rip', 'HD-DVD' ], + 'BluRay': [ 'Blu-ray', 'B[DR]Rip' ], + 'HDTV': [ 'HD-TV' ], + 'DVB': [ 'DVB-Rip', 'DVB', 'PD-TV' ], + 'WEBRip': [ 'WEB-Rip' ], + 'Screener': [ 'DVD-SCR', 'Screener' ], + 'VHS': [ 'VHS' ], + 'WEB-DL': [ 'WEB-DL' ] }, + + 'screenSize': { '480p': [ '480p?' ], + '720p': [ '720p?' ], + '1080p': [ '1080p?' ] }, + + 'videoCodec': { 'XviD': [ 'Xvid' ], + 'DivX': [ 'DVDivX', 'DivX' ], + 'h264': [ '[hx]-264' ], + 'Rv10': [ 'Rv10' ] }, + + 'audioCodec': { 'AC3': [ 'AC3' ], + 'DTS': [ 'DTS' ], + 'AAC': [ 'He-AAC', 'AAC-He', 'AAC' ] }, + + 'audioChannels': { '5.1': [ r'5\.1', 'DD5\.1', '5ch' ] }, + + 'episodeFormat': { 'Minisode': [ 'Minisodes?' ] } + + } + +# prop_single dict of { property_name: [ canonical_form ] } +prop_single = { 'releaseGroup': [ 'ESiR', 'WAF', 'SEPTiC', r'\[XCT\]', 'iNT', 'PUKKA', + 'CHD', 'ViTE', 'TLF', 'DEiTY', 'FLAiTE', + 'MDX', 'GM4F', 'DVL', 'SVD', 'iLUMiNADOS', 'FiNaLe', + 'UnSeeN', 'aXXo', 'KLAXXON', 'NoTV', 'ZeaL', 'LOL', + 'SiNNERS', 'DiRTY', 'REWARD', 'ECI', 'KiNGS', 'CLUE', + 'CtrlHD', 'POD', 'WiKi', 'DIMENSION', 'IMMERSE', 'FQM', + '2HD', 'REPTiLE', 'CTU', 'HALCYON', 'EbP', 'SiTV', + 'SAiNTS', 'HDBRiSe', 'AlFleNi-TeaM', 'EVOLVE', '0TV', + 'TLA', 'NTB', 'ASAP', 'MOMENTUM' ], + + 'other': [ 'PROPER', 'REPACK', 'LIMITED', 'DualAudio', 'Audiofixed', 'R5', + 'complete', 'classic', # not so sure about these ones, could appear in a title + 'ws' ] # widescreen + } + +_dash = '-' +_psep = '[-\. _]?' + +def _to_rexp(prop): + return re.compile(prop.replace(_dash, _psep), re.IGNORECASE) + +# properties_rexps dict of { property_name: { canonical_form: [ rexp ] } } +# containing the rexps compiled from both prop_multi and prop_single +properties_rexps = dict((type, dict((canonical_form, + [ _to_rexp(pattern) for pattern in patterns ]) + for canonical_form, patterns in props.items())) + for type, props in prop_multi.items()) + +properties_rexps.update(dict((type, dict((canonical_form, [ _to_rexp(canonical_form) ]) + for canonical_form in props)) + for type, props in prop_single.items())) + + + +def find_properties(string): + result = [] + for property_name, props in properties_rexps.items(): + for canonical_form, rexps in props.items(): + for value_rexp in rexps: + match = value_rexp.search(string) + if match: + start, end = match.span() + # make sure our word is always surrounded by separators + # note: sep is a regexp, but in this case using it as + # a char sequence achieves the same goal + if ((start > 0 and string[start-1] not in sep) or + (end < len(string) and string[end] not in sep)): + continue + + result.append((property_name, canonical_form, start, end)) + return result + + +property_synonyms = { 'Special Edition': [ 'Special' ], + 'Collector Edition': [ 'Collector' ], + 'Criterion Edition': [ 'Criterion' ] + } + + +def revert_synonyms(): + reverse = {} + + for canonical, synonyms in property_synonyms.items(): + for synonym in synonyms: + reverse[synonym.lower()] = canonical + + return reverse + + +reverse_synonyms = revert_synonyms() + + +def canonical_form(string): + return reverse_synonyms.get(string.lower(), string) + + +def compute_canonical_form(property_name, value): + """Return the canonical form of a property given its type if it is a valid + one, None otherwise.""" + for canonical_form, rexps in properties_rexps[property_name].items(): + for rexp in rexps: + if rexp.match(value): + return canonical_form + return None diff --git a/lib/guessit/slogging.py b/lib/guessit/slogging.py new file mode 100644 index 0000000000000000000000000000000000000000..1d51b0f45d7e1a7910e53c68096c5680276a03ce --- /dev/null +++ b/lib/guessit/slogging.py @@ -0,0 +1,66 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# +# Smewt - A smart collection manager +# Copyright (c) 2011 Nicolas Wack <wackou@gmail.com> +# +# Smewt is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3 of the License, or +# (at your option) any later version. +# +# Smewt is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. +# + +from __future__ import unicode_literals +import logging +import sys + +GREEN_FONT = "\x1B[0;32m" +YELLOW_FONT = "\x1B[0;33m" +BLUE_FONT = "\x1B[0;34m" +RED_FONT = "\x1B[0;31m" +RESET_FONT = "\x1B[0m" + + +def setupLogging(colored=True): + """Set up a nice colored logger as the main application logger.""" + + class SimpleFormatter(logging.Formatter): + def __init__(self): + self.fmt = '%(levelname)-8s %(module)s:%(funcName)s -- %(message)s' + logging.Formatter.__init__(self, self.fmt) + + class ColoredFormatter(logging.Formatter): + def __init__(self): + self.fmt = ('%(levelname)-8s ' + + BLUE_FONT + '%(mname)-8s %(mmodule)s:%(funcName)s' + + RESET_FONT + ' -- %(message)s') + logging.Formatter.__init__(self, self.fmt) + + def format(self, record): + modpath = record.name.split('.') + record.mname = modpath[0] + record.mmodule = '.'.join(modpath[1:]) + result = logging.Formatter.format(self, record) + if record.levelno == logging.DEBUG: + return BLUE_FONT + result + elif record.levelno == logging.INFO: + return GREEN_FONT + result + elif record.levelno == logging.WARNING: + return YELLOW_FONT + result + else: + return RED_FONT + result + + ch = logging.StreamHandler() + if colored and sys.platform != 'win32': + ch.setFormatter(ColoredFormatter()) + else: + ch.setFormatter(SimpleFormatter()) + logging.getLogger().addHandler(ch) diff --git a/lib/guessit/textutils.py b/lib/guessit/textutils.py new file mode 100644 index 0000000000000000000000000000000000000000..e729067bc9a7000e2ebed1ff4329f0c275ade656 --- /dev/null +++ b/lib/guessit/textutils.py @@ -0,0 +1,210 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# +# Smewt - A smart collection manager +# Copyright (c) 2008-2012 Nicolas Wack <wackou@gmail.com> +# +# Smewt is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3 of the License, or +# (at your option) any later version. +# +# Smewt is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. +# + +from __future__ import unicode_literals +from guessit import s +from guessit.patterns import sep +import functools +import unicodedata + +# string-related functions + + +def strip_brackets(s): + if not s: + return s + + if ((s[0] == '[' and s[-1] == ']') or + (s[0] == '(' and s[-1] == ')') or + (s[0] == '{' and s[-1] == '}')): + return s[1:-1] + + return s + + +def clean_string(s): + for c in sep[:-2]: # do not remove dashes ('-') + s = s.replace(c, ' ') + parts = s.split() + result = ' '.join(p for p in parts if p != '') + + # now also remove dashes on the outer part of the string + while result and result[0] in sep: + result = result[1:] + while result and result[-1] in sep: + result = result[:-1] + + return result + +def reorder_title(title): + ltitle = title.lower() + if ltitle[-4:] == ',the': + return title[-3:] + ' ' + title[:-4] + if ltitle[-5:] == ', the': + return title[-3:] + ' ' + title[:-5] + return title + + +def str_replace(string, pos, c): + return string[:pos] + c + string[pos+1:] + + +def str_fill(string, region, c): + start, end = region + return string[:start] + c * (end - start) + string[end:] + + + +def levenshtein(a, b): + if not a: + return len(b) + if not b: + return len(a) + + m = len(a) + n = len(b) + d = [] + for i in range(m+1): + d.append([0] * (n+1)) + + for i in range(m+1): + d[i][0] = i + + for j in range(n+1): + d[0][j] = j + + for i in range(1, m+1): + for j in range(1, n+1): + if a[i-1] == b[j-1]: + cost = 0 + else: + cost = 1 + + d[i][j] = min(d[i-1][j] + 1, # deletion + d[i][j-1] + 1, # insertion + d[i-1][j-1] + cost # substitution + ) + + return d[m][n] + + +# group-related functions + +def find_first_level_groups_span(string, enclosing): + """Return a list of pairs (start, end) for the groups delimited by the given + enclosing characters. + This does not return nested groups, ie: '(ab(c)(d))' will return a single group + containing the whole string. + + >>> find_first_level_groups_span('abcd', '()') + [] + + >>> find_first_level_groups_span('abc(de)fgh', '()') + [(3, 7)] + + >>> find_first_level_groups_span('(ab(c)(d))', '()') + [(0, 10)] + + >>> find_first_level_groups_span('ab[c]de[f]gh(i)', '[]') + [(2, 5), (7, 10)] + """ + opening, closing = enclosing + depth = [] # depth is a stack of indices where we opened a group + result = [] + for i, c, in enumerate(string): + if c == opening: + depth.append(i) + elif c == closing: + try: + start = depth.pop() + end = i + if not depth: + # we emptied our stack, so we have a 1st level group + result.append((start, end+1)) + except IndexError: + # we closed a group which was not opened before + pass + + return result + + +def split_on_groups(string, groups): + """Split the given string using the different known groups for boundaries. + >>> s(split_on_groups('0123456789', [ (2, 4) ])) + ['01', '23', '456789'] + + >>> s(split_on_groups('0123456789', [ (2, 4), (4, 6) ])) + ['01', '23', '45', '6789'] + + >>> s(split_on_groups('0123456789', [ (5, 7), (2, 4) ])) + ['01', '23', '4', '56', '789'] + + """ + if not groups: + return [ string ] + + boundaries = sorted(set(functools.reduce(lambda l, x: l + list(x), groups, []))) + if boundaries[0] != 0: + boundaries.insert(0, 0) + if boundaries[-1] != len(string): + boundaries.append(len(string)) + + groups = [ string[start:end] for start, end in zip(boundaries[:-1], + boundaries[1:]) ] + + return [ g for g in groups if g ] # return only non-empty groups + + +def find_first_level_groups(string, enclosing, blank_sep=None): + """Return a list of groups that could be split because of explicit grouping. + The groups are delimited by the given enclosing characters. + + You can also specify if you want to blank the separator chars in the returned + list of groups by specifying a character for it. None means it won't be replaced. + + This does not return nested groups, ie: '(ab(c)(d))' will return a single group + containing the whole string. + + >>> s(find_first_level_groups('', '()')) + [''] + + >>> s(find_first_level_groups('abcd', '()')) + ['abcd'] + + >>> s(find_first_level_groups('abc(de)fgh', '()')) + ['abc', '(de)', 'fgh'] + + >>> s(find_first_level_groups('(ab(c)(d))', '()', blank_sep = '_')) + ['_ab(c)(d)_'] + + >>> s(find_first_level_groups('ab[c]de[f]gh(i)', '[]')) + ['ab', '[c]', 'de', '[f]', 'gh(i)'] + + >>> s(find_first_level_groups('()[]()', '()', blank_sep = '-')) + ['--', '[]', '--'] + + """ + groups = find_first_level_groups_span(string, enclosing) + if blank_sep: + for start, end in groups: + string = str_replace(string, start, blank_sep) + string = str_replace(string, end-1, blank_sep) + + return split_on_groups(string, groups) diff --git a/lib/guessit/transfo/__init__.py b/lib/guessit/transfo/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..820690a78265a1e5ab9e059b35e312cc731a6ef9 --- /dev/null +++ b/lib/guessit/transfo/__init__.py @@ -0,0 +1,101 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# +# GuessIt - A library for guessing information from filenames +# Copyright (c) 2012 Nicolas Wack <wackou@gmail.com> +# +# GuessIt is free software; you can redistribute it and/or modify it under +# the terms of the Lesser GNU General Public License as published by +# the Free Software Foundation; either version 3 of the License, or +# (at your option) any later version. +# +# GuessIt is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# Lesser GNU General Public License for more details. +# +# You should have received a copy of the Lesser GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. +# + +from __future__ import unicode_literals +from guessit import base_text_type, Guess +from guessit.patterns import canonical_form +from guessit.textutils import clean_string +import logging + +log = logging.getLogger(__name__) + + +def found_property(node, name, confidence): + node.guess = Guess({name: node.clean_value}, confidence=confidence) + log.debug('Found with confidence %.2f: %s' % (confidence, node.guess)) + + +def format_guess(guess): + """Format all the found values to their natural type. + For instance, a year would be stored as an int value, etc... + + Note that this modifies the dictionary given as input. + """ + for prop, value in guess.items(): + if prop in ('season', 'episodeNumber', 'year', 'cdNumber', + 'cdNumberTotal', 'bonusNumber', 'filmNumber'): + guess[prop] = int(guess[prop]) + elif isinstance(value, base_text_type): + if prop in ('edition',): + value = clean_string(value) + guess[prop] = canonical_form(value).replace('\\', '') + + return guess + + +def find_and_split_node(node, strategy, logger): + string = ' %s ' % node.value # add sentinels + for matcher, confidence in strategy: + if getattr(matcher, 'use_node', False): + result, span = matcher(string, node) + else: + result, span = matcher(string) + + if result: + # readjust span to compensate for sentinels + span = (span[0] - 1, span[1] - 1) + + if isinstance(result, Guess): + if confidence is None: + confidence = result.confidence(list(result.keys())[0]) + else: + if confidence is None: + confidence = 1.0 + + guess = format_guess(Guess(result, confidence=confidence)) + msg = 'Found with confidence %.2f: %s' % (confidence, guess) + (logger or log).debug(msg) + + node.partition(span) + absolute_span = (span[0] + node.offset, span[1] + node.offset) + for child in node.children: + if child.span == absolute_span: + child.guess = guess + else: + find_and_split_node(child, strategy, logger) + return + + +class SingleNodeGuesser(object): + def __init__(self, guess_func, confidence, logger=None): + self.guess_func = guess_func + self.confidence = confidence + self.logger = logger + + def process(self, mtree): + # strategy is a list of pairs (guesser, confidence) + # - if the guesser returns a guessit.Guess and confidence is specified, + # it will override it, otherwise it will leave the guess confidence + # - if the guesser returns a simple dict as a guess and confidence is + # specified, it will use it, or 1.0 otherwise + strategy = [ (self.guess_func, self.confidence) ] + + for node in mtree.unidentified_leaves(): + find_and_split_node(node, strategy, self.logger) diff --git a/lib/guessit/transfo/guess_bonus_features.py b/lib/guessit/transfo/guess_bonus_features.py new file mode 100644 index 0000000000000000000000000000000000000000..8c7ac0136b4e769204a86c22351bc5b2c85bb9ee --- /dev/null +++ b/lib/guessit/transfo/guess_bonus_features.py @@ -0,0 +1,61 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# +# GuessIt - A library for guessing information from filenames +# Copyright (c) 2012 Nicolas Wack <wackou@gmail.com> +# +# GuessIt is free software; you can redistribute it and/or modify it under +# the terms of the Lesser GNU General Public License as published by +# the Free Software Foundation; either version 3 of the License, or +# (at your option) any later version. +# +# GuessIt is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# Lesser GNU General Public License for more details. +# +# You should have received a copy of the Lesser GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. +# + +from __future__ import unicode_literals +from guessit.transfo import found_property +import logging + +log = logging.getLogger(__name__) + + +def process(mtree): + def previous_group(g): + for leaf in mtree.unidentified_leaves()[::-1]: + if leaf.node_idx < g.node_idx: + return leaf + + def next_group(g): + for leaf in mtree.unidentified_leaves(): + if leaf.node_idx > g.node_idx: + return leaf + + def same_group(g1, g2): + return g1.node_idx[:2] == g2.node_idx[:2] + + bonus = [ node for node in mtree.leaves() if 'bonusNumber' in node.guess ] + if bonus: + bonusTitle = next_group(bonus[0]) + if same_group(bonusTitle, bonus[0]): + found_property(bonusTitle, 'bonusTitle', 0.8) + + filmNumber = [ node for node in mtree.leaves() + if 'filmNumber' in node.guess ] + if filmNumber: + filmSeries = previous_group(filmNumber[0]) + found_property(filmSeries, 'filmSeries', 0.9) + + title = next_group(filmNumber[0]) + found_property(title, 'title', 0.9) + + season = [ node for node in mtree.leaves() if 'season' in node.guess ] + if season and 'bonusNumber' in mtree.info: + series = previous_group(season[0]) + if same_group(series, season[0]): + found_property(series, 'series', 0.9) diff --git a/lib/guessit/transfo/guess_country.py b/lib/guessit/transfo/guess_country.py new file mode 100644 index 0000000000000000000000000000000000000000..e77ff59118f842f81f1c560a200a50a1df12fb60 --- /dev/null +++ b/lib/guessit/transfo/guess_country.py @@ -0,0 +1,48 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# +# GuessIt - A library for guessing information from filenames +# Copyright (c) 2012 Nicolas Wack <wackou@gmail.com> +# +# GuessIt is free software; you can redistribute it and/or modify it under +# the terms of the Lesser GNU General Public License as published by +# the Free Software Foundation; either version 3 of the License, or +# (at your option) any later version. +# +# GuessIt is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# Lesser GNU General Public License for more details. +# +# You should have received a copy of the Lesser GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. +# + +from __future__ import unicode_literals +from guessit.country import Country +from guessit import Guess +import logging + +log = logging.getLogger(__name__) + +# list of common words which could be interpreted as countries, but which +# are far too common to be able to say they represent a country +country_common_words = frozenset([ 'bt' ]) + +def process(mtree): + for node in mtree.unidentified_leaves(): + if len(node.node_idx) == 2: + c = node.value[1:-1].lower() + if c in country_common_words: + continue + + # only keep explicit groups (enclosed in parentheses/brackets) + if node.value[0] + node.value[-1] not in ['()', '[]', '{}']: + continue + + try: + country = Country(c, strict=True) + except ValueError: + continue + + node.guess = Guess(country=country, confidence=1.0) diff --git a/lib/guessit/transfo/guess_date.py b/lib/guessit/transfo/guess_date.py new file mode 100644 index 0000000000000000000000000000000000000000..34a859896405412a9aea145b59d07793971c07a5 --- /dev/null +++ b/lib/guessit/transfo/guess_date.py @@ -0,0 +1,38 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# +# GuessIt - A library for guessing information from filenames +# Copyright (c) 2012 Nicolas Wack <wackou@gmail.com> +# +# GuessIt is free software; you can redistribute it and/or modify it under +# the terms of the Lesser GNU General Public License as published by +# the Free Software Foundation; either version 3 of the License, or +# (at your option) any later version. +# +# GuessIt is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# Lesser GNU General Public License for more details. +# +# You should have received a copy of the Lesser GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. +# + +from __future__ import unicode_literals +from guessit.transfo import SingleNodeGuesser +from guessit.date import search_date +import logging + +log = logging.getLogger(__name__) + + +def guess_date(string): + date, span = search_date(string) + if date: + return { 'date': date }, span + else: + return None, None + + +def process(mtree): + SingleNodeGuesser(guess_date, 1.0, log).process(mtree) diff --git a/lib/guessit/transfo/guess_episode_info_from_position.py b/lib/guessit/transfo/guess_episode_info_from_position.py new file mode 100644 index 0000000000000000000000000000000000000000..967c33416ce75cf321a24a688341b54fb2b62d38 --- /dev/null +++ b/lib/guessit/transfo/guess_episode_info_from_position.py @@ -0,0 +1,146 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# +# GuessIt - A library for guessing information from filenames +# Copyright (c) 2012 Nicolas Wack <wackou@gmail.com> +# +# GuessIt is free software; you can redistribute it and/or modify it under +# the terms of the Lesser GNU General Public License as published by +# the Free Software Foundation; either version 3 of the License, or +# (at your option) any later version. +# +# GuessIt is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# Lesser GNU General Public License for more details. +# +# You should have received a copy of the Lesser GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. +# + +from __future__ import unicode_literals +from guessit.transfo import found_property +from guessit.patterns import non_episode_title, unlikely_series +import logging + +log = logging.getLogger(__name__) + + +def match_from_epnum_position(mtree, node): + epnum_idx = node.node_idx + + # a few helper functions to be able to filter using high-level semantics + def before_epnum_in_same_pathgroup(): + return [ leaf for leaf in mtree.unidentified_leaves() + if (leaf.node_idx[0] == epnum_idx[0] and + leaf.node_idx[1:] < epnum_idx[1:]) ] + + def after_epnum_in_same_pathgroup(): + return [ leaf for leaf in mtree.unidentified_leaves() + if (leaf.node_idx[0] == epnum_idx[0] and + leaf.node_idx[1:] > epnum_idx[1:]) ] + + def after_epnum_in_same_explicitgroup(): + return [ leaf for leaf in mtree.unidentified_leaves() + if (leaf.node_idx[:2] == epnum_idx[:2] and + leaf.node_idx[2:] > epnum_idx[2:]) ] + + # epnumber is the first group and there are only 2 after it in same + # path group + # -> series title - episode title + title_candidates = [ n for n in after_epnum_in_same_pathgroup() + if n.clean_value.lower() not in non_episode_title ] + if ('title' not in mtree.info and # no title + before_epnum_in_same_pathgroup() == [] and # no groups before + len(title_candidates) == 2): # only 2 groups after + + found_property(title_candidates[0], 'series', confidence=0.4) + found_property(title_candidates[1], 'title', confidence=0.4) + return + + # if we have at least 1 valid group before the episodeNumber, then it's + # probably the series name + series_candidates = before_epnum_in_same_pathgroup() + if len(series_candidates) >= 1: + found_property(series_candidates[0], 'series', confidence=0.7) + + # only 1 group after (in the same path group) and it's probably the + # episode title + title_candidates = [ n for n in after_epnum_in_same_pathgroup() + if n.clean_value.lower() not in non_episode_title ] + + if len(title_candidates) == 1: + found_property(title_candidates[0], 'title', confidence=0.5) + return + else: + # try in the same explicit group, with lower confidence + title_candidates = [ n for n in after_epnum_in_same_explicitgroup() + if n.clean_value.lower() not in non_episode_title + ] + if len(title_candidates) == 1: + found_property(title_candidates[0], 'title', confidence=0.4) + return + elif len(title_candidates) > 1: + found_property(title_candidates[0], 'title', confidence=0.3) + return + + # get the one with the longest value + title_candidates = [ n for n in after_epnum_in_same_pathgroup() + if n.clean_value.lower() not in non_episode_title ] + if title_candidates: + maxidx = -1 + maxv = -1 + for i, c in enumerate(title_candidates): + if len(c.clean_value) > maxv: + maxidx = i + maxv = len(c.clean_value) + found_property(title_candidates[maxidx], 'title', confidence=0.3) + + +def process(mtree): + eps = [node for node in mtree.leaves() if 'episodeNumber' in node.guess] + if eps: + match_from_epnum_position(mtree, eps[0]) + + else: + # if we don't have the episode number, but at least 2 groups in the + # basename, then it's probably series - eptitle + basename = mtree.node_at((-2,)) + title_candidates = [ n for n in basename.unidentified_leaves() + if n.clean_value.lower() not in non_episode_title + ] + + if len(title_candidates) >= 2: + found_property(title_candidates[0], 'series', 0.4) + found_property(title_candidates[1], 'title', 0.4) + elif len(title_candidates) == 1: + # but if there's only one candidate, it's probably the series name + found_property(title_candidates[0], 'series', 0.4) + + # if we only have 1 remaining valid group in the folder containing the + # file, then it's likely that it is the series name + try: + series_candidates = mtree.node_at((-3,)).unidentified_leaves() + except ValueError: + series_candidates = [] + + if len(series_candidates) == 1: + found_property(series_candidates[0], 'series', 0.3) + + # if there's a path group that only contains the season info, then the + # previous one is most likely the series title (ie: ../series/season X/..) + eps = [ node for node in mtree.nodes() + if 'season' in node.guess and 'episodeNumber' not in node.guess ] + + if eps: + previous = [ node for node in mtree.unidentified_leaves() + if node.node_idx[0] == eps[0].node_idx[0] - 1 ] + if len(previous) == 1: + found_property(previous[0], 'series', 0.5) + + # reduce the confidence of unlikely series + for node in mtree.nodes(): + if 'series' in node.guess: + if node.guess['series'].lower() in unlikely_series: + new_confidence = node.guess.confidence('series') * 0.5 + node.guess.set_confidence('series', new_confidence) diff --git a/lib/guessit/transfo/guess_episodes_rexps.py b/lib/guessit/transfo/guess_episodes_rexps.py new file mode 100644 index 0000000000000000000000000000000000000000..4ebfb547e736d22b973e299ebf00e410770bc53c --- /dev/null +++ b/lib/guessit/transfo/guess_episodes_rexps.py @@ -0,0 +1,65 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# +# GuessIt - A library for guessing information from filenames +# Copyright (c) 2012 Nicolas Wack <wackou@gmail.com> +# +# GuessIt is free software; you can redistribute it and/or modify it under +# the terms of the Lesser GNU General Public License as published by +# the Free Software Foundation; either version 3 of the License, or +# (at your option) any later version. +# +# GuessIt is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# Lesser GNU General Public License for more details. +# +# You should have received a copy of the Lesser GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. +# + +from __future__ import unicode_literals +from guessit import Guess +from guessit.transfo import SingleNodeGuesser +from guessit.patterns import episode_rexps +import re +import logging + +log = logging.getLogger(__name__) + +def number_list(s): + return list(re.sub('[^0-9]+', ' ', s).split()) + +def guess_episodes_rexps(string): + for rexp, confidence, span_adjust in episode_rexps: + match = re.search(rexp, string, re.IGNORECASE) + if match: + guess = Guess(match.groupdict(), confidence=confidence) + span = (match.start() + span_adjust[0], + match.end() + span_adjust[1]) + + # episodes which have a season > 25 are most likely errors + # (Simpsons is at 24!) + if int(guess.get('season', 0)) > 25: + continue + + # decide whether we have only a single episode number or an + # episode list + if guess.get('episodeNumber'): + eplist = number_list(guess['episodeNumber']) + guess.set('episodeNumber', int(eplist[0]), confidence=confidence) + + if len(eplist) > 1: + guess.set('episodeList', list(map(int, eplist)), confidence=confidence) + + if guess.get('bonusNumber'): + eplist = number_list(guess['bonusNumber']) + guess.set('bonusNumber', int(eplist[0]), confidence=confidence) + + return guess, span + + return None, None + + +def process(mtree): + SingleNodeGuesser(guess_episodes_rexps, None, log).process(mtree) diff --git a/lib/guessit/transfo/guess_filetype.py b/lib/guessit/transfo/guess_filetype.py new file mode 100644 index 0000000000000000000000000000000000000000..4d98d016e0e0280602dc6be2a34855b7b006d087 --- /dev/null +++ b/lib/guessit/transfo/guess_filetype.py @@ -0,0 +1,176 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# +# GuessIt - A library for guessing information from filenames +# Copyright (c) 2012 Nicolas Wack <wackou@gmail.com> +# +# GuessIt is free software; you can redistribute it and/or modify it under +# the terms of the Lesser GNU General Public License as published by +# the Free Software Foundation; either version 3 of the License, or +# (at your option) any later version. +# +# GuessIt is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# Lesser GNU General Public License for more details. +# +# You should have received a copy of the Lesser GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. +# + +from __future__ import unicode_literals +from guessit import Guess +from guessit.patterns import (subtitle_exts, video_exts, episode_rexps, + find_properties, compute_canonical_form) +from guessit.date import valid_year +from guessit.textutils import clean_string +import os.path +import re +import mimetypes +import logging + +log = logging.getLogger(__name__) + +# List of well known movies and series, hardcoded because they cannot be +# guessed appropriately otherwise +MOVIES = [ 'OSS 117' ] +SERIES = [ 'Band of Brothers' ] + +MOVIES = [ m.lower() for m in MOVIES ] +SERIES = [ s.lower() for s in SERIES ] + +def guess_filetype(mtree, filetype): + # put the filetype inside a dummy container to be able to have the + # following functions work correctly as closures + # this is a workaround for python 2 which doesn't have the + # 'nonlocal' keyword (python 3 does have it) + filetype_container = [filetype] + other = {} + filename = mtree.string + + def upgrade_episode(): + if filetype_container[0] == 'video': + filetype_container[0] = 'episode' + elif filetype_container[0] == 'subtitle': + filetype_container[0] = 'episodesubtitle' + + def upgrade_movie(): + if filetype_container[0] == 'video': + filetype_container[0] = 'movie' + elif filetype_container[0] == 'subtitle': + filetype_container[0] = 'moviesubtitle' + + def upgrade_subtitle(): + if 'movie' in filetype_container[0]: + filetype_container[0] = 'moviesubtitle' + elif 'episode' in filetype_container[0]: + filetype_container[0] = 'episodesubtitle' + else: + filetype_container[0] = 'subtitle' + + def upgrade(type='unknown'): + if filetype_container[0] == 'autodetect': + filetype_container[0] = type + + + # look at the extension first + fileext = os.path.splitext(filename)[1][1:].lower() + if fileext in subtitle_exts: + upgrade_subtitle() + other = { 'container': fileext } + elif fileext in video_exts: + upgrade(type='video') + other = { 'container': fileext } + else: + upgrade(type='unknown') + other = { 'extension': fileext } + + + + # check whether we are in a 'Movies', 'Tv Shows', ... folder + folder_rexps = [ (r'Movies?', upgrade_movie), + (r'Tv[ _-]?Shows?', upgrade_episode), + (r'Series', upgrade_episode) + ] + for frexp, upgrade_func in folder_rexps: + frexp = re.compile(frexp, re.IGNORECASE) + for pathgroup in mtree.children: + if frexp.match(pathgroup.value): + upgrade_func() + + # check for a few specific cases which will unintentionally make the + # following heuristics confused (eg: OSS 117 will look like an episode, + # season 1, epnum 17, when it is in fact a movie) + fname = clean_string(filename).lower() + for m in MOVIES: + if m in fname: + upgrade_movie() + for s in SERIES: + if s in fname: + upgrade_episode() + + # now look whether there are some specific hints for episode vs movie + if filetype_container[0] in ('video', 'subtitle'): + # if we have an episode_rexp (eg: s02e13), it is an episode + for rexp, _, _ in episode_rexps: + match = re.search(rexp, filename, re.IGNORECASE) + if match: + upgrade_episode() + break + + # if we have a 3-4 digit number that's not a year, maybe an episode + match = re.search(r'[^0-9]([0-9]{3,4})[^0-9]', filename) + if match: + fullnumber = int(match.group()[1:-1]) + #season = fullnumber // 100 + epnumber = fullnumber % 100 + possible = True + + # check for validity + if epnumber > 40: + possible = False + if valid_year(fullnumber): + possible = False + + if possible: + upgrade_episode() + + # if we have certain properties characteristic of episodes, it is an ep + for prop, value, _, _ in find_properties(filename): + log.debug('prop: %s = %s' % (prop, value)) + if prop == 'episodeFormat': + upgrade_episode() + break + + elif compute_canonical_form('format', value) == 'DVB': + upgrade_episode() + break + + # origin-specific type + if 'tvu.org.ru' in filename: + upgrade_episode() + + # if no episode info found, assume it's a movie + upgrade_movie() + + filetype = filetype_container[0] + return filetype, other + + +def process(mtree, filetype='autodetect'): + filetype, other = guess_filetype(mtree, filetype) + + mtree.guess.set('type', filetype, confidence=1.0) + log.debug('Found with confidence %.2f: %s' % (1.0, mtree.guess)) + + filetype_info = Guess(other, confidence=1.0) + # guess the mimetype of the filename + # TODO: handle other mimetypes not found on the default type_maps + # mimetypes.types_map['.srt']='text/subtitle' + mime, _ = mimetypes.guess_type(mtree.string, strict=False) + if mime is not None: + filetype_info.update({'mimetype': mime}, confidence=1.0) + + node_ext = mtree.node_at((-1,)) + node_ext.guess = filetype_info + log.debug('Found with confidence %.2f: %s' % (1.0, node_ext.guess)) diff --git a/lib/guessit/transfo/guess_language.py b/lib/guessit/transfo/guess_language.py new file mode 100644 index 0000000000000000000000000000000000000000..fe547e614da5a043e7beab327244bd30bf1a5258 --- /dev/null +++ b/lib/guessit/transfo/guess_language.py @@ -0,0 +1,48 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# +# GuessIt - A library for guessing information from filenames +# Copyright (c) 2012 Nicolas Wack <wackou@gmail.com> +# +# GuessIt is free software; you can redistribute it and/or modify it under +# the terms of the Lesser GNU General Public License as published by +# the Free Software Foundation; either version 3 of the License, or +# (at your option) any later version. +# +# GuessIt is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# Lesser GNU General Public License for more details. +# +# You should have received a copy of the Lesser GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. +# + +from __future__ import unicode_literals +from guessit import Guess +from guessit.transfo import SingleNodeGuesser +from guessit.language import search_language +from guessit.textutils import clean_string +import logging + +log = logging.getLogger(__name__) + + +def guess_language(string): + language, span, confidence = search_language(string) + if language: + # is it a subtitle language? + if 'sub' in clean_string(string[:span[0]]).lower().split(' '): + return (Guess({'subtitleLanguage': language}, + confidence=confidence), + span) + else: + return (Guess({'language': language}, + confidence=confidence), + span) + + return None, None + + +def process(mtree): + SingleNodeGuesser(guess_language, None, log).process(mtree) diff --git a/lib/guessit/transfo/guess_movie_title_from_position.py b/lib/guessit/transfo/guess_movie_title_from_position.py new file mode 100644 index 0000000000000000000000000000000000000000..8b6f5d0a5cc201b81ba4507253ece9506a2011b4 --- /dev/null +++ b/lib/guessit/transfo/guess_movie_title_from_position.py @@ -0,0 +1,172 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# +# GuessIt - A library for guessing information from filenames +# Copyright (c) 2012 Nicolas Wack <wackou@gmail.com> +# +# GuessIt is free software; you can redistribute it and/or modify it under +# the terms of the Lesser GNU General Public License as published by +# the Free Software Foundation; either version 3 of the License, or +# (at your option) any later version. +# +# GuessIt is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# Lesser GNU General Public License for more details. +# +# You should have received a copy of the Lesser GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. +# + +from __future__ import unicode_literals +from guessit import Guess +import logging + +log = logging.getLogger(__name__) + + +def process(mtree): + def found_property(node, name, value, confidence): + node.guess = Guess({ name: value }, + confidence=confidence) + log.debug('Found with confidence %.2f: %s' % (confidence, node.guess)) + + def found_title(node, confidence): + found_property(node, 'title', node.clean_value, confidence) + + basename = mtree.node_at((-2,)) + all_valid = lambda leaf: len(leaf.clean_value) > 0 + basename_leftover = basename.unidentified_leaves(valid=all_valid) + + try: + folder = mtree.node_at((-3,)) + folder_leftover = folder.unidentified_leaves() + except ValueError: + folder = None + folder_leftover = [] + + log.debug('folder: %s' % folder_leftover) + log.debug('basename: %s' % basename_leftover) + + # specific cases: + # if we find the same group both in the folder name and the filename, + # it's a good candidate for title + if (folder_leftover and basename_leftover and + folder_leftover[0].clean_value == basename_leftover[0].clean_value): + + found_title(folder_leftover[0], confidence=0.8) + return + + # specific cases: + # if the basename contains a number first followed by an unidentified + # group, and the folder only contains 1 unidentified one, then we have + # a series + # ex: Millenium Trilogy (2009)/(1)The Girl With The Dragon Tattoo(2009).mkv + try: + series = folder_leftover[0] + filmNumber = basename_leftover[0] + title = basename_leftover[1] + + basename_leaves = basename.leaves() + + num = int(filmNumber.clean_value) + + log.debug('series: %s' % series.clean_value) + log.debug('title: %s' % title.clean_value) + if (series.clean_value != title.clean_value and + series.clean_value != filmNumber.clean_value and + basename_leaves.index(filmNumber) == 0 and + basename_leaves.index(title) == 1): + + found_title(title, confidence=0.6) + found_property(series, 'filmSeries', + series.clean_value, confidence=0.6) + found_property(filmNumber, 'filmNumber', + num, confidence=0.6) + return + except Exception: + pass + + # specific cases: + # - movies/tttttt (yyyy)/tttttt.ccc + try: + if mtree.node_at((-4, 0)).value.lower() == 'movies': + folder = mtree.node_at((-3,)) + + # Note:too generic, might solve all the unittests as they all + # contain 'movies' in their path + # + #if containing_folder.is_leaf() and not containing_folder.guess: + # containing_folder.guess = + # Guess({ 'title': clean_string(containing_folder.value) }, + # confidence=0.7) + + year_group = folder.first_leaf_containing('year') + groups_before = folder.previous_unidentified_leaves(year_group) + + found_title(groups_before[0], confidence=0.8) + return + + except Exception: + pass + + # if we have either format or videoCodec in the folder containing the file + # or one of its parents, then we should probably look for the title in + # there rather than in the basename + try: + props = mtree.previous_leaves_containing(mtree.children[-2], + [ 'videoCodec', 'format', + 'language' ]) + except IndexError: + props = [] + + if props: + group_idx = props[0].node_idx[0] + if all(g.node_idx[0] == group_idx for g in props): + # if they're all in the same group, take leftover info from there + leftover = mtree.node_at((group_idx,)).unidentified_leaves() + + if leftover: + found_title(leftover[0], confidence=0.7) + return + + # look for title in basename if there are some remaining undidentified + # groups there + if basename_leftover: + title_candidate = basename_leftover[0] + + # if basename is only one word and the containing folder has at least + # 3 words in it, we should take the title from the folder name + # ex: Movies/Alice in Wonderland DVDRip.XviD-DiAMOND/dmd-aw.avi + # ex: Movies/Somewhere.2010.DVDRip.XviD-iLG/i-smwhr.avi <-- TODO: gets caught here? + if (title_candidate.clean_value.count(' ') == 0 and + folder_leftover and + folder_leftover[0].clean_value.count(' ') >= 2): + + found_title(folder_leftover[0], confidence=0.7) + return + + # if there are only 2 unidentified groups, the first of which is inside + # brackets or parentheses, we take the second one for the title: + # ex: Movies/[阿维达].Avida.2006.FRENCH.DVDRiP.XViD-PROD.avi + if len(basename_leftover) == 2 and basename_leftover[0].is_explicit(): + found_title(basename_leftover[1], confidence=0.8) + return + + # if all else fails, take the first remaining unidentified group in the + # basename as title + found_title(title_candidate, confidence=0.6) + return + + # if there are no leftover groups in the basename, look in the folder name + if folder_leftover: + found_title(folder_leftover[0], confidence=0.5) + return + + # if nothing worked, look if we have a very small group at the beginning + # of the basename + basename = mtree.node_at((-2,)) + basename_leftover = basename.unidentified_leaves(valid=lambda leaf: True) + if basename_leftover: + found_title(basename_leftover[0], confidence=0.4) + return diff --git a/lib/guessit/transfo/guess_properties.py b/lib/guessit/transfo/guess_properties.py new file mode 100644 index 0000000000000000000000000000000000000000..6c72dfd5e60a83f157c148ac4f0da0eb9c4dcea1 --- /dev/null +++ b/lib/guessit/transfo/guess_properties.py @@ -0,0 +1,38 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# +# GuessIt - A library for guessing information from filenames +# Copyright (c) 2012 Nicolas Wack <wackou@gmail.com> +# +# GuessIt is free software; you can redistribute it and/or modify it under +# the terms of the Lesser GNU General Public License as published by +# the Free Software Foundation; either version 3 of the License, or +# (at your option) any later version. +# +# GuessIt is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# Lesser GNU General Public License for more details. +# +# You should have received a copy of the Lesser GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. +# + +from __future__ import unicode_literals +from guessit.transfo import SingleNodeGuesser +from guessit.patterns import find_properties +import logging + +log = logging.getLogger(__name__) + + +def guess_properties(string): + try: + prop, value, pos, end = find_properties(string)[0] + return { prop: value }, (pos, end) + except IndexError: + return None, None + + +def process(mtree): + SingleNodeGuesser(guess_properties, 1.0, log).process(mtree) diff --git a/lib/guessit/transfo/guess_release_group.py b/lib/guessit/transfo/guess_release_group.py new file mode 100644 index 0000000000000000000000000000000000000000..4c9025499be9cd7190b7d22e30751b0512420a59 --- /dev/null +++ b/lib/guessit/transfo/guess_release_group.py @@ -0,0 +1,72 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# +# GuessIt - A library for guessing information from filenames +# Copyright (c) 2012 Nicolas Wack <wackou@gmail.com> +# +# GuessIt is free software; you can redistribute it and/or modify it under +# the terms of the Lesser GNU General Public License as published by +# the Free Software Foundation; either version 3 of the License, or +# (at your option) any later version. +# +# GuessIt is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# Lesser GNU General Public License for more details. +# +# You should have received a copy of the Lesser GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. +# + +from __future__ import unicode_literals +from guessit.transfo import SingleNodeGuesser +from guessit.patterns import prop_multi, compute_canonical_form, _dash, _psep +import re +import logging + +log = logging.getLogger(__name__) + +def get_patterns(property_name): + return [ p.replace(_dash, _psep) for patterns in prop_multi[property_name].values() for p in patterns ] + +CODECS = get_patterns('videoCodec') +FORMATS = get_patterns('format') + +def adjust_metadata(md): + return dict((property_name, compute_canonical_form(property_name, value) or value) + for property_name, value in md.items()) + + +def guess_release_group(string): + # first try to see whether we have both a known codec and a known release group + group_names = [ r'(?P<videoCodec>' + codec + r')-?(?P<releaseGroup>.*?)[ \.]' + for codec in CODECS ] + group_names += [ r'(?P<format>' + fmt + r')-?(?P<releaseGroup>.*?)[ \.]' + for fmt in FORMATS ] + + for rexp in group_names: + match = re.search(rexp, string, re.IGNORECASE) + if match: + metadata = match.groupdict() + release_group = compute_canonical_form('releaseGroup', metadata['releaseGroup']) + if release_group: + return adjust_metadata(metadata), (match.start(1), match.end(2)) + + # pick anything as releaseGroup as long as we have a codec in front + # this doesn't include a potential dash ('-') ending the release group + # eg: [...].X264-HiS@SiLUHD-English.[...] + group_names = [ r'\.(?P<videoCodec>' + codec + r')-(?P<releaseGroup>.*?)(-(.*?))?[ \.]' + for codec in CODECS ] + group_names += [ r'\.(?P<format>' + fmt + r')-(?P<releaseGroup>.*?)(-(.*?))?[ \.]' + for fmt in FORMATS ] + + for rexp in group_names: + match = re.search(rexp, string, re.IGNORECASE) + if match: + return adjust_metadata(match.groupdict()), (match.start(1), match.end(2)) + + return None, None + + +def process(mtree): + SingleNodeGuesser(guess_release_group, 0.8, log).process(mtree) diff --git a/lib/guessit/transfo/guess_video_rexps.py b/lib/guessit/transfo/guess_video_rexps.py new file mode 100644 index 0000000000000000000000000000000000000000..8ae9e6c659879c3440cd5a7d85cb14c533059944 --- /dev/null +++ b/lib/guessit/transfo/guess_video_rexps.py @@ -0,0 +1,49 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# +# GuessIt - A library for guessing information from filenames +# Copyright (c) 2012 Nicolas Wack <wackou@gmail.com> +# +# GuessIt is free software; you can redistribute it and/or modify it under +# the terms of the Lesser GNU General Public License as published by +# the Free Software Foundation; either version 3 of the License, or +# (at your option) any later version. +# +# GuessIt is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# Lesser GNU General Public License for more details. +# +# You should have received a copy of the Lesser GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. +# + +from __future__ import unicode_literals +from guessit import Guess +from guessit.transfo import SingleNodeGuesser +from guessit.patterns import video_rexps, sep +import re +import logging + +log = logging.getLogger(__name__) + + +def guess_video_rexps(string): + string = '-' + string + '-' + for rexp, confidence, span_adjust in video_rexps: + match = re.search(sep + rexp + sep, string, re.IGNORECASE) + if match: + metadata = match.groupdict() + # is this the better place to put it? (maybe, as it is at least + # the soonest that we can catch it) + if metadata.get('cdNumberTotal', -1) is None: + del metadata['cdNumberTotal'] + return (Guess(metadata, confidence=confidence), + (match.start() + span_adjust[0], + match.end() + span_adjust[1] - 2)) + + return None, None + + +def process(mtree): + SingleNodeGuesser(guess_video_rexps, None, log).process(mtree) diff --git a/lib/guessit/transfo/guess_weak_episodes_rexps.py b/lib/guessit/transfo/guess_weak_episodes_rexps.py new file mode 100644 index 0000000000000000000000000000000000000000..8436ade85553639be452905b5365964e7ef5c8ad --- /dev/null +++ b/lib/guessit/transfo/guess_weak_episodes_rexps.py @@ -0,0 +1,62 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# +# GuessIt - A library for guessing information from filenames +# Copyright (c) 2012 Nicolas Wack <wackou@gmail.com> +# +# GuessIt is free software; you can redistribute it and/or modify it under +# the terms of the Lesser GNU General Public License as published by +# the Free Software Foundation; either version 3 of the License, or +# (at your option) any later version. +# +# GuessIt is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# Lesser GNU General Public License for more details. +# +# You should have received a copy of the Lesser GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. +# + +from __future__ import unicode_literals +from guessit import Guess +from guessit.transfo import SingleNodeGuesser +from guessit.patterns import weak_episode_rexps +import re +import logging + +log = logging.getLogger(__name__) + + +def guess_weak_episodes_rexps(string, node): + if 'episodeNumber' in node.root.info: + return None, None + + for rexp, span_adjust in weak_episode_rexps: + match = re.search(rexp, string, re.IGNORECASE) + if match: + metadata = match.groupdict() + span = (match.start() + span_adjust[0], + match.end() + span_adjust[1]) + + epnum = int(metadata['episodeNumber']) + if epnum > 100: + season, epnum = epnum // 100, epnum % 100 + # episodes which have a season > 25 are most likely errors + # (Simpsons is at 23!) + if season > 25: + continue + return Guess({ 'season': season, + 'episodeNumber': epnum }, + confidence=0.6), span + else: + return Guess(metadata, confidence=0.3), span + + return None, None + + +guess_weak_episodes_rexps.use_node = True + + +def process(mtree): + SingleNodeGuesser(guess_weak_episodes_rexps, 0.6, log).process(mtree) diff --git a/lib/guessit/transfo/guess_website.py b/lib/guessit/transfo/guess_website.py new file mode 100644 index 0000000000000000000000000000000000000000..acfd8e119d81eb59ad75842370d40be24705decb --- /dev/null +++ b/lib/guessit/transfo/guess_website.py @@ -0,0 +1,39 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# +# GuessIt - A library for guessing information from filenames +# Copyright (c) 2012 Nicolas Wack <wackou@gmail.com> +# +# GuessIt is free software; you can redistribute it and/or modify it under +# the terms of the Lesser GNU General Public License as published by +# the Free Software Foundation; either version 3 of the License, or +# (at your option) any later version. +# +# GuessIt is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# Lesser GNU General Public License for more details. +# +# You should have received a copy of the Lesser GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. +# + +from __future__ import unicode_literals +from guessit.transfo import SingleNodeGuesser +from guessit.patterns import websites +import logging + +log = logging.getLogger(__name__) + + +def guess_website(string): + low = string.lower() + for site in websites: + pos = low.find(site.lower()) + if pos != -1: + return {'website': site}, (pos, pos + len(site)) + return None, None + + +def process(mtree): + SingleNodeGuesser(guess_website, 1.0, log).process(mtree) diff --git a/lib/guessit/transfo/guess_year.py b/lib/guessit/transfo/guess_year.py new file mode 100644 index 0000000000000000000000000000000000000000..4bc9b867e06daa845b4c0c76799d90c84355edba --- /dev/null +++ b/lib/guessit/transfo/guess_year.py @@ -0,0 +1,38 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# +# GuessIt - A library for guessing information from filenames +# Copyright (c) 2012 Nicolas Wack <wackou@gmail.com> +# +# GuessIt is free software; you can redistribute it and/or modify it under +# the terms of the Lesser GNU General Public License as published by +# the Free Software Foundation; either version 3 of the License, or +# (at your option) any later version. +# +# GuessIt is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# Lesser GNU General Public License for more details. +# +# You should have received a copy of the Lesser GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. +# + +from __future__ import unicode_literals +from guessit.transfo import SingleNodeGuesser +from guessit.date import search_year +import logging + +log = logging.getLogger(__name__) + + +def guess_year(string): + year, span = search_year(string) + if year: + return { 'year': year }, span + else: + return None, None + + +def process(mtree): + SingleNodeGuesser(guess_year, 1.0, log).process(mtree) diff --git a/lib/guessit/transfo/post_process.py b/lib/guessit/transfo/post_process.py new file mode 100644 index 0000000000000000000000000000000000000000..ec69d7ebb526de48ee59e905c0b8db8b69dcdfc0 --- /dev/null +++ b/lib/guessit/transfo/post_process.py @@ -0,0 +1,64 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# +# GuessIt - A library for guessing information from filenames +# Copyright (c) 2012 Nicolas Wack <wackou@gmail.com> +# +# GuessIt is free software; you can redistribute it and/or modify it under +# the terms of the Lesser GNU General Public License as published by +# the Free Software Foundation; either version 3 of the License, or +# (at your option) any later version. +# +# GuessIt is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# Lesser GNU General Public License for more details. +# +# You should have received a copy of the Lesser GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. +# + +from __future__ import unicode_literals +from guessit.patterns import subtitle_exts +from guessit.textutils import reorder_title +import logging + +log = logging.getLogger(__name__) + + +def process(mtree): + # 1- try to promote language to subtitle language where it makes sense + for node in mtree.nodes(): + if 'language' not in node.guess: + continue + + def promote_subtitle(): + # pylint: disable=W0631 + node.guess.set('subtitleLanguage', node.guess['language'], + confidence=node.guess.confidence('language')) + del node.guess['language'] + + # - if we matched a language in a file with a sub extension and that + # the group is the last group of the filename, it is probably the + # language of the subtitle + # (eg: 'xxx.english.srt') + if (mtree.node_at((-1,)).value.lower() in subtitle_exts and + node == mtree.leaves()[-2]): + promote_subtitle() + + # - if a language is in an explicit group just preceded by "st", + # it is a subtitle language (eg: '...st[fr-eng]...') + try: + idx = node.node_idx + previous = mtree.node_at((idx[0], idx[1] - 1)).leaves()[-1] + if previous.value.lower()[-2:] == 'st': + promote_subtitle() + except IndexError: + pass + + # 2- ", the" at the end of a series title should be prepended to it + for node in mtree.nodes(): + if 'series' not in node.guess: + continue + + node.guess['series'] = reorder_title(node.guess['series']) diff --git a/lib/guessit/transfo/split_explicit_groups.py b/lib/guessit/transfo/split_explicit_groups.py new file mode 100644 index 0000000000000000000000000000000000000000..7ae5787d49b192457086f7b4f274138060b2fff4 --- /dev/null +++ b/lib/guessit/transfo/split_explicit_groups.py @@ -0,0 +1,44 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# +# GuessIt - A library for guessing information from filenames +# Copyright (c) 2012 Nicolas Wack <wackou@gmail.com> +# +# GuessIt is free software; you can redistribute it and/or modify it under +# the terms of the Lesser GNU General Public License as published by +# the Free Software Foundation; either version 3 of the License, or +# (at your option) any later version. +# +# GuessIt is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# Lesser GNU General Public License for more details. +# +# You should have received a copy of the Lesser GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. +# + +from __future__ import unicode_literals +from guessit.textutils import find_first_level_groups +from guessit.patterns import group_delimiters +import functools +import logging + +log = logging.getLogger(__name__) + + +def process(mtree): + """return the string split into explicit groups, that is, those either + between parenthese, square brackets or curly braces, and those separated + by a dash.""" + for c in mtree.children: + groups = find_first_level_groups(c.value, group_delimiters[0]) + for delimiters in group_delimiters: + flatten = lambda l, x: l + find_first_level_groups(x, delimiters) + groups = functools.reduce(flatten, groups, []) + + # do not do this at this moment, it is not strong enough and can break other + # patterns, such as dates, etc... + #groups = functools.reduce(lambda l, x: l + x.split('-'), groups, []) + + c.split_on_components(groups) diff --git a/lib/guessit/transfo/split_on_dash.py b/lib/guessit/transfo/split_on_dash.py new file mode 100644 index 0000000000000000000000000000000000000000..031baff61fff7b2f3247f44e5ec261ffcb0bbb46 --- /dev/null +++ b/lib/guessit/transfo/split_on_dash.py @@ -0,0 +1,42 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# +# GuessIt - A library for guessing information from filenames +# Copyright (c) 2012 Nicolas Wack <wackou@gmail.com> +# +# GuessIt is free software; you can redistribute it and/or modify it under +# the terms of the Lesser GNU General Public License as published by +# the Free Software Foundation; either version 3 of the License, or +# (at your option) any later version. +# +# GuessIt is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# Lesser GNU General Public License for more details. +# +# You should have received a copy of the Lesser GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. +# + +from __future__ import unicode_literals +from guessit.patterns import sep +import re +import logging + +log = logging.getLogger(__name__) + + +def process(mtree): + for node in mtree.unidentified_leaves(): + indices = [] + + didx = 0 + pattern = re.compile(sep + '-' + sep) + match = pattern.search(node.value) + while match: + span = match.span() + indices.extend([ span[0], span[1] ]) + match = pattern.search(node.value, span[1]) + + if indices: + node.partition(indices) diff --git a/lib/guessit/transfo/split_path_components.py b/lib/guessit/transfo/split_path_components.py new file mode 100644 index 0000000000000000000000000000000000000000..35fab405d6fdbca7cc1060479ad58ebe7115310b --- /dev/null +++ b/lib/guessit/transfo/split_path_components.py @@ -0,0 +1,36 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# +# GuessIt - A library for guessing information from filenames +# Copyright (c) 2012 Nicolas Wack <wackou@gmail.com> +# +# GuessIt is free software; you can redistribute it and/or modify it under +# the terms of the Lesser GNU General Public License as published by +# the Free Software Foundation; either version 3 of the License, or +# (at your option) any later version. +# +# GuessIt is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# Lesser GNU General Public License for more details. +# +# You should have received a copy of the Lesser GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. +# + +from __future__ import unicode_literals +from guessit import fileutils +import os.path +import logging + +log = logging.getLogger(__name__) + + +def process(mtree): + """Returns the filename split into [ dir*, basename, ext ].""" + components = fileutils.split_path(mtree.value) + basename = components.pop(-1) + components += list(os.path.splitext(basename)) + components[-1] = components[-1][1:] # remove the '.' from the extension + + mtree.split_on_components(components) diff --git a/lib/subliminal/__init__.py b/lib/subliminal/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..77297f5a3f28bd573c2a1bf0c0f50b174ba1581c --- /dev/null +++ b/lib/subliminal/__init__.py @@ -0,0 +1,34 @@ +# -*- coding: utf-8 -*- +# Copyright 2011-2012 Antoine Bertin <diaoulael@gmail.com> +# +# This file is part of subliminal. +# +# subliminal is free software; you can redistribute it and/or modify it under +# the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 3 of the License, or +# (at your option) any later version. +# +# subliminal is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with subliminal. If not, see <http://www.gnu.org/licenses/>. +from .api import list_subtitles, download_subtitles +from .async import Pool +from .core import (SERVICES, LANGUAGE_INDEX, SERVICE_INDEX, SERVICE_CONFIDENCE, + MATCHING_CONFIDENCE) +from .infos import __version__ +import logging +try: + from logging import NullHandler +except ImportError: + class NullHandler(logging.Handler): + def emit(self, record): + pass + + +__all__ = ['SERVICES', 'LANGUAGE_INDEX', 'SERVICE_INDEX', 'SERVICE_CONFIDENCE', + 'MATCHING_CONFIDENCE', 'list_subtitles', 'download_subtitles', 'Pool'] +logging.getLogger("subliminal").addHandler(NullHandler()) diff --git a/lib/subliminal/api.py b/lib/subliminal/api.py new file mode 100644 index 0000000000000000000000000000000000000000..3b6f9139d25a8afd336b25a0c944e1442102b38e --- /dev/null +++ b/lib/subliminal/api.py @@ -0,0 +1,109 @@ +# -*- coding: utf-8 -*- +# Copyright 2011-2012 Antoine Bertin <diaoulael@gmail.com> +# +# This file is part of subliminal. +# +# subliminal is free software; you can redistribute it and/or modify it under +# the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 3 of the License, or +# (at your option) any later version. +# +# subliminal is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with subliminal. If not, see <http://www.gnu.org/licenses/>. +from .core import (SERVICES, LANGUAGE_INDEX, SERVICE_INDEX, SERVICE_CONFIDENCE, + MATCHING_CONFIDENCE, create_list_tasks, consume_task, create_download_tasks, + group_by_video, key_subtitles) +from .language import language_set, language_list, LANGUAGES +import logging + + +__all__ = ['list_subtitles', 'download_subtitles'] +logger = logging.getLogger("subliminal") + + +def list_subtitles(paths, languages=None, services=None, force=True, multi=False, cache_dir=None, max_depth=3, scan_filter=None): + """List subtitles in given paths according to the criteria + + :param paths: path(s) to video file or folder + :type paths: string or list + :param languages: languages to search for, in preferred order + :type languages: list of :class:`~subliminal.language.Language` or string + :param list services: services to use for the search, in preferred order + :param bool force: force searching for subtitles even if some are detected + :param bool multi: search multiple languages for the same video + :param string cache_dir: path to the cache directory to use + :param int max_depth: maximum depth for scanning entries + :param function scan_filter: filter function that takes a path as argument and returns a boolean indicating whether it has to be filtered out (``True``) or not (``False``) + :return: found subtitles + :rtype: dict of :class:`~subliminal.videos.Video` => [:class:`~subliminal.subtitles.ResultSubtitle`] + + """ + services = services or SERVICES + languages = language_set(languages) if languages is not None else language_set(LANGUAGES) + if isinstance(paths, basestring): + paths = [paths] + if any([not isinstance(p, unicode) for p in paths]): + logger.warning(u'Not all entries are unicode') + results = [] + service_instances = {} + tasks = create_list_tasks(paths, languages, services, force, multi, cache_dir, max_depth, scan_filter) + for task in tasks: + try: + result = consume_task(task, service_instances) + results.append((task.video, result)) + except: + logger.error(u'Error consuming task %r' % task, exc_info=True) + for service_instance in service_instances.itervalues(): + service_instance.terminate() + return group_by_video(results) + + +def download_subtitles(paths, languages=None, services=None, force=True, multi=False, cache_dir=None, max_depth=3, scan_filter=None, order=None): + """Download subtitles in given paths according to the criteria + + :param paths: path(s) to video file or folder + :type paths: string or list + :param languages: languages to search for, in preferred order + :type languages: list of :class:`~subliminal.language.Language` or string + :param list services: services to use for the search, in preferred order + :param bool force: force searching for subtitles even if some are detected + :param bool multi: search multiple languages for the same video + :param string cache_dir: path to the cache directory to use + :param int max_depth: maximum depth for scanning entries + :param function scan_filter: filter function that takes a path as argument and returns a boolean indicating whether it has to be filtered out (``True``) or not (``False``) + :param order: preferred order for subtitles sorting + :type list: list of :data:`~subliminal.core.LANGUAGE_INDEX`, :data:`~subliminal.core.SERVICE_INDEX`, :data:`~subliminal.core.SERVICE_CONFIDENCE`, :data:`~subliminal.core.MATCHING_CONFIDENCE` + :return: downloaded subtitles + :rtype: dict of :class:`~subliminal.videos.Video` => [:class:`~subliminal.subtitles.ResultSubtitle`] + + .. note:: + + If you use ``multi=True``, :data:`~subliminal.core.LANGUAGE_INDEX` has to be the first item of the ``order`` list + or you might get unexpected results. + + """ + services = services or SERVICES + languages = language_list(languages) if languages is not None else language_list(LANGUAGES) + if isinstance(paths, basestring): + paths = [paths] + order = order or [LANGUAGE_INDEX, SERVICE_INDEX, SERVICE_CONFIDENCE, MATCHING_CONFIDENCE] + subtitles_by_video = list_subtitles(paths, languages, services, force, multi, cache_dir, max_depth, scan_filter) + for video, subtitles in subtitles_by_video.iteritems(): + subtitles.sort(key=lambda s: key_subtitles(s, video, languages, services, order), reverse=True) + results = [] + service_instances = {} + tasks = create_download_tasks(subtitles_by_video, languages, multi) + for task in tasks: + try: + result = consume_task(task, service_instances) + results.append((task.video, result)) + except: + logger.error(u'Error consuming task %r' % task, exc_info=True) + for service_instance in service_instances.itervalues(): + service_instance.terminate() + return group_by_video(results) diff --git a/lib/subliminal/async.py b/lib/subliminal/async.py new file mode 100644 index 0000000000000000000000000000000000000000..ff42764b830ae94640addffcd8e2e4496b37f6c1 --- /dev/null +++ b/lib/subliminal/async.py @@ -0,0 +1,142 @@ +# -*- coding: utf-8 -*- +# Copyright 2011-2012 Antoine Bertin <diaoulael@gmail.com> +# +# This file is part of subliminal. +# +# subliminal is free software; you can redistribute it and/or modify it under +# the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 3 of the License, or +# (at your option) any later version. +# +# subliminal is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with subliminal. If not, see <http://www.gnu.org/licenses/>. +from .core import (consume_task, LANGUAGE_INDEX, SERVICE_INDEX, + SERVICE_CONFIDENCE, MATCHING_CONFIDENCE, SERVICES, create_list_tasks, + create_download_tasks, group_by_video, key_subtitles) +from .language import language_list, language_set, LANGUAGES +from .tasks import StopTask +import Queue +import logging +import threading + + +__all__ = ['Worker', 'Pool'] +logger = logging.getLogger("subliminal") + + +class Worker(threading.Thread): + """Consume tasks and put the result in the queue""" + def __init__(self, tasks, results): + super(Worker, self).__init__() + self.tasks = tasks + self.results = results + self.services = {} + + def run(self): + while 1: + result = [] + try: + task = self.tasks.get(block=True) + if isinstance(task, StopTask): + break + result = consume_task(task, self.services) + self.results.put((task.video, result)) + except: + logger.error(u'Exception raised in worker %s' % self.name, exc_info=True) + finally: + self.tasks.task_done() + self.terminate() + logger.debug(u'Thread %s terminated' % self.name) + + def terminate(self): + """Terminate instantiated services""" + for service_name, service in self.services.iteritems(): + try: + service.terminate() + except: + logger.error(u'Exception raised when terminating service %s' % service_name, exc_info=True) + + +class Pool(object): + """Pool of workers""" + def __init__(self, size): + self.tasks = Queue.Queue() + self.results = Queue.Queue() + self.workers = [] + for _ in range(size): + self.workers.append(Worker(self.tasks, self.results)) + + def __enter__(self): + self.start() + return self + + def __exit__(self, *args): + self.stop() + self.join() + + def start(self): + """Start workers""" + for worker in self.workers: + worker.start() + + def stop(self): + """Stop workers""" + for _ in self.workers: + self.tasks.put(StopTask()) + + def join(self): + """Join the task queue""" + self.tasks.join() + + def collect(self): + """Collect available results + + :return: results of tasks + :rtype: list of :class:`~subliminal.tasks.Task` + + """ + results = [] + while 1: + try: + result = self.results.get(block=False) + results.append(result) + except Queue.Empty: + break + return results + + def list_subtitles(self, paths, languages=None, services=None, force=True, multi=False, cache_dir=None, max_depth=3, scan_filter=None): + """See :meth:`subliminal.list_subtitles`""" + services = services or SERVICES + languages = language_set(languages) if languages is not None else language_set(LANGUAGES) + if isinstance(paths, basestring): + paths = [paths] + if any([not isinstance(p, unicode) for p in paths]): + logger.warning(u'Not all entries are unicode') + tasks = create_list_tasks(paths, languages, services, force, multi, cache_dir, max_depth, scan_filter) + for task in tasks: + self.tasks.put(task) + self.join() + results = self.collect() + return group_by_video(results) + + def download_subtitles(self, paths, languages=None, services=None, force=True, multi=False, cache_dir=None, max_depth=3, scan_filter=None, order=None): + """See :meth:`subliminal.download_subtitles`""" + services = services or SERVICES + languages = language_list(languages) if languages is not None else language_list(LANGUAGES) + if isinstance(paths, basestring): + paths = [paths] + order = order or [LANGUAGE_INDEX, SERVICE_INDEX, SERVICE_CONFIDENCE, MATCHING_CONFIDENCE] + subtitles_by_video = self.list_subtitles(paths, languages, services, force, multi, cache_dir, max_depth, scan_filter) + for video, subtitles in subtitles_by_video.iteritems(): + subtitles.sort(key=lambda s: key_subtitles(s, video, languages, services, order), reverse=True) + tasks = create_download_tasks(subtitles_by_video, languages, multi) + for task in tasks: + self.tasks.put(task) + self.join() + results = self.collect() + return group_by_video(results) diff --git a/lib/subliminal/cache.py b/lib/subliminal/cache.py new file mode 100644 index 0000000000000000000000000000000000000000..31275e00ef151d05184bfdf195f41a0fe01ee71a --- /dev/null +++ b/lib/subliminal/cache.py @@ -0,0 +1,134 @@ +# -*- coding: utf-8 -*- +# Copyright 2012 Nicolas Wack <wackou@gmail.com> +# +# This file is part of subliminal. +# +# subliminal is free software; you can redistribute it and/or modify it under +# the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 3 of the License, or +# (at your option) any later version. +# +# subliminal is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with subliminal. If not, see <http://www.gnu.org/licenses/>. +from collections import defaultdict +from functools import wraps +import logging +import os.path +import threading +try: + import cPickle as pickle +except ImportError: + import pickle + + +__all__ = ['Cache', 'cachedmethod'] +logger = logging.getLogger("subliminal") + + +class Cache(object): + """A Cache object contains cached values for methods. It can have + separate internal caches, one for each service + + """ + def __init__(self, cache_dir): + self.cache_dir = cache_dir + self.cache = defaultdict(dict) + self.lock = threading.RLock() + + def __del__(self): + for service_name in self.cache: + self.save(service_name) + + def cache_location(self, service_name): + return os.path.join(self.cache_dir, 'subliminal_%s.cache' % service_name) + + def load(self, service_name): + with self.lock: + if service_name in self.cache: + # already loaded + return + + self.cache[service_name] = defaultdict(dict) + filename = self.cache_location(service_name) + logger.debug(u'Cache: loading cache from %s' % filename) + try: + self.cache[service_name] = pickle.load(open(filename, 'rb')) + except IOError: + logger.info('Cache: Cache file "%s" doesn\'t exist, creating it' % filename) + except EOFError: + logger.error('Cache: cache file "%s" is corrupted... Removing it.' % filename) + os.remove(filename) + + def save(self, service_name): + filename = self.cache_location(service_name) + logger.debug(u'Cache: saving cache to %s' % filename) + with self.lock: + pickle.dump(self.cache[service_name], open(filename, 'wb')) + + def clear(self, service_name): + try: + os.remove(self.cache_location(service_name)) + except OSError: + pass + self.cache[service_name] = defaultdict(dict) + + def cached_func_key(self, func, cls=None): + try: + cls = func.im_class + except: + pass + return ('%s.%s' % (cls.__module__, cls.__name__), func.__name__) + + def function_cache(self, service_name, func): + func_key = self.cached_func_key(func) + return self.cache[service_name][func_key] + + def cache_for(self, service_name, func, args, result): + # no need to lock here, dict ops are atomic + self.function_cache(service_name, func)[args] = result + + def cached_value(self, service_name, func, args): + """Raises KeyError if not found""" + # no need to lock here, dict ops are atomic + return self.function_cache(service_name, func)[args] + + +def cachedmethod(function): + """Decorator to make a method use the cache. + + .. note:: + + This can NOT be used with static functions, it has to be used on + methods of some class + + """ + @wraps(function) + def cached(*args): + c = args[0].config.cache + service_name = args[0].__class__.__name__ + func_key = c.cached_func_key(function, cls=args[0].__class__) + func_cache = c.cache[service_name][func_key] + + # we need to remove the first element of args for the key, as it is the + # instance pointer and we don't want the cache to know which instance + # called it, it is shared among all instances of the same class + key = args[1:] + + if key in func_cache: + result = func_cache[key] + logger.debug(u'Using cached value for %s(%s), returns: %s' % (func_key, key, result)) + return result + + result = function(*args) + + # note: another thread could have already cached a value in the + # meantime, but that's ok as we prefer to keep the latest value in + # the cache + func_cache[key] = result + return result + return cached diff --git a/lib/subliminal/core.py b/lib/subliminal/core.py new file mode 100644 index 0000000000000000000000000000000000000000..1b8c840d12c761a9e510758c30cd422f278e8c54 --- /dev/null +++ b/lib/subliminal/core.py @@ -0,0 +1,275 @@ +# -*- coding: utf-8 -*- +# Copyright 2011-2012 Antoine Bertin <diaoulael@gmail.com> +# +# This file is part of subliminal. +# +# subliminal is free software; you can redistribute it and/or modify it under +# the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 3 of the License, or +# (at your option) any later version. +# +# subliminal is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with subliminal. If not, see <http://www.gnu.org/licenses/>. +from .exceptions import DownloadFailedError +from .services import ServiceConfig +from .tasks import DownloadTask, ListTask +from .utils import get_keywords +from .videos import Episode, Movie, scan +from .language import Language +from collections import defaultdict +from itertools import groupby +import bs4 +import guessit +import logging + + +__all__ = ['SERVICES', 'LANGUAGE_INDEX', 'SERVICE_INDEX', 'SERVICE_CONFIDENCE', 'MATCHING_CONFIDENCE', + 'create_list_tasks', 'create_download_tasks', 'consume_task', 'matching_confidence', + 'key_subtitles', 'group_by_video'] +logger = logging.getLogger("subliminal") +SERVICES = ['opensubtitles', 'bierdopje', 'subswiki', 'subtitulos', 'thesubdb', 'addic7ed', 'tvsubtitles'] +LANGUAGE_INDEX, SERVICE_INDEX, SERVICE_CONFIDENCE, MATCHING_CONFIDENCE = range(4) + + +def create_list_tasks(paths, languages, services, force, multi, cache_dir, max_depth, scan_filter): + """Create a list of :class:`~subliminal.tasks.ListTask` from one or more paths using the given criteria + + :param paths: path(s) to video file or folder + :type paths: string or list + :param set languages: languages to search for + :param list services: services to use for the search + :param bool force: force searching for subtitles even if some are detected + :param bool multi: search multiple languages for the same video + :param string cache_dir: path to the cache directory to use + :param int max_depth: maximum depth for scanning entries + :param function scan_filter: filter function that takes a path as argument and returns a boolean indicating whether it has to be filtered out (``True``) or not (``False``) + :return: the created tasks + :rtype: list of :class:`~subliminal.tasks.ListTask` + + """ + scan_result = [] + for p in paths: + scan_result.extend(scan(p, max_depth, scan_filter)) + logger.debug(u'Found %d videos in %r with maximum depth %d' % (len(scan_result), paths, max_depth)) + tasks = [] + config = ServiceConfig(multi, cache_dir) + services = filter_services(services) + for video, detected_subtitles in scan_result: + detected_languages = set(s.language for s in detected_subtitles) + wanted_languages = languages.copy() + if not force and multi: + wanted_languages -= detected_languages + if not wanted_languages: + logger.debug(u'No need to list multi subtitles %r for %r because %r detected' % (languages, video, detected_languages)) + continue + if not force and not multi and Language('Undetermined') in detected_languages: + logger.debug(u'No need to list single subtitles %r for %r because one detected' % (languages, video)) + continue + logger.debug(u'Listing subtitles %r for %r with services %r' % (wanted_languages, video, services)) + for service_name in services: + mod = __import__('services.' + service_name, globals=globals(), locals=locals(), fromlist=['Service'], level=-1) + service = mod.Service + if not service.check_validity(video, wanted_languages): + continue + task = ListTask(video, wanted_languages & service.languages, service_name, config) + logger.debug(u'Created task %r' % task) + tasks.append(task) + return tasks + + +def create_download_tasks(subtitles_by_video, languages, multi): + """Create a list of :class:`~subliminal.tasks.DownloadTask` from a list results grouped by video + + :param subtitles_by_video: :class:`~subliminal.tasks.ListTask` results with ordered subtitles + :type subtitles_by_video: dict of :class:`~subliminal.videos.Video` => [:class:`~subliminal.subtitles.Subtitle`] + :param languages: languages in preferred order + :type languages: :class:`~subliminal.language.language_list` + :param bool multi: download multiple languages for the same video + :return: the created tasks + :rtype: list of :class:`~subliminal.tasks.DownloadTask` + + """ + tasks = [] + for video, subtitles in subtitles_by_video.iteritems(): + if not subtitles: + continue + if not multi: + task = DownloadTask(video, list(subtitles)) + logger.debug(u'Created task %r' % task) + tasks.append(task) + continue + for _, by_language in groupby(subtitles, lambda s: languages.index(s.language)): + task = DownloadTask(video, list(by_language)) + logger.debug(u'Created task %r' % task) + tasks.append(task) + return tasks + + +def consume_task(task, services=None): + """Consume a task. If the ``services`` parameter is given, the function will attempt + to get the service from it. In case the service is not in ``services``, it will be initialized + and put in ``services`` + + :param task: task to consume + :type task: :class:`~subliminal.tasks.ListTask` or :class:`~subliminal.tasks.DownloadTask` + :param dict services: mapping between the service name and an instance of this service + :return: the result of the task + :rtype: list of :class:`~subliminal.subtitles.ResultSubtitle` + + """ + if services is None: + services = {} + logger.info(u'Consuming %r' % task) + result = None + if isinstance(task, ListTask): + service = get_service(services, task.service, config=task.config) + result = service.list(task.video, task.languages) + elif isinstance(task, DownloadTask): + for subtitle in task.subtitles: + service = get_service(services, subtitle.service) + try: + service.download(subtitle) + result = [subtitle] + break + except DownloadFailedError: + logger.warning(u'Could not download subtitle %r, trying next' % subtitle) + continue + if result is None: + logger.error(u'No subtitles could be downloaded for video %r' % task.video) + return result + + +def matching_confidence(video, subtitle): + """Compute the probability (confidence) that the subtitle matches the video + + :param video: video to match + :type video: :class:`~subliminal.videos.Video` + :param subtitle: subtitle to match + :type subtitle: :class:`~subliminal.subtitles.Subtitle` + :return: the matching probability + :rtype: float + + """ + guess = guessit.guess_file_info(subtitle.release, 'autodetect') + video_keywords = get_keywords(video.guess) + subtitle_keywords = get_keywords(guess) | subtitle.keywords + logger.debug(u'Video keywords %r - Subtitle keywords %r' % (video_keywords, subtitle_keywords)) + replacement = {'keywords': len(video_keywords & subtitle_keywords)} + if isinstance(video, Episode): + replacement.update({'series': 0, 'season': 0, 'episode': 0}) + matching_format = '{series:b}{season:b}{episode:b}{keywords:03b}' + best = matching_format.format(series=1, season=1, episode=1, keywords=len(video_keywords)) + if guess['type'] in ['episode', 'episodesubtitle']: + if 'series' in guess and guess['series'].lower() == video.series.lower(): + replacement['series'] = 1 + if 'season' in guess and guess['season'] == video.season: + replacement['season'] = 1 + if 'episodeNumber' in guess and guess['episodeNumber'] == video.episode: + replacement['episode'] = 1 + elif isinstance(video, Movie): + replacement.update({'title': 0, 'year': 0}) + matching_format = '{title:b}{year:b}{keywords:03b}' + best = matching_format.format(title=1, year=1, keywords=len(video_keywords)) + if guess['type'] in ['movie', 'moviesubtitle']: + if 'title' in guess and guess['title'].lower() == video.title.lower(): + replacement['title'] = 1 + if 'year' in guess and guess['year'] == video.year: + replacement['year'] = 1 + else: + logger.debug(u'Not able to compute confidence for %r' % video) + return 0.0 + logger.debug(u'Found %r' % replacement) + confidence = float(int(matching_format.format(**replacement), 2)) / float(int(best, 2)) + logger.info(u'Computed confidence %.4f for %r and %r' % (confidence, video, subtitle)) + return confidence + + +def get_service(services, service_name, config=None): + """Get a service from its name in the service dict with the specified config. + If the service does not exist in the service dict, it is created and added to the dict. + + :param dict services: dict where to get existing services or put created ones + :param string service_name: name of the service to get + :param config: config to use for the service + :type config: :class:`~subliminal.services.ServiceConfig` or None + :return: the corresponding service + :rtype: :class:`~subliminal.services.ServiceBase` + + """ + if service_name not in services: + mod = __import__('services.' + service_name, globals=globals(), locals=locals(), fromlist=['Service'], level=-1) + services[service_name] = mod.Service() + services[service_name].init() + services[service_name].config = config + return services[service_name] + + +def key_subtitles(subtitle, video, languages, services, order): + """Create a key to sort subtitle using the given order + + :param subtitle: subtitle to sort + :type subtitle: :class:`~subliminal.subtitles.ResultSubtitle` + :param video: video to match + :type video: :class:`~subliminal.videos.Video` + :param list languages: languages in preferred order + :param list services: services in preferred order + :param order: preferred order for subtitles sorting + :type list: list of :data:`LANGUAGE_INDEX`, :data:`SERVICE_INDEX`, :data:`SERVICE_CONFIDENCE`, :data:`MATCHING_CONFIDENCE` + :return: a key ready to use for subtitles sorting + :rtype: int + + """ + key = '' + for sort_item in order: + if sort_item == LANGUAGE_INDEX: + key += '{0:03d}'.format(len(languages) - languages.index(subtitle.language) - 1) + key += '{0:01d}'.format(subtitle.language == languages[languages.index(subtitle.language)]) + elif sort_item == SERVICE_INDEX: + key += '{0:02d}'.format(len(services) - services.index(subtitle.service) - 1) + elif sort_item == SERVICE_CONFIDENCE: + key += '{0:04d}'.format(int(subtitle.confidence * 1000)) + elif sort_item == MATCHING_CONFIDENCE: + confidence = 0 + if subtitle.release: + confidence = matching_confidence(video, subtitle) + key += '{0:04d}'.format(int(confidence * 1000)) + return int(key) + + +def group_by_video(list_results): + """Group the results of :class:`ListTasks <subliminal.tasks.ListTask>` into a + dictionary of :class:`~subliminal.videos.Video` => :class:`~subliminal.subtitles.Subtitle` + + :param list_results: + :type list_results: list of result of :class:`~subliminal.tasks.ListTask` + :return: subtitles grouped by videos + :rtype: dict of :class:`~subliminal.videos.Video` => [:class:`~subliminal.subtitles.Subtitle`] + + """ + result = defaultdict(list) + for video, subtitles in list_results: + result[video] += subtitles or [] + return result + + +def filter_services(services): + """Filter out services that are not available because of a missing feature + + :param list services: service names to filter + :return: a copy of the initial list of service names without unavailable ones + :rtype: list + + """ + filtered_services = services[:] + for service_name in services: + mod = __import__('services.' + service_name, globals=globals(), locals=locals(), fromlist=['Service'], level=-1) + service = mod.Service + if service.required_features is not None and bs4.builder_registry.lookup(*service.required_features) is None: + logger.warning(u'Service %s not available: none of available features could be used. One of %r required' % (service_name, service.required_features)) + filtered_services.remove(service_name) + return filtered_services diff --git a/lib/subliminal/exceptions.py b/lib/subliminal/exceptions.py new file mode 100644 index 0000000000000000000000000000000000000000..66e3dd51c2adce7f4e6f4302311911ffe76554c4 --- /dev/null +++ b/lib/subliminal/exceptions.py @@ -0,0 +1,32 @@ +# -*- coding: utf-8 -*- +# Copyright 2011-2012 Antoine Bertin <diaoulael@gmail.com> +# +# This file is part of subliminal. +# +# subliminal is free software; you can redistribute it and/or modify it under +# the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 3 of the License, or +# (at your option) any later version. +# +# subliminal is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with subliminal. If not, see <http://www.gnu.org/licenses/>. + + +class Error(Exception): + """Base class for exceptions in subliminal""" + pass + + +class ServiceError(Error): + """"Exception raised by services""" + pass + + +class DownloadFailedError(Error): + """"Exception raised when a download task has failed in service""" + pass diff --git a/lib/subliminal/infos.py b/lib/subliminal/infos.py new file mode 100644 index 0000000000000000000000000000000000000000..5ab2084ac5f7f17bcd7578a2b93efa4a0c38bb7f --- /dev/null +++ b/lib/subliminal/infos.py @@ -0,0 +1,18 @@ +# -*- coding: utf-8 -*- +# Copyright 2011-2012 Antoine Bertin <diaoulael@gmail.com> +# +# This file is part of subliminal. +# +# subliminal is free software; you can redistribute it and/or modify it under +# the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 3 of the License, or +# (at your option) any later version. +# +# subliminal is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with subliminal. If not, see <http://www.gnu.org/licenses/>. +__version__ = '0.6.3' diff --git a/lib/subliminal/language.py b/lib/subliminal/language.py new file mode 100644 index 0000000000000000000000000000000000000000..c89e7abc0b00eddaf0211454a1cd62789ed6e56d --- /dev/null +++ b/lib/subliminal/language.py @@ -0,0 +1,1047 @@ +# -*- coding: utf-8 -*- +# Copyright 2011-2012 Antoine Bertin <diaoulael@gmail.com> +# +# This file is part of subliminal. +# +# subliminal is free software; you can redistribute it and/or modify it under +# the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 3 of the License, or +# (at your option) any later version. +# +# subliminal is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with subliminal. If not, see <http://www.gnu.org/licenses/>. +from .utils import to_unicode +import re +import logging + + +logger = logging.getLogger("subliminal") + + +COUNTRIES = [('AF', 'AFG', '004', u'Afghanistan'), + ('AX', 'ALA', '248', u'Ã…land Islands'), + ('AL', 'ALB', '008', u'Albania'), + ('DZ', 'DZA', '012', u'Algeria'), + ('AS', 'ASM', '016', u'American Samoa'), + ('AD', 'AND', '020', u'Andorra'), + ('AO', 'AGO', '024', u'Angola'), + ('AI', 'AIA', '660', u'Anguilla'), + ('AQ', 'ATA', '010', u'Antarctica'), + ('AG', 'ATG', '028', u'Antigua and Barbuda'), + ('AR', 'ARG', '032', u'Argentina'), + ('AM', 'ARM', '051', u'Armenia'), + ('AW', 'ABW', '533', u'Aruba'), + ('AU', 'AUS', '036', u'Australia'), + ('AT', 'AUT', '040', u'Austria'), + ('AZ', 'AZE', '031', u'Azerbaijan'), + ('BS', 'BHS', '044', u'Bahamas'), + ('BH', 'BHR', '048', u'Bahrain'), + ('BD', 'BGD', '050', u'Bangladesh'), + ('BB', 'BRB', '052', u'Barbados'), + ('BY', 'BLR', '112', u'Belarus'), + ('BE', 'BEL', '056', u'Belgium'), + ('BZ', 'BLZ', '084', u'Belize'), + ('BJ', 'BEN', '204', u'Benin'), + ('BM', 'BMU', '060', u'Bermuda'), + ('BT', 'BTN', '064', u'Bhutan'), + ('BO', 'BOL', '068', u'Bolivia, Plurinational State of'), + ('BQ', 'BES', '535', u'Bonaire, Sint Eustatius and Saba'), + ('BA', 'BIH', '070', u'Bosnia and Herzegovina'), + ('BW', 'BWA', '072', u'Botswana'), + ('BV', 'BVT', '074', u'Bouvet Island'), + ('BR', 'BRA', '076', u'Brazil'), + ('IO', 'IOT', '086', u'British Indian Ocean Territory'), + ('BN', 'BRN', '096', u'Brunei Darussalam'), + ('BG', 'BGR', '100', u'Bulgaria'), + ('BF', 'BFA', '854', u'Burkina Faso'), + ('BI', 'BDI', '108', u'Burundi'), + ('KH', 'KHM', '116', u'Cambodia'), + ('CM', 'CMR', '120', u'Cameroon'), + ('CA', 'CAN', '124', u'Canada'), + ('CV', 'CPV', '132', u'Cape Verde'), + ('KY', 'CYM', '136', u'Cayman Islands'), + ('CF', 'CAF', '140', u'Central African Republic'), + ('TD', 'TCD', '148', u'Chad'), + ('CL', 'CHL', '152', u'Chile'), + ('CN', 'CHN', '156', u'China'), + ('CX', 'CXR', '162', u'Christmas Island'), + ('CC', 'CCK', '166', u'Cocos (Keeling) Islands'), + ('CO', 'COL', '170', u'Colombia'), + ('KM', 'COM', '174', u'Comoros'), + ('CG', 'COG', '178', u'Congo'), + ('CD', 'COD', '180', u'Congo, The Democratic Republic of the'), + ('CK', 'COK', '184', u'Cook Islands'), + ('CR', 'CRI', '188', u'Costa Rica'), + ('CI', 'CIV', '384', u'Côte d\'Ivoire'), + ('HR', 'HRV', '191', u'Croatia'), + ('CU', 'CUB', '192', u'Cuba'), + ('CW', 'CUW', '531', u'Curaçao'), + ('CY', 'CYP', '196', u'Cyprus'), + ('CZ', 'CZE', '203', u'Czech Republic'), + ('DK', 'DNK', '208', u'Denmark'), + ('DJ', 'DJI', '262', u'Djibouti'), + ('DM', 'DMA', '212', u'Dominica'), + ('DO', 'DOM', '214', u'Dominican Republic'), + ('EC', 'ECU', '218', u'Ecuador'), + ('EG', 'EGY', '818', u'Egypt'), + ('SV', 'SLV', '222', u'El Salvador'), + ('GQ', 'GNQ', '226', u'Equatorial Guinea'), + ('ER', 'ERI', '232', u'Eritrea'), + ('EE', 'EST', '233', u'Estonia'), + ('ET', 'ETH', '231', u'Ethiopia'), + ('FK', 'FLK', '238', u'Falkland Islands (Malvinas)'), + ('FO', 'FRO', '234', u'Faroe Islands'), + ('FJ', 'FJI', '242', u'Fiji'), + ('FI', 'FIN', '246', u'Finland'), + ('FR', 'FRA', '250', u'France'), + ('GF', 'GUF', '254', u'French Guiana'), + ('PF', 'PYF', '258', u'French Polynesia'), + ('TF', 'ATF', '260', u'French Southern Territories'), + ('GA', 'GAB', '266', u'Gabon'), + ('GM', 'GMB', '270', u'Gambia'), + ('GE', 'GEO', '268', u'Georgia'), + ('DE', 'DEU', '276', u'Germany'), + ('GH', 'GHA', '288', u'Ghana'), + ('GI', 'GIB', '292', u'Gibraltar'), + ('GR', 'GRC', '300', u'Greece'), + ('GL', 'GRL', '304', u'Greenland'), + ('GD', 'GRD', '308', u'Grenada'), + ('GP', 'GLP', '312', u'Guadeloupe'), + ('GU', 'GUM', '316', u'Guam'), + ('GT', 'GTM', '320', u'Guatemala'), + ('GG', 'GGY', '831', u'Guernsey'), + ('GN', 'GIN', '324', u'Guinea'), + ('GW', 'GNB', '624', u'Guinea-Bissau'), + ('GY', 'GUY', '328', u'Guyana'), + ('HT', 'HTI', '332', u'Haiti'), + ('HM', 'HMD', '334', u'Heard Island and McDonald Islands'), + ('VA', 'VAT', '336', u'Holy See (Vatican City State)'), + ('HN', 'HND', '340', u'Honduras'), + ('HK', 'HKG', '344', u'Hong Kong'), + ('HU', 'HUN', '348', u'Hungary'), + ('IS', 'ISL', '352', u'Iceland'), + ('IN', 'IND', '356', u'India'), + ('ID', 'IDN', '360', u'Indonesia'), + ('IR', 'IRN', '364', u'Iran, Islamic Republic of'), + ('IQ', 'IRQ', '368', u'Iraq'), + ('IE', 'IRL', '372', u'Ireland'), + ('IM', 'IMN', '833', u'Isle of Man'), + ('IL', 'ISR', '376', u'Israel'), + ('IT', 'ITA', '380', u'Italy'), + ('JM', 'JAM', '388', u'Jamaica'), + ('JP', 'JPN', '392', u'Japan'), + ('JE', 'JEY', '832', u'Jersey'), + ('JO', 'JOR', '400', u'Jordan'), + ('KZ', 'KAZ', '398', u'Kazakhstan'), + ('KE', 'KEN', '404', u'Kenya'), + ('KI', 'KIR', '296', u'Kiribati'), + ('KP', 'PRK', '408', u'Korea, Democratic People\'s Republic of'), + ('KR', 'KOR', '410', u'Korea, Republic of'), + ('KW', 'KWT', '414', u'Kuwait'), + ('KG', 'KGZ', '417', u'Kyrgyzstan'), + ('LA', 'LAO', '418', u'Lao People\'s Democratic Republic'), + ('LV', 'LVA', '428', u'Latvia'), + ('LB', 'LBN', '422', u'Lebanon'), + ('LS', 'LSO', '426', u'Lesotho'), + ('LR', 'LBR', '430', u'Liberia'), + ('LY', 'LBY', '434', u'Libya'), + ('LI', 'LIE', '438', u'Liechtenstein'), + ('LT', 'LTU', '440', u'Lithuania'), + ('LU', 'LUX', '442', u'Luxembourg'), + ('MO', 'MAC', '446', u'Macao'), + ('MK', 'MKD', '807', u'Macedonia, Republic of'), + ('MG', 'MDG', '450', u'Madagascar'), + ('MW', 'MWI', '454', u'Malawi'), + ('MY', 'MYS', '458', u'Malaysia'), + ('MV', 'MDV', '462', u'Maldives'), + ('ML', 'MLI', '466', u'Mali'), + ('MT', 'MLT', '470', u'Malta'), + ('MH', 'MHL', '584', u'Marshall Islands'), + ('MQ', 'MTQ', '474', u'Martinique'), + ('MR', 'MRT', '478', u'Mauritania'), + ('MU', 'MUS', '480', u'Mauritius'), + ('YT', 'MYT', '175', u'Mayotte'), + ('MX', 'MEX', '484', u'Mexico'), + ('FM', 'FSM', '583', u'Micronesia, Federated States of'), + ('MD', 'MDA', '498', u'Moldova, Republic of'), + ('MC', 'MCO', '492', u'Monaco'), + ('MN', 'MNG', '496', u'Mongolia'), + ('ME', 'MNE', '499', u'Montenegro'), + ('MS', 'MSR', '500', u'Montserrat'), + ('MA', 'MAR', '504', u'Morocco'), + ('MZ', 'MOZ', '508', u'Mozambique'), + ('MM', 'MMR', '104', u'Myanmar'), + ('NA', 'NAM', '516', u'Namibia'), + ('NR', 'NRU', '520', u'Nauru'), + ('NP', 'NPL', '524', u'Nepal'), + ('NL', 'NLD', '528', u'Netherlands'), + ('NC', 'NCL', '540', u'New Caledonia'), + ('NZ', 'NZL', '554', u'New Zealand'), + ('NI', 'NIC', '558', u'Nicaragua'), + ('NE', 'NER', '562', u'Niger'), + ('NG', 'NGA', '566', u'Nigeria'), + ('NU', 'NIU', '570', u'Niue'), + ('NF', 'NFK', '574', u'Norfolk Island'), + ('MP', 'MNP', '580', u'Northern Mariana Islands'), + ('NO', 'NOR', '578', u'Norway'), + ('OM', 'OMN', '512', u'Oman'), + ('PK', 'PAK', '586', u'Pakistan'), + ('PW', 'PLW', '585', u'Palau'), + ('PS', 'PSE', '275', u'Palestinian Territory, Occupied'), + ('PA', 'PAN', '591', u'Panama'), + ('PG', 'PNG', '598', u'Papua New Guinea'), + ('PY', 'PRY', '600', u'Paraguay'), + ('PE', 'PER', '604', u'Peru'), + ('PH', 'PHL', '608', u'Philippines'), + ('PN', 'PCN', '612', u'Pitcairn'), + ('PL', 'POL', '616', u'Poland'), + ('PT', 'PRT', '620', u'Portugal'), + ('PR', 'PRI', '630', u'Puerto Rico'), + ('QA', 'QAT', '634', u'Qatar'), + ('RE', 'REU', '638', u'Réunion'), + ('RO', 'ROU', '642', u'Romania'), + ('RU', 'RUS', '643', u'Russian Federation'), + ('RW', 'RWA', '646', u'Rwanda'), + ('BL', 'BLM', '652', u'Saint Barthélemy'), + ('SH', 'SHN', '654', u'Saint Helena, Ascension and Tristan da Cunha'), + ('KN', 'KNA', '659', u'Saint Kitts and Nevis'), + ('LC', 'LCA', '662', u'Saint Lucia'), + ('MF', 'MAF', '663', u'Saint Martin (French part)'), + ('PM', 'SPM', '666', u'Saint Pierre and Miquelon'), + ('VC', 'VCT', '670', u'Saint Vincent and the Grenadines'), + ('WS', 'WSM', '882', u'Samoa'), + ('SM', 'SMR', '674', u'San Marino'), + ('ST', 'STP', '678', u'Sao Tome and Principe'), + ('SA', 'SAU', '682', u'Saudi Arabia'), + ('SN', 'SEN', '686', u'Senegal'), + ('RS', 'SRB', '688', u'Serbia'), + ('SC', 'SYC', '690', u'Seychelles'), + ('SL', 'SLE', '694', u'Sierra Leone'), + ('SG', 'SGP', '702', u'Singapore'), + ('SX', 'SXM', '534', u'Sint Maarten (Dutch part)'), + ('SK', 'SVK', '703', u'Slovakia'), + ('SI', 'SVN', '705', u'Slovenia'), + ('SB', 'SLB', '090', u'Solomon Islands'), + ('SO', 'SOM', '706', u'Somalia'), + ('ZA', 'ZAF', '710', u'South Africa'), + ('GS', 'SGS', '239', u'South Georgia and the South Sandwich Islands'), + ('ES', 'ESP', '724', u'Spain'), + ('LK', 'LKA', '144', u'Sri Lanka'), + ('SD', 'SDN', '729', u'Sudan'), + ('SR', 'SUR', '740', u'Suriname'), + ('SS', 'SSD', '728', u'South Sudan'), + ('SJ', 'SJM', '744', u'Svalbard and Jan Mayen'), + ('SZ', 'SWZ', '748', u'Swaziland'), + ('SE', 'SWE', '752', u'Sweden'), + ('CH', 'CHE', '756', u'Switzerland'), + ('SY', 'SYR', '760', u'Syrian Arab Republic'), + ('TW', 'TWN', '158', u'Taiwan, Province of China'), + ('TJ', 'TJK', '762', u'Tajikistan'), + ('TZ', 'TZA', '834', u'Tanzania, United Republic of'), + ('TH', 'THA', '764', u'Thailand'), + ('TL', 'TLS', '626', u'Timor-Leste'), + ('TG', 'TGO', '768', u'Togo'), + ('TK', 'TKL', '772', u'Tokelau'), + ('TO', 'TON', '776', u'Tonga'), + ('TT', 'TTO', '780', u'Trinidad and Tobago'), + ('TN', 'TUN', '788', u'Tunisia'), + ('TR', 'TUR', '792', u'Turkey'), + ('TM', 'TKM', '795', u'Turkmenistan'), + ('TC', 'TCA', '796', u'Turks and Caicos Islands'), + ('TV', 'TUV', '798', u'Tuvalu'), + ('UG', 'UGA', '800', u'Uganda'), + ('UA', 'UKR', '804', u'Ukraine'), + ('AE', 'ARE', '784', u'United Arab Emirates'), + ('GB', 'GBR', '826', u'United Kingdom'), + ('US', 'USA', '840', u'United States'), + ('UM', 'UMI', '581', u'United States Minor Outlying Islands'), + ('UY', 'URY', '858', u'Uruguay'), + ('UZ', 'UZB', '860', u'Uzbekistan'), + ('VU', 'VUT', '548', u'Vanuatu'), + ('VE', 'VEN', '862', u'Venezuela, Bolivarian Republic of'), + ('VN', 'VNM', '704', u'Viet Nam'), + ('VG', 'VGB', '092', u'Virgin Islands, British'), + ('VI', 'VIR', '850', u'Virgin Islands, U.S.'), + ('WF', 'WLF', '876', u'Wallis and Futuna'), + ('EH', 'ESH', '732', u'Western Sahara'), + ('YE', 'YEM', '887', u'Yemen'), + ('ZM', 'ZMB', '894', u'Zambia'), + ('ZW', 'ZWE', '716', u'Zimbabwe')] + + +LANGUAGES = [('aar', '', 'aa', u'Afar', u'afar'), + ('abk', '', 'ab', u'Abkhazian', u'abkhaze'), + ('ace', '', '', u'Achinese', u'aceh'), + ('ach', '', '', u'Acoli', u'acoli'), + ('ada', '', '', u'Adangme', u'adangme'), + ('ady', '', '', u'Adyghe; Adygei', u'adyghé'), + ('afa', '', '', u'Afro-Asiatic languages', u'afro-asiatiques, langues'), + ('afh', '', '', u'Afrihili', u'afrihili'), + ('afr', '', 'af', u'Afrikaans', u'afrikaans'), + ('ain', '', '', u'Ainu', u'aïnou'), + ('aka', '', 'ak', u'Akan', u'akan'), + ('akk', '', '', u'Akkadian', u'akkadien'), + ('alb', 'sqi', 'sq', u'Albanian', u'albanais'), + ('ale', '', '', u'Aleut', u'aléoute'), + ('alg', '', '', u'Algonquian languages', u'algonquines, langues'), + ('alt', '', '', u'Southern Altai', u'altai du Sud'), + ('amh', '', 'am', u'Amharic', u'amharique'), + ('ang', '', '', u'English, Old (ca.450-1100)', u'anglo-saxon (ca.450-1100)'), + ('anp', '', '', u'Angika', u'angika'), + ('apa', '', '', u'Apache languages', u'apaches, langues'), + ('ara', '', 'ar', u'Arabic', u'arabe'), + ('arc', '', '', u'Official Aramaic (700-300 BCE); Imperial Aramaic (700-300 BCE)', u'araméen d\'empire (700-300 BCE)'), + ('arg', '', 'an', u'Aragonese', u'aragonais'), + ('arm', 'hye', 'hy', u'Armenian', u'arménien'), + ('arn', '', '', u'Mapudungun; Mapuche', u'mapudungun; mapuche; mapuce'), + ('arp', '', '', u'Arapaho', u'arapaho'), + ('art', '', '', u'Artificial languages', u'artificielles, langues'), + ('arw', '', '', u'Arawak', u'arawak'), + ('asm', '', 'as', u'Assamese', u'assamais'), + ('ast', '', '', u'Asturian; Bable; Leonese; Asturleonese', u'asturien; bable; léonais; asturoléonais'), + ('ath', '', '', u'Athapascan languages', u'athapascanes, langues'), + ('aus', '', '', u'Australian languages', u'australiennes, langues'), + ('ava', '', 'av', u'Avaric', u'avar'), + ('ave', '', 'ae', u'Avestan', u'avestique'), + ('awa', '', '', u'Awadhi', u'awadhi'), + ('aym', '', 'ay', u'Aymara', u'aymara'), + ('aze', '', 'az', u'Azerbaijani', u'azéri'), + ('bad', '', '', u'Banda languages', u'banda, langues'), + ('bai', '', '', u'Bamileke languages', u'bamiléké, langues'), + ('bak', '', 'ba', u'Bashkir', u'bachkir'), + ('bal', '', '', u'Baluchi', u'baloutchi'), + ('bam', '', 'bm', u'Bambara', u'bambara'), + ('ban', '', '', u'Balinese', u'balinais'), + ('baq', 'eus', 'eu', u'Basque', u'basque'), + ('bas', '', '', u'Basa', u'basa'), + ('bat', '', '', u'Baltic languages', u'baltes, langues'), + ('bej', '', '', u'Beja; Bedawiyet', u'bedja'), + ('bel', '', 'be', u'Belarusian', u'biélorusse'), + ('bem', '', '', u'Bemba', u'bemba'), + ('ben', '', 'bn', u'Bengali', u'bengali'), + ('ber', '', '', u'Berber languages', u'berbères, langues'), + ('bho', '', '', u'Bhojpuri', u'bhojpuri'), + ('bih', '', 'bh', u'Bihari languages', u'langues biharis'), + ('bik', '', '', u'Bikol', u'bikol'), + ('bin', '', '', u'Bini; Edo', u'bini; edo'), + ('bis', '', 'bi', u'Bislama', u'bichlamar'), + ('bla', '', '', u'Siksika', u'blackfoot'), + ('bnt', '', '', u'Bantu (Other)', u'bantoues, autres langues'), + ('bos', '', 'bs', u'Bosnian', u'bosniaque'), + ('bra', '', '', u'Braj', u'braj'), + ('bre', '', 'br', u'Breton', u'breton'), + ('btk', '', '', u'Batak languages', u'batak, langues'), + ('bua', '', '', u'Buriat', u'bouriate'), + ('bug', '', '', u'Buginese', u'bugi'), + ('bul', '', 'bg', u'Bulgarian', u'bulgare'), + ('bur', 'mya', 'my', u'Burmese', u'birman'), + ('byn', '', '', u'Blin; Bilin', u'blin; bilen'), + ('cad', '', '', u'Caddo', u'caddo'), + ('cai', '', '', u'Central American Indian languages', u'amérindiennes de L\'Amérique centrale, langues'), + ('car', '', '', u'Galibi Carib', u'karib; galibi; carib'), + ('cat', '', 'ca', u'Catalan; Valencian', u'catalan; valencien'), + ('cau', '', '', u'Caucasian languages', u'caucasiennes, langues'), + ('ceb', '', '', u'Cebuano', u'cebuano'), + ('cel', '', '', u'Celtic languages', u'celtiques, langues; celtes, langues'), + ('cha', '', 'ch', u'Chamorro', u'chamorro'), + ('chb', '', '', u'Chibcha', u'chibcha'), + ('che', '', 'ce', u'Chechen', u'tchétchène'), + ('chg', '', '', u'Chagatai', u'djaghataï'), + ('chi', 'zho', 'zh', u'Chinese', u'chinois'), + ('chk', '', '', u'Chuukese', u'chuuk'), + ('chm', '', '', u'Mari', u'mari'), + ('chn', '', '', u'Chinook jargon', u'chinook, jargon'), + ('cho', '', '', u'Choctaw', u'choctaw'), + ('chp', '', '', u'Chipewyan; Dene Suline', u'chipewyan'), + ('chr', '', '', u'Cherokee', u'cherokee'), + ('chu', '', 'cu', u'Church Slavic; Old Slavonic; Church Slavonic; Old Bulgarian; Old Church Slavonic', u'slavon d\'église; vieux slave; slavon liturgique; vieux bulgare'), + ('chv', '', 'cv', u'Chuvash', u'tchouvache'), + ('chy', '', '', u'Cheyenne', u'cheyenne'), + ('cmc', '', '', u'Chamic languages', u'chames, langues'), + ('cop', '', '', u'Coptic', u'copte'), + ('cor', '', 'kw', u'Cornish', u'cornique'), + ('cos', '', 'co', u'Corsican', u'corse'), + ('cpe', '', '', u'Creoles and pidgins, English based', u'créoles et pidgins basés sur l\'anglais'), + ('cpf', '', '', u'Creoles and pidgins, French-based ', u'créoles et pidgins basés sur le français'), + ('cpp', '', '', u'Creoles and pidgins, Portuguese-based ', u'créoles et pidgins basés sur le portugais'), + ('cre', '', 'cr', u'Cree', u'cree'), + ('crh', '', '', u'Crimean Tatar; Crimean Turkish', u'tatar de Crimé'), + ('crp', '', '', u'Creoles and pidgins ', u'créoles et pidgins'), + ('csb', '', '', u'Kashubian', u'kachoube'), + ('cus', '', '', u'Cushitic languages', u'couchitiques, langues'), + ('cze', 'ces', 'cs', u'Czech', u'tchèque'), + ('dak', '', '', u'Dakota', u'dakota'), + ('dan', '', 'da', u'Danish', u'danois'), + ('dar', '', '', u'Dargwa', u'dargwa'), + ('day', '', '', u'Land Dayak languages', u'dayak, langues'), + ('del', '', '', u'Delaware', u'delaware'), + ('den', '', '', u'Slave (Athapascan)', u'esclave (athapascan)'), + ('dgr', '', '', u'Dogrib', u'dogrib'), + ('din', '', '', u'Dinka', u'dinka'), + ('div', '', 'dv', u'Divehi; Dhivehi; Maldivian', u'maldivien'), + ('doi', '', '', u'Dogri', u'dogri'), + ('dra', '', '', u'Dravidian languages', u'dravidiennes, langues'), + ('dsb', '', '', u'Lower Sorbian', u'bas-sorabe'), + ('dua', '', '', u'Duala', u'douala'), + ('dum', '', '', u'Dutch, Middle (ca.1050-1350)', u'néerlandais moyen (ca. 1050-1350)'), + ('dut', 'nld', 'nl', u'Dutch; Flemish', u'néerlandais; flamand'), + ('dyu', '', '', u'Dyula', u'dioula'), + ('dzo', '', 'dz', u'Dzongkha', u'dzongkha'), + ('efi', '', '', u'Efik', u'efik'), + ('egy', '', '', u'Egyptian (Ancient)', u'égyptien'), + ('eka', '', '', u'Ekajuk', u'ekajuk'), + ('elx', '', '', u'Elamite', u'élamite'), + ('eng', '', 'en', u'English', u'anglais'), + ('enm', '', '', u'English, Middle (1100-1500)', u'anglais moyen (1100-1500)'), + ('epo', '', 'eo', u'Esperanto', u'espéranto'), + ('est', '', 'et', u'Estonian', u'estonien'), + ('ewe', '', 'ee', u'Ewe', u'éwé'), + ('ewo', '', '', u'Ewondo', u'éwondo'), + ('fan', '', '', u'Fang', u'fang'), + ('fao', '', 'fo', u'Faroese', u'féroïen'), + ('fat', '', '', u'Fanti', u'fanti'), + ('fij', '', 'fj', u'Fijian', u'fidjien'), + ('fil', '', '', u'Filipino; Pilipino', u'filipino; pilipino'), + ('fin', '', 'fi', u'Finnish', u'finnois'), + ('fiu', '', '', u'Finno-Ugrian languages', u'finno-ougriennes, langues'), + ('fon', '', '', u'Fon', u'fon'), + ('fre', 'fra', 'fr', u'French', u'français'), + ('frm', '', '', u'French, Middle (ca.1400-1600)', u'français moyen (1400-1600)'), + ('fro', '', '', u'French, Old (842-ca.1400)', u'français ancien (842-ca.1400)'), + ('frr', '', '', u'Northern Frisian', u'frison septentrional'), + ('frs', '', '', u'Eastern Frisian', u'frison oriental'), + ('fry', '', 'fy', u'Western Frisian', u'frison occidental'), + ('ful', '', 'ff', u'Fulah', u'peul'), + ('fur', '', '', u'Friulian', u'frioulan'), + ('gaa', '', '', u'Ga', u'ga'), + ('gay', '', '', u'Gayo', u'gayo'), + ('gba', '', '', u'Gbaya', u'gbaya'), + ('gem', '', '', u'Germanic languages', u'germaniques, langues'), + ('geo', 'kat', 'ka', u'Georgian', u'géorgien'), + ('ger', 'deu', 'de', u'German', u'allemand'), + ('gez', '', '', u'Geez', u'guèze'), + ('gil', '', '', u'Gilbertese', u'kiribati'), + ('gla', '', 'gd', u'Gaelic; Scottish Gaelic', u'gaélique; gaélique écossais'), + ('gle', '', 'ga', u'Irish', u'irlandais'), + ('glg', '', 'gl', u'Galician', u'galicien'), + ('glv', '', 'gv', u'Manx', u'manx; mannois'), + ('gmh', '', '', u'German, Middle High (ca.1050-1500)', u'allemand, moyen haut (ca. 1050-1500)'), + ('goh', '', '', u'German, Old High (ca.750-1050)', u'allemand, vieux haut (ca. 750-1050)'), + ('gon', '', '', u'Gondi', u'gond'), + ('gor', '', '', u'Gorontalo', u'gorontalo'), + ('got', '', '', u'Gothic', u'gothique'), + ('grb', '', '', u'Grebo', u'grebo'), + ('grc', '', '', u'Greek, Ancient (to 1453)', u'grec ancien (jusqu\'à 1453)'), + ('gre', 'ell', 'el', u'Greek, Modern (1453-)', u'grec moderne (après 1453)'), + ('grn', '', 'gn', u'Guarani', u'guarani'), + ('gsw', '', '', u'Swiss German; Alemannic; Alsatian', u'suisse alémanique; alémanique; alsacien'), + ('guj', '', 'gu', u'Gujarati', u'goudjrati'), + ('gwi', '', '', u'Gwich\'in', u'gwich\'in'), + ('hai', '', '', u'Haida', u'haida'), + ('hat', '', 'ht', u'Haitian; Haitian Creole', u'haïtien; créole haïtien'), + ('hau', '', 'ha', u'Hausa', u'haoussa'), + ('haw', '', '', u'Hawaiian', u'hawaïen'), + ('heb', '', 'he', u'Hebrew', u'hébreu'), + ('her', '', 'hz', u'Herero', u'herero'), + ('hil', '', '', u'Hiligaynon', u'hiligaynon'), + ('him', '', '', u'Himachali languages; Western Pahari languages', u'langues himachalis; langues paharis occidentales'), + ('hin', '', 'hi', u'Hindi', u'hindi'), + ('hit', '', '', u'Hittite', u'hittite'), + ('hmn', '', '', u'Hmong; Mong', u'hmong'), + ('hmo', '', 'ho', u'Hiri Motu', u'hiri motu'), + ('hrv', '', 'hr', u'Croatian', u'croate'), + ('hsb', '', '', u'Upper Sorbian', u'haut-sorabe'), + ('hun', '', 'hu', u'Hungarian', u'hongrois'), + ('hup', '', '', u'Hupa', u'hupa'), + ('iba', '', '', u'Iban', u'iban'), + ('ibo', '', 'ig', u'Igbo', u'igbo'), + ('ice', 'isl', 'is', u'Icelandic', u'islandais'), + ('ido', '', 'io', u'Ido', u'ido'), + ('iii', '', 'ii', u'Sichuan Yi; Nuosu', u'yi de Sichuan'), + ('ijo', '', '', u'Ijo languages', u'ijo, langues'), + ('iku', '', 'iu', u'Inuktitut', u'inuktitut'), + ('ile', '', 'ie', u'Interlingue; Occidental', u'interlingue'), + ('ilo', '', '', u'Iloko', u'ilocano'), + ('ina', '', 'ia', u'Interlingua (International Auxiliary Language Association)', u'interlingua (langue auxiliaire internationale)'), + ('inc', '', '', u'Indic languages', u'indo-aryennes, langues'), + ('ind', '', 'id', u'Indonesian', u'indonésien'), + ('ine', '', '', u'Indo-European languages', u'indo-européennes, langues'), + ('inh', '', '', u'Ingush', u'ingouche'), + ('ipk', '', 'ik', u'Inupiaq', u'inupiaq'), + ('ira', '', '', u'Iranian languages', u'iraniennes, langues'), + ('iro', '', '', u'Iroquoian languages', u'iroquoises, langues'), + ('ita', '', 'it', u'Italian', u'italien'), + ('jav', '', 'jv', u'Javanese', u'javanais'), + ('jbo', '', '', u'Lojban', u'lojban'), + ('jpn', '', 'ja', u'Japanese', u'japonais'), + ('jpr', '', '', u'Judeo-Persian', u'judéo-persan'), + ('jrb', '', '', u'Judeo-Arabic', u'judéo-arabe'), + ('kaa', '', '', u'Kara-Kalpak', u'karakalpak'), + ('kab', '', '', u'Kabyle', u'kabyle'), + ('kac', '', '', u'Kachin; Jingpho', u'kachin; jingpho'), + ('kal', '', 'kl', u'Kalaallisut; Greenlandic', u'groenlandais'), + ('kam', '', '', u'Kamba', u'kamba'), + ('kan', '', 'kn', u'Kannada', u'kannada'), + ('kar', '', '', u'Karen languages', u'karen, langues'), + ('kas', '', 'ks', u'Kashmiri', u'kashmiri'), + ('kau', '', 'kr', u'Kanuri', u'kanouri'), + ('kaw', '', '', u'Kawi', u'kawi'), + ('kaz', '', 'kk', u'Kazakh', u'kazakh'), + ('kbd', '', '', u'Kabardian', u'kabardien'), + ('kha', '', '', u'Khasi', u'khasi'), + ('khi', '', '', u'Khoisan languages', u'khoïsan, langues'), + ('khm', '', 'km', u'Central Khmer', u'khmer central'), + ('kho', '', '', u'Khotanese; Sakan', u'khotanais; sakan'), + ('kik', '', 'ki', u'Kikuyu; Gikuyu', u'kikuyu'), + ('kin', '', 'rw', u'Kinyarwanda', u'rwanda'), + ('kir', '', 'ky', u'Kirghiz; Kyrgyz', u'kirghiz'), + ('kmb', '', '', u'Kimbundu', u'kimbundu'), + ('kok', '', '', u'Konkani', u'konkani'), + ('kom', '', 'kv', u'Komi', u'kom'), + ('kon', '', 'kg', u'Kongo', u'kongo'), + ('kor', '', 'ko', u'Korean', u'coréen'), + ('kos', '', '', u'Kosraean', u'kosrae'), + ('kpe', '', '', u'Kpelle', u'kpellé'), + ('krc', '', '', u'Karachay-Balkar', u'karatchai balkar'), + ('krl', '', '', u'Karelian', u'carélien'), + ('kro', '', '', u'Kru languages', u'krou, langues'), + ('kru', '', '', u'Kurukh', u'kurukh'), + ('kua', '', 'kj', u'Kuanyama; Kwanyama', u'kuanyama; kwanyama'), + ('kum', '', '', u'Kumyk', u'koumyk'), + ('kur', '', 'ku', u'Kurdish', u'kurde'), + ('kut', '', '', u'Kutenai', u'kutenai'), + ('lad', '', '', u'Ladino', u'judéo-espagnol'), + ('lah', '', '', u'Lahnda', u'lahnda'), + ('lam', '', '', u'Lamba', u'lamba'), + ('lao', '', 'lo', u'Lao', u'lao'), + ('lat', '', 'la', u'Latin', u'latin'), + ('lav', '', 'lv', u'Latvian', u'letton'), + ('lez', '', '', u'Lezghian', u'lezghien'), + ('lim', '', 'li', u'Limburgan; Limburger; Limburgish', u'limbourgeois'), + ('lin', '', 'ln', u'Lingala', u'lingala'), + ('lit', '', 'lt', u'Lithuanian', u'lituanien'), + ('lol', '', '', u'Mongo', u'mongo'), + ('loz', '', '', u'Lozi', u'lozi'), + ('ltz', '', 'lb', u'Luxembourgish; Letzeburgesch', u'luxembourgeois'), + ('lua', '', '', u'Luba-Lulua', u'luba-lulua'), + ('lub', '', 'lu', u'Luba-Katanga', u'luba-katanga'), + ('lug', '', 'lg', u'Ganda', u'ganda'), + ('lui', '', '', u'Luiseno', u'luiseno'), + ('lun', '', '', u'Lunda', u'lunda'), + ('luo', '', '', u'Luo (Kenya and Tanzania)', u'luo (Kenya et Tanzanie)'), + ('lus', '', '', u'Lushai', u'lushai'), + ('mac', 'mkd', 'mk', u'Macedonian', u'macédonien'), + ('mad', '', '', u'Madurese', u'madourais'), + ('mag', '', '', u'Magahi', u'magahi'), + ('mah', '', 'mh', u'Marshallese', u'marshall'), + ('mai', '', '', u'Maithili', u'maithili'), + ('mak', '', '', u'Makasar', u'makassar'), + ('mal', '', 'ml', u'Malayalam', u'malayalam'), + ('man', '', '', u'Mandingo', u'mandingue'), + ('mao', 'mri', 'mi', u'Maori', u'maori'), + ('map', '', '', u'Austronesian languages', u'austronésiennes, langues'), + ('mar', '', 'mr', u'Marathi', u'marathe'), + ('mas', '', '', u'Masai', u'massaï'), + ('may', 'msa', 'ms', u'Malay', u'malais'), + ('mdf', '', '', u'Moksha', u'moksa'), + ('mdr', '', '', u'Mandar', u'mandar'), + ('men', '', '', u'Mende', u'mendé'), + ('mga', '', '', u'Irish, Middle (900-1200)', u'irlandais moyen (900-1200)'), + ('mic', '', '', u'Mi\'kmaq; Micmac', u'mi\'kmaq; micmac'), + ('min', '', '', u'Minangkabau', u'minangkabau'), + ('mkh', '', '', u'Mon-Khmer languages', u'môn-khmer, langues'), + ('mlg', '', 'mg', u'Malagasy', u'malgache'), + ('mlt', '', 'mt', u'Maltese', u'maltais'), + ('mnc', '', '', u'Manchu', u'mandchou'), + ('mni', '', '', u'Manipuri', u'manipuri'), + ('mno', '', '', u'Manobo languages', u'manobo, langues'), + ('moh', '', '', u'Mohawk', u'mohawk'), + ('mon', '', 'mn', u'Mongolian', u'mongol'), + ('mos', '', '', u'Mossi', u'moré'), + ('mun', '', '', u'Munda languages', u'mounda, langues'), + ('mus', '', '', u'Creek', u'muskogee'), + ('mwl', '', '', u'Mirandese', u'mirandais'), + ('mwr', '', '', u'Marwari', u'marvari'), + ('myn', '', '', u'Mayan languages', u'maya, langues'), + ('myv', '', '', u'Erzya', u'erza'), + ('nah', '', '', u'Nahuatl languages', u'nahuatl, langues'), + ('nai', '', '', u'North American Indian languages', u'nord-amérindiennes, langues'), + ('nap', '', '', u'Neapolitan', u'napolitain'), + ('nau', '', 'na', u'Nauru', u'nauruan'), + ('nav', '', 'nv', u'Navajo; Navaho', u'navaho'), + ('nbl', '', 'nr', u'Ndebele, South; South Ndebele', u'ndébélé du Sud'), + ('nde', '', 'nd', u'Ndebele, North; North Ndebele', u'ndébélé du Nord'), + ('ndo', '', 'ng', u'Ndonga', u'ndonga'), + ('nds', '', '', u'Low German; Low Saxon; German, Low; Saxon, Low', u'bas allemand; bas saxon; allemand, bas; saxon, bas'), + ('nep', '', 'ne', u'Nepali', u'népalais'), + ('new', '', '', u'Nepal Bhasa; Newari', u'nepal bhasa; newari'), + ('nia', '', '', u'Nias', u'nias'), + ('nic', '', '', u'Niger-Kordofanian languages', u'nigéro-kordofaniennes, langues'), + ('niu', '', '', u'Niuean', u'niué'), + ('nno', '', 'nn', u'Norwegian Nynorsk; Nynorsk, Norwegian', u'norvégien nynorsk; nynorsk, norvégien'), + ('nob', '', 'nb', u'BokmÃ¥l, Norwegian; Norwegian BokmÃ¥l', u'norvégien bokmÃ¥l'), + ('nog', '', '', u'Nogai', u'nogaï; nogay'), + ('non', '', '', u'Norse, Old', u'norrois, vieux'), + ('nor', '', 'no', u'Norwegian', u'norvégien'), + ('nqo', '', '', u'N\'Ko', u'n\'ko'), + ('nso', '', '', u'Pedi; Sepedi; Northern Sotho', u'pedi; sepedi; sotho du Nord'), + ('nub', '', '', u'Nubian languages', u'nubiennes, langues'), + ('nwc', '', '', u'Classical Newari; Old Newari; Classical Nepal Bhasa', u'newari classique'), + ('nya', '', 'ny', u'Chichewa; Chewa; Nyanja', u'chichewa; chewa; nyanja'), + ('nym', '', '', u'Nyamwezi', u'nyamwezi'), + ('nyn', '', '', u'Nyankole', u'nyankolé'), + ('nyo', '', '', u'Nyoro', u'nyoro'), + ('nzi', '', '', u'Nzima', u'nzema'), + ('oci', '', 'oc', u'Occitan (post 1500); Provençal', u'occitan (après 1500); provençal'), + ('oji', '', 'oj', u'Ojibwa', u'ojibwa'), + ('ori', '', 'or', u'Oriya', u'oriya'), + ('orm', '', 'om', u'Oromo', u'galla'), + ('osa', '', '', u'Osage', u'osage'), + ('oss', '', 'os', u'Ossetian; Ossetic', u'ossète'), + ('ota', '', '', u'Turkish, Ottoman (1500-1928)', u'turc ottoman (1500-1928)'), + ('oto', '', '', u'Otomian languages', u'otomi, langues'), + ('paa', '', '', u'Papuan languages', u'papoues, langues'), + ('pag', '', '', u'Pangasinan', u'pangasinan'), + ('pal', '', '', u'Pahlavi', u'pahlavi'), + ('pam', '', '', u'Pampanga; Kapampangan', u'pampangan'), + ('pan', '', 'pa', u'Panjabi; Punjabi', u'pendjabi'), + ('pap', '', '', u'Papiamento', u'papiamento'), + ('pau', '', '', u'Palauan', u'palau'), + ('peo', '', '', u'Persian, Old (ca.600-400 B.C.)', u'perse, vieux (ca. 600-400 av. J.-C.)'), + ('per', 'fas', 'fa', u'Persian', u'persan'), + ('phi', '', '', u'Philippine languages', u'philippines, langues'), + ('phn', '', '', u'Phoenician', u'phénicien'), + ('pli', '', 'pi', u'Pali', u'pali'), + ('pol', '', 'pl', u'Polish', u'polonais'), + ('pon', '', '', u'Pohnpeian', u'pohnpei'), + ('por', '', 'pt', u'Portuguese', u'portugais'), + ('pra', '', '', u'Prakrit languages', u'prâkrit, langues'), + ('pro', '', '', u'Provençal, Old (to 1500)', u'provençal ancien (jusqu\'à 1500)'), + ('pus', '', 'ps', u'Pushto; Pashto', u'pachto'), + ('que', '', 'qu', u'Quechua', u'quechua'), + ('raj', '', '', u'Rajasthani', u'rajasthani'), + ('rap', '', '', u'Rapanui', u'rapanui'), + ('rar', '', '', u'Rarotongan; Cook Islands Maori', u'rarotonga; maori des îles Cook'), + ('roa', '', '', u'Romance languages', u'romanes, langues'), + ('roh', '', 'rm', u'Romansh', u'romanche'), + ('rom', '', '', u'Romany', u'tsigane'), + ('rum', 'ron', 'ro', u'Romanian; Moldavian; Moldovan', u'roumain; moldave'), + ('run', '', 'rn', u'Rundi', u'rundi'), + ('rup', '', '', u'Aromanian; Arumanian; Macedo-Romanian', u'aroumain; macédo-roumain'), + ('rus', '', 'ru', u'Russian', u'russe'), + ('sad', '', '', u'Sandawe', u'sandawe'), + ('sag', '', 'sg', u'Sango', u'sango'), + ('sah', '', '', u'Yakut', u'iakoute'), + ('sai', '', '', u'South American Indian (Other)', u'indiennes d\'Amérique du Sud, autres langues'), + ('sal', '', '', u'Salishan languages', u'salishennes, langues'), + ('sam', '', '', u'Samaritan Aramaic', u'samaritain'), + ('san', '', 'sa', u'Sanskrit', u'sanskrit'), + ('sas', '', '', u'Sasak', u'sasak'), + ('sat', '', '', u'Santali', u'santal'), + ('scn', '', '', u'Sicilian', u'sicilien'), + ('sco', '', '', u'Scots', u'écossais'), + ('sel', '', '', u'Selkup', u'selkoupe'), + ('sem', '', '', u'Semitic languages', u'sémitiques, langues'), + ('sga', '', '', u'Irish, Old (to 900)', u'irlandais ancien (jusqu\'à 900)'), + ('sgn', '', '', u'Sign Languages', u'langues des signes'), + ('shn', '', '', u'Shan', u'chan'), + ('sid', '', '', u'Sidamo', u'sidamo'), + ('sin', '', 'si', u'Sinhala; Sinhalese', u'singhalais'), + ('sio', '', '', u'Siouan languages', u'sioux, langues'), + ('sit', '', '', u'Sino-Tibetan languages', u'sino-tibétaines, langues'), + ('sla', '', '', u'Slavic languages', u'slaves, langues'), + ('slo', 'slk', 'sk', u'Slovak', u'slovaque'), + ('slv', '', 'sl', u'Slovenian', u'slovène'), + ('sma', '', '', u'Southern Sami', u'sami du Sud'), + ('sme', '', 'se', u'Northern Sami', u'sami du Nord'), + ('smi', '', '', u'Sami languages', u'sames, langues'), + ('smj', '', '', u'Lule Sami', u'sami de Lule'), + ('smn', '', '', u'Inari Sami', u'sami d\'Inari'), + ('smo', '', 'sm', u'Samoan', u'samoan'), + ('sms', '', '', u'Skolt Sami', u'sami skolt'), + ('sna', '', 'sn', u'Shona', u'shona'), + ('snd', '', 'sd', u'Sindhi', u'sindhi'), + ('snk', '', '', u'Soninke', u'soninké'), + ('sog', '', '', u'Sogdian', u'sogdien'), + ('som', '', 'so', u'Somali', u'somali'), + ('son', '', '', u'Songhai languages', u'songhai, langues'), + ('sot', '', 'st', u'Sotho, Southern', u'sotho du Sud'), + ('spa', '', 'es', u'Spanish; Castilian', u'espagnol; castillan'), + ('srd', '', 'sc', u'Sardinian', u'sarde'), + ('srn', '', '', u'Sranan Tongo', u'sranan tongo'), + ('srp', '', 'sr', u'Serbian', u'serbe'), + ('srr', '', '', u'Serer', u'sérère'), + ('ssa', '', '', u'Nilo-Saharan languages', u'nilo-sahariennes, langues'), + ('ssw', '', 'ss', u'Swati', u'swati'), + ('suk', '', '', u'Sukuma', u'sukuma'), + ('sun', '', 'su', u'Sundanese', u'soundanais'), + ('sus', '', '', u'Susu', u'soussou'), + ('sux', '', '', u'Sumerian', u'sumérien'), + ('swa', '', 'sw', u'Swahili', u'swahili'), + ('swe', '', 'sv', u'Swedish', u'suédois'), + ('syc', '', '', u'Classical Syriac', u'syriaque classique'), + ('syr', '', '', u'Syriac', u'syriaque'), + ('tah', '', 'ty', u'Tahitian', u'tahitien'), + ('tai', '', '', u'Tai languages', u'tai, langues'), + ('tam', '', 'ta', u'Tamil', u'tamoul'), + ('tat', '', 'tt', u'Tatar', u'tatar'), + ('tel', '', 'te', u'Telugu', u'télougou'), + ('tem', '', '', u'Timne', u'temne'), + ('ter', '', '', u'Tereno', u'tereno'), + ('tet', '', '', u'Tetum', u'tetum'), + ('tgk', '', 'tg', u'Tajik', u'tadjik'), + ('tgl', '', 'tl', u'Tagalog', u'tagalog'), + ('tha', '', 'th', u'Thai', u'thaï'), + ('tib', 'bod', 'bo', u'Tibetan', u'tibétain'), + ('tig', '', '', u'Tigre', u'tigré'), + ('tir', '', 'ti', u'Tigrinya', u'tigrigna'), + ('tiv', '', '', u'Tiv', u'tiv'), + ('tkl', '', '', u'Tokelau', u'tokelau'), + ('tlh', '', '', u'Klingon; tlhIngan-Hol', u'klingon'), + ('tli', '', '', u'Tlingit', u'tlingit'), + ('tmh', '', '', u'Tamashek', u'tamacheq'), + ('tog', '', '', u'Tonga (Nyasa)', u'tonga (Nyasa)'), + ('ton', '', 'to', u'Tonga (Tonga Islands)', u'tongan (ÃŽles Tonga)'), + ('tpi', '', '', u'Tok Pisin', u'tok pisin'), + ('tsi', '', '', u'Tsimshian', u'tsimshian'), + ('tsn', '', 'tn', u'Tswana', u'tswana'), + ('tso', '', 'ts', u'Tsonga', u'tsonga'), + ('tuk', '', 'tk', u'Turkmen', u'turkmène'), + ('tum', '', '', u'Tumbuka', u'tumbuka'), + ('tup', '', '', u'Tupi languages', u'tupi, langues'), + ('tur', '', 'tr', u'Turkish', u'turc'), + ('tut', '', '', u'Altaic languages', u'altaïques, langues'), + ('tvl', '', '', u'Tuvalu', u'tuvalu'), + ('twi', '', 'tw', u'Twi', u'twi'), + ('tyv', '', '', u'Tuvinian', u'touva'), + ('udm', '', '', u'Udmurt', u'oudmourte'), + ('uga', '', '', u'Ugaritic', u'ougaritique'), + ('uig', '', 'ug', u'Uighur; Uyghur', u'ouïgour'), + ('ukr', '', 'uk', u'Ukrainian', u'ukrainien'), + ('umb', '', '', u'Umbundu', u'umbundu'), + ('und', '', '', u'Undetermined', u'indéterminée'), + ('urd', '', 'ur', u'Urdu', u'ourdou'), + ('uzb', '', 'uz', u'Uzbek', u'ouszbek'), + ('vai', '', '', u'Vai', u'vaï'), + ('ven', '', 've', u'Venda', u'venda'), + ('vie', '', 'vi', u'Vietnamese', u'vietnamien'), + ('vol', '', 'vo', u'Volapük', u'volapük'), + ('vot', '', '', u'Votic', u'vote'), + ('wak', '', '', u'Wakashan languages', u'wakashanes, langues'), + ('wal', '', '', u'Walamo', u'walamo'), + ('war', '', '', u'Waray', u'waray'), + ('was', '', '', u'Washo', u'washo'), + ('wel', 'cym', 'cy', u'Welsh', u'gallois'), + ('wen', '', '', u'Sorbian languages', u'sorabes, langues'), + ('wln', '', 'wa', u'Walloon', u'wallon'), + ('wol', '', 'wo', u'Wolof', u'wolof'), + ('xal', '', '', u'Kalmyk; Oirat', u'kalmouk; oïrat'), + ('xho', '', 'xh', u'Xhosa', u'xhosa'), + ('yao', '', '', u'Yao', u'yao'), + ('yap', '', '', u'Yapese', u'yapois'), + ('yid', '', 'yi', u'Yiddish', u'yiddish'), + ('yor', '', 'yo', u'Yoruba', u'yoruba'), + ('ypk', '', '', u'Yupik languages', u'yupik, langues'), + ('zap', '', '', u'Zapotec', u'zapotèque'), + ('zbl', '', '', u'Blissymbols; Blissymbolics; Bliss', u'symboles Bliss; Bliss'), + ('zen', '', '', u'Zenaga', u'zenaga'), + ('zha', '', 'za', u'Zhuang; Chuang', u'zhuang; chuang'), + ('znd', '', '', u'Zande languages', u'zandé, langues'), + ('zul', '', 'zu', u'Zulu', u'zoulou'), + ('zun', '', '', u'Zuni', u'zuni'), + ('zza', '', '', u'Zaza; Dimili; Dimli; Kirdki; Kirmanjki; Zazaki', u'zaza; dimili; dimli; kirdki; kirmanjki; zazaki')] + + +class Country(object): + """Country according to ISO-3166 + + :param string country: country name, alpha2 code, alpha3 code or numeric code + :param list countries: all countries + :type countries: see :data:`~subliminal.language.COUNTRIES` + + """ + def __init__(self, country, countries=None): + countries = countries or COUNTRIES + country = to_unicode(country.strip().lower()) + country_tuple = None + + # Try to find the country + if len(country) == 2: + country_tuple = dict((c[0].lower(), c) for c in countries).get(country) + elif len(country) == 3 and not country.isdigit(): + country_tuple = dict((c[1].lower(), c) for c in countries).get(country) + elif len(country) == 3 and country.isdigit(): + country_tuple = dict((c[2].lower(), c) for c in countries).get(country) + if country_tuple is None: + country_tuple = dict((c[3].lower(), c) for c in countries).get(country) + + # Raise ValueError if nothing is found + if country_tuple is None: + raise ValueError('Country %s does not exist' % country) + + # Set default attrs + self.alpha2 = country_tuple[0] + self.alpha3 = country_tuple[1] + self.numeric = country_tuple[2] + self.name = country_tuple[3] + + def __hash__(self): + return hash(self.alpha3) + + def __eq__(self, other): + if isinstance(other, Country): + return self.alpha3 == other.alpha3 + return False + + def __ne__(self, other): + return not self == other + + def __unicode__(self): + return self.name + + def __str__(self): + return unicode(self).encode('utf-8') + + def __repr__(self): + return 'Country(%s)' % self + + +class Language(object): + """Language according to ISO-639 + + :param string language: language name (english or french), alpha2 code, alpha3 code, terminologic code or numeric code, eventually with a country + :param country: country of the language + :type country: :class:`Country` or string + :param languages: all languages + :type languages: see :data:`~subliminal.language.LANGUAGES` + :param countries: all countries + :type countries: see :data:`~subliminal.language.COUNTRIES` + :param bool strict: whether to raise a ValueError on unknown language or not + + :class:`Language` implements the inclusion test, with the ``in`` keyword:: + + >>> Language('pt-BR') in Language('pt') # Portuguese (Brazil) is included in Portuguese + True + >>> Language('pt') in Language('pt-BR') # Portuguese is not included in Portuguese (Brazil) + False + + """ + with_country_regexps = [re.compile('(.*)\((.*)\)'), re.compile('(.*)[-_](.*)')] + + def __init__(self, language, country=None, languages=None, countries=None, strict=True): + languages = languages or LANGUAGES + countries = countries or COUNTRIES + + # Get the country + self.country = None + if isinstance(country, Country): + self.country = country + elif isinstance(country, basestring): + try: + self.country = Country(country, countries) + except ValueError: + logger.warning(u'Country %s could not be identified' % country) + if strict: + raise + + # Language + Country format + #TODO: Improve this part + if country is None: + for regexp in [r.match(language) for r in self.with_country_regexps]: + if regexp: + language = regexp.group(1) + try: + self.country = Country(regexp.group(2), countries) + except ValueError: + logger.warning(u'Country %s could not be identified' % country) + if strict: + raise + break + + # Try to find the language + language = to_unicode(language.strip().lower()) + language_tuple = None + if len(language) == 2: + language_tuple = dict((l[2].lower(), l) for l in languages).get(language) + elif len(language) == 3: + language_tuple = dict((l[0].lower(), l) for l in languages).get(language) + if language_tuple is None: + language_tuple = dict((l[1].lower(), l) for l in languages).get(language) + if language_tuple is None: + language_tuple = dict((l[3].split('; ')[0].lower(), l) for l in languages).get(language) + if language_tuple is None: + language_tuple = dict((l[4].split('; ')[0].lower(), l) for l in languages).get(language) + + # Raise ValueError if strict or continue with Undetermined + if language_tuple is None: + if strict: + raise ValueError('Language %s does not exist' % language) + language_tuple = dict((l[0].lower(), l) for l in languages).get('und') + + # Set attributes + self.alpha2 = language_tuple[2] + self.alpha3 = language_tuple[0] + self.terminologic = language_tuple[1] + self.name = language_tuple[3] + self.french_name = language_tuple[4] + + def __hash__(self): + if self.country is None: + return hash(self.alpha3) + return hash(self.alpha3 + self.country.alpha3) + + def __eq__(self, other): + if isinstance(other, Language): + return self.alpha3 == other.alpha3 and self.country == other.country + return False + + def __contains__(self, item): + if isinstance(item, Language): + if self == item: + return True + if self.country is None: + return self.alpha3 == item.alpha3 + return False + + def __ne__(self, other): + return not self == other + + def __nonzero__(self): + return self.alpha3 != 'und' + + def __unicode__(self): + if self.country is None: + return self.name + return '%s (%s)' % (self.name, self.country) + + def __str__(self): + return unicode(self).encode('utf-8') + + def __repr__(self): + if self.country is None: + return 'Language(%s)' % self.name.encode('utf-8') + return 'Language(%s, country=%s)' % (self.name.encode('utf-8'), self.country) + + +class language_set(set): + """Set of :class:`Language` with some specificities. + + :param iterable: where to take elements from + :type iterable: iterable of :class:`Languages <Language>` or string + :param languages: all languages + :type languages: see :data:`~subliminal.language.LANGUAGES` + :param bool strict: whether to raise a ValueError on invalid language or not + + The following redefinitions are meant to reflect the inclusion logic in :class:`Language` + + * Inclusion test, with the ``in`` keyword + * Intersection + * Substraction + + Here is an illustration of the previous points:: + + >>> Language('en') in language_set(['en-US', 'en-CA']) + False + >>> Language('en-US') in language_set(['en', 'fr']) + True + >>> language_set(['en']) & language_set(['en-US', 'en-CA']) + language_set([Language(English, country=Canada), Language(English, country=United States)]) + >>> language_set(['en-US', 'en-CA', 'fr']) - language_set(['en']) + language_set([Language(French)]) + + """ + def __init__(self, iterable=None, languages=None, strict=True): + iterable = iterable or [] + languages = languages or LANGUAGES + items = [] + for i in iterable: + if isinstance(i, Language): + items.append(i) + continue + if isinstance(i, tuple): + items.append(Language(i[0], languages=languages, strict=strict)) + continue + items.append(Language(i, languages=languages, strict=strict)) + super(language_set, self).__init__(items) + + def __contains__(self, item): + for i in self: + if item in i: + return True + return super(language_set, self).__contains__(item) + + def __and__(self, other): + results = language_set() + for i in self: + for j in other: + if i in j: + results.add(i) + for i in other: + for j in self: + if i in j: + results.add(i) + return results + + def __sub__(self, other): + results = language_set() + for i in self: + if i not in other: + results.add(i) + return results + + +class language_list(list): + """List of :class:`Language` with some specificities. + + :param iterable: where to take elements from + :type iterable: iterable of :class:`Languages <Language>` or string + :param languages: all languages + :type languages: see :data:`~subliminal.language.LANGUAGES` + :param bool strict: whether to raise a ValueError on invalid language or not + + The following redefinitions are meant to reflect the inclusion logic in :class:`Language` + + * Inclusion test, with the ``in`` keyword + * Index + + Here is an illustration of the previous points:: + + >>> Language('en') in language_list(['en-US', 'en-CA']) + False + >>> Language('en-US') in language_list(['en', 'fr-BE']) + True + >>> language_list(['en', 'fr-BE']).index(Language('en-US')) + 0 + + """ + def __init__(self, iterable=None, languages=None, strict=True): + iterable = iterable or [] + languages = languages or LANGUAGES + items = [] + for i in iterable: + if isinstance(i, Language): + items.append(i) + continue + if isinstance(i, tuple): + items.append(Language(i[0], languages=languages, strict=strict)) + continue + items.append(Language(i, languages=languages, strict=strict)) + super(language_list, self).__init__(items) + + def __contains__(self, item): + for i in self: + if item in i: + return True + return super(language_list, self).__contains__(item) + + def index(self, x, strict=False): + if not strict: + for i in range(len(self)): + if x in self[i]: + return i + return super(language_list, self).index(x) diff --git a/lib/subliminal/services/__init__.py b/lib/subliminal/services/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..7cad1cd6a11656bbff8b7835e5bae4997fdd9ee4 --- /dev/null +++ b/lib/subliminal/services/__init__.py @@ -0,0 +1,266 @@ +# -*- coding: utf-8 -*- +# Copyright 2011-2012 Antoine Bertin <diaoulael@gmail.com> +# +# This file is part of subliminal. +# +# subliminal is free software; you can redistribute it and/or modify it under +# the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 3 of the License, or +# (at your option) any later version. +# +# subliminal is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with subliminal. If not, see <http://www.gnu.org/licenses/>. +from ..cache import Cache +from ..exceptions import DownloadFailedError, ServiceError +from ..language import language_set, Language +from ..subtitles import EXTENSIONS +import logging +import os +import requests +import threading +import zipfile + + +__all__ = ['ServiceBase', 'ServiceConfig'] +logger = logging.getLogger("subliminal") + + +class ServiceBase(object): + """Service base class + + :param config: service configuration + :type config: :class:`ServiceConfig` + + """ + #: URL to the service server + server_url = '' + + #: User Agent for any HTTP-based requests + user_agent = 'subliminal v0.6' + + #: Whether based on an API or not + api_based = False + + #: Timeout for web requests + timeout = 5 + + #: :class:`~subliminal.language.language_set` of available languages + languages = language_set() + + #: Map between language objects and language codes used in the service + language_map = {} + + #: Default attribute of a :class:`~subliminal.language.Language` to get with :meth:`get_code` + language_code = 'alpha2' + + #: Accepted video classes (:class:`~subliminal.videos.Episode`, :class:`~subliminal.videos.Movie`, :class:`~subliminal.videos.UnknownVideo`) + videos = [] + + #: Whether the video has to exist or not + require_video = False + + #: List of required features for BeautifulSoup + required_features = None + + def __init__(self, config=None): + self.config = config or ServiceConfig() + self.session = None + + def __enter__(self): + self.init() + return self + + def __exit__(self, *args): + self.terminate() + + def init(self): + """Initialize connection""" + logger.debug(u'Initializing %s' % self.__class__.__name__) + self.session = requests.session(timeout=10, headers={'User-Agent': self.user_agent}) + + def init_cache(self): + """Initialize cache, make sure it is loaded from disk""" + if not self.config or not self.config.cache: + raise ServiceError('Cache directory is required') + self.config.cache.load(self.__class__.__name__) + + def save_cache(self): + self.config.cache.save(self.__class__.__name__) + + def clear_cache(self): + self.config.cache.clear(self.__class__.__name__) + + def cache_for(self, func, args, result): + return self.config.cache.cache_for(self.__class__.__name__, func, args, result) + + def cached_value(self, func, args): + return self.config.cache.cached_value(self.__class__.__name__, func, args) + + def terminate(self): + """Terminate connection""" + logger.debug(u'Terminating %s' % self.__class__.__name__) + + def get_code(self, language): + """Get the service code for a :class:`~subliminal.language.Language` + + It uses the :data:`language_map` and if there's no match, falls back + on the :data:`language_code` attribute of the given :class:`~subliminal.language.Language` + + """ + if language in self.language_map: + return self.language_map[language] + if self.language_code is None: + raise ValueError('%r has no matching code' % language) + return getattr(language, self.language_code) + + def get_language(self, code): + """Get a :class:`~subliminal.language.Language` from a service code + + It uses the :data:`language_map` and if there's no match, uses the + given code as ``language`` parameter for the :class:`~subliminal.language.Language` + constructor + + .. note:: + + A warning is emitted if the generated :class:`~subliminal.language.Language` + is "Undetermined" + + """ + if code in self.language_map: + return self.language_map[code] + language = Language(code, strict=False) + if language == Language('Undetermined'): + logger.warning(u'Code %s could not be identified as a language for %s' % (code, self.__class__.__name__)) + return language + + def query(self, *args): + """Make the actual query""" + raise NotImplementedError() + + def list(self, video, languages): + """List subtitles + + As a service writer, you can either override this method or implement + :meth:`list_checked` instead to have the languages pre-filtered for you + + """ + if not self.check_validity(video, languages): + return [] + return self.list_checked(video, languages) + + def list_checked(self, video, languages): + """List subtitles without having to check parameters for validity""" + raise NotImplementedError() + + def download(self, subtitle): + """Download a subtitle""" + self.download_file(subtitle.link, subtitle.path) + return subtitle + + @classmethod + def check_validity(cls, video, languages): + """Check for video and languages validity in the Service + + :param video: the video to check + :type video: :class:`~subliminal.videos.video` + :param languages: languages to check + :type languages: :class:`~subliminal.language.Language` + :rtype: bool + + """ + languages = (languages & cls.languages) - language_set(['Undetermined']) + if not languages: + logger.debug(u'No language available for service %s' % cls.__name__.lower()) + return False + if cls.require_video and not video.exists or not isinstance(video, tuple(cls.videos)): + logger.debug(u'%r is not valid for service %s' % (video, cls.__name__.lower())) + return False + return True + + def download_file(self, url, filepath): + """Attempt to download a file and remove it in case of failure + + :param string url: URL to download + :param string filepath: destination path + + """ + logger.info(u'Downloading %s in %s' % (url, filepath)) + try: + r = self.session.get(url, headers={'Referer': url, 'User-Agent': self.user_agent}) + with open(filepath, 'wb') as f: + f.write(r.content) + except Exception as e: + logger.error(u'Download failed: %s' % e) + if os.path.exists(filepath): + os.remove(filepath) + raise DownloadFailedError(str(e)) + logger.debug(u'Download finished') + + def download_zip_file(self, url, filepath): + """Attempt to download a zip file and extract any subtitle file from it, if any. + This cleans up after itself if anything fails. + + :param string url: URL of the zip file to download + :param string filepath: destination path for the subtitle + + """ + logger.info(u'Downloading %s in %s' % (url, filepath)) + try: + zippath = filepath + '.zip' + r = self.session.get(url, headers={'Referer': url, 'User-Agent': self.user_agent}) + with open(zippath, 'wb') as f: + f.write(r.content) + if not zipfile.is_zipfile(zippath): + # TODO: could check if maybe we already have a text file and + # download it directly + raise DownloadFailedError('Downloaded file is not a zip file') +# with zipfile.ZipFile(zippath) as zipsub: +# for subfile in zipsub.namelist(): +# if os.path.splitext(subfile)[1] in EXTENSIONS: +# with open(filepath, 'w') as f: +# f.write(zipsub.open(subfile).read()) +# break +# else: +# raise DownloadFailedError('No subtitles found in zip file') + zipsub = zipfile.ZipFile(zippath) + for subfile in zipsub.namelist(): + if os.path.splitext(subfile)[1] in EXTENSIONS: + with open(filepath, 'w') as f: + f.write(zipsub.open(subfile).read()) + break + else: + zipsub.close() + raise DownloadFailedError('No subtitles found in zip file') + zipsub.close() + os.remove(zippath) + except Exception as e: + logger.error(u'Download %s failed: %s' % (url, e)) + if os.path.exists(zippath): + os.remove(zippath) + if os.path.exists(filepath): + os.remove(filepath) + raise DownloadFailedError(str(e)) + logger.debug(u'Download finished') + + +class ServiceConfig(object): + """Configuration for any :class:`Service` + + :param bool multi: whether to download one subtitle per language or not + :param string cache_dir: cache directory + + """ + def __init__(self, multi=False, cache_dir=None): + self.multi = multi + self.cache_dir = cache_dir + self.cache = None + if cache_dir is not None: + self.cache = Cache(cache_dir) + + def __repr__(self): + return 'ServiceConfig(%r, %s)' % (self.multi, self.cache.cache_dir) diff --git a/lib/subliminal/services/addic7ed.py b/lib/subliminal/services/addic7ed.py new file mode 100644 index 0000000000000000000000000000000000000000..1080cb479830a298b0328ca696dcfde1ea5a6894 --- /dev/null +++ b/lib/subliminal/services/addic7ed.py @@ -0,0 +1,121 @@ +# -*- coding: utf-8 -*- +# Copyright 2012 Olivier Leveau <olifozzy@gmail.com> +# Copyright 2012 Antoine Bertin <diaoulael@gmail.com> +# +# This file is part of subliminal. +# +# subliminal is free software; you can redistribute it and/or modify it under +# the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 3 of the License, or +# (at your option) any later version. +# +# subliminal is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with subliminal. If not, see <http://www.gnu.org/licenses/>. +from . import ServiceBase +from ..cache import cachedmethod +from ..exceptions import DownloadFailedError +from ..language import Language, language_set +from ..subtitles import get_subtitle_path, ResultSubtitle +from ..utils import get_keywords, split_keyword +from ..videos import Episode +from bs4 import BeautifulSoup +import logging +import os +import re + + +logger = logging.getLogger("subliminal") + + +class Addic7ed(ServiceBase): + server_url = 'http://www.addic7ed.com' + site_url = 'http://www.addic7ed.com' + api_based = False + #TODO: Complete this + languages = language_set(['ar', 'ca', 'de', 'el', 'en', 'es', 'eu', 'fr', 'ga', 'gl', 'he', 'hr', 'hu', + 'it', 'pl', 'pt', 'ro', 'ru', 'se', 'pt-br']) + language_map = {'Portuguese (Brazilian)': Language('por-BR'), 'Greek': Language('gre'), + 'Spanish (Latin America)': Language('spa'), 'Galego': Language('glg'), + u'Català ': Language('cat')} + videos = [Episode] + require_video = False + required_features = ['permissive'] + + @cachedmethod + def get_series_id(self, name): + """Get the show page and cache every show found in it""" + r = self.session.get('%s/shows.php' % self.server_url) + soup = BeautifulSoup(r.content, self.required_features) + for html_series in soup.select('h3 > a'): + series_name = html_series.text.lower() + match = re.search('show/([0-9]+)', html_series['href']) + if match is None: + continue + series_id = int(match.group(1)) + self.cache_for(self.get_series_id, args=(series_name,), result=series_id) + return self.cached_value(self.get_series_id, args=(name,)) + + def list_checked(self, video, languages): + return self.query(video.path or video.release, languages, get_keywords(video.guess), video.series, video.season, video.episode) + + def query(self, filepath, languages, keywords, series, season, episode): + logger.debug(u'Getting subtitles for %s season %d episode %d with languages %r' % (series, season, episode, languages)) + self.init_cache() + try: + series_id = self.get_series_id(series.lower()) + except KeyError: + logger.debug(u'Could not find series id for %s' % series) + return [] + r = self.session.get('%s/show/%d&season=%d' % (self.server_url, series_id, season)) + soup = BeautifulSoup(r.content, self.required_features) + subtitles = [] + for row in soup('tr', {'class': 'epeven completed'}): + cells = row('td') + if int(cells[0].text.strip()) != season or int(cells[1].text.strip()) != episode: + continue + if cells[6].text.strip(): + logger.debug(u'Skipping hearing impaired') + continue + sub_status = cells[5].text.strip() + if sub_status != 'Completed': + logger.debug(u'Wrong subtitle status %s' % sub_status) + continue + sub_language = self.get_language(cells[3].text.strip()) + if sub_language not in languages: + logger.debug(u'Language %r not in wanted languages %r' % (sub_language, languages)) + continue + sub_keywords = split_keyword(cells[4].text.strip().lower()) + #TODO: Maybe allow empty keywords here? (same in Subtitulos) + if keywords and not keywords & sub_keywords: + logger.debug(u'None of subtitle keywords %r in %r' % (sub_keywords, keywords)) + continue + sub_link = '%s/%s' % (self.server_url, cells[9].a['href']) + sub_path = get_subtitle_path(filepath, sub_language, self.config.multi) + subtitle = ResultSubtitle(sub_path, sub_language, self.__class__.__name__.lower(), sub_link, keywords=sub_keywords) + subtitles.append(subtitle) + return subtitles + + def download(self, subtitle): + logger.info(u'Downloading %s in %s' % (subtitle.link, subtitle.path)) + try: + r = self.session.get(subtitle.link, headers={'Referer': subtitle.link, 'User-Agent': self.user_agent}) + soup = BeautifulSoup(r.content, self.required_features) + if soup.title is not None and u'Addic7ed.com' in soup.title.text.strip(): + raise DownloadFailedError('Download limit exceeded') + with open(subtitle.path, 'wb') as f: + f.write(r.content) + except Exception as e: + logger.error(u'Download failed: %s' % e) + if os.path.exists(subtitle.path): + os.remove(subtitle.path) + raise DownloadFailedError(str(e)) + logger.debug(u'Download finished') + return subtitle + + +Service = Addic7ed diff --git a/lib/subliminal/services/bierdopje.py b/lib/subliminal/services/bierdopje.py new file mode 100644 index 0000000000000000000000000000000000000000..8642afb89fba2331895aac6d292968a207070228 --- /dev/null +++ b/lib/subliminal/services/bierdopje.py @@ -0,0 +1,104 @@ +# -*- coding: utf-8 -*- +# Copyright 2011-2012 Antoine Bertin <diaoulael@gmail.com> +# +# This file is part of subliminal. +# +# subliminal is free software; you can redistribute it and/or modify it under +# the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 3 of the License, or +# (at your option) any later version. +# +# subliminal is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with subliminal. If not, see <http://www.gnu.org/licenses/>. +from . import ServiceBase +from ..cache import cachedmethod +from ..exceptions import ServiceError +from ..language import language_set +from ..subtitles import get_subtitle_path, ResultSubtitle, EXTENSIONS +from ..utils import to_unicode +from ..videos import Episode +from bs4 import BeautifulSoup +import logging +import urllib +try: + import cPickle as pickle +except ImportError: + import pickle + + +logger = logging.getLogger("subliminal") + + +class BierDopje(ServiceBase): + server_url = 'http://api.bierdopje.com/A2B638AC5D804C2E/' + site_url = 'http://www.bierdopje.com' + user_agent = 'Subliminal/0.6' + api_based = True + languages = language_set(['eng', 'dut']) + videos = [Episode] + require_video = False + required_features = ['xml'] + + @cachedmethod + def get_show_id(self, series): + r = self.session.get('%sGetShowByName/%s' % (self.server_url, urllib.quote(series.lower()))) + if r.status_code != 200: + logger.error(u'Request %s returned status code %d' % (r.url, r.status_code)) + return None + soup = BeautifulSoup(r.content, self.required_features) + if soup.status.contents[0] == 'false': + logger.debug(u'Could not find show %s' % series) + return None + return int(soup.showid.contents[0]) + + def load_cache(self): + logger.debug(u'Loading showids from cache...') + with self.lock: + with open(self.showids_cache, 'r') as f: + self.showids = pickle.load(f) + + def query(self, filepath, season, episode, languages, tvdbid=None, series=None): + self.init_cache() + if series: + request_id = self.get_show_id(series.lower()) + if request_id is None: + return [] + request_source = 'showid' + request_is_tvdbid = 'false' + elif tvdbid: + request_id = tvdbid + request_source = 'tvdbid' + request_is_tvdbid = 'true' + else: + raise ServiceError('One or more parameter missing') + subtitles = [] + for language in languages: + logger.debug(u'Getting subtitles for %s %d season %d episode %d with language %s' % (request_source, request_id, season, episode, language.alpha2)) + r = self.session.get('%sGetAllSubsFor/%s/%s/%s/%s/%s' % (self.server_url, request_id, season, episode, language.alpha2, request_is_tvdbid)) + if r.status_code != 200: + logger.error(u'Request %s returned status code %d' % (r.url, r.status_code)) + return [] + soup = BeautifulSoup(r.content, self.required_features) + if soup.status.contents[0] == 'false': + logger.debug(u'Could not find subtitles for %s %d season %d episode %d with language %s' % (request_source, request_id, season, episode, language.alpha2)) + continue + path = get_subtitle_path(filepath, language, self.config.multi) + for result in soup.results('result'): + release = to_unicode(result.filename.contents[0]) + if not release.endswith(tuple(EXTENSIONS)): + release += '.srt' + subtitle = ResultSubtitle(path, language, self.__class__.__name__.lower(), result.downloadlink.contents[0], + release=release) + subtitles.append(subtitle) + return subtitles + + def list_checked(self, video, languages): + return self.query(video.path or video.release, video.season, video.episode, languages, video.tvdbid, video.series) + + +Service = BierDopje diff --git a/lib/subliminal/services/opensubtitles.py b/lib/subliminal/services/opensubtitles.py new file mode 100644 index 0000000000000000000000000000000000000000..fba8e4091d59a898d9d0984385ee82c48953d089 --- /dev/null +++ b/lib/subliminal/services/opensubtitles.py @@ -0,0 +1,159 @@ +# -*- coding: utf-8 -*- +# Copyright 2011-2012 Antoine Bertin <diaoulael@gmail.com> +# +# This file is part of subliminal. +# +# subliminal is free software; you can redistribute it and/or modify it under +# the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 3 of the License, or +# (at your option) any later version. +# +# subliminal is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with subliminal. If not, see <http://www.gnu.org/licenses/>. +from . import ServiceBase +from ..exceptions import ServiceError, DownloadFailedError +from ..language import Language, language_set +from ..subtitles import get_subtitle_path, ResultSubtitle +from ..utils import to_unicode +from ..videos import Episode, Movie +import gzip +import logging +import os.path +import xmlrpclib + + +logger = logging.getLogger("subliminal") + + +class OpenSubtitles(ServiceBase): + server_url = 'http://api.opensubtitles.org/xml-rpc' + site_url = 'http://www.opensubtitles.org' + api_based = True + # Source: http://www.opensubtitles.org/addons/export_languages.php + languages = language_set(['aar', 'abk', 'ace', 'ach', 'ada', 'ady', 'afa', 'afh', 'afr', 'ain', 'aka', 'akk', + 'alb', 'ale', 'alg', 'alt', 'amh', 'ang', 'apa', 'ara', 'arc', 'arg', 'arm', 'arn', + 'arp', 'art', 'arw', 'asm', 'ast', 'ath', 'aus', 'ava', 'ave', 'awa', 'aym', 'aze', + 'bad', 'bai', 'bak', 'bal', 'bam', 'ban', 'baq', 'bas', 'bat', 'bej', 'bel', 'bem', + 'ben', 'ber', 'bho', 'bih', 'bik', 'bin', 'bis', 'bla', 'bnt', 'bos', 'bra', 'bre', + 'btk', 'bua', 'bug', 'bul', 'bur', 'byn', 'cad', 'cai', 'car', 'cat', 'cau', 'ceb', + 'cel', 'cha', 'chb', 'che', 'chg', 'chi', 'chk', 'chm', 'chn', 'cho', 'chp', 'chr', + 'chu', 'chv', 'chy', 'cmc', 'cop', 'cor', 'cos', 'cpe', 'cpf', 'cpp', 'cre', 'crh', + 'crp', 'csb', 'cus', 'cze', 'dak', 'dan', 'dar', 'day', 'del', 'den', 'dgr', 'din', + 'div', 'doi', 'dra', 'dua', 'dum', 'dut', 'dyu', 'dzo', 'efi', 'egy', 'eka', 'ell', + 'elx', 'eng', 'enm', 'epo', 'est', 'ewe', 'ewo', 'fan', 'fao', 'fat', 'fij', 'fil', + 'fin', 'fiu', 'fon', 'fre', 'frm', 'fro', 'fry', 'ful', 'fur', 'gaa', 'gay', 'gba', + 'gem', 'geo', 'ger', 'gez', 'gil', 'gla', 'gle', 'glg', 'glv', 'gmh', 'goh', 'gon', + 'gor', 'got', 'grb', 'grc', 'grn', 'guj', 'gwi', 'hai', 'hat', 'hau', 'haw', 'heb', + 'her', 'hil', 'him', 'hin', 'hit', 'hmn', 'hmo', 'hrv', 'hun', 'hup', 'iba', 'ibo', + 'ice', 'ido', 'iii', 'ijo', 'iku', 'ile', 'ilo', 'ina', 'inc', 'ind', 'ine', 'inh', + 'ipk', 'ira', 'iro', 'ita', 'jav', 'jpn', 'jpr', 'jrb', 'kaa', 'kab', 'kac', 'kal', + 'kam', 'kan', 'kar', 'kas', 'kau', 'kaw', 'kaz', 'kbd', 'kha', 'khi', 'khm', 'kho', + 'kik', 'kin', 'kir', 'kmb', 'kok', 'kom', 'kon', 'kor', 'kos', 'kpe', 'krc', 'kro', + 'kru', 'kua', 'kum', 'kur', 'kut', 'lad', 'lah', 'lam', 'lao', 'lat', 'lav', 'lez', + 'lim', 'lin', 'lit', 'lol', 'loz', 'ltz', 'lua', 'lub', 'lug', 'lui', 'lun', 'luo', + 'lus', 'mac', 'mad', 'mag', 'mah', 'mai', 'mak', 'mal', 'man', 'mao', 'map', 'mar', + 'mas', 'may', 'mdf', 'mdr', 'men', 'mga', 'mic', 'min', 'mkh', 'mlg', 'mlt', 'mnc', + 'mni', 'mno', 'moh', 'mon', 'mos', 'mun', 'mus', 'mwl', 'mwr', 'myn', 'myv', 'nah', + 'nai', 'nap', 'nau', 'nav', 'nbl', 'nde', 'ndo', 'nds', 'nep', 'new', 'nia', 'nic', + 'niu', 'nno', 'nob', 'nog', 'non', 'nor', 'nso', 'nub', 'nwc', 'nya', 'nym', 'nyn', + 'nyo', 'nzi', 'oci', 'oji', 'ori', 'orm', 'osa', 'oss', 'ota', 'oto', 'paa', 'pag', + 'pal', 'pam', 'pan', 'pap', 'pau', 'peo', 'per', 'phi', 'phn', 'pli', 'pol', 'pon', + 'por', 'pra', 'pro', 'pus', 'que', 'raj', 'rap', 'rar', 'roa', 'roh', 'rom', 'rum', + 'run', 'rup', 'rus', 'sad', 'sag', 'sah', 'sai', 'sal', 'sam', 'san', 'sas', 'sat', + 'scn', 'sco', 'sel', 'sem', 'sga', 'sgn', 'shn', 'sid', 'sin', 'sio', 'sit', 'sla', + 'slo', 'slv', 'sma', 'sme', 'smi', 'smj', 'smn', 'smo', 'sms', 'sna', 'snd', 'snk', + 'sog', 'som', 'son', 'sot', 'spa', 'srd', 'srp', 'srr', 'ssa', 'ssw', 'suk', 'sun', + 'sus', 'sux', 'swa', 'swe', 'syr', 'tah', 'tai', 'tam', 'tat', 'tel', 'tem', 'ter', + 'tet', 'tgk', 'tgl', 'tha', 'tib', 'tig', 'tir', 'tiv', 'tkl', 'tlh', 'tli', 'tmh', + 'tog', 'ton', 'tpi', 'tsi', 'tsn', 'tso', 'tuk', 'tum', 'tup', 'tur', 'tut', 'tvl', + 'twi', 'tyv', 'udm', 'uga', 'uig', 'ukr', 'umb', 'urd', 'uzb', 'vai', 'ven', 'vie', + 'vol', 'vot', 'wak', 'wal', 'war', 'was', 'wel', 'wen', 'wln', 'wol', 'xal', 'xho', + 'yao', 'yap', 'yid', 'yor', 'ypk', 'zap', 'zen', 'zha', 'znd', 'zul', 'zun', + 'por-BR', 'rum-MD']) + language_map = {'mol': Language('rum-MD'), 'scc': Language('srp'), 'pob': Language('por-BR'), + Language('rum-MD'): 'mol', Language('srp'): 'scc', Language('por-BR'): 'pob'} + language_code = 'alpha3' + videos = [Episode, Movie] + require_video = False + confidence_order = ['moviehash', 'imdbid', 'fulltext'] + + def __init__(self, config=None): + super(OpenSubtitles, self).__init__(config) + self.server = xmlrpclib.ServerProxy(self.server_url) + self.token = None + + def init(self): + super(OpenSubtitles, self).init() + result = self.server.LogIn('', '', 'eng', self.user_agent) + if result['status'] != '200 OK': + raise ServiceError('Login failed') + self.token = result['token'] + + def terminate(self): + super(OpenSubtitles, self).terminate() + if self.token: + self.server.LogOut(self.token) + + def query(self, filepath, languages, moviehash=None, size=None, imdbid=None, query=None): + searches = [] + if moviehash and size: + searches.append({'moviehash': moviehash, 'moviebytesize': size}) + if imdbid: + searches.append({'imdbid': imdbid}) + if query: + searches.append({'query': query}) + if not searches: + raise ServiceError('One or more parameter missing') + for search in searches: + search['sublanguageid'] = ','.join(self.get_code(l) for l in languages) + logger.debug(u'Getting subtitles %r with token %s' % (searches, self.token)) + results = self.server.SearchSubtitles(self.token, searches) + if not results['data']: + logger.debug(u'Could not find subtitles for %r with token %s' % (searches, self.token)) + return [] + subtitles = [] + for result in results['data']: + language = self.get_language(result['SubLanguageID']) + path = get_subtitle_path(filepath, language, self.config.multi) + confidence = 1 - float(self.confidence_order.index(result['MatchedBy'])) / float(len(self.confidence_order)) + subtitle = ResultSubtitle(path, language, self.__class__.__name__.lower(), result['SubDownloadLink'], + release=to_unicode(result['SubFileName']), confidence=confidence) + subtitles.append(subtitle) + return subtitles + + def list_checked(self, video, languages): + results = [] + if video.exists: + results = self.query(video.path or video.release, languages, moviehash=video.hashes['OpenSubtitles'], size=str(video.size)) + elif video.imdbid: + results = self.query(video.path or video.release, languages, imdbid=video.imdbid) + elif isinstance(video, Episode): + results = self.query(video.path or video.release, languages, query=video.series) + elif isinstance(video, Movie): + results = self.query(video.path or video.release, languages, query=video.title) + return results + + def download(self, subtitle): + #TODO: Use OpenSubtitles DownloadSubtitles method + try: + self.download_file(subtitle.link, subtitle.path + '.gz') + with open(subtitle.path, 'wb') as dump: + gz = gzip.open(subtitle.path + '.gz') + dump.write(gz.read()) + gz.close() + except Exception as e: + if os.path.exists(subtitle.path): + os.remove(subtitle.path) + raise DownloadFailedError(str(e)) + finally: + if os.path.exists(subtitle.path + '.gz'): + os.remove(subtitle.path + '.gz') + return subtitle + + +Service = OpenSubtitles diff --git a/lib/subliminal/services/podnapisi.py b/lib/subliminal/services/podnapisi.py new file mode 100644 index 0000000000000000000000000000000000000000..108de211bae5aef010fdfd19cae27960b65b8d3e --- /dev/null +++ b/lib/subliminal/services/podnapisi.py @@ -0,0 +1,111 @@ +# -*- coding: utf-8 -*- +# Copyright 2011-2012 Antoine Bertin <diaoulael@gmail.com> +# +# This file is part of subliminal. +# +# subliminal is free software; you can redistribute it and/or modify it under +# the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 3 of the License, or +# (at your option) any later version. +# +# subliminal is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with subliminal. If not, see <http://www.gnu.org/licenses/>. +from . import ServiceBase +from ..exceptions import ServiceError, DownloadFailedError +from ..language import language_set, Language +from ..subtitles import get_subtitle_path, ResultSubtitle +from ..utils import to_unicode +from ..videos import Episode, Movie +from hashlib import md5, sha256 +import logging +import xmlrpclib + + +logger = logging.getLogger("subliminal") + + +class Podnapisi(ServiceBase): + server_url = 'http://ssp.podnapisi.net:8000' + site_url = 'http://www.podnapisi.net' + api_based = True + languages = language_set(['ar', 'be', 'bg', 'bs', 'ca', 'ca', 'cs', 'da', 'de', 'el', 'en', + 'es', 'et', 'fa', 'fi', 'fr', 'ga', 'he', 'hi', 'hr', 'hu', 'id', + 'is', 'it', 'ja', 'ko', 'lt', 'lv', 'mk', 'ms', 'nl', 'nn', 'pl', + 'pt', 'ro', 'ru', 'sk', 'sl', 'sq', 'sr', 'sv', 'th', 'tr', 'uk', + 'vi', 'zh', 'es-ar', 'pt-br']) + language_map = {'jp': Language('jpn'), Language('jpn'): 'jp', + 'gr': Language('gre'), Language('gre'): 'gr', + 'pb': Language('por-BR'), Language('por-BR'): 'pb', + 'ag': Language('spa-AR'), Language('spa-AR'): 'ag', + 'cyr': Language('srp')} + videos = [Episode, Movie] + require_video = True + + def __init__(self, config=None): + super(Podnapisi, self).__init__(config) + self.server = xmlrpclib.ServerProxy(self.server_url) + self.token = None + + def init(self): + super(Podnapisi, self).init() + result = self.server.initiate(self.user_agent) + if result['status'] != 200: + raise ServiceError('Initiate failed') + username = 'python_subliminal' + password = sha256(md5('XWFXQ6gE5Oe12rv4qxXX').hexdigest() + result['nonce']).hexdigest() + self.token = result['session'] + result = self.server.authenticate(self.token, username, password) + if result['status'] != 200: + raise ServiceError('Authenticate failed') + + def terminate(self): + super(Podnapisi, self).terminate() + + def query(self, filepath, languages, moviehash): + results = self.server.search(self.token, [moviehash]) + if results['status'] != 200: + logger.error('Search failed with error code %d' % results['status']) + return [] + if not results['results'] or not results['results'][moviehash]['subtitles']: + logger.debug(u'Could not find subtitles for %r with token %s' % (moviehash, self.token)) + return [] + subtitles = [] + for result in results['results'][moviehash]['subtitles']: + language = self.get_language(result['lang']) + if language not in languages: + continue + path = get_subtitle_path(filepath, language, self.config.multi) + subtitle = ResultSubtitle(path, language, self.__class__.__name__.lower(), result['id'], + release=to_unicode(result['release']), confidence=result['weight']) + subtitles.append(subtitle) + if not subtitles: + return [] + # Convert weight to confidence + max_weight = float(max([s.confidence for s in subtitles])) + min_weight = float(min([s.confidence for s in subtitles])) + for subtitle in subtitles: + if max_weight == 0 and min_weight == 0: + subtitle.confidence = 1.0 + else: + subtitle.confidence = (subtitle.confidence - min_weight) / (max_weight - min_weight) + return subtitles + + def list_checked(self, video, languages): + results = self.query(video.path, languages, video.hashes['OpenSubtitles']) + return results + + def download(self, subtitle): + results = self.server.download(self.token, [subtitle.link]) + if results['status'] != 200: + raise DownloadFailedError() + subtitle.link = 'http://www.podnapisi.net/static/podnapisi/' + results['names'][0]['filename'] + self.download_file(subtitle.link, subtitle.path) + return subtitle + + +Service = Podnapisi diff --git a/lib/subliminal/services/podnapisiweb.py b/lib/subliminal/services/podnapisiweb.py new file mode 100644 index 0000000000000000000000000000000000000000..57397b751e94466a921e7ca7ae38d2135d084bc0 --- /dev/null +++ b/lib/subliminal/services/podnapisiweb.py @@ -0,0 +1,124 @@ +# -*- coding: utf-8 -*- +# Copyright 2011-2012 Antoine Bertin <diaoulael@gmail.com> +# +# This file is part of subliminal. +# +# subliminal is free software; you can redistribute it and/or modify it under +# the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 3 of the License, or +# (at your option) any later version. +# +# subliminal is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with subliminal. If not, see <http://www.gnu.org/licenses/>. +from . import ServiceBase +from ..exceptions import DownloadFailedError +from ..language import Language, language_set +from ..subtitles import ResultSubtitle +from ..utils import get_keywords +from ..videos import Episode, Movie +from bs4 import BeautifulSoup +import guessit +import logging +import re +from subliminal.subtitles import get_subtitle_path + + +logger = logging.getLogger("subliminal") + + +class PodnapisiWeb(ServiceBase): + server_url = 'http://simple.podnapisi.net' + site_url = 'http://www.podnapisi.net' + api_based = True + user_agent = 'Subliminal/0.6' + videos = [Episode, Movie] + require_video = False + required_features = ['xml'] + languages = language_set(['Albanian', 'Arabic', 'Spanish (Argentina)', 'Belarusian', 'Bosnian', 'Portuguese (Brazil)', 'Bulgarian', 'Catalan', + 'Chinese', 'Croatian', 'Czech', 'Danish', 'Dutch', 'English', 'Estonian', 'Persian', + 'Finnish', 'French', 'German', 'gre', 'Kalaallisut', 'Hebrew', 'Hindi', 'Hungarian', + 'Icelandic', 'Indonesian', 'Irish', 'Italian', 'Japanese', 'Kazakh', 'Korean', 'Latvian', + 'Lithuanian', 'Macedonian', 'Malay', 'Norwegian', 'Polish', 'Portuguese', 'Romanian', + 'Russian', 'Serbian', 'Sinhala', 'Slovak', 'Slovenian', 'Spanish', 'Swedish', 'Thai', + 'Turkish', 'Ukrainian', 'Vietnamese']) + language_map = {Language('Albanian'): 29, Language('Arabic'): 12, Language('Spanish (Argentina)'): 14, Language('Belarusian'): 50, + Language('Bosnian'): 10, Language('Portuguese (Brazil)'): 48, Language('Bulgarian'): 33, Language('Catalan'): 53, + Language('Chinese'): 17, Language('Croatian'): 38, Language('Czech'): 7, Language('Danish'): 24, + Language('Dutch'): 23, Language('English'): 2, Language('Estonian'): 20, Language('Persian'): 52, + Language('Finnish'): 31, Language('French'): 8, Language('German'): 5, Language('gre'): 16, + Language('Kalaallisut'): 57, Language('Hebrew'): 22, Language('Hindi'): 42, Language('Hungarian'): 15, + Language('Icelandic'): 6, Language('Indonesian'): 54, Language('Irish'): 49, Language('Italian'): 9, + Language('Japanese'): 11, Language('Kazakh'): 58, Language('Korean'): 4, Language('Latvian'): 21, + Language('Lithuanian'): 19, Language('Macedonian'): 35, Language('Malay'): 55, + Language('Norwegian'): 3, Language('Polish'): 26, Language('Portuguese'): 32, Language('Romanian'): 13, + Language('Russian'): 27, Language('Serbian'): 36, Language('Sinhala'): 56, Language('Slovak'): 37, + Language('Slovenian'): 1, Language('Spanish'): 28, Language('Swedish'): 25, Language('Thai'): 44, + Language('Turkish'): 30, Language('Ukrainian'): 46, Language('Vietnamese'): 51, + 29: Language('Albanian'), 12: Language('Arabic'), 14: Language('Spanish (Argentina)'), 50: Language('Belarusian'), + 10: Language('Bosnian'), 48: Language('Portuguese (Brazil)'), 33: Language('Bulgarian'), 53: Language('Catalan'), + 17: Language('Chinese'), 38: Language('Croatian'), 7: Language('Czech'), 24: Language('Danish'), + 23: Language('Dutch'), 2: Language('English'), 20: Language('Estonian'), 52: Language('Persian'), + 31: Language('Finnish'), 8: Language('French'), 5: Language('German'), 16: Language('gre'), + 57: Language('Kalaallisut'), 22: Language('Hebrew'), 42: Language('Hindi'), 15: Language('Hungarian'), + 6: Language('Icelandic'), 54: Language('Indonesian'), 49: Language('Irish'), 9: Language('Italian'), + 11: Language('Japanese'), 58: Language('Kazakh'), 4: Language('Korean'), 21: Language('Latvian'), + 19: Language('Lithuanian'), 35: Language('Macedonian'), 55: Language('Malay'), 40: Language('Chinese'), + 3: Language('Norwegian'), 26: Language('Polish'), 32: Language('Portuguese'), 13: Language('Romanian'), + 27: Language('Russian'), 36: Language('Serbian'), 47: Language('Serbian'), 56: Language('Sinhala'), + 37: Language('Slovak'), 1: Language('Slovenian'), 28: Language('Spanish'), 25: Language('Swedish'), + 44: Language('Thai'), 30: Language('Turkish'), 46: Language('Ukrainian'), Language('Vietnamese'): 51} + + def list_checked(self, video, languages): + if isinstance(video, Movie): + return self.query(video.path or video.release, languages, video.title, year=video.year, + keywords=get_keywords(video.guess)) + if isinstance(video, Episode): + return self.query(video.path or video.release, languages, video.series, season=video.season, + episode=video.episode, keywords=get_keywords(video.guess)) + + def query(self, filepath, languages, title, season=None, episode=None, year=None, keywords=None): + params = {'sXML': 1, 'sK': title, 'sJ': ','.join([str(self.get_code(l)) for l in languages])} + if season is not None: + params['sTS'] = season + if episode is not None: + params['sTE'] = episode + if year is not None: + params['sY'] = year + if keywords is not None: + params['sR'] = keywords + r = self.session.get(self.server_url + '/ppodnapisi/search', params=params) + if r.status_code != 200: + logger.error(u'Request %s returned status code %d' % (r.url, r.status_code)) + return [] + subtitles = [] + soup = BeautifulSoup(r.content, self.required_features) + for sub in soup('subtitle'): + if 'n' in sub.flags: + logger.debug(u'Skipping hearing impaired') + continue + language = self.get_language(sub.languageId.text) + confidence = float(sub.rating.text) / 5.0 + sub_keywords = set() + for release in sub.release.text.split(): + sub_keywords |= get_keywords(guessit.guess_file_info(release + '.srt', 'autodetect')) + sub_path = get_subtitle_path(filepath, language, self.config.multi) + subtitle = ResultSubtitle(sub_path, language, self.__class__.__name__.lower(), + sub.url.text, confidence=confidence, keywords=sub_keywords) + subtitles.append(subtitle) + return subtitles + + def download(self, subtitle): + r = self.session.get(subtitle.link) + if r.status_code != 200: + raise DownloadFailedError() + soup = BeautifulSoup(r.content) + self.download_zip_file(self.server_url + soup.find('a', href=re.compile('download'))['href'], subtitle.path) + return subtitle + + +Service = PodnapisiWeb diff --git a/lib/subliminal/services/subswiki.py b/lib/subliminal/services/subswiki.py new file mode 100644 index 0000000000000000000000000000000000000000..2a3d57f8a55c0fd24a46fcab69605107f16b3e32 --- /dev/null +++ b/lib/subliminal/services/subswiki.py @@ -0,0 +1,100 @@ +# -*- coding: utf-8 -*- +# Copyright 2011-2012 Antoine Bertin <diaoulael@gmail.com> +# +# This file is part of subliminal. +# +# subliminal is free software; you can redistribute it and/or modify it under +# the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 3 of the License, or +# (at your option) any later version. +# +# subliminal is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with subliminal. If not, see <http://www.gnu.org/licenses/>. +from . import ServiceBase +from ..exceptions import ServiceError +from ..language import language_set, Language +from ..subtitles import get_subtitle_path, ResultSubtitle +from ..utils import get_keywords, split_keyword +from ..videos import Episode, Movie +from bs4 import BeautifulSoup +import logging +import urllib + + +logger = logging.getLogger("subliminal") + + +class SubsWiki(ServiceBase): + server_url = 'http://www.subswiki.com' + site_url = 'http://www.subswiki.com' + api_based = False + languages = language_set(['eng-US', 'eng-GB', 'eng', 'fre', 'por-BR', 'por', 'spa-ES', u'spa', u'ita', u'cat']) + language_map = {u'Español': Language('spa'), u'Español (España)': Language('spa'), u'Español (Latinoamérica)': Language('spa'), + u'Català ': Language('cat'), u'Brazilian': Language('por-BR'), u'English (US)': Language('eng-US'), + u'English (UK)': Language('eng-GB')} + language_code = 'name' + videos = [Episode, Movie] + require_video = False + required_features = ['permissive'] + + def list_checked(self, video, languages): + results = [] + if isinstance(video, Episode): + results = self.query(video.path or video.release, languages, get_keywords(video.guess), series=video.series, season=video.season, episode=video.episode) + elif isinstance(video, Movie) and video.year: + results = self.query(video.path or video.release, languages, get_keywords(video.guess), movie=video.title, year=video.year) + return results + + def query(self, filepath, languages, keywords=None, series=None, season=None, episode=None, movie=None, year=None): + if series and season and episode: + request_series = series.lower().replace(' ', '_') + if isinstance(request_series, unicode): + request_series = request_series.encode('utf-8') + logger.debug(u'Getting subtitles for %s season %d episode %d with languages %r' % (series, season, episode, languages)) + r = self.session.get('%s/serie/%s/%s/%s/' % (self.server_url, urllib.quote(request_series), season, episode)) + if r.status_code == 404: + logger.debug(u'Could not find subtitles for %s season %d episode %d with languages %r' % (series, season, episode, languages)) + return [] + elif movie and year: + request_movie = movie.title().replace(' ', '_') + if isinstance(request_movie, unicode): + request_movie = request_movie.encode('utf-8') + logger.debug(u'Getting subtitles for %s (%d) with languages %r' % (movie, year, languages)) + r = self.session.get('%s/film/%s_(%d)' % (self.server_url, urllib.quote(request_movie), year)) + if r.status_code == 404: + logger.debug(u'Could not find subtitles for %s (%d) with languages %r' % (movie, year, languages)) + return [] + else: + raise ServiceError('One or more parameter missing') + if r.status_code != 200: + logger.error(u'Request %s returned status code %d' % (r.url, r.status_code)) + return [] + soup = BeautifulSoup(r.content, self.required_features) + subtitles = [] + for sub in soup('td', {'class': 'NewsTitle'}): + sub_keywords = split_keyword(sub.b.string.lower()) + if keywords and not keywords & sub_keywords: + logger.debug(u'None of subtitle keywords %r in %r' % (sub_keywords, keywords)) + continue + for html_language in sub.parent.parent.find_all('td', {'class': 'language'}): + language = self.get_language(html_language.string.strip()) + if language not in languages: + logger.debug(u'Language %r not in wanted languages %r' % (language, languages)) + continue + html_status = html_language.find_next_sibling('td') + status = html_status.strong.string.strip() + if status != 'Completado': + logger.debug(u'Wrong subtitle status %s' % status) + continue + path = get_subtitle_path(filepath, language, self.config.multi) + subtitle = ResultSubtitle(path, language, self.__class__.__name__.lower(), '%s%s' % (self.server_url, html_status.find_next('td').find('a')['href'])) + subtitles.append(subtitle) + return subtitles + + +Service = SubsWiki diff --git a/lib/subliminal/services/subtitulos.py b/lib/subliminal/services/subtitulos.py new file mode 100644 index 0000000000000000000000000000000000000000..103b241c9797eb6241c1be0e3efd54d73adbf159 --- /dev/null +++ b/lib/subliminal/services/subtitulos.py @@ -0,0 +1,90 @@ +# -*- coding: utf-8 -*- +# Copyright 2011-2012 Antoine Bertin <diaoulael@gmail.com> +# +# This file is part of subliminal. +# +# subliminal is free software; you can redistribute it and/or modify it under +# the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 3 of the License, or +# (at your option) any later version. +# +# subliminal is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with subliminal. If not, see <http://www.gnu.org/licenses/>. +from . import ServiceBase +from ..language import language_set, Language +from ..subtitles import get_subtitle_path, ResultSubtitle +from ..utils import get_keywords, split_keyword +from ..videos import Episode +from bs4 import BeautifulSoup +import logging +import re +import unicodedata +import urllib + + +logger = logging.getLogger("subliminal") + + +class Subtitulos(ServiceBase): + server_url = 'http://www.subtitulos.es' + site_url = 'http://www.subtitulos.es' + api_based = False + languages = language_set(['eng-US', 'eng-GB', 'eng', 'fre', 'por-BR', 'por', 'spa-ES', u'spa', u'ita', u'cat']) + language_map = {u'Español': Language('spa'), u'Español (España)': Language('spa'), #u'Español (Latinoamérica)': Language('spa'), + u'Català ': Language('cat'), u'Brazilian': Language('por-BR'), u'English (US)': Language('eng-US'), + u'English (UK)': Language('eng-GB'), 'Galego': Language('glg')} + language_code = 'name' + videos = [Episode] + require_video = False + required_features = ['permissive'] + # the '.+' in the pattern for Version allows us to match both 'ó' + # and the 'ó' char directly. This is because now BS4 converts the html + # code chars into their equivalent unicode char + release_pattern = re.compile('Versi.+n (.+) ([0-9]+).([0-9])+ megabytes') + extra_keywords_pattern = re.compile("(?:con|para)\s(?:720p)?(?:\-|\s)?([A-Za-z]+)(?:\-|\s)?(?:720p)?(?:\s|\.)(?:y\s)?(?:720p)?(?:\-\s)?([A-Za-z]+)?(?:\-\s)?(?:720p)?(?:\.)?"); + + def list_checked(self, video, languages): + return self.query(video.path or video.release, languages, get_keywords(video.guess), video.series, video.season, video.episode) + + def query(self, filepath, languages, keywords, series, season, episode): + request_series = series.lower().replace(' ', '_').replace('&', '@').replace('(','').replace(')','') + if isinstance(request_series, unicode): + request_series = unicodedata.normalize('NFKD', request_series).encode('ascii', 'ignore') + logger.debug(u'Getting subtitles for %s season %d episode %d with languages %r' % (series, season, episode, languages)) + r = self.session.get('%s/%s/%sx%.2d' % (self.server_url, urllib.quote(request_series), season, episode)) + if r.status_code == 404: + logger.debug(u'Could not find subtitles for %s season %d episode %d with languages %r' % (series, season, episode, languages)) + return [] + if r.status_code != 200: + logger.error(u'Request %s returned status code %d' % (r.url, r.status_code)) + return [] + soup = BeautifulSoup(r.content, self.required_features) + subtitles = [] + for sub in soup('div', {'id': 'version'}): + sub_keywords = split_keyword(self.release_pattern.search(sub.find('p', {'class': 'title-sub'}).contents[1]).group(1).lower()) + if keywords and not keywords & sub_keywords: + logger.debug(u'None of subtitle keywords %r in %r' % (sub_keywords, keywords)) + continue + for html_language in sub.findAllNext('ul', {'class': 'sslist'}): + language = self.get_language(html_language.findNext('li', {'class': 'li-idioma'}).find('strong').contents[0].string.strip()) + if language not in languages: + logger.debug(u'Language %r not in wanted languages %r' % (language, languages)) + continue + html_status = html_language.findNext('li', {'class': 'li-estado green'}) + status = html_status.contents[0].string.strip() + if status != 'Completado': + logger.debug(u'Wrong subtitle status %s' % status) + continue + path = get_subtitle_path(filepath, language, self.config.multi) + subtitle = ResultSubtitle(path, language, self.__class__.__name__.lower(), html_status.findNext('span', {'class': 'descargar green'}).find('a')['href'], + keywords=sub_keywords) + subtitles.append(subtitle) + return subtitles + + +Service = Subtitulos diff --git a/lib/subliminal/services/thesubdb.py b/lib/subliminal/services/thesubdb.py new file mode 100644 index 0000000000000000000000000000000000000000..9d2ced82bfcc5904721787d2dfbc738e2f96fc68 --- /dev/null +++ b/lib/subliminal/services/thesubdb.py @@ -0,0 +1,64 @@ +# -*- coding: utf-8 -*- +# Copyright 2011-2012 Antoine Bertin <diaoulael@gmail.com> +# +# This file is part of subliminal. +# +# subliminal is free software; you can redistribute it and/or modify it under +# the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 3 of the License, or +# (at your option) any later version. +# +# subliminal is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with subliminal. If not, see <http://www.gnu.org/licenses/>. +from . import ServiceBase +from ..language import language_set +from ..subtitles import get_subtitle_path, ResultSubtitle +from ..videos import Episode, Movie, UnknownVideo +import logging + + +logger = logging.getLogger("subliminal") + + +class TheSubDB(ServiceBase): + server_url = 'http://api.thesubdb.com' + site_url = 'http://www.thesubdb.com/' + user_agent = 'SubDB/1.0 (subliminal/0.6; https://github.com/Diaoul/subliminal)' + api_based = True + # Source: http://api.thesubdb.com/?action=languages + languages = language_set(['af', 'cs', 'da', 'de', 'en', 'es', 'fi', 'fr', 'hu', 'id', 'it', + 'la', 'nl', 'no', 'oc', 'pl', 'pt', 'ro', 'ru', 'sl', 'sr', 'sv', + 'tr']) + videos = [Movie, Episode, UnknownVideo] + require_video = True + + def list_checked(self, video, languages): + return self.query(video.path, video.hashes['TheSubDB'], languages) + + def query(self, filepath, moviehash, languages): + r = self.session.get(self.server_url, params={'action': 'search', 'hash': moviehash}) + if r.status_code == 404: + logger.debug(u'Could not find subtitles for hash %s' % moviehash) + return [] + if r.status_code != 200: + logger.error(u'Request %s returned status code %d' % (r.url, r.status_code)) + return [] + available_languages = language_set(r.content.split(',')) + languages &= available_languages + if not languages: + logger.debug(u'Could not find subtitles for hash %s with languages %r (only %r available)' % (moviehash, languages, available_languages)) + return [] + subtitles = [] + for language in languages: + path = get_subtitle_path(filepath, language, self.config.multi) + subtitle = ResultSubtitle(path, language, self.__class__.__name__.lower(), '%s?action=download&hash=%s&language=%s' % (self.server_url, moviehash, language.alpha2)) + subtitles.append(subtitle) + return subtitles + + +Service = TheSubDB diff --git a/lib/subliminal/services/tvsubtitles.py b/lib/subliminal/services/tvsubtitles.py new file mode 100644 index 0000000000000000000000000000000000000000..27992226d2d0b82d09bf4c4f2afa007a61624761 --- /dev/null +++ b/lib/subliminal/services/tvsubtitles.py @@ -0,0 +1,143 @@ +# -*- coding: utf-8 -*- +# Copyright 2012 Nicolas Wack <wackou@gmail.com> +# +# This file is part of subliminal. +# +# subliminal is free software; you can redistribute it and/or modify it under +# the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 3 of the License, or +# (at your option) any later version. +# +# subliminal is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with subliminal. If not, see <http://www.gnu.org/licenses/>. +from . import ServiceBase +from ..cache import cachedmethod +from ..language import language_set, Language +from ..subtitles import get_subtitle_path, ResultSubtitle +from ..utils import get_keywords +from ..videos import Episode +from bs4 import BeautifulSoup +import logging +import re + + +logger = logging.getLogger("subliminal") + + +def match(pattern, string): + try: + return re.search(pattern, string).group(1) + except AttributeError: + logger.debug(u'Could not match %r on %r' % (pattern, string)) + return None + + +class TvSubtitles(ServiceBase): + server_url = 'http://www.tvsubtitles.net' + site_url = 'http://www.tvsubtitles.net' + api_based = False + languages = language_set(['ar', 'bg', 'cs', 'da', 'de', 'el', 'en', 'es', 'fi', 'fr', 'hu', + 'it', 'ja', 'ko', 'nl', 'pl', 'pt', 'ro', 'ru', 'sv', 'tr', 'uk', + 'zh', 'pt-br']) + #TODO: Find more exceptions + language_map = {'gr': Language('gre'), 'cz': Language('cze'), 'ua': Language('ukr'), + 'cn': Language('chi')} + videos = [Episode] + require_video = False + required_features = ['permissive'] + + @cachedmethod + def get_likely_series_id(self, name): + r = self.session.post('%s/search.php' % self.server_url, data={'q': name}) + soup = BeautifulSoup(r.content, self.required_features) + maindiv = soup.find('div', 'left') + results = [] + for elem in maindiv.find_all('li'): + sid = int(match('tvshow-([0-9]+)\.html', elem.a['href'])) + show_name = match('(.*) \(', elem.a.text) + results.append((show_name, sid)) + #TODO: pick up the best one in a smart way + result = results[0] + return result[1] + + @cachedmethod + def get_episode_id(self, series_id, season, number): + """Get the TvSubtitles id for the given episode. Raises KeyError if none + could be found.""" + # download the page of the season, contains ids for all episodes + episode_id = None + r = self.session.get('%s/tvshow-%d-%d.html' % (self.server_url, series_id, season)) + soup = BeautifulSoup(r.content, self.required_features) + table = soup.find('table', id='table5') + for row in table.find_all('tr'): + cells = row.find_all('td') + if not cells: + continue + episode_number = match('x([0-9]+)', cells[0].text) + if not episode_number: + continue + episode_number = int(episode_number) + episode_id = int(match('episode-([0-9]+)', cells[1].a['href'])) + # we could just return the id of the queried episode, but as we + # already downloaded the whole page we might as well fill in the + # information for all the episodes of the season + self.cache_for(self.get_episode_id, args=(series_id, season, episode_number), result=episode_id) + # raises KeyError if not found + return self.cached_value(self.get_episode_id, args=(series_id, season, number)) + + # Do not cache this method in order to always check for the most recent + # subtitles + def get_sub_ids(self, episode_id): + subids = [] + r = self.session.get('%s/episode-%d.html' % (self.server_url, episode_id)) + epsoup = BeautifulSoup(r.content, self.required_features) + for subdiv in epsoup.find_all('a'): + if 'href' not in subdiv.attrs or not subdiv['href'].startswith('/subtitle'): + continue + subid = int(match('([0-9]+)', subdiv['href'])) + lang = self.get_language(match('flags/(.*).gif', subdiv.img['src'])) + result = {'subid': subid, 'language': lang} + for p in subdiv.find_all('p'): + if 'alt' in p.attrs and p['alt'] == 'rip': + result['rip'] = p.text.strip() + if 'alt' in p.attrs and p['alt'] == 'release': + result['release'] = p.text.strip() + subids.append(result) + return subids + + def list_checked(self, video, languages): + return self.query(video.path or video.release, languages, get_keywords(video.guess), video.series, video.season, video.episode) + + def query(self, filepath, languages, keywords, series, season, episode): + logger.debug(u'Getting subtitles for %s season %d episode %d with languages %r' % (series, season, episode, languages)) + self.init_cache() + sid = self.get_likely_series_id(series.lower()) + try: + ep_id = self.get_episode_id(sid, season, episode) + except KeyError: + logger.debug(u'Could not find episode id for %s season %d episode %d' % (series, season, episode)) + return [] + subids = self.get_sub_ids(ep_id) + # filter the subtitles with our queried languages + subtitles = [] + for subid in subids: + language = subid['language'] + if language not in languages: + continue + path = get_subtitle_path(filepath, language, self.config.multi) + subtitle = ResultSubtitle(path, language, self.__class__.__name__.lower(), '%s/download-%d.html' % (self.server_url, subid['subid']), + keywords=[subid['rip'], subid['release']]) + subtitles.append(subtitle) + return subtitles + + def download(self, subtitle): + self.download_zip_file(subtitle.link, subtitle.path) + return subtitle + + +Service = TvSubtitles diff --git a/lib/subliminal/subtitles.py b/lib/subliminal/subtitles.py new file mode 100644 index 0000000000000000000000000000000000000000..117871b38d971c250ecd8c026f1e9763de158768 --- /dev/null +++ b/lib/subliminal/subtitles.py @@ -0,0 +1,149 @@ +# -*- coding: utf-8 -*- +# Copyright 2011-2012 Antoine Bertin <diaoulael@gmail.com> +# +# This file is part of subliminal. +# +# subliminal is free software; you can redistribute it and/or modify it under +# the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 3 of the License, or +# (at your option) any later version. +# +# subliminal is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with subliminal. If not, see <http://www.gnu.org/licenses/>. +from .language import Language +from .utils import to_unicode +import os.path + + +__all__ = ['Subtitle', 'EmbeddedSubtitle', 'ExternalSubtitle', 'ResultSubtitle', 'get_subtitle_path'] + +#: Subtitles extensions +EXTENSIONS = ['.srt', '.sub', '.txt', '.ass'] + + +class Subtitle(object): + """Base class for subtitles + + :param string path: path to the subtitle + :param language: language of the subtitle + :type language: :class:`~subliminal.language.Language` + + """ + def __init__(self, path, language): + if not isinstance(language, Language): + raise TypeError('%r is not an instance of Language') + self.path = path + self.language = language + + @property + def exists(self): + """Whether the subtitle exists or not""" + if self.path: + return os.path.exists(self.path) + return False + + def __unicode__(self): + return to_unicode(self.path) + + def __str__(self): + return unicode(self).encode('utf-8') + + def __repr__(self): + return '%s(%s, %s)' % (self.__class__.__name__, self, self.language) + + +class EmbeddedSubtitle(Subtitle): + """Subtitle embedded in a container + + :param string path: path to the subtitle + :param language: language of the subtitle + :type language: :class:`~subliminal.language.Language` + :param int track_id: id of the subtitle track in the container + + """ + def __init__(self, path, language, track_id): + super(EmbeddedSubtitle, self).__init__(path, language) + self.track_id = track_id + + @classmethod + def from_enzyme(cls, path, subtitle): + language = Language(subtitle.language, strict=False) + return cls(path, language, subtitle.trackno) + + +class ExternalSubtitle(Subtitle): + """Subtitle in a file next to the video file""" + @classmethod + def from_path(cls, path): + """Create an :class:`ExternalSubtitle` from path""" + extension = None + for e in EXTENSIONS: + if path.endswith(e): + extension = e + break + if extension is None: + raise ValueError('Not a supported subtitle extension') + language = Language(os.path.splitext(path[:len(path) - len(extension)])[1][1:], strict=False) + return cls(path, language) + + +class ResultSubtitle(ExternalSubtitle): + """Subtitle found using :mod:`~subliminal.services` + + :param string path: path to the subtitle + :param language: language of the subtitle + :type language: :class:`~subliminal.language.Language` + :param string service: name of the service + :param string link: download link for the subtitle + :param string release: release name of the video + :param float confidence: confidence that the subtitle matches the video according to the service + :param set keywords: keywords that describe the subtitle + + """ + def __init__(self, path, language, service, link, release=None, confidence=1, keywords=None): + super(ResultSubtitle, self).__init__(path, language) + self.service = service + self.link = link + self.release = release + self.confidence = confidence + self.keywords = keywords or set() + + @property + def single(self): + """Whether this is a single subtitle or not. A single subtitle does not have + a language indicator in its file name + + :rtype: bool + + """ + return self.language == Language('Undetermined') + + def __repr__(self): + if not self.release: + return 'ResultSubtitle(%s, %s, %s, %.2f)' % (self.path, self.language, self.service, self.confidence) + return 'ResultSubtitle(%s, %s, %s, %.2f, release=%s)' % (self.path, self.language, self.service, self.confidence, self.release.encode('ascii', 'ignore')) + + +def get_subtitle_path(video_path, language, multi): + """Create the subtitle path from the given video path using language if multi + + :param string video_path: path to the video + :param language: language of the subtitle + :type language: :class:`~subliminal.language.Language` + :param bool multi: whether to use multi language naming or not + :return: path of the subtitle + :rtype: string + + """ + if not os.path.exists(video_path): + path = os.path.splitext(os.path.basename(video_path))[0] + else: + path = os.path.splitext(video_path)[0] + if multi and language: + return path + '.%s%s' % (language.alpha2, EXTENSIONS[0]) + return path + '%s' % EXTENSIONS[0] diff --git a/lib/subliminal/tasks.py b/lib/subliminal/tasks.py new file mode 100644 index 0000000000000000000000000000000000000000..bccf9ab53835710297d07ca8565fc7e209d57eb4 --- /dev/null +++ b/lib/subliminal/tasks.py @@ -0,0 +1,68 @@ +# -*- coding: utf-8 -*- +# Copyright 2011-2012 Antoine Bertin <diaoulael@gmail.com> +# +# This file is part of subliminal. +# +# subliminal is free software; you can redistribute it and/or modify it under +# the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 3 of the License, or +# (at your option) any later version. +# +# subliminal is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with subliminal. If not, see <http://www.gnu.org/licenses/>. +__all__ = ['Task', 'ListTask', 'DownloadTask', 'StopTask'] + + +class Task(object): + """Base class for tasks to use in subliminal""" + pass + + +class ListTask(Task): + """List task used by the worker to search for subtitles + + :param video: video to search subtitles for + :type video: :class:`~subliminal.videos.Video` + :param list languages: languages to search for + :param string service: name of the service to use + :param config: configuration for the service + :type config: :class:`~subliminal.services.ServiceConfig` + + """ + def __init__(self, video, languages, service, config): + super(ListTask, self).__init__() + self.video = video + self.service = service + self.languages = languages + self.config = config + + def __repr__(self): + return 'ListTask(%r, %r, %s, %r)' % (self.video, self.languages, self.service, self.config) + + +class DownloadTask(Task): + """Download task used by the worker to download subtitles + + :param video: video to download subtitles for + :type video: :class:`~subliminal.videos.Video` + :param subtitles: subtitles to download in order of preference + :type subtitles: list of :class:`~subliminal.subtitles.Subtitle` + + """ + def __init__(self, video, subtitles): + super(DownloadTask, self).__init__() + self.video = video + self.subtitles = subtitles + + def __repr__(self): + return 'DownloadTask(%r, %r)' % (self.video, self.subtitles) + + +class StopTask(Task): + """Stop task that will stop the worker""" + pass diff --git a/lib/subliminal/utils.py b/lib/subliminal/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..e4fe4e8583045efa4c892869016bf25eee8721fd --- /dev/null +++ b/lib/subliminal/utils.py @@ -0,0 +1,69 @@ +# -*- coding: utf-8 -*- +# Copyright 2011-2012 Antoine Bertin <diaoulael@gmail.com> +# +# This file is part of subliminal. +# +# subliminal is free software; you can redistribute it and/or modify it under +# the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 3 of the License, or +# (at your option) any later version. +# +# subliminal is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with subliminal. If not, see <http://www.gnu.org/licenses/>. +import re + + +__all__ = ['get_keywords', 'split_keyword', 'to_unicode'] + + +def get_keywords(guess): + """Retrieve keywords from guessed informations + + :param guess: guessed informations + :type guess: :class:`guessit.guess.Guess` + :return: lower case alphanumeric keywords + :rtype: set + + """ + keywords = set() + for k in ['releaseGroup', 'screenSize', 'videoCodec', 'format']: + if k in guess: + keywords = keywords | split_keyword(guess[k].lower()) + return keywords + + +def split_keyword(keyword): + """Split a keyword in multiple ones on any non-alphanumeric character + + :param string keyword: keyword + :return: keywords + :rtype: set + + """ + split = set(re.findall(r'\w+', keyword)) + return split + + +def to_unicode(data): + """Convert a basestring to unicode + + :param basestring data: data to decode + :return: data as unicode + :rtype: unicode + + """ + if not isinstance(data, basestring): + raise ValueError('Basestring expected') + if isinstance(data, unicode): + return data + for encoding in ('utf-8', 'latin-1'): + try: + return unicode(data, encoding) + except UnicodeDecodeError: + pass + return unicode(data, 'utf-8', 'replace') diff --git a/lib/subliminal/videos.py b/lib/subliminal/videos.py new file mode 100644 index 0000000000000000000000000000000000000000..e9838329506b627fd6b02650e6e9e43a0fedeb89 --- /dev/null +++ b/lib/subliminal/videos.py @@ -0,0 +1,293 @@ +# -*- coding: utf-8 -*- +# Copyright 2011-2012 Antoine Bertin <diaoulael@gmail.com> +# +# This file is part of subliminal. +# +# subliminal is free software; you can redistribute it and/or modify it under +# the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 3 of the License, or +# (at your option) any later version. +# +# subliminal is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with subliminal. If not, see <http://www.gnu.org/licenses/>. +from . import subtitles +from .language import Language +from .utils import to_unicode +import enzyme.core +import guessit +import hashlib +import logging +import mimetypes +import os +import struct + +from sickbeard import encodingKludge as ek +import sickbeard + + +__all__ = ['EXTENSIONS', 'MIMETYPES', 'Video', 'Episode', 'Movie', 'UnknownVideo', + 'scan', 'hash_opensubtitles', 'hash_thesubdb'] +logger = logging.getLogger("subliminal") + +#: Video extensions +EXTENSIONS = ['.avi', '.mkv', '.mpg', '.mp4', '.m4v', '.mov', '.ogm', '.ogv', '.wmv', + '.divx', '.asf'] + +#: Video mimetypes +MIMETYPES = ['video/mpeg', 'video/mp4', 'video/quicktime', 'video/x-ms-wmv', 'video/x-msvideo', + 'video/x-flv', 'video/x-matroska', 'video/x-matroska-3d'] + + +class Video(object): + """Base class for videos + + :param string path: path + :param guess: guessed informations + :type guess: :class:`~guessit.guess.Guess` + :param string imdbid: imdbid + + """ + def __init__(self, path, guess, imdbid=None): + self.release = path + self.guess = guess + self.imdbid = imdbid + self._path = None + self.hashes = {} + if os.path.exists(path): + self._path = path + self.size = os.path.getsize(self._path) + self._compute_hashes() + + @classmethod + def from_path(cls, path): + """Create a :class:`Video` subclass guessing all informations from the given path + + :param string path: path + :return: video object + :rtype: :class:`Episode` or :class:`Movie` or :class:`UnknownVideo` + + """ + guess = guessit.guess_file_info(path, 'autodetect') + result = None + if guess['type'] == 'episode' and 'series' in guess and 'season' in guess and 'episodeNumber' in guess: + title = None + if 'title' in guess: + title = guess['title'] + result = Episode(path, guess['series'], guess['season'], guess['episodeNumber'], title, guess) + if guess['type'] == 'movie' and 'title' in guess: + year = None + if 'year' in guess: + year = guess['year'] + result = Movie(path, guess['title'], year, guess) + if not result: + result = UnknownVideo(path, guess) + if not isinstance(result, cls): + raise ValueError('Video is not of requested type') + return result + + @property + def exists(self): + """Whether the video exists or not""" + if self._path: + return os.path.exists(self._path) + return False + + @property + def path(self): + """Path to the video""" + return self._path + + @path.setter + def path(self, value): + if not os.path.exists(value): + raise ValueError('Path does not exists') + self._path = value + self.size = os.path.getsize(self._path) + self._compute_hashes() + + def _compute_hashes(self): + """Compute different hashes""" + self.hashes['OpenSubtitles'] = hash_opensubtitles(self.path) + self.hashes['TheSubDB'] = hash_thesubdb(self.path) + + def scan(self): + """Scan and return associated subtitles + + :return: associated subtitles + :rtype: list of :class:`~subliminal.subtitles.Subtitle` + + """ + if not self.exists: + return [] + basepath = os.path.splitext(self.path)[0] + results = [] + video_infos = None + try: + video_infos = enzyme.parse(self.path) + logger.debug(u'Succeeded parsing %s with enzyme: %r' % (self.path, video_infos)) + except: + logger.debug(u'Failed parsing %s with enzyme' % self.path) + if isinstance(video_infos, enzyme.core.AVContainer): + results.extend([subtitles.EmbeddedSubtitle.from_enzyme(self.path, s) for s in video_infos.subtitles]) + # cannot use glob here because it chokes if there are any square + # brackets inside the filename, so we have to use basic string + # startswith/endswith comparisons + folder, basename = os.path.split(basepath) + if folder == '': + folder = '.' + existing = [f for f in os.listdir(folder) if f.startswith(basename)] + if sickbeard.SUBTITLES_DIR: + subsDir = ek.ek(os.path.join, folder, sickbeard.SUBTITLES_DIR) + if ek.ek(os.path.isdir, subsDir): + existing.extend([f for f in os.listdir(subsDir) if f.startswith(basename)]) + for path in existing: + for ext in subtitles.EXTENSIONS: + if path.endswith(ext): + language = Language(path[len(basename) + 1:-len(ext)], strict=False) + results.append(subtitles.ExternalSubtitle(path, language)) + return results + + def __unicode__(self): + return to_unicode(self.path or self.release) + + def __str__(self): + return unicode(self).encode('utf-8') + + def __repr__(self): + return '%s(%s)' % (self.__class__.__name__, self) + + def __hash__(self): + return hash(self.path or self.release) + + +class Episode(Video): + """Episode :class:`Video` + + :param string path: path + :param string series: series + :param int season: season number + :param int episode: episode number + :param string title: title + :param guess: guessed informations + :type guess: :class:`~guessit.guess.Guess` + :param string tvdbid: tvdbid + :param string imdbid: imdbid + + """ + def __init__(self, path, series, season, episode, title=None, guess=None, tvdbid=None, imdbid=None): + super(Episode, self).__init__(path, guess, imdbid) + self.series = series + self.title = title + self.season = season + self.episode = episode + self.tvdbid = tvdbid + + +class Movie(Video): + """Movie :class:`Video` + + :param string path: path + :param string title: title + :param int year: year + :param guess: guessed informations + :type guess: :class:`~guessit.guess.Guess` + :param string imdbid: imdbid + + """ + def __init__(self, path, title, year=None, guess=None, imdbid=None): + super(Movie, self).__init__(path, guess, imdbid) + self.title = title + self.year = year + + +class UnknownVideo(Video): + """Unknown video""" + pass + + +def scan(entry, max_depth=3, scan_filter=None, depth=0): + """Scan a path for videos and subtitles + + :param string entry: path + :param int max_depth: maximum folder depth + :param function scan_filter: filter function that takes a path as argument and returns a boolean indicating whether it has to be filtered out (``True``) or not (``False``) + :param int depth: starting depth + :return: found videos and subtitles + :rtype: list of (:class:`Video`, [:class:`~subliminal.subtitles.Subtitle`]) + + """ + if depth > max_depth and max_depth != 0: # we do not want to search the whole file system except if max_depth = 0 + return [] + if os.path.isdir(entry): # a dir? recurse + logger.debug(u'Scanning directory %s with depth %d/%d' % (entry, depth, max_depth)) + result = [] + for e in os.listdir(entry): + result.extend(scan(os.path.join(entry, e), max_depth, scan_filter, depth + 1)) + return result + if os.path.isfile(entry) or depth == 0: + logger.debug(u'Scanning file %s with depth %d/%d' % (entry, depth, max_depth)) + if depth != 0: # trust the user: only check for valid format if recursing + if mimetypes.guess_type(entry)[0] not in MIMETYPES and os.path.splitext(entry)[1] not in EXTENSIONS: + return [] + if scan_filter is not None and scan_filter(entry): + return [] + video = Video.from_path(entry) + return [(video, video.scan())] + logger.warning(u'Scanning entry %s failed with depth %d/%d' % (entry, depth, max_depth)) + return [] # anything else + + +def hash_opensubtitles(path): + """Compute a hash using OpenSubtitles' algorithm + + :param string path: path + :return: hash + :rtype: string + + """ + longlongformat = 'q' # long long + bytesize = struct.calcsize(longlongformat) + with open(path, 'rb') as f: + filesize = os.path.getsize(path) + filehash = filesize + if filesize < 65536 * 2: + return None + for _ in range(65536 / bytesize): + filebuffer = f.read(bytesize) + (l_value,) = struct.unpack(longlongformat, filebuffer) + filehash += l_value + filehash = filehash & 0xFFFFFFFFFFFFFFFF # to remain as 64bit number + f.seek(max(0, filesize - 65536), 0) + for _ in range(65536 / bytesize): + filebuffer = f.read(bytesize) + (l_value,) = struct.unpack(longlongformat, filebuffer) + filehash += l_value + filehash = filehash & 0xFFFFFFFFFFFFFFFF + returnedhash = '%016x' % filehash + logger.debug(u'Computed OpenSubtitle hash %s for %s' % (returnedhash, path)) + return returnedhash + + +def hash_thesubdb(path): + """Compute a hash using TheSubDB's algorithm + + :param string path: path + :return: hash + :rtype: string + + """ + readsize = 64 * 1024 + if os.path.getsize(path) < readsize: + return None + with open(path, 'rb') as f: + data = f.read(readsize) + f.seek(-readsize, os.SEEK_END) + data += f.read(readsize) + returnedhash = hashlib.md5(data).hexdigest() + logger.debug(u'Computed TheSubDB hash %s for %s' % (returnedhash, path)) + return returnedhash diff --git a/sickbeard/__init__.py b/sickbeard/__init__.py index 608e35ac08500383620727199eecae98c807b9df..80eeede0f82011e704289fc249f58143321254a3 100755 --- a/sickbeard/__init__.py +++ b/sickbeard/__init__.py @@ -33,7 +33,7 @@ from sickbeard import providers, metadata from providers import ezrss, tvtorrents, torrentleech, btn, nzbsrus, newznab, womble, nzbx, omgwtfnzbs, binnewz, t411 from sickbeard.config import CheckSection, check_setting_int, check_setting_str, ConfigMigrator -from sickbeard import searchCurrent, searchBacklog, showUpdater, versionChecker, properFinder, autoPostProcesser +from sickbeard import searchCurrent, searchBacklog, showUpdater, versionChecker, properFinder, autoPostProcesser, subtitles from sickbeard import helpers, db, exceptions, show_queue, search_queue, scheduler from sickbeard import logger from sickbeard import naming @@ -76,7 +76,7 @@ showQueueScheduler = None searchQueueScheduler = None properFinderScheduler = None autoPostProcesserScheduler = None - +subtitlesFinderScheduler = None showList = None loadingShowList = None @@ -127,6 +127,7 @@ QUALITY_DEFAULT = None STATUS_DEFAULT = None FLATTEN_FOLDERS_DEFAULT = None AUDIO_SHOW_DEFAULT = None +SUBTITLES_DEFAULT = None PROVIDER_ORDER = [] NAMING_MULTI_EP = None @@ -229,6 +230,7 @@ TORRENT_LABEL = '' USE_XBMC = False XBMC_NOTIFY_ONSNATCH = False XBMC_NOTIFY_ONDOWNLOAD = False +XBMC_NOTIFY_ONSUBTITLEDOWNLOAD = False XBMC_UPDATE_LIBRARY = False XBMC_UPDATE_FULL = False XBMC_UPDATE_ONLYFIRST = False @@ -239,6 +241,7 @@ XBMC_PASSWORD = None USE_PLEX = False PLEX_NOTIFY_ONSNATCH = False PLEX_NOTIFY_ONDOWNLOAD = False +PLEX_NOTIFY_ONSUBTITLEDOWNLOAD = False PLEX_UPDATE_LIBRARY = False PLEX_SERVER_HOST = None PLEX_HOST = None @@ -248,18 +251,21 @@ PLEX_PASSWORD = None USE_GROWL = False GROWL_NOTIFY_ONSNATCH = False GROWL_NOTIFY_ONDOWNLOAD = False +GROWL_NOTIFY_ONSUBTITLEDOWNLOAD = False GROWL_HOST = '' GROWL_PASSWORD = None USE_PROWL = False PROWL_NOTIFY_ONSNATCH = False PROWL_NOTIFY_ONDOWNLOAD = False +PROWL_NOTIFY_ONSUBTITLEDOWNLOAD = False PROWL_API = None PROWL_PRIORITY = 0 USE_TWITTER = False TWITTER_NOTIFY_ONSNATCH = False TWITTER_NOTIFY_ONDOWNLOAD = False +TWITTER_NOTIFY_ONSUBTITLEDOWNLOAD = False TWITTER_USERNAME = None TWITTER_PASSWORD = None TWITTER_PREFIX = None @@ -267,6 +273,7 @@ TWITTER_PREFIX = None USE_NOTIFO = False NOTIFO_NOTIFY_ONSNATCH = False NOTIFO_NOTIFY_ONDOWNLOAD = False +NOTIFO_NOTIFY_ONSUBTITLEDOWNLOAD = False NOTIFO_USERNAME = None NOTIFO_APISECRET = None NOTIFO_PREFIX = None @@ -274,6 +281,7 @@ NOTIFO_PREFIX = None USE_BOXCAR = False BOXCAR_NOTIFY_ONSNATCH = False BOXCAR_NOTIFY_ONDOWNLOAD = False +BOXCAR_NOTIFY_ONSUBTITLEDOWNLOAD = False BOXCAR_USERNAME = None BOXCAR_PASSWORD = None BOXCAR_PREFIX = None @@ -281,11 +289,13 @@ BOXCAR_PREFIX = None USE_PUSHOVER = False PUSHOVER_NOTIFY_ONSNATCH = False PUSHOVER_NOTIFY_ONDOWNLOAD = False +PUSHOVER_NOTIFY_ONSUBTITLEDOWNLOAD = False PUSHOVER_USERKEY = None USE_LIBNOTIFY = False LIBNOTIFY_NOTIFY_ONSNATCH = False LIBNOTIFY_NOTIFY_ONDOWNLOAD = False +LIBNOTIFY_NOTIFY_ONSUBTITLEDOWNLOAD = False USE_NMJ = False NMJ_HOST = None @@ -307,6 +317,7 @@ TRAKT_API = '' USE_PYTIVO = False PYTIVO_NOTIFY_ONSNATCH = False PYTIVO_NOTIFY_ONDOWNLOAD = False +PYTIVO_NOTIFY_ONSUBTITLEDOWNLOAD = False PYTIVO_UPDATE_LIBRARY = False PYTIVO_HOST = '' PYTIVO_SHARE_NAME = '' @@ -315,18 +326,27 @@ PYTIVO_TIVO_NAME = '' USE_NMA = False NMA_NOTIFY_ONSNATCH = False NMA_NOTIFY_ONDOWNLOAD = False +NMA_NOTIFY_ONSUBTITLEDOWNLOAD = False NMA_API = None NMA_PRIORITY = 0 COMING_EPS_LAYOUT = None COMING_EPS_DISPLAY_PAUSED = None COMING_EPS_SORT = None +COMING_EPS_MISSED_RANGE = None + +USE_SUBTITLES = False +SUBTITLES_LANGUAGES = [] +SUBTITLES_DIR = '' +SUBTITLES_SERVICES_LIST = [] +SUBTITLES_SERVICES_ENABLED = [] +SUBTITLES_HISTORY = False EXTRA_SCRIPTS = [] GIT_PATH = None -IGNORE_WORDS = "german,french,core2hd,dutch,swedish" +IGNORE_WORDS = "german,spanish,core2hd,dutch,swedish" __INITIALIZED__ = False @@ -345,21 +365,21 @@ def initialize(consoleLogging=True): SAB_USERNAME, SAB_PASSWORD, SAB_APIKEY, SAB_CATEGORY, SAB_HOST, \ NZBGET_PASSWORD, NZBGET_CATEGORY, NZBGET_HOST, currentSearchScheduler, backlogSearchScheduler, \ TORRENT_USERNAME, TORRENT_PASSWORD, TORRENT_HOST, TORRENT_PATH, TORRENT_RATIO, TORRENT_PAUSED, TORRENT_LABEL, \ - USE_XBMC, XBMC_NOTIFY_ONSNATCH, XBMC_NOTIFY_ONDOWNLOAD, XBMC_UPDATE_FULL, XBMC_UPDATE_ONLYFIRST, \ + USE_XBMC, XBMC_NOTIFY_ONSNATCH, XBMC_NOTIFY_ONDOWNLOAD, XBMC_NOTIFY_ONSUBTITLEDOWNLOAD, XBMC_UPDATE_FULL, XBMC_UPDATE_ONLYFIRST, \ XBMC_UPDATE_LIBRARY, XBMC_HOST, XBMC_USERNAME, XBMC_PASSWORD, \ USE_TRAKT, TRAKT_USERNAME, TRAKT_PASSWORD, TRAKT_API, \ - USE_PLEX, PLEX_NOTIFY_ONSNATCH, PLEX_NOTIFY_ONDOWNLOAD, PLEX_UPDATE_LIBRARY, \ + USE_PLEX, PLEX_NOTIFY_ONSNATCH, PLEX_NOTIFY_ONDOWNLOAD, PLEX_NOTIFY_ONSUBTITLEDOWNLOAD, PLEX_UPDATE_LIBRARY, \ PLEX_SERVER_HOST, PLEX_HOST, PLEX_USERNAME, PLEX_PASSWORD, \ showUpdateScheduler, __INITIALIZED__, LAUNCH_BROWSER, showList, loadingShowList, \ NZBS, NZBS_UID, NZBS_HASH, EZRSS, TVTORRENTS, TVTORRENTS_DIGEST, TVTORRENTS_HASH, BTN, BTN_API_KEY, TORRENTLEECH, TORRENTLEECH_KEY, TORRENT_DIR, USENET_RETENTION, SOCKET_TIMEOUT, \ BINNEWZ, \ T411, T411_USERNAME, T411_PASSWORD, \ SEARCH_FREQUENCY, DEFAULT_SEARCH_FREQUENCY, BACKLOG_SEARCH_FREQUENCY, \ - QUALITY_DEFAULT, FLATTEN_FOLDERS_DEFAULT, STATUS_DEFAULT, AUDIO_SHOW_DEFAULT, \ - GROWL_NOTIFY_ONSNATCH, GROWL_NOTIFY_ONDOWNLOAD, TWITTER_NOTIFY_ONSNATCH, TWITTER_NOTIFY_ONDOWNLOAD, \ - USE_GROWL, GROWL_HOST, GROWL_PASSWORD, USE_PROWL, PROWL_NOTIFY_ONSNATCH, PROWL_NOTIFY_ONDOWNLOAD, PROWL_API, PROWL_PRIORITY, PROG_DIR, NZBMATRIX, NZBMATRIX_USERNAME, \ - USE_PYTIVO, PYTIVO_NOTIFY_ONSNATCH, PYTIVO_NOTIFY_ONDOWNLOAD, PYTIVO_UPDATE_LIBRARY, PYTIVO_HOST, PYTIVO_SHARE_NAME, PYTIVO_TIVO_NAME, \ - USE_NMA, NMA_NOTIFY_ONSNATCH, NMA_NOTIFY_ONDOWNLOAD, NMA_API, NMA_PRIORITY, \ + QUALITY_DEFAULT, FLATTEN_FOLDERS_DEFAULT, SUBTITLES_DEFAULT, STATUS_DEFAULT, AUDIO_SHOW_DEFAULT, \ + GROWL_NOTIFY_ONSNATCH, GROWL_NOTIFY_ONDOWNLOAD, GROWL_NOTIFY_ONSUBTITLEDOWNLOAD, TWITTER_NOTIFY_ONSNATCH, TWITTER_NOTIFY_ONDOWNLOAD, TWITTER_NOTIFY_ONSUBTITLEDOWNLOAD, \ + USE_GROWL, GROWL_HOST, GROWL_PASSWORD, USE_PROWL, PROWL_NOTIFY_ONSNATCH, PROWL_NOTIFY_ONDOWNLOAD, PROWL_NOTIFY_ONSUBTITLEDOWNLOAD, PROWL_API, PROWL_PRIORITY, PROG_DIR, NZBMATRIX, NZBMATRIX_USERNAME, \ + USE_PYTIVO, PYTIVO_NOTIFY_ONSNATCH, PYTIVO_NOTIFY_ONDOWNLOAD, PYTIVO_NOTIFY_ONSUBTITLEDOWNLOAD, PYTIVO_UPDATE_LIBRARY, PYTIVO_HOST, PYTIVO_SHARE_NAME, PYTIVO_TIVO_NAME, \ + USE_NMA, NMA_NOTIFY_ONSNATCH, NMA_NOTIFY_ONDOWNLOAD, NMA_NOTIFY_ONSUBTITLEDOWNLOAD, NMA_API, NMA_PRIORITY, \ NZBMATRIX_APIKEY, versionCheckScheduler, VERSION_NOTIFY, PROCESS_AUTOMATICALLY, \ KEEP_PROCESSED_DIR, TV_DOWNLOAD_DIR, TVDB_BASE_URL, MIN_SEARCH_FREQUENCY, \ showQueueScheduler, searchQueueScheduler, ROOT_DIRS, CACHE_DIR, ACTUAL_CACHE_DIR, TVDB_API_PARMS, \ @@ -367,14 +387,14 @@ def initialize(consoleLogging=True): RENAME_EPISODES, properFinderScheduler, PROVIDER_ORDER, autoPostProcesserScheduler, \ NZBSRUS, NZBSRUS_UID, NZBSRUS_HASH, WOMBLE, NZBX, NZBX_COMPLETION, OMGWTFNZBS, OMGWTFNZBS_UID, OMGWTFNZBS_KEY, providerList, newznabProviderList, \ EXTRA_SCRIPTS, USE_TWITTER, TWITTER_USERNAME, TWITTER_PASSWORD, TWITTER_PREFIX, \ - USE_NOTIFO, NOTIFO_USERNAME, NOTIFO_APISECRET, NOTIFO_NOTIFY_ONDOWNLOAD, NOTIFO_NOTIFY_ONSNATCH, \ - USE_BOXCAR, BOXCAR_USERNAME, BOXCAR_PASSWORD, BOXCAR_NOTIFY_ONDOWNLOAD, BOXCAR_NOTIFY_ONSNATCH, \ - USE_PUSHOVER, PUSHOVER_USERKEY, PUSHOVER_NOTIFY_ONDOWNLOAD, PUSHOVER_NOTIFY_ONSNATCH, \ - USE_LIBNOTIFY, LIBNOTIFY_NOTIFY_ONSNATCH, LIBNOTIFY_NOTIFY_ONDOWNLOAD, USE_NMJ, NMJ_HOST, NMJ_DATABASE, NMJ_MOUNT, USE_NMJv2, NMJv2_HOST, NMJv2_DATABASE, NMJv2_DBLOC, USE_SYNOINDEX, \ + USE_NOTIFO, NOTIFO_USERNAME, NOTIFO_APISECRET, NOTIFO_NOTIFY_ONDOWNLOAD, NOTIFO_NOTIFY_ONSUBTITLEDOWNLOAD, NOTIFO_NOTIFY_ONSNATCH, \ + USE_BOXCAR, BOXCAR_USERNAME, BOXCAR_PASSWORD, BOXCAR_NOTIFY_ONDOWNLOAD, BOXCAR_NOTIFY_ONSUBTITLEDOWNLOAD, BOXCAR_NOTIFY_ONSNATCH, \ + USE_PUSHOVER, PUSHOVER_USERKEY, PUSHOVER_NOTIFY_ONDOWNLOAD, PUSHOVER_NOTIFY_ONSUBTITLEDOWNLOAD, PUSHOVER_NOTIFY_ONSNATCH, \ + USE_LIBNOTIFY, LIBNOTIFY_NOTIFY_ONSNATCH, LIBNOTIFY_NOTIFY_ONDOWNLOAD, LIBNOTIFY_NOTIFY_ONSUBTITLEDOWNLOAD, USE_NMJ, NMJ_HOST, NMJ_DATABASE, NMJ_MOUNT, USE_NMJv2, NMJv2_HOST, NMJv2_DATABASE, NMJv2_DBLOC, USE_SYNOINDEX, \ USE_BANNER, USE_LISTVIEW, METADATA_XBMC, METADATA_MEDIABROWSER, METADATA_PS3, METADATA_SYNOLOGY, metadata_provider_dict, \ NEWZBIN, NEWZBIN_USERNAME, NEWZBIN_PASSWORD, GIT_PATH, MOVE_ASSOCIATED_FILES, \ - COMING_EPS_LAYOUT, COMING_EPS_SORT, COMING_EPS_DISPLAY_PAUSED, METADATA_WDTV, METADATA_TIVO, IGNORE_WORDS, CREATE_MISSING_SHOW_DIRS, \ - ADD_SHOWS_WO_DIR + COMING_EPS_LAYOUT, COMING_EPS_SORT, COMING_EPS_DISPLAY_PAUSED, COMING_EPS_MISSED_RANGE, METADATA_WDTV, METADATA_TIVO, IGNORE_WORDS, CREATE_MISSING_SHOW_DIRS, \ + ADD_SHOWS_WO_DIR, USE_SUBTITLES, SUBTITLES_LANGUAGES, SUBTITLES_DIR, SUBTITLES_SERVICES_LIST, SUBTITLES_SERVICES_ENABLED, SUBTITLES_HISTORY, subtitlesFinderScheduler if __INITIALIZED__: return False @@ -645,6 +665,7 @@ def initialize(consoleLogging=True): USE_XBMC = bool(check_setting_int(CFG, 'XBMC', 'use_xbmc', 0)) XBMC_NOTIFY_ONSNATCH = bool(check_setting_int(CFG, 'XBMC', 'xbmc_notify_onsnatch', 0)) XBMC_NOTIFY_ONDOWNLOAD = bool(check_setting_int(CFG, 'XBMC', 'xbmc_notify_ondownload', 0)) + XBMC_NOTIFY_ONSUBTITLEDOWNLOAD = bool(check_setting_int(CFG, 'XBMC', 'xbmc_notify_onsubtitledownload', 0)) XBMC_UPDATE_LIBRARY = bool(check_setting_int(CFG, 'XBMC', 'xbmc_update_library', 0)) XBMC_UPDATE_FULL = bool(check_setting_int(CFG, 'XBMC', 'xbmc_update_full', 0)) XBMC_UPDATE_ONLYFIRST = bool(check_setting_int(CFG, 'XBMC', 'xbmc_update_onlyfirst', 0)) @@ -656,6 +677,7 @@ def initialize(consoleLogging=True): USE_PLEX = bool(check_setting_int(CFG, 'Plex', 'use_plex', 0)) PLEX_NOTIFY_ONSNATCH = bool(check_setting_int(CFG, 'Plex', 'plex_notify_onsnatch', 0)) PLEX_NOTIFY_ONDOWNLOAD = bool(check_setting_int(CFG, 'Plex', 'plex_notify_ondownload', 0)) + PLEX_NOTIFY_ONSUBTITLEDOWNLOAD = bool(check_setting_int(CFG, 'Plex', 'plex_notify_onsubtitledownload', 0)) PLEX_UPDATE_LIBRARY = bool(check_setting_int(CFG, 'Plex', 'plex_update_library', 0)) PLEX_SERVER_HOST = check_setting_str(CFG, 'Plex', 'plex_server_host', '') PLEX_HOST = check_setting_str(CFG, 'Plex', 'plex_host', '') @@ -666,6 +688,7 @@ def initialize(consoleLogging=True): USE_GROWL = bool(check_setting_int(CFG, 'Growl', 'use_growl', 0)) GROWL_NOTIFY_ONSNATCH = bool(check_setting_int(CFG, 'Growl', 'growl_notify_onsnatch', 0)) GROWL_NOTIFY_ONDOWNLOAD = bool(check_setting_int(CFG, 'Growl', 'growl_notify_ondownload', 0)) + GROWL_NOTIFY_ONSUBTITLEDOWNLOAD = bool(check_setting_int(CFG, 'Growl', 'growl_notify_onsubtitledownload', 0)) GROWL_HOST = check_setting_str(CFG, 'Growl', 'growl_host', '') GROWL_PASSWORD = check_setting_str(CFG, 'Growl', 'growl_password', '') @@ -673,6 +696,7 @@ def initialize(consoleLogging=True): USE_PROWL = bool(check_setting_int(CFG, 'Prowl', 'use_prowl', 0)) PROWL_NOTIFY_ONSNATCH = bool(check_setting_int(CFG, 'Prowl', 'prowl_notify_onsnatch', 0)) PROWL_NOTIFY_ONDOWNLOAD = bool(check_setting_int(CFG, 'Prowl', 'prowl_notify_ondownload', 0)) + PROWL_NOTIFY_ONSUBTITLEDOWNLOAD = bool(check_setting_int(CFG, 'Prowl', 'prowl_notify_onsubtitledownload', 0)) PROWL_API = check_setting_str(CFG, 'Prowl', 'prowl_api', '') PROWL_PRIORITY = check_setting_str(CFG, 'Prowl', 'prowl_priority', "0") @@ -680,6 +704,7 @@ def initialize(consoleLogging=True): USE_TWITTER = bool(check_setting_int(CFG, 'Twitter', 'use_twitter', 0)) TWITTER_NOTIFY_ONSNATCH = bool(check_setting_int(CFG, 'Twitter', 'twitter_notify_onsnatch', 0)) TWITTER_NOTIFY_ONDOWNLOAD = bool(check_setting_int(CFG, 'Twitter', 'twitter_notify_ondownload', 0)) + TWITTER_NOTIFY_ONSUBTITLEDOWNLOAD = bool(check_setting_int(CFG, 'Twitter', 'twitter_notify_onsubtitledownload', 0)) TWITTER_USERNAME = check_setting_str(CFG, 'Twitter', 'twitter_username', '') TWITTER_PASSWORD = check_setting_str(CFG, 'Twitter', 'twitter_password', '') TWITTER_PREFIX = check_setting_str(CFG, 'Twitter', 'twitter_prefix', 'Sick Beard') @@ -688,6 +713,7 @@ def initialize(consoleLogging=True): USE_NOTIFO = bool(check_setting_int(CFG, 'Notifo', 'use_notifo', 0)) NOTIFO_NOTIFY_ONSNATCH = bool(check_setting_int(CFG, 'Notifo', 'notifo_notify_onsnatch', 0)) NOTIFO_NOTIFY_ONDOWNLOAD = bool(check_setting_int(CFG, 'Notifo', 'notifo_notify_ondownload', 0)) + NOTIFO_NOTIFY_ONSUBTITLEDOWNLOAD = bool(check_setting_int(CFG, 'Notifo', 'notifo_notify_onsubtitledownload', 0)) NOTIFO_USERNAME = check_setting_str(CFG, 'Notifo', 'notifo_username', '') NOTIFO_APISECRET = check_setting_str(CFG, 'Notifo', 'notifo_apisecret', '') @@ -695,18 +721,21 @@ def initialize(consoleLogging=True): USE_BOXCAR = bool(check_setting_int(CFG, 'Boxcar', 'use_boxcar', 0)) BOXCAR_NOTIFY_ONSNATCH = bool(check_setting_int(CFG, 'Boxcar', 'boxcar_notify_onsnatch', 0)) BOXCAR_NOTIFY_ONDOWNLOAD = bool(check_setting_int(CFG, 'Boxcar', 'boxcar_notify_ondownload', 0)) + BOXCAR_NOTIFY_ONSUBTITLEDOWNLOAD = bool(check_setting_int(CFG, 'Boxcar', 'boxcar_notify_onsubtitledownload', 0)) BOXCAR_USERNAME = check_setting_str(CFG, 'Boxcar', 'boxcar_username', '') CheckSection(CFG, 'Pushover') USE_PUSHOVER = bool(check_setting_int(CFG, 'Pushover', 'use_pushover', 0)) PUSHOVER_NOTIFY_ONSNATCH = bool(check_setting_int(CFG, 'Pushover', 'pushover_notify_onsnatch', 0)) PUSHOVER_NOTIFY_ONDOWNLOAD = bool(check_setting_int(CFG, 'Pushover', 'pushover_notify_ondownload', 0)) + PUSHOVER_NOTIFY_ONSUBTITLEDOWNLOAD = bool(check_setting_int(CFG, 'Pushover', 'pushover_notify_onsubtitledownload', 0)) PUSHOVER_USERKEY = check_setting_str(CFG, 'Pushover', 'pushover_userkey', '') CheckSection(CFG, 'Libnotify') USE_LIBNOTIFY = bool(check_setting_int(CFG, 'Libnotify', 'use_libnotify', 0)) LIBNOTIFY_NOTIFY_ONSNATCH = bool(check_setting_int(CFG, 'Libnotify', 'libnotify_notify_onsnatch', 0)) LIBNOTIFY_NOTIFY_ONDOWNLOAD = bool(check_setting_int(CFG, 'Libnotify', 'libnotify_notify_ondownload', 0)) + LIBNOTIFY_NOTIFY_ONSUBTITLEDOWNLOAD = bool(check_setting_int(CFG, 'Libnotify', 'libnotify_notify_onsubtitledownload', 0)) CheckSection(CFG, 'NMJ') USE_NMJ = bool(check_setting_int(CFG, 'NMJ', 'use_nmj', 0)) @@ -733,6 +762,7 @@ def initialize(consoleLogging=True): USE_PYTIVO = bool(check_setting_int(CFG, 'pyTivo', 'use_pytivo', 0)) PYTIVO_NOTIFY_ONSNATCH = bool(check_setting_int(CFG, 'pyTivo', 'pytivo_notify_onsnatch', 0)) PYTIVO_NOTIFY_ONDOWNLOAD = bool(check_setting_int(CFG, 'pyTivo', 'pytivo_notify_ondownload', 0)) + PYTIVO_NOTIFY_ONSUBTITLEDOWNLOAD = bool(check_setting_int(CFG, 'pyTivo', 'pytivo_notify_onsubtitledownload', 0)) PYTIVO_UPDATE_LIBRARY = bool(check_setting_int(CFG, 'pyTivo', 'pyTivo_update_library', 0)) PYTIVO_HOST = check_setting_str(CFG, 'pyTivo', 'pytivo_host', '') PYTIVO_SHARE_NAME = check_setting_str(CFG, 'pyTivo', 'pytivo_share_name', '') @@ -742,9 +772,19 @@ def initialize(consoleLogging=True): USE_NMA = bool(check_setting_int(CFG, 'NMA', 'use_nma', 0)) NMA_NOTIFY_ONSNATCH = bool(check_setting_int(CFG, 'NMA', 'nma_notify_onsnatch', 0)) NMA_NOTIFY_ONDOWNLOAD = bool(check_setting_int(CFG, 'NMA', 'nma_notify_ondownload', 0)) + NMA_NOTIFY_ONSUBTITLEDOWNLOAD = bool(check_setting_int(CFG, 'NMA', 'nma_notify_onsubtitledownload', 0)) NMA_API = check_setting_str(CFG, 'NMA', 'nma_api', '') NMA_PRIORITY = check_setting_str(CFG, 'NMA', 'nma_priority', "0") + USE_SUBTITLES = bool(check_setting_int(CFG, 'Subtitles', 'use_subtitles', 0)) + SUBTITLES_LANGUAGES = check_setting_str(CFG, 'Subtitles', 'subtitles_languages', '').split(',') + if SUBTITLES_LANGUAGES[0] == '': + SUBTITLES_LANGUAGES = [] + SUBTITLES_DIR = check_setting_str(CFG, 'Subtitles', 'subtitles_dir', '') + SUBTITLES_SERVICES_LIST = check_setting_str(CFG, 'Subtitles', 'SUBTITLES_SERVICES_LIST', '').split(',') + SUBTITLES_SERVICES_ENABLED = [int(x) for x in check_setting_str(CFG, 'Subtitles', 'SUBTITLES_SERVICES_ENABLED', '').split('|') if x] + SUBTITLES_DEFAULT = bool(check_setting_int(CFG, 'Subtitles', 'subtitles_default', 0)) + SUBTITLES_HISTORY = bool(check_setting_int(CFG, 'Subtitles', 'subtitles_history', 0)) # start up all the threads logger.sb_log_instance.initLogging(consoleLogging=consoleLogging) @@ -805,6 +845,12 @@ def initialize(consoleLogging=True): runImmediately=True) backlogSearchScheduler.action.cycleTime = BACKLOG_SEARCH_FREQUENCY + + subtitlesFinderScheduler = scheduler.Scheduler(subtitles.SubtitlesFinder(), + cycleTime=datetime.timedelta(hours=1), + threadName="FINDSUBTITLES", + runImmediately=True) + showList = [] loadingShowList = {} @@ -817,6 +863,7 @@ def start(): global __INITIALIZED__, currentSearchScheduler, backlogSearchScheduler, \ showUpdateScheduler, versionCheckScheduler, showQueueScheduler, \ properFinderScheduler, autoPostProcesserScheduler, searchQueueScheduler, \ + subtitlesFinderScheduler, started, USE_SUBTITLES, \ started with INIT_LOCK: @@ -847,13 +894,18 @@ def start(): # start the proper finder autoPostProcesserScheduler.thread.start() - started = True + # start the subtitles finder + if USE_SUBTITLES: + subtitlesFinderScheduler.thread.start() + + started = True def halt(): global __INITIALIZED__, currentSearchScheduler, backlogSearchScheduler, showUpdateScheduler, \ showQueueScheduler, properFinderScheduler, autoPostProcesserScheduler, searchQueueScheduler, \ + subtitlesFinderScheduler, started, \ started with INIT_LOCK: @@ -920,6 +972,14 @@ def halt(): except: pass + subtitlesFinderScheduler.abort = True + logger.log(u"Waiting for the SUBTITLESFINDER thread to exit") + try: + subtitlesFinderScheduler.thread.join(10) + except: + pass + + __INITIALIZED__ = False @@ -1157,6 +1217,7 @@ def save_config(): new_config['XBMC']['use_xbmc'] = int(USE_XBMC) new_config['XBMC']['xbmc_notify_onsnatch'] = int(XBMC_NOTIFY_ONSNATCH) new_config['XBMC']['xbmc_notify_ondownload'] = int(XBMC_NOTIFY_ONDOWNLOAD) + new_config['XBMC']['xbmc_notify_onsubtitledownload'] = int(XBMC_NOTIFY_ONSUBTITLEDOWNLOAD) new_config['XBMC']['xbmc_update_library'] = int(XBMC_UPDATE_LIBRARY) new_config['XBMC']['xbmc_update_full'] = int(XBMC_UPDATE_FULL) new_config['XBMC']['xbmc_update_onlyfirst'] = int(XBMC_UPDATE_ONLYFIRST) @@ -1168,6 +1229,7 @@ def save_config(): new_config['Plex']['use_plex'] = int(USE_PLEX) new_config['Plex']['plex_notify_onsnatch'] = int(PLEX_NOTIFY_ONSNATCH) new_config['Plex']['plex_notify_ondownload'] = int(PLEX_NOTIFY_ONDOWNLOAD) + new_config['Plex']['plex_notify_onsubtitledownload'] = int(PLEX_NOTIFY_ONSUBTITLEDOWNLOAD) new_config['Plex']['plex_update_library'] = int(PLEX_UPDATE_LIBRARY) new_config['Plex']['plex_server_host'] = PLEX_SERVER_HOST new_config['Plex']['plex_host'] = PLEX_HOST @@ -1178,6 +1240,7 @@ def save_config(): new_config['Growl']['use_growl'] = int(USE_GROWL) new_config['Growl']['growl_notify_onsnatch'] = int(GROWL_NOTIFY_ONSNATCH) new_config['Growl']['growl_notify_ondownload'] = int(GROWL_NOTIFY_ONDOWNLOAD) + new_config['Growl']['growl_notify_onsubtitledownload'] = int(GROWL_NOTIFY_ONSUBTITLEDOWNLOAD) new_config['Growl']['growl_host'] = GROWL_HOST new_config['Growl']['growl_password'] = GROWL_PASSWORD @@ -1185,6 +1248,7 @@ def save_config(): new_config['Prowl']['use_prowl'] = int(USE_PROWL) new_config['Prowl']['prowl_notify_onsnatch'] = int(PROWL_NOTIFY_ONSNATCH) new_config['Prowl']['prowl_notify_ondownload'] = int(PROWL_NOTIFY_ONDOWNLOAD) + new_config['Prowl']['prowl_notify_onsubtitledownload'] = int(PROWL_NOTIFY_ONSUBTITLEDOWNLOAD) new_config['Prowl']['prowl_api'] = PROWL_API new_config['Prowl']['prowl_priority'] = PROWL_PRIORITY @@ -1192,6 +1256,7 @@ def save_config(): new_config['Twitter']['use_twitter'] = int(USE_TWITTER) new_config['Twitter']['twitter_notify_onsnatch'] = int(TWITTER_NOTIFY_ONSNATCH) new_config['Twitter']['twitter_notify_ondownload'] = int(TWITTER_NOTIFY_ONDOWNLOAD) + new_config['Twitter']['twitter_notify_onsubtitledownload'] = int(TWITTER_NOTIFY_ONSUBTITLEDOWNLOAD) new_config['Twitter']['twitter_username'] = TWITTER_USERNAME new_config['Twitter']['twitter_password'] = TWITTER_PASSWORD new_config['Twitter']['twitter_prefix'] = TWITTER_PREFIX @@ -1200,6 +1265,7 @@ def save_config(): new_config['Notifo']['use_notifo'] = int(USE_NOTIFO) new_config['Notifo']['notifo_notify_onsnatch'] = int(NOTIFO_NOTIFY_ONSNATCH) new_config['Notifo']['notifo_notify_ondownload'] = int(NOTIFO_NOTIFY_ONDOWNLOAD) + new_config['Notifo']['notifo_notify_onsubtitledownload'] = int(NOTIFO_NOTIFY_ONSUBTITLEDOWNLOAD) new_config['Notifo']['notifo_username'] = NOTIFO_USERNAME new_config['Notifo']['notifo_apisecret'] = NOTIFO_APISECRET @@ -1207,18 +1273,21 @@ def save_config(): new_config['Boxcar']['use_boxcar'] = int(USE_BOXCAR) new_config['Boxcar']['boxcar_notify_onsnatch'] = int(BOXCAR_NOTIFY_ONSNATCH) new_config['Boxcar']['boxcar_notify_ondownload'] = int(BOXCAR_NOTIFY_ONDOWNLOAD) + new_config['Boxcar']['boxcar_notify_onsubtitledownload'] = int(BOXCAR_NOTIFY_ONSUBTITLEDOWNLOAD) new_config['Boxcar']['boxcar_username'] = BOXCAR_USERNAME new_config['Pushover'] = {} new_config['Pushover']['use_pushover'] = int(USE_PUSHOVER) new_config['Pushover']['pushover_notify_onsnatch'] = int(PUSHOVER_NOTIFY_ONSNATCH) new_config['Pushover']['pushover_notify_ondownload'] = int(PUSHOVER_NOTIFY_ONDOWNLOAD) + new_config['Pushover']['pushover_notify_onsubtitledownload'] = int(PUSHOVER_NOTIFY_ONSUBTITLEDOWNLOAD) new_config['Pushover']['pushover_userkey'] = PUSHOVER_USERKEY new_config['Libnotify'] = {} new_config['Libnotify']['use_libnotify'] = int(USE_LIBNOTIFY) new_config['Libnotify']['libnotify_notify_onsnatch'] = int(LIBNOTIFY_NOTIFY_ONSNATCH) new_config['Libnotify']['libnotify_notify_ondownload'] = int(LIBNOTIFY_NOTIFY_ONDOWNLOAD) + new_config['Libnotify']['libnotify_notify_onsubtitledownload'] = int(LIBNOTIFY_NOTIFY_ONSUBTITLEDOWNLOAD) new_config['NMJ'] = {} new_config['NMJ']['use_nmj'] = int(USE_NMJ) @@ -1245,6 +1314,7 @@ def save_config(): new_config['pyTivo']['use_pytivo'] = int(USE_PYTIVO) new_config['pyTivo']['pytivo_notify_onsnatch'] = int(PYTIVO_NOTIFY_ONSNATCH) new_config['pyTivo']['pytivo_notify_ondownload'] = int(PYTIVO_NOTIFY_ONDOWNLOAD) + new_config['pyTivo']['pytivo_notify_onsubtitledownload'] = int(PYTIVO_NOTIFY_ONSUBTITLEDOWNLOAD) new_config['pyTivo']['pyTivo_update_library'] = int(PYTIVO_UPDATE_LIBRARY) new_config['pyTivo']['pytivo_host'] = PYTIVO_HOST new_config['pyTivo']['pytivo_share_name'] = PYTIVO_SHARE_NAME @@ -1265,6 +1335,15 @@ def save_config(): new_config['GUI']['coming_eps_display_paused'] = int(COMING_EPS_DISPLAY_PAUSED) new_config['GUI']['coming_eps_sort'] = COMING_EPS_SORT + new_config['Subtitles'] = {} + new_config['Subtitles']['use_subtitles'] = int(USE_SUBTITLES) + new_config['Subtitles']['subtitles_languages'] = ','.join(SUBTITLES_LANGUAGES) + new_config['Subtitles']['SUBTITLES_SERVICES_LIST'] = ','.join(SUBTITLES_SERVICES_LIST) + new_config['Subtitles']['SUBTITLES_SERVICES_ENABLED'] = '|'.join([str(x) for x in SUBTITLES_SERVICES_ENABLED]) + new_config['Subtitles']['subtitles_dir'] = SUBTITLES_DIR + new_config['Subtitles']['subtitles_default'] = int(SUBTITLES_DEFAULT) + new_config['Subtitles']['subtitles_history'] = int(SUBTITLES_HISTORY) + new_config['General']['config_version'] = CONFIG_VERSION new_config.write() diff --git a/sickbeard/common.py b/sickbeard/common.py index 3c82b0e6e9661e3d3af7245d4918ea29019c80f9..af46b18fed4db48f08a5a7b3df9cf87a89bd4e0d 100644 --- a/sickbeard/common.py +++ b/sickbeard/common.py @@ -31,6 +31,8 @@ mediaExtensions = ['avi', 'mkv', 'mpg', 'mpeg', 'wmv', 'mov', 'rmvb', 'vob', 'dvr-ms', 'wtv', 'ogv', '3gp'] +subtitleExtensions = ['srt', 'sub', 'ass', 'idx', 'ssa'] + ### Other constants MULTI_EP_RESULT = -1 SEASON_RESULT = -2 @@ -38,10 +40,12 @@ SEASON_RESULT = -2 ### Notification Types NOTIFY_SNATCH = 1 NOTIFY_DOWNLOAD = 2 +NOTIFY_SUBTITLE_DOWNLOAD = 3 notifyStrings = {} notifyStrings[NOTIFY_SNATCH] = "Started Download" notifyStrings[NOTIFY_DOWNLOAD] = "Download Finished" +notifyStrings[NOTIFY_SUBTITLE_DOWNLOAD] = "Subtitle Download Finished" ### Episode statuses UNKNOWN = -1 # should never happen @@ -53,6 +57,7 @@ SKIPPED = 5 # episodes we don't want ARCHIVED = 6 # episodes that you don't have locally (counts toward download completion stats) IGNORED = 7 # episodes that you don't want included in your download stats SNATCHED_PROPER = 9 # qualified with quality +SUBTITLED = 10 # qualified with quality NAMING_REPEAT = 1 NAMING_EXTEND = 2 @@ -239,7 +244,8 @@ class StatusStrings: SNATCHED_PROPER: "Snatched (Proper)", WANTED: "Wanted", ARCHIVED: "Archived", - IGNORED: "Ignored"} + IGNORED: "Ignored", + SUBTITLED: "Subtitled"} def __getitem__(self, name): if name in Quality.DOWNLOADED + Quality.SNATCHED + Quality.SNATCHED_PROPER: @@ -256,7 +262,6 @@ class StatusStrings: statusStrings = StatusStrings() - class Overview: UNAIRED = UNAIRED # 1 QUAL = 2 diff --git a/sickbeard/databases/mainDB.py b/sickbeard/databases/mainDB.py index 01f8b032f5e44d78a7d501d58446aeeaddd49154..5ac1124bd89877f94106fada0856c72f12acebbc 100644 --- a/sickbeard/databases/mainDB.py +++ b/sickbeard/databases/mainDB.py @@ -18,7 +18,7 @@ import sickbeard import os.path - +import datetime from sickbeard import db, common, helpers, logger from sickbeard.providers.generic import GenericProvider @@ -560,8 +560,20 @@ class RenameSeasonFolders(AddSizeAndSceneNameFields): self.incDBVersion() +class AddSubtitlesSupport(RenameSeasonFolders): + def test(self): + return self.checkDBVersion() >= 12 + + def execute(self): + + self.addColumn("tv_shows", "subtitles") + self.addColumn("tv_episodes", "subtitles", "TEXT", "") + self.addColumn("tv_episodes", "subtitles_searchcount") + self.addColumn("tv_episodes", "subtitles_lastsearch", "TIMESTAMP", str(datetime.datetime.min)) + self.incDBVersion() + -class Add1080pAndRawHDQualities(RenameSeasonFolders): +class Add1080pAndRawHDQualities(AddSubtitlesSupport): """Add support for 1080p related qualities along with RawHD Quick overview of what the upgrade needs to do: diff --git a/sickbeard/helpers.py b/sickbeard/helpers.py index 15ba0c38c7c1662a7a8fa3e1404af4a3e6ab5c85..f8dfbb6f055f4b5f32f7c204866a44ed69af169b 100644 --- a/sickbeard/helpers.py +++ b/sickbeard/helpers.py @@ -43,6 +43,9 @@ from lib.tvdb_api import tvdb_api, tvdb_exceptions import xml.etree.cElementTree as etree +from lib import subliminal +#from sickbeard.subtitles import EXTENSIONS + urllib._urlopener = classes.SickBeardURLopener() def indentXML(elem, level=0): @@ -487,6 +490,17 @@ def rename_ep_file(cur_path, new_path): new_dest_dir, new_dest_name = os.path.split(new_path) #@UnusedVariable cur_file_name, cur_file_ext = os.path.splitext(cur_path) #@UnusedVariable + if cur_file_ext[1:] in subtitleExtensions: + #Extract subtitle language from filename + sublang = os.path.splitext(cur_file_name)[1][1:] + + #Check if the language extracted from filename is a valid language + try: + language = subliminal.language.Language(sublang, strict=True) + cur_file_ext = '.'+sublang+cur_file_ext + except ValueError: + pass + # put the extension on the incoming file new_path += cur_file_ext diff --git a/sickbeard/history.py b/sickbeard/history.py index 047d4583ba628c9e866b544a1883af07a0e6be28..4c15f0b102b41f5e098c085f455dbcf101703c5b 100644 --- a/sickbeard/history.py +++ b/sickbeard/history.py @@ -19,7 +19,7 @@ import db import datetime -from sickbeard.common import SNATCHED, Quality +from sickbeard.common import SNATCHED, SUBTITLED, Quality dateFormat = "%Y%m%d%H%M%S" @@ -71,3 +71,11 @@ def logDownload(episode, filename, new_ep_quality, release_group=None): _logHistoryItem(action, showid, season, epNum, quality, filename, provider) +def logSubtitle(showid, season, episode, status, subtitleResult): + + resource = subtitleResult.path + provider = subtitleResult.service + status, quality = Quality.splitCompositeStatus(status) + action = Quality.compositeStatus(SUBTITLED, quality) + + _logHistoryItem(action, showid, season, episode, quality, resource, provider) \ No newline at end of file diff --git a/sickbeard/logger.py b/sickbeard/logger.py index 28bdc5e8985c54987efc4c3bf8634a8b402220d5..6c2953e6dce903b671f2014f910db98478361fcc 100644 --- a/sickbeard/logger.py +++ b/sickbeard/logger.py @@ -64,7 +64,8 @@ class SBRotatingLogHandler(object): self.cur_handler = self._config_handler() logging.getLogger('sickbeard').addHandler(self.cur_handler) - + logging.getLogger('subliminal').addHandler(self.cur_handler) + # define a Handler which writes INFO messages or higher to the sys.stderr if consoleLogging: console = logging.StreamHandler() @@ -76,9 +77,11 @@ class SBRotatingLogHandler(object): # add the handler to the root logger logging.getLogger('sickbeard').addHandler(console) - + logging.getLogger('subliminal').addHandler(console) + logging.getLogger('sickbeard').setLevel(logging.DEBUG) - + logging.getLogger('subliminal').setLevel(logging.ERROR) + def _config_handler(self): """ Configure a file handler to log at file_name and return it. @@ -112,12 +115,14 @@ class SBRotatingLogHandler(object): def _rotate_logs(self): sb_logger = logging.getLogger('sickbeard') + subli_logger = logging.getLogger('subliminal') # delete the old handler if self.cur_handler: self.cur_handler.flush() self.cur_handler.close() sb_logger.removeHandler(self.cur_handler) + subli_logger.removeHandler(self.cur_handler) # rename or delete all the old log files for i in range(self._num_logs(), -1, -1): @@ -136,6 +141,7 @@ class SBRotatingLogHandler(object): self.cur_handler = new_file_handler sb_logger.addHandler(new_file_handler) + subli_logger.addHandler(new_file_handler) def log(self, toLog, logLevel=MESSAGE): diff --git a/sickbeard/notifiers/__init__.py b/sickbeard/notifiers/__init__.py index fb28b7ddcd52eb9d364ba90277f35b4b2a355ffa..be6ff54861f2657fa10b94e275c5b5ca725ec1b9 100755 --- a/sickbeard/notifiers/__init__.py +++ b/sickbeard/notifiers/__init__.py @@ -80,6 +80,9 @@ def notify_download(ep_name): for n in notifiers: n.notify_download(ep_name) +def notify_subtitle_download(ep_name, lang): + for n in notifiers: + n.notify_subtitle_download(ep_name, lang) def notify_snatch(ep_name): for n in notifiers: diff --git a/sickbeard/notifiers/boxcar.py b/sickbeard/notifiers/boxcar.py index 35a86942de89b5786e9e574138ce8b8f35a2d14b..633245f1dfb159a49c8c31b8ac7e8a6c28c87225 100644 --- a/sickbeard/notifiers/boxcar.py +++ b/sickbeard/notifiers/boxcar.py @@ -23,7 +23,7 @@ import time import sickbeard from sickbeard import logger -from sickbeard.common import notifyStrings, NOTIFY_SNATCH, NOTIFY_DOWNLOAD +from sickbeard.common import notifyStrings, NOTIFY_SNATCH, NOTIFY_DOWNLOAD, NOTIFY_SUBTITLE_DOWNLOAD from sickbeard.exceptions import ex API_URL = "https://boxcar.io/devices/providers/fWc4sgSmpcN6JujtBmR6/notifications" @@ -119,6 +119,10 @@ class BoxcarNotifier: if sickbeard.BOXCAR_NOTIFY_ONDOWNLOAD: self._notifyBoxcar(title, ep_name) + def notify_subtitle_download(self, ep_name, lang, title=notifyStrings[NOTIFY_SUBTITLE_DOWNLOAD]): + if sickbeard.BOXCAR_NOTIFY_ONSUBTITLEDOWNLOAD: + self._notifyBoxcar(title, ep_name + ": " + lang) + def _notifyBoxcar(self, title, message, username=None, force=False): """ Sends a boxcar notification based on the provided info or SB config diff --git a/sickbeard/notifiers/growl.py b/sickbeard/notifiers/growl.py index 72577489b91119b0b1b76efbc065e7191c666698..3a7932fbe280146908f76f9838af8a914ebbd9cd 100644 --- a/sickbeard/notifiers/growl.py +++ b/sickbeard/notifiers/growl.py @@ -39,6 +39,10 @@ class GrowlNotifier: if sickbeard.GROWL_NOTIFY_ONDOWNLOAD: self._sendGrowl(common.notifyStrings[common.NOTIFY_DOWNLOAD], ep_name) + def notify_subtitle_download(self, ep_name, lang): + if sickbeard.GROWL_NOTIFY_ONSUBTITLEDOWNLOAD: + self._sendGrowl(common.notifyStrings[common.NOTIFY_SUBTITLE_DOWNLOAD], ep_name + ": " + lang) + def _send_growl(self, options,message=None): #Send Notification diff --git a/sickbeard/notifiers/libnotify.py b/sickbeard/notifiers/libnotify.py index a95f7d9deef758e00f58849cbd695039359e4e85..2dfac95a1d235d2d9cd244a53fa7ae47d0adb7fa 100644 --- a/sickbeard/notifiers/libnotify.py +++ b/sickbeard/notifiers/libnotify.py @@ -88,6 +88,10 @@ class LibnotifyNotifier: if sickbeard.LIBNOTIFY_NOTIFY_ONDOWNLOAD: self._notify(common.notifyStrings[common.NOTIFY_DOWNLOAD], ep_name) + def notify_subtitle_download(self, ep_name, lang): + if sickbeard.LIBNOTIFY_NOTIFY_ONSUBTITLEDOWNLOAD: + self._notify(common.notifyStrings[common.NOTIFY_SUBTITLE_DOWNLOAD], ep_name + ": " + lang) + def test_notify(self): return self._notify('Test notification', "This is a test notification from Sick Beard", force=True) diff --git a/sickbeard/notifiers/nma.py b/sickbeard/notifiers/nma.py index a85212715bcebf86d5d953c2767e941a0b7ce1a5..fed5f828d8cf96384bc37ca34a07acd4c4f8eb78 100644 --- a/sickbeard/notifiers/nma.py +++ b/sickbeard/notifiers/nma.py @@ -15,6 +15,10 @@ class NMA_Notifier: def notify_download(self, ep_name): if sickbeard.NMA_NOTIFY_ONDOWNLOAD: self._sendNMA(nma_api=None, nma_priority=None, event=common.notifyStrings[common.NOTIFY_DOWNLOAD], message=ep_name) + + def notify_subtitle_download(self, ep_name, lang): + if sickbeard.NMA_NOTIFY_ONSUBTITLEDOWNLOAD: + self._sendNMA(nma_api=None, nma_priority=None, event=common.notifyStrings[common.NOTIFY_DOWNLOAD], message=ep_name + ": " + lang) def _sendNMA(self, nma_api=None, nma_priority=None, event=None, message=None, force=False): diff --git a/sickbeard/notifiers/nmj.py b/sickbeard/notifiers/nmj.py index 4a05815592eaf3f2d08a1a3844e3c469b90eb524..2a2d8dc2c9e3779d73cdf2d2f028cc09b02a453c 100644 --- a/sickbeard/notifiers/nmj.py +++ b/sickbeard/notifiers/nmj.py @@ -91,6 +91,10 @@ class NMJNotifier: if sickbeard.USE_NMJ: self._notifyNMJ() + def notify_subtitle_download(self, ep_name, lang): + if sickbeard.USE_NMJ: + self._notifyNMJ() + def test_notify(self, host, database, mount): return self._sendNMJ(host, database, mount) diff --git a/sickbeard/notifiers/nmjv2.py b/sickbeard/notifiers/nmjv2.py index c5f096f54a0103b746bf77fdad2a0948ada6c0fd..e187777321ab6a053adf88372bca2440a9453b4d 100644 --- a/sickbeard/notifiers/nmjv2.py +++ b/sickbeard/notifiers/nmjv2.py @@ -85,6 +85,10 @@ class NMJv2Notifier: logger.log(u"Warning: Couldn't contact popcorn hour on host %s: %s" % (host, e)) return False return False + def notify_subtitle_download(self, ep_name, lang): + if sickbeard.USE_NMJv2: + self._sendNMJ() + def _sendNMJ(self, host): """ diff --git a/sickbeard/notifiers/notifo.py b/sickbeard/notifiers/notifo.py index 0fba8e67f06a40c0696cda07c818a80a65853c18..5a7ce142d3ac6688d76689dfa56a90a30ac203cc 100644 --- a/sickbeard/notifiers/notifo.py +++ b/sickbeard/notifiers/notifo.py @@ -100,7 +100,18 @@ class NotifoNotifier: title: The title of the notification (optional) """ if sickbeard.NOTIFO_NOTIFY_ONDOWNLOAD: - self._notifyNotifo(title, ep_name) + self._notifyNotifo(title, ep_name) + + def notify_subtitle_download(self, ep_name, lang, title="Completed:"): + """ + Send a notification that a subtitle was downloaded + + ep_name: The name of the episode + lang: The language of subtitle that was downloaded + title: The title of the notification (optional) + """ + if sickbeard.NOTIFO_NOTIFY_ONSUBTITLEDOWNLOAD: + self._notifyNotifo(title, ep_name + ": " + lang) def _notifyNotifo(self, title, message, username=None, apisecret=None, force=False): """ diff --git a/sickbeard/notifiers/plex.py b/sickbeard/notifiers/plex.py index 4d1b4362ef9efbb09226db2adb2b936ef691d7fa..5f991769f1251e8e86ecfd322d382ea25cba2b10 100644 --- a/sickbeard/notifiers/plex.py +++ b/sickbeard/notifiers/plex.py @@ -35,6 +35,10 @@ class PLEXNotifier: def _send_to_plex(self, command, host, username=None, password=None): """Handles communication to Plex hosts via HTTP API + + def notify_subtitle_download(self, ep_name, lang): + if sickbeard.PLEX_NOTIFY_ONSUBTITLEDOWNLOAD: + self._notifyXBMC(ep_name + ": " + lang, common.notifyStrings[common.NOTIFY_SUBTITLE_DOWNLOAD]) Args: command: Dictionary of field/data pairs, encoded via urllib and passed to the legacy xbmcCmds HTTP API @@ -142,6 +146,10 @@ class PLEXNotifier: if sickbeard.PLEX_NOTIFY_ONDOWNLOAD: self._notify_pmc(ep_name, common.notifyStrings[common.NOTIFY_DOWNLOAD]) + def notify_subtitle_download(self, ep_name, lang): + if sickbeard.PLEX_NOTIFY_ONSUBTITLEDOWNLOAD: + self._notify_pmc(ep_name + ": " + lang, common.notifyStrings[common.NOTIFY_SUBTITLE_DOWNLOAD]) + def test_notify(self, host, username, password): return self._notify_pmc("Testing Plex notifications from Sick Beard", "Test Notification", host, username, password, force=True) diff --git a/sickbeard/notifiers/prowl.py b/sickbeard/notifiers/prowl.py index 709ea95ca5c3a297242ed2238cffc5152d461d76..4f0e932c82a0892ebba5a519bc06d29ba976dcef 100644 --- a/sickbeard/notifiers/prowl.py +++ b/sickbeard/notifiers/prowl.py @@ -43,6 +43,10 @@ class ProwlNotifier: def notify_download(self, ep_name): if sickbeard.PROWL_NOTIFY_ONDOWNLOAD: self._sendProwl(prowl_api=None, prowl_priority=None, event=common.notifyStrings[common.NOTIFY_DOWNLOAD], message=ep_name) + + def notify_subtitle_download(self, ep_name, lang): + if sickbeard.PROWL_NOTIFY_ONSUBTITLEDOWNLOAD: + self._sendProwl(prowl_api=None, prowl_priority=None, event=common.notifyStrings[common.NOTIFY_SUBTITLE_DOWNLOAD], message=ep_name + ": " + lang) def _sendProwl(self, prowl_api=None, prowl_priority=None, event=None, message=None, force=False): diff --git a/sickbeard/notifiers/pushover.py b/sickbeard/notifiers/pushover.py index 4e6cd5d6bac9e5d7d3c0a32a23ce56b83d455720..74b07c44930dfa1b86d228d211d973fd8849b380 100644 --- a/sickbeard/notifiers/pushover.py +++ b/sickbeard/notifiers/pushover.py @@ -24,7 +24,7 @@ import time import sickbeard from sickbeard import logger -from sickbeard.common import notifyStrings, NOTIFY_SNATCH, NOTIFY_DOWNLOAD +from sickbeard.common import notifyStrings, NOTIFY_SNATCH, NOTIFY_DOWNLOAD, NOTIFY_SUBTITLE_DOWNLOAD from sickbeard.exceptions import ex API_URL = "https://api.pushover.net/1/messages.json" @@ -109,6 +109,10 @@ class PushoverNotifier: def notify_download(self, ep_name, title=notifyStrings[NOTIFY_DOWNLOAD]): if sickbeard.PUSHOVER_NOTIFY_ONDOWNLOAD: self._notifyPushover(title, ep_name) + + def notify_subtitle_download(self, ep_name, lang, title=notifyStrings[NOTIFY_SUBTITLE_DOWNLOAD]): + if sickbeard.PUSHOVER_NOTIFY_ONSUBTITLEDOWNLOAD: + self._notifyPushover(title, ep_name + ": " + lang) def _notifyPushover(self, title, message, userKey=None ): """ diff --git a/sickbeard/notifiers/pytivo.py b/sickbeard/notifiers/pytivo.py index 3f4aa30b482d62c4c8e58ced9db45a410fd0d319..4f39080e58309837f91d890e2f4f0318f919d32a 100644 --- a/sickbeard/notifiers/pytivo.py +++ b/sickbeard/notifiers/pytivo.py @@ -32,6 +32,9 @@ class pyTivoNotifier: def notify_download(self, ep_name): pass + + def notify_subtitle_download(self, ep_name, lang): + pass def update_library(self, ep_obj): diff --git a/sickbeard/notifiers/synoindex.py b/sickbeard/notifiers/synoindex.py index 2eaeedbfb76d91a26bed0c9dabe1cd15e5be6db6..71703f24b9be5d2ed44f73bd26f458c7545aed67 100755 --- a/sickbeard/notifiers/synoindex.py +++ b/sickbeard/notifiers/synoindex.py @@ -34,6 +34,9 @@ class synoIndexNotifier: def notify_download(self, ep_name): pass + + def notify_subtitle_download(self, ep_name, lang): + pass def moveFolder(self, old_path, new_path): self.moveObject(old_path, new_path) diff --git a/sickbeard/notifiers/trakt.py b/sickbeard/notifiers/trakt.py index d5bcb6515071b12bc900690bea5748b66d22b53f..7c0d9039a9a60549a0617beb478563084e5d94f5 100644 --- a/sickbeard/notifiers/trakt.py +++ b/sickbeard/notifiers/trakt.py @@ -40,6 +40,9 @@ class TraktNotifier: def notify_download(self, ep_name): pass + + def notify_subtitle_download(self, ep_name, lang): + pass def update_library(self, ep_obj): """ diff --git a/sickbeard/notifiers/tweet.py b/sickbeard/notifiers/tweet.py index c56288038b60d84f66a2f6bae040534662567c0e..fc3611e087b12cebbe2e0d8ae0e693a68a04d094 100644 --- a/sickbeard/notifiers/tweet.py +++ b/sickbeard/notifiers/tweet.py @@ -47,6 +47,10 @@ class TwitterNotifier: def notify_download(self, ep_name): if sickbeard.TWITTER_NOTIFY_ONDOWNLOAD: self._notifyTwitter(common.notifyStrings[common.NOTIFY_DOWNLOAD]+': '+ep_name) + + def notify_subtitle_download(self, ep_name, lang): + if sickbeard.TWITTER_NOTIFY_ONSUBTITLEDOWNLOAD: + self._notifyTwitter(common.notifyStrings[common.NOTIFY_SUBTITLE_DOWNLOAD], ep_name + ": " + lang) def test_notify(self): return self._notifyTwitter("This is a test notification from Sick Beard", force=True) diff --git a/sickbeard/notifiers/xbmc.py b/sickbeard/notifiers/xbmc.py index 63528c644244aacb4946cae8804d4d8908ba8461..6774191140779066f2ccfe576933f69d25564edf 100644 --- a/sickbeard/notifiers/xbmc.py +++ b/sickbeard/notifiers/xbmc.py @@ -450,6 +450,10 @@ class XBMCNotifier: if sickbeard.XBMC_NOTIFY_ONDOWNLOAD: self._notify_xbmc(ep_name, common.notifyStrings[common.NOTIFY_DOWNLOAD]) + def notify_subtitle_download(self, ep_name, lang): + if sickbeard.XBMC_NOTIFY_ONSUBTITLEDOWNLOAD: + self._notify_xbmc(ep_name + ": " + lang, common.notifyStrings[common.NOTIFY_SUBTITLE_DOWNLOAD]) + def test_notify(self, host, username, password): return self._notify_xbmc("Testing XBMC notifications from Sick Beard", "Test Notification", host, username, password, force=True) diff --git a/sickbeard/postProcessor.py b/sickbeard/postProcessor.py index d73bd2c0d37128691df2816baa6ddcb292811a81..7501766377dca6cbdd6f84ff472ce4a9c033c77b 100755 --- a/sickbeard/postProcessor.py +++ b/sickbeard/postProcessor.py @@ -140,7 +140,7 @@ class PostProcessor(object): self._log(u"File "+existing_file+" doesn't exist so there's no worries about replacing it", logger.DEBUG) return PostProcessor.DOESNT_EXIST - def _list_associated_files(self, file_path): + def _list_associated_files(self, file_path, subtitles_only=False): """ For a given file path searches for files with the same name but different extension and returns their absolute paths @@ -167,9 +167,12 @@ class PostProcessor(object): # only add associated to list if associated_file_path == file_path: continue - # only list it if the only non-shared part is the extension + # only list it if the only non-shared part is the extension or if it is a subtitle + if '.' in associated_file_path[len(base_name):]: continue + if subtitles_only and not associated_file_path[len(associated_file_path)-3:] in common.subtitleExtensions: + continue file_path_list.append(associated_file_path) @@ -203,7 +206,7 @@ class PostProcessor(object): # do the library update for synoindex notifiers.synoindex_notifier.deleteFile(cur_file) - def _combined_file_operation (self, file_path, new_path, new_base_name, associated_files=False, action=None): + def _combined_file_operation (self, file_path, new_path, new_base_name, associated_files=False, action=None, subtitles=False): """ Performs a generic operation (move or copy) on a file. Can rename the file as well as change its location, and optionally move associated files too. @@ -222,6 +225,8 @@ class PostProcessor(object): file_list = [file_path] if associated_files: file_list = file_list + self._list_associated_files(file_path) + elif subtitles: + file_list = file_list + self._list_associated_files(file_path, True) if not file_list: self._log(u"There were no files associated with " + file_path + ", not moving anything", logger.DEBUG) @@ -234,6 +239,12 @@ class PostProcessor(object): # get the extension cur_extension = cur_file_path.rpartition('.')[-1] + + # check if file have language of subtitles + if cur_extension in common.subtitleExtensions: + cur_lang = cur_file_path.rpartition('.')[0].rpartition('.')[-1] + if cur_lang in sickbeard.SUBTITLES_LANGUAGES: + cur_extension = cur_lang + '.' + cur_extension # replace .nfo with .nfo-orig to avoid conflicts if cur_extension == 'nfo': @@ -246,11 +257,20 @@ class PostProcessor(object): else: new_file_name = helpers.replaceExtension(cur_file_name, cur_extension) - new_file_path = ek.ek(os.path.join, new_path, new_file_name) - + if sickbeard.SUBTITLES_DIR and cur_extension in common.subtitleExtensions: + subs_new_path = ek.ek(os.path.join, new_path, sickbeard.SUBTITLES_DIR) + dir_exists = helpers.makeDir(subs_new_path) + if not dir_exists: + logger.log(u"Unable to create subtitles folder "+subs_new_path, logger.ERROR) + else: + helpers.chmodAsParent(subs_new_path) + new_file_path = ek.ek(os.path.join, subs_new_path, new_file_name) + else: + new_file_path = ek.ek(os.path.join, new_path, new_file_name) + action(cur_file_path, new_file_path) - def _move(self, file_path, new_path, new_base_name, associated_files=False): + def _move(self, file_path, new_path, new_base_name, associated_files=False, subtitles=False): """ file_path: The full path of the media file to move new_path: Destination path where we want to move the file to @@ -268,9 +288,9 @@ class PostProcessor(object): self._log("Unable to move file "+cur_file_path+" to "+new_file_path+": "+ex(e), logger.ERROR) raise e - self._combined_file_operation(file_path, new_path, new_base_name, associated_files, action=_int_move) + self._combined_file_operation(file_path, new_path, new_base_name, associated_files, action=_int_move, subtitles=subtitles) - def _copy(self, file_path, new_path, new_base_name, associated_files=False): + def _copy(self, file_path, new_path, new_base_name, associated_files=False, subtitles=False): """ file_path: The full path of the media file to copy new_path: Destination path where we want to copy the file to @@ -288,7 +308,7 @@ class PostProcessor(object): logger.log("Unable to copy file "+cur_file_path+" to "+new_file_path+": "+ex(e), logger.ERROR) raise e - self._combined_file_operation(file_path, new_path, new_base_name, associated_files, action=_int_copy) + self._combined_file_operation(file_path, new_path, new_base_name, associated_files, action=_int_copy, subtitles=subtitles) def _history_lookup(self): """ @@ -818,6 +838,11 @@ class PostProcessor(object): # create any folders we need helpers.make_dirs(dest_path) + # download subtitles + if sickbeard.USE_SUBTITLES and ep_obj.show.subtitles: + cur_ep.location = self.file_path + cur_ep.downloadSubtitles() + # figure out the base name of the resulting episode file if sickbeard.RENAME_EPISODES: orig_extension = self.file_name.rpartition('.')[-1] @@ -832,9 +857,9 @@ class PostProcessor(object): try: # move the episode and associated files to the show dir if sickbeard.KEEP_PROCESSED_DIR: - self._copy(self.file_path, dest_path, new_base_name, sickbeard.MOVE_ASSOCIATED_FILES) + self._copy(self.file_path, dest_path, new_base_name, sickbeard.MOVE_ASSOCIATED_FILES, sickbeard.USE_SUBTITLES and ep_obj.show.subtitles) else: - self._move(self.file_path, dest_path, new_base_name, sickbeard.MOVE_ASSOCIATED_FILES) + self._move(self.file_path, dest_path, new_base_name, sickbeard.MOVE_ASSOCIATED_FILES, sickbeard.USE_SUBTITLES and ep_obj.show.subtitles) except (OSError, IOError): raise exceptions.PostProcessingFailed("Unable to move the files to their new home") diff --git a/sickbeard/show_queue.py b/sickbeard/show_queue.py index 6cc667e5ed03cfa824b4729b2dbb69daffe68960..adb9ae86bb82648540c79dea431e426783e40c58 100644 --- a/sickbeard/show_queue.py +++ b/sickbeard/show_queue.py @@ -54,6 +54,9 @@ class ShowQueue(generic_queue.GenericQueue): def isInRenameQueue(self, show): return self._isInQueue(show, (ShowQueueActions.RENAME,)) + + def isInSubtitleQueue(self, show): + return self._isInQueue(show, (ShowQueueActions.SUBTITLE,)) def isBeingAdded(self, show): return self._isBeingSomethinged(show, (ShowQueueActions.ADD,)) @@ -66,6 +69,9 @@ class ShowQueue(generic_queue.GenericQueue): def isBeingRenamed(self, show): return self._isBeingSomethinged(show, (ShowQueueActions.RENAME,)) + + def isBeingSubtitled(self, show): + return self._isBeingSomethinged(show, (ShowQueueActions.SUBTITLE,)) def _getLoadingShowList(self): return [x for x in self.queue + [self.currentItem] if x != None and x.isLoading] @@ -114,9 +120,17 @@ class ShowQueue(generic_queue.GenericQueue): self.add_item(queueItemObj) return queueItemObj + + def downloadSubtitles(self, show, force=False): - def addShow(self, tvdb_id, showDir, default_status=None, quality=None, flatten_folders=None, lang="fr", audio_lang=None): - queueItemObj = QueueItemAdd(tvdb_id, showDir, default_status, quality, flatten_folders, lang, audio_lang) + queueItemObj = QueueItemSubtitle(show) + + self.add_item(queueItemObj) + + return queueItemObj + + def addShow(self, tvdb_id, showDir, default_status=None, quality=None, flatten_folders=None, lang="fr", subtitles=None, audio_lang=None): + queueItemObj = QueueItemAdd(tvdb_id, showDir, default_status, quality, flatten_folders, lang, subtitles, audio_lang) self.add_item(queueItemObj) @@ -129,12 +143,14 @@ class ShowQueueActions: UPDATE = 3 FORCEUPDATE = 4 RENAME = 5 + SUBTITLE=6 names = {REFRESH: 'Refresh', ADD: 'Add', UPDATE: 'Update', FORCEUPDATE: 'Force Update', RENAME: 'Rename', + SUBTITLE: 'Subtitle', } @@ -147,6 +163,7 @@ class ShowQueueItem(generic_queue.QueueItem): - show being refreshed - show being updated - show being force updated + - show being subtitled """ def __init__(self, action_id, show): generic_queue.QueueItem.__init__(self, ShowQueueActions.names[action_id], action_id) @@ -167,7 +184,7 @@ class ShowQueueItem(generic_queue.QueueItem): class QueueItemAdd(ShowQueueItem): - def __init__(self, tvdb_id, showDir, default_status, quality, flatten_folders, lang, audio_lang): + def __init__(self, tvdb_id, showDir, default_status, quality, flatten_folders, subtitles, lang, audio_lang): self.tvdb_id = tvdb_id self.showDir = showDir @@ -176,6 +193,7 @@ class QueueItemAdd(ShowQueueItem): self.flatten_folders = flatten_folders self.lang = lang self.audio_lang = audio_lang + self.subtitles = subtitles self.show = None @@ -250,6 +268,7 @@ class QueueItemAdd(ShowQueueItem): # set up initial values self.show.location = self.showDir + self.show.subtitles = self.subtitles if self.subtitles != None else sickbeard.SUBTITLES_DEFAULT self.show.quality = self.quality if self.quality else sickbeard.QUALITY_DEFAULT self.show.flatten_folders = self.flatten_folders if self.flatten_folders != None else sickbeard.FLATTEN_FOLDERS_DEFAULT self.show.paused = 0 @@ -387,6 +406,20 @@ class QueueItemRename(ShowQueueItem): self.inProgress = False +class QueueItemSubtitle(ShowQueueItem): + def __init__(self, show=None): + ShowQueueItem.__init__(self, ShowQueueActions.SUBTITLE, show) + + def execute(self): + + ShowQueueItem.execute(self) + + logger.log(u"Downloading subtitles for "+self.show.name) + + self.show.downloadSubtitles() + + self.inProgress = False + class QueueItemUpdate(ShowQueueItem): def __init__(self, show=None): diff --git a/sickbeard/subtitles.py b/sickbeard/subtitles.py new file mode 100644 index 0000000000000000000000000000000000000000..de9dc286f630b11201b50aa84916e884d8cf2c70 --- /dev/null +++ b/sickbeard/subtitles.py @@ -0,0 +1,135 @@ +# Author: Nyaran <nyayukko@gmail.com>, based on Antoine Bertin <diaoulael@gmail.com> work +# URL: http://code.google.com/p/sickbeard/ +# +# This file is part of Sick Beard. +# +# Sick Beard is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# Sick Beard is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with Sick Beard. If not, see <http://www.gnu.org/licenses/>. + +import datetime +import sickbeard +from sickbeard.common import * +from sickbeard import notifiers +from sickbeard import logger +from sickbeard import helpers +from sickbeard import encodingKludge as ek +from sickbeard import db +from sickbeard import history +from lib import subliminal + +SINGLE = 'und' +def sortedServiceList(): + servicesMapping = dict([(x.lower(), x) for x in subliminal.core.SERVICES]) + + newList = [] + + # add all services in the priority list, in order + curIndex = 0 + for curService in sickbeard.SUBTITLES_SERVICES_LIST: + if curService in servicesMapping: + curServiceDict = {'id': curService, 'image': curService+'.png', 'name': servicesMapping[curService], 'enabled': sickbeard.SUBTITLES_SERVICES_ENABLED[curIndex] == 1, 'api_based': __import__('lib.subliminal.services.' + curService, globals=globals(), locals=locals(), fromlist=['Service'], level=-1).Service.api_based, 'url': __import__('lib.subliminal.services.' + curService, globals=globals(), locals=locals(), fromlist=['Service'], level=-1).Service.site_url} + newList.append(curServiceDict) + curIndex += 1 + + # add any services that are missing from that list + for curService in servicesMapping.keys(): + if curService not in [x['id'] for x in newList]: + curServiceDict = {'id': curService, 'image': curService+'.png', 'name': servicesMapping[curService], 'enabled': False, 'api_based': servicesMapping[curService] in subliminal.SERVICES, 'url': ''} + newList.append(curServiceDict) + + return newList + +def getEnabledServiceList(): + return [x['name'] for x in sortedServiceList() if x['enabled']] + +def isValidLanguage(language): + return subliminal.language.language_list(language) + +def getLanguageName(selectLang): + return subliminal.language.Language(selectLang).name + +def wantedLanguages(sqlLike = False): + wantedLanguages = sorted(sickbeard.SUBTITLES_LANGUAGES) + if sqlLike: + return '%' + ','.join(wantedLanguages) + '%' + return wantedLanguages + +def subtitlesLanguages(video_path): + """Return a list detected subtitles for the given video file""" + video = subliminal.videos.Video.from_path(video_path) + subtitles = video.scan() + languages = set() + for subtitle in subtitles: + if subtitle.language: + languages.add(subtitle.language.alpha2) + else: + languages.add(SINGLE) + return list(languages) + +# Return a list with languages that have alpha2 code +def subtitleLanguageFilter(): + return [language for language in subliminal.language.LANGUAGES if language[2] != ""] + +class SubtitlesFinder(): + """ + The SubtitlesFinder will be executed every hour but will not necessarly search + and download subtitles. Only if the defined rule is true + """ + def run(self): + # TODO: Put that in the __init__ before starting the thread? + if not sickbeard.USE_SUBTITLES: + logger.log(u'Subtitles support disabled', logger.DEBUG) + return + if len(sickbeard.subtitles.getEnabledServiceList()) < 1: + logger.log(u'Not enough services selected. At least 1 service is required to search subtitles in the background', logger.ERROR) + return + + logger.log(u'Checking for subtitles', logger.MESSAGE) + + # get episodes on which we want subtitles + # criteria is: + # - show subtitles = 1 + # - episode subtitles != config wanted languages or SINGLE (depends on config multi) + # - search count < 2 and diff(airdate, now) > 1 week : now -> 1d + # - search count < 7 and diff(airdate, now) <= 1 week : now -> 4h -> 8h -> 16h -> 1d -> 1d -> 1d + + myDB = db.DBConnection() + today = datetime.date.today().toordinal() + # you have 5 minutes to understand that one. Good luck + sqlResults = myDB.select('SELECT s.show_name, e.showid, e.season, e.episode, e.status, e.subtitles, e.subtitles_searchcount AS searchcount, e.subtitles_lastsearch AS lastsearch, e.location, (? - e.airdate) AS airdate_daydiff FROM tv_episodes AS e INNER JOIN tv_shows AS s ON (e.showid = s.tvdb_id) WHERE s.subtitles = 1 AND e.subtitles NOT LIKE (?) AND ((e.subtitles_searchcount <= 2 AND (? - e.airdate) > 7) OR (e.subtitles_searchcount <= 7 AND (? - e.airdate) <= 7)) AND (e.status IN ('+','.join([str(x) for x in Quality.DOWNLOADED + [ARCHIVED]])+') OR (e.status IN ('+','.join([str(x) for x in Quality.SNATCHED + Quality.SNATCHED_PROPER])+') AND e.location != ""))', [today, wantedLanguages(True), today, today]) + if len(sqlResults) == 0: + logger.log('No subtitles to download', logger.MESSAGE) + return + + rules = self._getRules() + now = datetime.datetime.now(); + for epToSub in sqlResults: + if not ek.ek(os.path.isfile, epToSub['location']): + logger.log('Episode file does not exist, cannot download subtitles for episode %dx%d of show %s' % (epToSub['season'], epToSub['episode'], epToSub['show_name']), logger.DEBUG) + continue + + # Old shows rule + if ((epToSub['airdate_daydiff'] > 7 and epToSub['searchcount'] < 2 and now - datetime.datetime.strptime(epToSub['lastsearch'], '%Y-%m-%d %H:%M:%S') > datetime.timedelta(hours=rules['old'][epToSub['searchcount']])) or + # Recent shows rule + (epToSub['airdate_daydiff'] <= 7 and epToSub['searchcount'] < 7 and now - datetime.datetime.strptime(epToSub['lastsearch'], '%Y-%m-%d %H:%M:%S') > datetime.timedelta(hours=rules['new'][epToSub['searchcount']]))): + logger.log('Downloading subtitles for episode %dx%d of show %s' % (epToSub['season'], epToSub['episode'], epToSub['show_name']), logger.DEBUG) + helpers.findCertainShow(sickbeard.showList, int(epToSub['showid'])).getEpisode(int(epToSub["season"]), int(epToSub["episode"])).downloadSubtitles() + + + def _getRules(self): + """ + Define the hours to wait between 2 subtitles search depending on: + - the episode: new or old + - the number of searches done so far (searchcount), represented by the index of the list + """ + return {'old': [0, 24], 'new': [0, 4, 8, 4, 16, 24, 24]} diff --git a/sickbeard/tv.py b/sickbeard/tv.py index 892ea95e4f7e35180d1f101240c65dd865b7ab56..ffa3fd607474ab91427f0160a652dff4dffcdeb8 100644 --- a/sickbeard/tv.py +++ b/sickbeard/tv.py @@ -30,6 +30,8 @@ import xml.etree.cElementTree as etree from name_parser.parser import NameParser, InvalidNameException +from lib import subliminal + from lib.tvdb_api import tvdb_api, tvdb_exceptions from sickbeard import db @@ -37,7 +39,10 @@ from sickbeard import helpers, exceptions, logger from sickbeard.exceptions import ex from sickbeard import tvrage from sickbeard import image_cache +from sickbeard import notifiers from sickbeard import postProcessor +from sickbeard import subtitles +from sickbeard import history from sickbeard import encodingKludge as ek @@ -66,6 +71,7 @@ class TVShow(object): self.startyear = 0 self.paused = 0 self.air_by_date = 0 + self.subtitles = int(sickbeard.SUBTITLES_DEFAULT) self.lang = lang self.audio_lang = audio_lang self.custom_search_names = "" @@ -270,6 +276,12 @@ class TVShow(object): # store the reference in the show if curEpisode != None: + if self.subtitles: + try: + curEpisode.refreshSubtitles() + except: + logger.log(str(self.tvdbid) + ": Could not refresh subtitles", logger.ERROR) + logger.log(traceback.format_exc(), logger.DEBUG) curEpisode.saveToDB() @@ -603,6 +615,12 @@ class TVShow(object): self.air_by_date = sqlResults[0]["air_by_date"] if self.air_by_date == None: self.air_by_date = 0 + + self.subtitles = sqlResults[0]["subtitles"] + if self.subtitles: + self.subtitles = 1 + else: + self.subtitles = 0 self.quality = int(sqlResults[0]["quality"]) self.flatten_folders = int(sqlResults[0]["flatten_folders"]) @@ -804,14 +822,53 @@ class TVShow(object): if curEp.location and curEp.status in Quality.DOWNLOADED: logger.log(str(self.tvdbid) + ": Location for " + str(season) + "x" + str(episode) + " doesn't exist, removing it and changing our status to IGNORED", logger.DEBUG) curEp.status = IGNORED + curEp.subtitles = list() + curEp.subtitles_searchcount = 0 + curEp.subtitles_lastsearch = str(datetime.datetime.min) curEp.location = '' curEp.hasnfo = False curEp.hastbn = False curEp.release_name = '' curEp.saveToDB() - def saveToDB(self): + def downloadSubtitles(self): + #TODO: Add support for force option + if not ek.ek(os.path.isdir, self._location): + logger.log(str(self.tvdbid) + ": Show dir doesn't exist, can't download subtitles", logger.DEBUG) + return + logger.log(str(self.tvdbid) + ": Downloading subtitles", logger.DEBUG) + + try: + episodes = db.DBConnection().select("SELECT location FROM tv_episodes WHERE showid = ? AND location NOT LIKE '' ORDER BY season DESC, episode DESC", [self.tvdbid]) + for episodeLoc in episodes: + episode = self.makeEpFromFile(episodeLoc['location']); + subtitles = episode.downloadSubtitles() + + if sickbeard.SUBTITLES_DIR: + for video in subtitles: + subs_new_path = ek.ek(os.path.join, os.path.dirname(video.path), sickbeard.SUBTITLES_DIR) + dir_exists = helpers.makeDir(subs_new_path) + if not dir_exists: + logger.log(u"Unable to create subtitles folder "+subs_new_path, logger.ERROR) + else: + helpers.chmodAsParent(subs_new_path) + + for subtitle in subtitles.get(video): + new_file_path = ek.ek(os.path.join, subs_new_path, os.path.basename(subtitle.path)) + helpers.moveFile(subtitle.path, new_file_path) + helpers.chmodAsParent(new_file_path) + else: + for video in subtitles: + for subtitle in subtitles.get(video): + helpers.chmodAsParent(subtitle.path) + + except Exception as e: + logger.log("Error occurred when downloading subtitles: " + str(e), logger.DEBUG) + return + + + def saveToDB(self): logger.log(str(self.tvdbid) + ": Saving show info to database", logger.DEBUG) myDB = db.DBConnection() @@ -829,6 +886,7 @@ class TVShow(object): "flatten_folders": self.flatten_folders, "paused": self.paused, "air_by_date": self.air_by_date, + "subtitles": self.subtitles, "startyear": self.startyear, "tvr_name": self.tvrname, "lang": self.lang, @@ -954,6 +1012,9 @@ class TVEpisode(object): self._season = season self._episode = episode self._description = "" + self._subtitles = list() + self._subtitles_searchcount = 0 + self._subtitles_lastsearch = str(datetime.datetime.min) self._airdate = datetime.date.fromordinal(1) self._hasnfo = False self._hastbn = False @@ -981,6 +1042,9 @@ class TVEpisode(object): season = property(lambda self: self._season, dirty_setter("_season")) episode = property(lambda self: self._episode, dirty_setter("_episode")) description = property(lambda self: self._description, dirty_setter("_description")) + subtitles = property(lambda self: self._subtitles, dirty_setter("_subtitles")) + subtitles_searchcount = property(lambda self: self._subtitles_searchcount, dirty_setter("_subtitles_searchcount")) + subtitles_lastsearch = property(lambda self: self._subtitles_lastsearch, dirty_setter("_subtitles_lastsearch")) airdate = property(lambda self: self._airdate, dirty_setter("_airdate")) hasnfo = property(lambda self: self._hasnfo, dirty_setter("_hasnfo")) hastbn = property(lambda self: self._hastbn, dirty_setter("_hastbn")) @@ -1003,6 +1067,51 @@ class TVEpisode(object): self.file_size = 0 location = property(lambda self: self._location, _set_location) + def refreshSubtitles(self): + """Look for subtitles files and refresh the subtitles property""" + self.subtitles = subtitles.subtitlesLanguages(self.location) + + def downloadSubtitles(self): + #TODO: Add support for force option + if not ek.ek(os.path.isfile, self.location): + logger.log(str(self.show.tvdbid) + ": Episode file doesn't exist, can't download subtitles for episode " + str(self.season) + "x" + str(self.episode), logger.DEBUG) + return + logger.log(str(self.show.tvdbid) + ": Downloading subtitles for episode " + str(self.season) + "x" + str(self.episode), logger.DEBUG) + + previous_subtitles = self.subtitles + + try: + need_languages = set(sickbeard.SUBTITLES_LANGUAGES) - set(self.subtitles) + subtitles = subliminal.download_subtitles([self.location], languages=need_languages, services=sickbeard.subtitles.getEnabledServiceList(), force=False, multi=True, cache_dir=sickbeard.CACHE_DIR) + + except Exception as e: + logger.log("Error occurred when downloading subtitles: " + str(e), logger.DEBUG) + return + + self.refreshSubtitles() + self.subtitles_searchcount = self.subtitles_searchcount + 1 + self.subtitles_lastsearch = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S") + self.saveToDB() + + newsubtitles = set(self.subtitles).difference(set(previous_subtitles)) + + if newsubtitles: + subtitleList = (subliminal.language.Language(x).name for x in newsubtitles) + logger.log(str(self.show.tvdbid) + ": Downloaded " + ", ".join(subtitleList) + " subtitles for episode " + str(self.season) + "x" + str(self.episode), logger.DEBUG) + + notifiers.notify_subtitle_download(self.prettyName(), ", ".join(subtitleList)) + + else: + logger.log(str(self.show.tvdbid) + ": No subtitles downloaded for episode " + str(self.season) + "x" + str(self.episode), logger.DEBUG) + + if sickbeard.SUBTITLES_HISTORY: + for video in subtitles: + for subtitle in subtitles.get(video): + history.logSubtitle(self.show.tvdbid, self.season, self.episode, self.status, subtitle) + + + return subtitles + def checkForMetaFiles(self): @@ -1082,6 +1191,10 @@ class TVEpisode(object): self.description = sqlResults[0]["description"] if self.description == None: self.description = "" + if sqlResults[0]["subtitles"] != None and sqlResults[0]["subtitles"] != '': + self.subtitles = sqlResults[0]["subtitles"].split(",") + self.subtitles_searchcount = sqlResults[0]["subtitles_searchcount"] + self.subtitles_lastsearch = sqlResults[0]["subtitles_lastsearch"] self.airdate = datetime.date.fromordinal(int(sqlResults[0]["airdate"])) #logger.log(u"1 Status changes from " + str(self.status) + " to " + str(sqlResults[0]["status"]), logger.DEBUG) self.status = int(sqlResults[0]["status"]) @@ -1303,6 +1416,9 @@ class TVEpisode(object): toReturn += str(self.show.name) + " - " + str(self.season) + "x" + str(self.episode) + " - " + str(self.name) + "\n" toReturn += "location: " + str(self.location) + "\n" toReturn += "description: " + str(self.description) + "\n" + toReturn += "subtitles: " + str(",".join(self.subtitles)) + "\n" + toReturn += "subtitles_searchcount: " + str(self.subtitles_searchcount) + "\n" + toReturn += "subtitles_lastsearch: " + str(self.subtitles_lastsearch) + "\n" toReturn += "airdate: " + str(self.airdate.toordinal()) + " (" + str(self.airdate) + ")\n" toReturn += "hasnfo: " + str(self.hasnfo) + "\n" toReturn += "hastbn: " + str(self.hastbn) + "\n" @@ -1374,9 +1490,13 @@ class TVEpisode(object): logger.log(u"STATUS IS " + str(self.status), logger.DEBUG) myDB = db.DBConnection() + newValueDict = {"tvdbid": self.tvdbid, "name": self.name, "description": self.description, + "subtitles": ",".join([sub for sub in self.subtitles]), + "subtitles_searchcount": self.subtitles_searchcount, + "subtitles_lastsearch": self.subtitles_lastsearch, "airdate": self.airdate.toordinal(), "hasnfo": self.hasnfo, "hastbn": self.hastbn, @@ -1720,6 +1840,8 @@ class TVEpisode(object): proper_path = self.proper_path() absolute_proper_path = ek.ek(os.path.join, self.show.location, proper_path) absolute_current_path_no_ext, file_ext = os.path.splitext(self.location) + + related_subs = [] current_path = absolute_current_path_no_ext @@ -1734,6 +1856,11 @@ class TVEpisode(object): return related_files = postProcessor.PostProcessor(self.location)._list_associated_files(self.location) + + if self.show.subtitles and sickbeard.SUBTITLES_DIR != '': + related_subs = postProcessor.PostProcessor(self.location)._list_associated_files(sickbeard.SUBTITLES_DIR, subtitles_only=True) + absolute_proper_subs_path = ek.ek(os.path.join, sickbeard.SUBTITLES_DIR, self.formatted_filename()) + logger.log(u"Files associated to " + self.location + ": " + str(related_files), logger.DEBUG) # move the ep file @@ -1745,6 +1872,11 @@ class TVEpisode(object): if cur_result == False: logger.log(str(self.tvdbid) + ": Unable to rename file " + cur_related_file, logger.ERROR) + for cur_related_sub in related_subs: + cur_result = helpers.rename_ep_file(cur_related_sub, absolute_proper_subs_path) + if cur_result == False: + logger.log(str(self.tvdbid) + ": Unable to rename file " + cur_related_sub, logger.ERROR) + # save the ep with self.lock: if result != False: diff --git a/sickbeard/version.py b/sickbeard/version.py index 4df29c1bb34eb3599fb4069415d94a14f525cd04..d020973c3ffed94d17460456d0379efab8dabc7b 100644 --- a/sickbeard/version.py +++ b/sickbeard/version.py @@ -1 +1 @@ -SICKBEARD_VERSION = "master" \ No newline at end of file +SICKBEARD_VERSION = "LANG VERSION" \ No newline at end of file diff --git a/sickbeard/webserve.py b/sickbeard/webserve.py index 37c6c5b02dfec7f1feb5f10ab153c44195ba07fe..90fe6a94261dc187aaa99ca2bdf1d29b1860b1ac 100644 --- a/sickbeard/webserve.py +++ b/sickbeard/webserve.py @@ -41,6 +41,7 @@ from sickbeard import encodingKludge as ek from sickbeard import search_queue from sickbeard import image_cache from sickbeard import naming +from sickbeard import subtitles from sickbeard.providers import newznab from sickbeard.common import Quality, Overview, statusStrings @@ -50,6 +51,8 @@ from sickbeard.webapi import Api from lib.tvdb_api import tvdb_api +import subliminal + try: import json except ImportError: @@ -151,7 +154,10 @@ ManageMenu = [ { 'title': 'Backlog Overview', 'path': 'manage/backlogOverview' }, { 'title': 'Manage Searches', 'path': 'manage/manageSearches' }, { 'title': 'Episode Status Management', 'path': 'manage/episodeStatuses' }, + { 'title': 'Manage Missed Subtitles', 'path': 'manage/subtitleMissed' }, ] +if sickbeard.USE_SUBTITLES: + ManageMenu.append({ 'title': 'Missed Subtitle Management', 'path': 'manage/subtitleMissed' }) class ManageSearches: @@ -309,10 +315,120 @@ class Manage: redirect('/manage/episodeStatuses') @cherrypy.expose - def backlogShow(self, tvdb_id): + def showSubtitleMissed(self, tvdb_id, whichSubs): + myDB = db.DBConnection() - show_obj = helpers.findCertainShow(sickbeard.showList, int(tvdb_id)) + cur_show_results = myDB.select("SELECT season, episode, name, subtitles FROM tv_episodes WHERE showid = ? AND season != 0 AND status LIKE '%4'", [int(tvdb_id)]) + + result = {} + for cur_result in cur_show_results: + if whichSubs == 'all': + if len(set(cur_result["subtitles"].split(',')).intersection(set(subtitles.wantedLanguages()))) >= len(subtitles.wantedLanguages()): + continue + elif whichSubs in cur_result["subtitles"].split(','): + continue + + cur_season = int(cur_result["season"]) + cur_episode = int(cur_result["episode"]) + + if cur_season not in result: + result[cur_season] = {} + + if cur_episode not in result[cur_season]: + result[cur_season][cur_episode] = {} + + result[cur_season][cur_episode]["name"] = cur_result["name"] + + result[cur_season][cur_episode]["subtitles"] = ",".join(subliminal.language.Language(subtitle).alpha2 for subtitle in cur_result["subtitles"].split(',')) if not cur_result["subtitles"] == '' else '' + + return json.dumps(result) + + @cherrypy.expose + def subtitleMissed(self, whichSubs=None): + + t = PageTemplate(file="manage_subtitleMissed.tmpl") + t.submenu = ManageMenu + t.whichSubs = whichSubs + + if not whichSubs: + return _munge(t) + + myDB = db.DBConnection() + status_results = myDB.select("SELECT show_name, tv_shows.tvdb_id as tvdb_id, tv_episodes.subtitles subtitles FROM tv_episodes, tv_shows WHERE tv_shows.subtitles = 1 AND tv_episodes.status LIKE '%4' AND tv_episodes.season != 0 AND tv_episodes.showid = tv_shows.tvdb_id ORDER BY show_name") + + ep_counts = {} + show_names = {} + sorted_show_ids = [] + for cur_status_result in status_results: + if whichSubs == 'all': + if len(set(cur_status_result["subtitles"].split(',')).intersection(set(subtitles.wantedLanguages()))) >= len(subtitles.wantedLanguages()): + continue + elif whichSubs in cur_status_result["subtitles"].split(','): + continue + + cur_tvdb_id = int(cur_status_result["tvdb_id"]) + if cur_tvdb_id not in ep_counts: + ep_counts[cur_tvdb_id] = 1 + else: + ep_counts[cur_tvdb_id] += 1 + + show_names[cur_tvdb_id] = cur_status_result["show_name"] + if cur_tvdb_id not in sorted_show_ids: + sorted_show_ids.append(cur_tvdb_id) + + t.show_names = show_names + t.ep_counts = ep_counts + t.sorted_show_ids = sorted_show_ids + return _munge(t) + + @cherrypy.expose + def downloadSubtitleMissed(self, *args, **kwargs): + + to_download = {} + + # make a list of all shows and their associated args + for arg in kwargs: + tvdb_id, what = arg.split('-') + + # we don't care about unchecked checkboxes + if kwargs[arg] != 'on': + continue + + if tvdb_id not in to_download: + to_download[tvdb_id] = [] + + to_download[tvdb_id].append(what) + + for cur_tvdb_id in to_download: + # get a list of all the eps we want to download subtitles if they just said "all" + if 'all' in to_download[cur_tvdb_id]: + myDB = db.DBConnection() + all_eps_results = myDB.select("SELECT season, episode FROM tv_episodes WHERE status LIKE '%4' AND season != 0 AND showid = ?", [cur_tvdb_id]) + to_download[cur_tvdb_id] = [str(x["season"])+'x'+str(x["episode"]) for x in all_eps_results] + + for epResult in to_download[cur_tvdb_id]: + season, episode = epResult.split('x'); + + show = sickbeard.helpers.findCertainShow(sickbeard.showList, int(cur_tvdb_id)) + subtitles = show.getEpisode(int(season), int(episode)).downloadSubtitles() + + if sickbeard.SUBTITLES_DIR: + for video in subtitles: + subs_new_path = ek.ek(os.path.join, os.path.dirname(video.path), sickbeard.SUBTITLES_DIR) + if not ek.ek(os.path.isdir, subs_new_path): + ek.ek(os.mkdir, subs_new_path) + + for subtitle in subtitles.get(video): + new_file_path = ek.ek(os.path.join, subs_new_path, os.path.basename(subtitle.path)) + helpers.moveFile(subtitle.path, new_file_path) + + redirect('/manage/subtitleMissed') + @cherrypy.expose + def backlogShow(self, tvdb_id): + + show_obj = helpers.findCertainShow(sickbeard.showList, int(tvdb_id)) + if show_obj: sickbeard.backlogSearchScheduler.action.searchBacklog([show_obj]) #@UndefinedVariable @@ -384,6 +500,9 @@ class Manage: quality_all_same = True last_quality = None + + subtitles_all_same = True + last_subtitles = None root_dir_list = [] @@ -413,16 +532,23 @@ class Manage: else: last_quality = curShow.quality + if subtitles_all_same: + if last_subtitles not in (None, curShow.subtitles): + subtitles_all_same = False + else: + last_subtitles = curShow.subtitles + t.showList = toEdit t.paused_value = last_paused if paused_all_same else None t.flatten_folders_value = last_flatten_folders if flatten_folders_all_same else None t.quality_value = last_quality if quality_all_same else None + t.subtitles_value = last_subtitles if subtitles_all_same else None t.root_dir_list = root_dir_list return _munge(t) @cherrypy.expose - def massEditSubmit(self, paused=None, flatten_folders=None, quality_preset=False, + def massEditSubmit(self, paused=None, flatten_folders=None, quality_preset=False, subtitles=None, anyQualities=[], bestQualities=[], toEdit=None, *args, **kwargs): dir_map = {} @@ -461,10 +587,19 @@ class Manage: new_flatten_folders = True if flatten_folders == 'enable' else False new_flatten_folders = 'on' if new_flatten_folders else 'off' + if subtitles == 'keep': + new_subtitles = showObj.subtitles + else: + new_subtitles = True if subtitles == 'enable' else False + + new_subtitles = 'on' if new_subtitles else 'off' + if quality_preset == 'keep': anyQualities, bestQualities = Quality.splitQuality(showObj.quality) - curErrors += Home().editShow(curShow, new_show_dir, anyQualities, bestQualities, new_flatten_folders, new_paused, directCall=True) + exceptions_list = [] + + curErrors += Home().editShow(curShow, new_show_dir, anyQualities, bestQualities, exceptions_list, new_flatten_folders, new_paused, subtitles=new_subtitles, directCall=True) if curErrors: logger.log(u"Errors: "+str(curErrors), logger.ERROR) @@ -477,7 +612,7 @@ class Manage: redirect("/manage") @cherrypy.expose - def massUpdate(self, toUpdate=None, toRefresh=None, toRename=None, toDelete=None, toMetadata=None): + def massUpdate(self, toUpdate=None, toRefresh=None, toRename=None, toDelete=None, toMetadata=None, toSubtitle=None): if toUpdate != None: toUpdate = toUpdate.split('|') @@ -493,6 +628,11 @@ class Manage: toRename = toRename.split('|') else: toRename = [] + + if toSubtitle != None: + toSubtitle = toSubtitle.split('|') + else: + toSubtitle = [] if toDelete != None: toDelete = toDelete.split('|') @@ -508,8 +648,9 @@ class Manage: refreshes = [] updates = [] renames = [] + subtitles = [] - for curShowID in set(toUpdate+toRefresh+toRename+toDelete+toMetadata): + for curShowID in set(toUpdate+toRefresh+toRename+toSubtitle+toDelete+toMetadata): if curShowID == '': continue @@ -542,6 +683,10 @@ class Manage: if curShowID in toRename: sickbeard.showQueueScheduler.action.renameShowEpisodes(showObj) #@UndefinedVariable renames.append(showObj.name) + + if curShowID in toSubtitle: + sickbeard.showQueueScheduler.action.downloadSubtitles(showObj) #@UndefinedVariable + subtitles.append(showObj.name) if len(errors) > 0: ui.notifications.error("Errors encountered", @@ -563,8 +708,13 @@ class Manage: messageDetail += "<br /><b>Renames</b><br /><ul><li>" messageDetail += "</li><li>".join(renames) messageDetail += "</li></ul>" + + if len(subtitles) > 0: + messageDetail += "<br /><b>Subtitles</b><br /><ul><li>" + messageDetail += "</li><li>".join(subtitles) + messageDetail += "</li></ul>" - if len(updates+refreshes+renames) > 0: + if len(updates+refreshes+renames+subtitles) > 0: ui.notifications.message("The following actions were queued:", messageDetail) @@ -617,6 +767,7 @@ ConfigMenu = [ { 'title': 'General', 'path': 'config/general/' }, { 'title': 'Search Settings', 'path': 'config/search/' }, { 'title': 'Search Providers', 'path': 'config/providers/' }, + { 'title': 'Subtitles Settings','path': 'config/subtitles/' }, { 'title': 'Post Processing', 'path': 'config/postProcessing/' }, { 'title': 'Notifications', 'path': 'config/notifications/' }, ] @@ -635,7 +786,7 @@ class ConfigGeneral: sickbeard.ROOT_DIRS = rootDirString @cherrypy.expose - def saveAddShowDefaults(self, defaultFlattenFolders, defaultStatus, anyQualities, bestQualities, audio_langs ): + def saveAddShowDefaults(self, defaultFlattenFolders, defaultStatus, anyQualities, bestQualities, audio_langs, subtitles): if anyQualities: anyQualities = anyQualities.split(',') @@ -660,6 +811,12 @@ class ConfigGeneral: sickbeard.FLATTEN_FOLDERS_DEFAULT = int(defaultFlattenFolders) + if subtitles == "true": + subtitles = 1 + else: + subtitles = 0 + sickbeard.SUBTITLES_DEFAULT = int(subtitles) + @cherrypy.expose def generateKey(self): """ Return a new randomized API_KEY @@ -1185,23 +1342,23 @@ class ConfigNotifications: return _munge(t) @cherrypy.expose - def saveNotifications(self, use_xbmc=None, xbmc_notify_onsnatch=None, xbmc_notify_ondownload=None, xbmc_update_onlyfirst=None, + def saveNotifications(self, use_xbmc=None, xbmc_notify_onsnatch=None, xbmc_notify_ondownload=None, xbmc_update_onlyfirst=None, xbmc_notify_onsubtitledownload=None, xbmc_update_library=None, xbmc_update_full=None, xbmc_host=None, xbmc_username=None, xbmc_password=None, - use_plex=None, plex_notify_onsnatch=None, plex_notify_ondownload=None, plex_update_library=None, + use_plex=None, plex_notify_onsnatch=None, plex_notify_ondownload=None, plex_notify_onsubtitledownload=None, plex_update_library=None, plex_server_host=None, plex_host=None, plex_username=None, plex_password=None, - use_growl=None, growl_notify_onsnatch=None, growl_notify_ondownload=None, growl_host=None, growl_password=None, - use_prowl=None, prowl_notify_onsnatch=None, prowl_notify_ondownload=None, prowl_api=None, prowl_priority=0, - use_twitter=None, twitter_notify_onsnatch=None, twitter_notify_ondownload=None, - use_notifo=None, notifo_notify_onsnatch=None, notifo_notify_ondownload=None, notifo_username=None, notifo_apisecret=None, - use_boxcar=None, boxcar_notify_onsnatch=None, boxcar_notify_ondownload=None, boxcar_username=None, - use_pushover=None, pushover_notify_onsnatch=None, pushover_notify_ondownload=None, pushover_userkey=None, - use_libnotify=None, libnotify_notify_onsnatch=None, libnotify_notify_ondownload=None, + use_growl=None, growl_notify_onsnatch=None, growl_notify_ondownload=None, growl_notify_onsubtitledownload=None, growl_host=None, growl_password=None, + use_prowl=None, prowl_notify_onsnatch=None, prowl_notify_ondownload=None, prowl_notify_onsubtitledownload=None, prowl_api=None, prowl_priority=0, + use_twitter=None, twitter_notify_onsnatch=None, twitter_notify_ondownload=None, twitter_notify_onsubtitledownload=None, + use_notifo=None, notifo_notify_onsnatch=None, notifo_notify_ondownload=None, notifo_notify_onsubtitledownload=None, notifo_username=None, notifo_apisecret=None, + use_boxcar=None, boxcar_notify_onsnatch=None, boxcar_notify_ondownload=None, boxcar_notify_onsubtitledownload=None, boxcar_username=None, + use_pushover=None, pushover_notify_onsnatch=None, pushover_notify_ondownload=None, pushover_notify_onsubtitledownload=None, pushover_userkey=None, + use_libnotify=None, libnotify_notify_onsnatch=None, libnotify_notify_ondownload=None, libnotify_notify_onsubtitledownload=None, use_nmj=None, nmj_host=None, nmj_database=None, nmj_mount=None, use_synoindex=None, use_nmjv2=None, nmjv2_host=None, nmjv2_dbloc=None, nmjv2_database=None, use_trakt=None, trakt_username=None, trakt_password=None, trakt_api=None, - use_pytivo=None, pytivo_notify_onsnatch=None, pytivo_notify_ondownload=None, pytivo_update_library=None, + use_pytivo=None, pytivo_notify_onsnatch=None, pytivo_notify_ondownload=None, pytivo_notify_onsubtitledownload=None, pytivo_update_library=None, pytivo_host=None, pytivo_share_name=None, pytivo_tivo_name=None, - use_nma=None, nma_notify_onsnatch=None, nma_notify_ondownload=None, nma_api=None, nma_priority=0 ): + use_nma=None, nma_notify_onsnatch=None, nma_notify_ondownload=None, nma_notify_onsubtitledownload=None, nma_api=None, nma_priority=0 ): results = [] @@ -1215,6 +1372,11 @@ class ConfigNotifications: else: xbmc_notify_ondownload = 0 + if xbmc_notify_onsubtitledownload == "on": + xbmc_notify_onsubtitledownload = 1 + else: + xbmc_notify_onsubtitledownload = 0 + if xbmc_update_library == "on": xbmc_update_library = 1 else: @@ -1250,6 +1412,11 @@ class ConfigNotifications: else: plex_notify_ondownload = 0 + if plex_notify_onsubtitledownload == "on": + plex_notify_onsubtitledownload = 1 + else: + plex_notify_onsubtitledownload = 0 + if use_plex == "on": use_plex = 1 else: @@ -1265,6 +1432,11 @@ class ConfigNotifications: else: growl_notify_ondownload = 0 + if growl_notify_onsubtitledownload == "on": + growl_notify_onsubtitledownload = 1 + else: + growl_notify_onsubtitledownload = 0 + if use_growl == "on": use_growl = 1 else: @@ -1279,6 +1451,12 @@ class ConfigNotifications: prowl_notify_ondownload = 1 else: prowl_notify_ondownload = 0 + + if prowl_notify_onsubtitledownload == "on": + prowl_notify_onsubtitledownload = 1 + else: + prowl_notify_onsubtitledownload = 0 + if use_prowl == "on": use_prowl = 1 else: @@ -1293,6 +1471,12 @@ class ConfigNotifications: twitter_notify_ondownload = 1 else: twitter_notify_ondownload = 0 + + if twitter_notify_onsubtitledownload == "on": + twitter_notify_onsubtitledownload = 1 + else: + twitter_notify_onsubtitledownload = 0 + if use_twitter == "on": use_twitter = 1 else: @@ -1307,6 +1491,12 @@ class ConfigNotifications: notifo_notify_ondownload = 1 else: notifo_notify_ondownload = 0 + + if notifo_notify_onsubtitledownload == "on": + notifo_notify_onsubtitledownload = 1 + else: + notifo_notify_onsubtitledownload = 0 + if use_notifo == "on": use_notifo = 1 else: @@ -1321,6 +1511,12 @@ class ConfigNotifications: boxcar_notify_ondownload = 1 else: boxcar_notify_ondownload = 0 + + if boxcar_notify_onsubtitledownload == "on": + boxcar_notify_onsubtitledownload = 1 + else: + boxcar_notify_onsubtitledownload = 0 + if use_boxcar == "on": use_boxcar = 1 else: @@ -1335,6 +1531,12 @@ class ConfigNotifications: pushover_notify_ondownload = 1 else: pushover_notify_ondownload = 0 + + if pushover_notify_onsubtitledownload == "on": + pushover_notify_onsubtitledownload = 1 + else: + pushover_notify_onsubtitledownload = 0 + if use_pushover == "on": use_pushover = 1 else: @@ -1375,6 +1577,11 @@ class ConfigNotifications: else: pytivo_notify_ondownload = 0 + if pytivo_notify_onsubtitledownload == "on": + pytivo_notify_onsubtitledownload = 1 + else: + pytivo_notify_onsubtitledownload = 0 + if pytivo_update_library == "on": pytivo_update_library = 1 else: @@ -1395,9 +1602,15 @@ class ConfigNotifications: else: nma_notify_ondownload = 0 + if nma_notify_onsubtitledownload == "on": + nma_notify_onsubtitledownload = 1 + else: + nma_notify_onsubtitledownload = 0 + sickbeard.USE_XBMC = use_xbmc sickbeard.XBMC_NOTIFY_ONSNATCH = xbmc_notify_onsnatch sickbeard.XBMC_NOTIFY_ONDOWNLOAD = xbmc_notify_ondownload + sickbeard.XBMC_NOTIFY_ONSUBTITLEDOWNLOAD = xbmc_notify_onsubtitledownload sickbeard.XBMC_UPDATE_LIBRARY = xbmc_update_library sickbeard.XBMC_UPDATE_FULL = xbmc_update_full sickbeard.XBMC_UPDATE_ONLYFIRST = xbmc_update_onlyfirst @@ -1408,6 +1621,7 @@ class ConfigNotifications: sickbeard.USE_PLEX = use_plex sickbeard.PLEX_NOTIFY_ONSNATCH = plex_notify_onsnatch sickbeard.PLEX_NOTIFY_ONDOWNLOAD = plex_notify_ondownload + sickbeard.PLEX_NOTIFY_ONSUBTITLEDOWNLOAD = plex_notify_onsubtitledownload sickbeard.PLEX_UPDATE_LIBRARY = plex_update_library sickbeard.PLEX_HOST = plex_host sickbeard.PLEX_SERVER_HOST = plex_server_host @@ -1417,38 +1631,45 @@ class ConfigNotifications: sickbeard.USE_GROWL = use_growl sickbeard.GROWL_NOTIFY_ONSNATCH = growl_notify_onsnatch sickbeard.GROWL_NOTIFY_ONDOWNLOAD = growl_notify_ondownload + sickbeard.GROWL_NOTIFY_ONSUBTITLEDOWNLOAD = growl_notify_onsubtitledownload sickbeard.GROWL_HOST = growl_host sickbeard.GROWL_PASSWORD = growl_password sickbeard.USE_PROWL = use_prowl sickbeard.PROWL_NOTIFY_ONSNATCH = prowl_notify_onsnatch sickbeard.PROWL_NOTIFY_ONDOWNLOAD = prowl_notify_ondownload + sickbeard.PROWL_NOTIFY_ONSUBTITLEDOWNLOAD = prowl_notify_onsubtitledownload sickbeard.PROWL_API = prowl_api sickbeard.PROWL_PRIORITY = prowl_priority sickbeard.USE_TWITTER = use_twitter sickbeard.TWITTER_NOTIFY_ONSNATCH = twitter_notify_onsnatch sickbeard.TWITTER_NOTIFY_ONDOWNLOAD = twitter_notify_ondownload + sickbeard.TWITTER_NOTIFY_ONSUBTITLEDOWNLOAD = twitter_notify_onsubtitledownload sickbeard.USE_NOTIFO = use_notifo sickbeard.NOTIFO_NOTIFY_ONSNATCH = notifo_notify_onsnatch sickbeard.NOTIFO_NOTIFY_ONDOWNLOAD = notifo_notify_ondownload + sickbeard.NOTIFO_NOTIFY_ONSUBTITLEDOWNLOAD = notifo_notify_onsubtitledownload sickbeard.NOTIFO_USERNAME = notifo_username sickbeard.NOTIFO_APISECRET = notifo_apisecret sickbeard.USE_BOXCAR = use_boxcar sickbeard.BOXCAR_NOTIFY_ONSNATCH = boxcar_notify_onsnatch sickbeard.BOXCAR_NOTIFY_ONDOWNLOAD = boxcar_notify_ondownload + sickbeard.BOXCAR_NOTIFY_ONSUBTITLEDOWNLOAD = boxcar_notify_onsubtitledownload sickbeard.BOXCAR_USERNAME = boxcar_username sickbeard.USE_PUSHOVER = use_pushover sickbeard.PUSHOVER_NOTIFY_ONSNATCH = pushover_notify_onsnatch sickbeard.PUSHOVER_NOTIFY_ONDOWNLOAD = pushover_notify_ondownload + sickbeard.PUSHOVER_NOTIFY_ONSUBTITLEDOWNLOAD = pushover_notify_onsubtitledownload sickbeard.PUSHOVER_USERKEY = pushover_userkey sickbeard.USE_LIBNOTIFY = use_libnotify == "on" sickbeard.LIBNOTIFY_NOTIFY_ONSNATCH = libnotify_notify_onsnatch == "on" sickbeard.LIBNOTIFY_NOTIFY_ONDOWNLOAD = libnotify_notify_ondownload == "on" + sickbeard.LIBNOTIFY_NOTIFY_ONSUBTITLEDOWNLOAD = libnotify_notify_onsubtitledownload == "on" sickbeard.USE_NMJ = use_nmj sickbeard.NMJ_HOST = nmj_host @@ -1469,7 +1690,8 @@ class ConfigNotifications: sickbeard.USE_PYTIVO = use_pytivo sickbeard.PYTIVO_NOTIFY_ONSNATCH = pytivo_notify_onsnatch == "off" - sickbeard.PYTIVO_NOTIFY_ONDOWNLOAD = pytivo_notify_ondownload == "off" + sickbeard.PYTIVO_NOTIFY_ONDOWNLOAD = pytivo_notify_ondownload == "off" + sickbeard.PYTIVO_NOTIFY_ONSUBTITLEDOWNLOAD = pytivo_notify_onsubtitledownload == "off" sickbeard.PYTIVO_UPDATE_LIBRARY = pytivo_update_library sickbeard.PYTIVO_HOST = pytivo_host sickbeard.PYTIVO_SHARE_NAME = pytivo_share_name @@ -1478,6 +1700,7 @@ class ConfigNotifications: sickbeard.USE_NMA = use_nma sickbeard.NMA_NOTIFY_ONSNATCH = nma_notify_onsnatch sickbeard.NMA_NOTIFY_ONDOWNLOAD = nma_notify_ondownload + sickbeard.NMA_NOTIFY_ONSUBTITLEDOWNLOAD = nma_notify_onsubtitledownload sickbeard.NMA_API = nma_api sickbeard.NMA_PRIORITY = nma_priority @@ -1493,6 +1716,64 @@ class ConfigNotifications: redirect("/config/notifications/") +class ConfigSubtitles: + + @cherrypy.expose + def index(self): + t = PageTemplate(file="config_subtitles.tmpl") + t.submenu = ConfigMenu + return _munge(t) + + @cherrypy.expose + def saveSubtitles(self, use_subtitles=None, subtitles_plugins=None, subtitles_languages=None, subtitles_dir=None, service_order=None, subtitles_history=None): + results = [] + + if use_subtitles == "on": + use_subtitles = 1 + if sickbeard.subtitlesFinderScheduler.thread == None or not sickbeard.subtitlesFinderScheduler.thread.isAlive(): + sickbeard.subtitlesFinderScheduler.initThread() + else: + use_subtitles = 0 + sickbeard.subtitlesFinderScheduler.abort = True + logger.log(u"Waiting for the SUBTITLESFINDER thread to exit") + try: + sickbeard.subtitlesFinderScheduler.thread.join(5) + except: + pass + + if subtitles_history == "on": + subtitles_history = 1 + else: + subtitles_history = 0 + + sickbeard.USE_SUBTITLES = use_subtitles + sickbeard.SUBTITLES_LANGUAGES = [lang.alpha2 for lang in subtitles.isValidLanguage(subtitles_languages.replace(' ', '').split(','))] if subtitles_languages != '' else '' + sickbeard.SUBTITLES_DIR = subtitles_dir + sickbeard.SUBTITLES_HISTORY = subtitles_history + + # Subtitles services + services_str_list = service_order.split() + subtitles_services_list = [] + subtitles_services_enabled = [] + for curServiceStr in services_str_list: + curService, curEnabled = curServiceStr.split(':') + subtitles_services_list.append(curService) + subtitles_services_enabled.append(int(curEnabled)) + + sickbeard.SUBTITLES_SERVICES_LIST = subtitles_services_list + sickbeard.SUBTITLES_SERVICES_ENABLED = subtitles_services_enabled + + sickbeard.save_config() + + if len(results) > 0: + for x in results: + logger.log(x, logger.ERROR) + ui.notifications.error('Error(s) Saving Configuration', + '<br />\n'.join(results)) + else: + ui.notifications.message('Configuration Saved', ek.ek(os.path.join, sickbeard.CONFIG_FILE) ) + + redirect("/config/subtitles/") class Config: @@ -1513,6 +1794,8 @@ class Config: notifications = ConfigNotifications() + subtitles = ConfigSubtitles() + def haveXBMC(): return sickbeard.USE_XBMC and sickbeard.XBMC_UPDATE_LIBRARY @@ -1747,7 +2030,7 @@ class NewHomeAddShows: @cherrypy.expose def addNewShow(self, whichSeries=None, tvdbLang="fr", rootDir=None, defaultStatus=None, - anyQualities=None, bestQualities=None, flatten_folders=None, fullShowPath=None, + anyQualities=None, bestQualities=None, flatten_folders=None, subtitles=None, fullShowPath=None, other_shows=None, skipShow=None, audio_lang=None): """ Receive tvdb id, dir, and other options and create a show from them. If extra show dirs are @@ -1816,7 +2099,12 @@ class NewHomeAddShows: flatten_folders = 1 else: flatten_folders = 0 - + + if subtitles == "on": + subtitles = 1 + else: + subtitles = 0 + if not anyQualities: anyQualities = [] if not bestQualities: @@ -1828,7 +2116,7 @@ class NewHomeAddShows: newQuality = Quality.combineQualities(map(int, anyQualities), map(int, bestQualities)) # add the show - sickbeard.showQueueScheduler.action.addShow(tvdb_id, show_dir, int(defaultStatus), newQuality, flatten_folders, tvdbLang, audio_lang) #@UndefinedVariable + sickbeard.showQueueScheduler.action.addShow(tvdb_id, show_dir, int(defaultStatus), newQuality, flatten_folders, subtitles, tvdbLang, audio_lang) #@UndefinedVariable ui.notifications.message('Show added', 'Adding the specified show into '+show_dir) return finishAddShow() @@ -1899,7 +2187,7 @@ class NewHomeAddShows: show_dir, tvdb_id, show_name = cur_show # add the show - sickbeard.showQueueScheduler.action.addShow(tvdb_id, show_dir, SKIPPED, sickbeard.QUALITY_DEFAULT, sickbeard.FLATTEN_FOLDERS_DEFAULT) #@UndefinedVariable + sickbeard.showQueueScheduler.action.addShow(tvdb_id, show_dir, SKIPPED, sickbeard.QUALITY_DEFAULT, sickbeard.FLATTEN_FOLDERS_DEFAULT, sickbeard.SUBTITLES_DEFAULT) #@UndefinedVariable num_added += 1 if num_added: @@ -2311,12 +2599,18 @@ class Home: elif sickbeard.showQueueScheduler.action.isBeingRefreshed(showObj): #@UndefinedVariable show_message = 'The episodes below are currently being refreshed from disk' + + elif sickbeard.showQueueScheduler.action.isBeingSubtitled(showObj): #@UndefinedVariable + show_message = 'Currently downloading subtitles for this show' elif sickbeard.showQueueScheduler.action.isInRefreshQueue(showObj): #@UndefinedVariable show_message = 'This show is queued to be refreshed.' elif sickbeard.showQueueScheduler.action.isInUpdateQueue(showObj): #@UndefinedVariable show_message = 'This show is queued and awaiting an update.' + + elif sickbeard.showQueueScheduler.action.isInSubtitleQueue(showObj): #@UndefinedVariable + show_message = 'This show is queued and awaiting subtitles download.' if not sickbeard.showQueueScheduler.action.isBeingAdded(showObj): #@UndefinedVariable if not sickbeard.showQueueScheduler.action.isBeingUpdated(showObj): #@UndefinedVariable @@ -2325,6 +2619,8 @@ class Home: t.submenu.append({ 'title': 'Force Full Update', 'path': 'home/updateShow?show=%d&force=1'%showObj.tvdbid }) t.submenu.append({ 'title': 'Update show in XBMC', 'path': 'home/updateXBMC?showName=%s'%urllib.quote_plus(showObj.name.encode('utf-8')), 'requires': haveXBMC }) t.submenu.append({ 'title': 'Preview Rename', 'path': 'home/testRename?show=%d'%showObj.tvdbid }) + if sickbeard.USE_SUBTITLES and not sickbeard.showQueueScheduler.action.isBeingSubtitled(showObj) and showObj.subtitles: + t.submenu.append({ 'title': 'Download Subtitles', 'path': 'home/subtitleShow?show=%d'%showObj.tvdbid }) t.show = showObj t.sqlResults = sqlResults @@ -2367,7 +2663,7 @@ class Home: return result['description'] if result else 'Episode not found.' @cherrypy.expose - def editShow(self, show=None, location=None, anyQualities=[], bestQualities=[], flatten_folders=None, paused=None, directCall=False, air_by_date=None, tvdbLang=None, audio_lang=None, custom_search_names=None): + def editShow(self, show=None, location=None, anyQualities=[], bestQualities=[], flatten_folders=None, paused=None, directCall=False, air_by_date=None, tvdbLang=None, audio_lang=None, custom_search_names=None, subtitles=None): if show == None: errString = "Invalid show ID: "+str(show) @@ -2410,6 +2706,12 @@ class Home: air_by_date = 1 else: air_by_date = 0 + + if subtitles == "on": + subtitles = 1 + else: + subtitles = 0 + if tvdbLang and tvdbLang in tvdb_api.Tvdb().config['valid_languages']: tvdb_lang = tvdbLang @@ -2443,6 +2745,7 @@ class Home: showObj.paused = paused showObj.air_by_date = air_by_date + showObj.subtitles = subtitles showObj.lang = tvdb_lang showObj.audio_lang = audio_lang showObj.custom_search_names = custom_search_names @@ -2552,6 +2855,25 @@ class Home: redirect("/home/displayShow?show=" + str(showObj.tvdbid)) + @cherrypy.expose + def subtitleShow(self, show=None, force=0): + + if show == None: + return _genericMessage("Error", "Invalid show ID") + + showObj = sickbeard.helpers.findCertainShow(sickbeard.showList, int(show)) + + if showObj == None: + return _genericMessage("Error", "Unable to find the specified show") + + # search and download subtitles + sickbeard.showQueueScheduler.action.downloadSubtitles(showObj, bool(force)) #@UndefinedVariable + + time.sleep(3) + + redirect("/home/displayShow?show="+str(showObj.tvdbid)) + + @cherrypy.expose def updateXBMC(self, showName=None): if sickbeard.XBMC_UPDATE_ONLYFIRST: @@ -2808,12 +3130,72 @@ class Home: return json.dumps({'result': statusStrings[ep_obj.status]}) return json.dumps({'result': 'failure'}) + + @cherrypy.expose + def searchEpisodeSubtitles(self, show=None, season=None, episode=None): -class UI: + # retrieve the episode object and fail if we can't get one + ep_obj = _getEpisode(show, season, episode) + if isinstance(ep_obj, str): + return json.dumps({'result': 'failure'}) + + # try do download subtitles for that episode + previous_subtitles = ep_obj.subtitles + try: + subtitles = ep_obj.downloadSubtitles() + + if sickbeard.SUBTITLES_DIR: + for video in subtitles: + subs_new_path = ek.ek(os.path.join, os.path.dirname(video.path), sickbeard.SUBTITLES_DIR) + dir_exists = helpers.makeDir(subs_new_path) + if not dir_exists: + logger.log(u"Unable to create subtitles folder "+subs_new_path, logger.ERROR) + else: + helpers.chmodAsParent(subs_new_path) + + for subtitle in subtitles.get(video): + new_file_path = ek.ek(os.path.join, subs_new_path, os.path.basename(subtitle.path)) + helpers.moveFile(subtitle.path, new_file_path) + helpers.chmodAsParent(new_file_path) + else: + for video in subtitles: + for subtitle in subtitles.get(video): + helpers.chmodAsParent(subtitle.path) + except: + return json.dumps({'result': 'failure'}) + + # return the correct json value + if previous_subtitles != ep_obj.subtitles: + status = 'New subtitles downloaded: %s' % ' '.join(["<img src='"+sickbeard.WEB_ROOT+"/images/flags/"+subliminal.language.Language(x).alpha2+".png' alt='"+subliminal.language.Language(x).name+"'/>" for x in sorted(list(set(ep_obj.subtitles).difference(previous_subtitles)))]) + else: + status = 'No subtitles downloaded' + ui.notifications.message('Subtitles Search', status) + return json.dumps({'result': status, 'subtitles': ','.join([x for x in ep_obj.subtitles])}) @cherrypy.expose - def add_message(self): + def mergeEpisodeSubtitles(self, show=None, season=None, episode=None): + # retrieve the episode object and fail if we can't get one + ep_obj = _getEpisode(show, season, episode) + if isinstance(ep_obj, str): + return json.dumps({'result': 'failure'}) + + # try do merge subtitles for that episode + try: + ep_obj.mergeSubtitles() + except Exception as e: + return json.dumps({'result': 'failure', 'exception': str(e)}) + + # return the correct json value + status = 'Subtitles merged successfully ' + ui.notifications.message('Merge Subtitles', status) + return json.dumps({'result': 'ok'}) + +class UI: + + @cherrypy.expose + def add_message(self): + ui.notifications.message('Test 1', 'This is test number 1') ui.notifications.error('Test 2', 'This is test number 2')