Private GIT

Skip to content
Snippets Groups Projects
Commit 062f6511 authored by miigotu's avatar miigotu
Browse files

Merge pull request #594 from SickRage/no-xmltodict

Rework torrentz and ettv to use bs4 instead of xmltodict
parents b9236a60 a9a6460a
Branches
Tags
No related merge requests found
...@@ -19,13 +19,11 @@ ...@@ -19,13 +19,11 @@
import re import re
import traceback import traceback
import xmltodict
from xml.parsers.expat import ExpatError
from sickbeard import logger from sickbeard import logger
from sickbeard import tvcache from sickbeard import tvcache
from sickbeard.common import USER_AGENT from sickbeard.common import USER_AGENT
from sickrage.helper.common import try_int from sickrage.helper.common import try_int
from sickbeard.bs4_parser import BS4Parser
from sickrage.providers.torrent.TorrentProvider import TorrentProvider from sickrage.providers.torrent.TorrentProvider import TorrentProvider
...@@ -58,17 +56,15 @@ class ExtraTorrentProvider(TorrentProvider): ...@@ -58,17 +56,15 @@ class ExtraTorrentProvider(TorrentProvider):
for mode in search_strings.keys(): for mode in search_strings.keys():
logger.log(u"Search Mode: %s" % mode, logger.DEBUG) logger.log(u"Search Mode: %s" % mode, logger.DEBUG)
for search_string in search_strings[mode]: for search_string in search_strings[mode]:
if mode != 'RSS': if mode != 'RSS':
logger.log(u"Search string: %s " % search_string, logger.DEBUG) logger.log(u"Search string: %s " % search_string, logger.DEBUG)
try: try:
self.search_params.update({'type': ('search', 'rss')[mode == 'RSS'], 'search': search_string}) self.search_params.update({'type': ('search', 'rss')[mode == 'RSS'], 'search': search_string})
if self.custom_url:
url = self.custom_url + '/rss.xml' url = self.urls['rss'] if not self.custom_url else self.urls['rss'].replace(self.urls['index'], self.custom_url)
data = self.get_url(url, params=self.search_params) data = self.get_url(url, params=self.search_params)
else:
data = self.get_url(self.urls['rss'], params=self.search_params)
if not data: if not data:
logger.log(u"No data returned from provider", logger.DEBUG) logger.log(u"No data returned from provider", logger.DEBUG)
continue continue
...@@ -77,27 +73,15 @@ class ExtraTorrentProvider(TorrentProvider): ...@@ -77,27 +73,15 @@ class ExtraTorrentProvider(TorrentProvider):
logger.log(u'Expected xml but got something else, is your mirror failing?', logger.INFO) logger.log(u'Expected xml but got something else, is your mirror failing?', logger.INFO)
continue continue
try: with BS4Parser(data, 'html5lib') as parser:
data = xmltodict.parse(data) for item in parser.findAll('item'):
except ExpatError: title = re.sub(r'^<!\[CDATA\[|\]\]>$', '', item.find('title').text)
logger.log(u"Failed parsing provider. Traceback: %r\n%r" % (traceback.format_exc(), data), logger.ERROR) # info_hash = item.get('info_hash', '')
continue size = try_int(item.find('size').text, -1)
seeders = try_int(item.find('seeders').text)
if not all([data, 'rss' in data, 'channel' in data['rss'], 'item' in data['rss']['channel']]): leechers = try_int(item.find('leechers').text)
logger.log(u"Malformed rss returned, skipping", logger.DEBUG) enclosure = item.find('enclosure')
continue download_url = enclosure['url'] if enclosure else self._magnet_from_details(item.find('link').text)
# https://github.com/martinblech/xmltodict/issues/111
entries = data['rss']['channel']['item']
entries = entries if isinstance(entries, list) else [entries]
for item in entries:
title = item['title'].decode('utf-8')
# info_hash = item['info_hash']
size = int(item['size'])
seeders = try_int(item['seeders'], 0)
leechers = try_int(item['leechers'], 0)
download_url = item['enclosure']['@url'] if 'enclosure' in item else self._magnet_from_details(item['link'])
if not all([title, download_url]): if not all([title, download_url]):
continue continue
...@@ -115,7 +99,7 @@ class ExtraTorrentProvider(TorrentProvider): ...@@ -115,7 +99,7 @@ class ExtraTorrentProvider(TorrentProvider):
items[mode].append(item) items[mode].append(item)
except (AttributeError, TypeError, KeyError, ValueError): except (AttributeError, TypeError, KeyError, ValueError):
logger.log(u"Failed parsing provider. Traceback: %r" % traceback.format_exc(), logger.ERROR) logger.log(u"Failed parsing provider. Traceback: %r" % traceback.format_exc(), logger.WARNING)
# For each search mode sort all the items by seeders if available # For each search mode sort all the items by seeders if available
items[mode].sort(key=lambda tup: tup[3], reverse=True) items[mode].sort(key=lambda tup: tup[3], reverse=True)
......
...@@ -18,17 +18,13 @@ ...@@ -18,17 +18,13 @@
# along with SickRage. If not, see <http://www.gnu.org/licenses/>. # along with SickRage. If not, see <http://www.gnu.org/licenses/>.
import re import re
import time
import traceback import traceback
import xmltodict
from six.moves import urllib from six.moves import urllib
from xml.parsers.expat import ExpatError
import sickbeard
from sickbeard import logger from sickbeard import logger
from sickbeard import tvcache from sickbeard import tvcache
from sickbeard.common import cpu_presets
from sickbeard.common import USER_AGENT from sickbeard.common import USER_AGENT
from sickbeard.bs4_parser import BS4Parser
from sickrage.providers.torrent.TorrentProvider import TorrentProvider from sickrage.providers.torrent.TorrentProvider import TorrentProvider
...@@ -77,39 +73,21 @@ class TORRENTZProvider(TorrentProvider): ...@@ -77,39 +73,21 @@ class TORRENTZProvider(TorrentProvider):
logger.log(u'Wrong data returned from: ' + search_url, logger.DEBUG) logger.log(u'Wrong data returned from: ' + search_url, logger.DEBUG)
continue continue
if not data.startswith('<?xml'):
logger.log(u'Expected xml but got something else, is your mirror failing?', logger.INFO)
continue
try: try:
data = xmltodict.parse(data) with BS4Parser(data, 'html5lib') as parser:
except ExpatError: for item in parser.findAll('item'):
logger.log(u"Failed parsing provider. Traceback: %r\n%r" % (traceback.format_exc(), data), logger.ERROR) if item.category and 'tv' not in item.category.text:
continue continue
if not all([data, 'rss' in data, 'channel' in data['rss'], 'item' in data['rss']['channel']]): title = item.title.text.rsplit(' ', 1)[0].replace(' ', '.')
logger.log(u"Malformed rss returned or no results, skipping", logger.DEBUG) t_hash = item.guid.text.rsplit('/', 1)[-1]
continue
time.sleep(cpu_presets[sickbeard.CPU_PRESET])
# https://github.com/martinblech/xmltodict/issues/111
entries = data['rss']['channel']['item']
entries = entries if isinstance(entries, list) else [entries]
for item in entries:
if item.get('category', None) and 'tv' not in item.get('category', ''):
continue
title = item.get('title', '').rsplit(' ', 1)[0].replace(' ', '.')
t_hash = item.get('guid', '').rsplit('/', 1)[-1]
if not all([title, t_hash]): if not all([title, t_hash]):
continue continue
# TODO: Add method to generic provider for building magnet from hash. # TODO: Add method to generic provider for building magnet from hash.
download_url = "magnet:?xt=urn:btih:" + t_hash + "&dn=" + title + self._custom_trackers download_url = "magnet:?xt=urn:btih:" + t_hash + "&dn=" + title + self._custom_trackers
size, seeders, leechers = self._split_description(item.get('description', '')) size, seeders, leechers = self._split_description(item.find('description').text)
# Filter unseeded torrent # Filter unseeded torrent
if seeders < self.minseed or leechers < self.minleech: if seeders < self.minseed or leechers < self.minleech:
...@@ -119,6 +97,9 @@ class TORRENTZProvider(TorrentProvider): ...@@ -119,6 +97,9 @@ class TORRENTZProvider(TorrentProvider):
items[mode].append((title, download_url, size, seeders, leechers)) items[mode].append((title, download_url, size, seeders, leechers))
except (AttributeError, TypeError, KeyError, ValueError):
logger.log(u"Failed parsing provider. Traceback: %r" % traceback.format_exc(), logger.WARNING)
# For each search mode sort all the items by seeders if available # For each search mode sort all the items by seeders if available
items[mode].sort(key=lambda tup: tup[3], reverse=True) items[mode].sort(key=lambda tup: tup[3], reverse=True)
results += items[mode] results += items[mode]
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment