diff --git a/sickbeard/providers/extratorrent.py b/sickbeard/providers/extratorrent.py index ba8a2e3d40c42fe24cea8c1c63db1390541e1e73..9445bdcdcee39cf2348035ab99a6d1f2f5b60bdf 100644 --- a/sickbeard/providers/extratorrent.py +++ b/sickbeard/providers/extratorrent.py @@ -19,13 +19,11 @@ import re import traceback -import xmltodict -from xml.parsers.expat import ExpatError - from sickbeard import logger from sickbeard import tvcache from sickbeard.common import USER_AGENT from sickrage.helper.common import try_int +from sickbeard.bs4_parser import BS4Parser from sickrage.providers.torrent.TorrentProvider import TorrentProvider @@ -58,17 +56,15 @@ class ExtraTorrentProvider(TorrentProvider): for mode in search_strings.keys(): logger.log(u"Search Mode: %s" % mode, logger.DEBUG) for search_string in search_strings[mode]: - if mode != 'RSS': logger.log(u"Search string: %s " % search_string, logger.DEBUG) try: self.search_params.update({'type': ('search', 'rss')[mode == 'RSS'], 'search': search_string}) - if self.custom_url: - url = self.custom_url + '/rss.xml' - data = self.get_url(url, params=self.search_params) - else: - data = self.get_url(self.urls['rss'], params=self.search_params) + + url = self.urls['rss'] if not self.custom_url else self.urls['rss'].replace(self.urls['index'], self.custom_url) + + data = self.get_url(url, params=self.search_params) if not data: logger.log(u"No data returned from provider", logger.DEBUG) continue @@ -77,45 +73,33 @@ class ExtraTorrentProvider(TorrentProvider): logger.log(u'Expected xml but got something else, is your mirror failing?', logger.INFO) continue - try: - data = xmltodict.parse(data) - except ExpatError: - logger.log(u"Failed parsing provider. Traceback: %r\n%r" % (traceback.format_exc(), data), logger.ERROR) - continue - - if not all([data, 'rss' in data, 'channel' in data['rss'], 'item' in data['rss']['channel']]): - logger.log(u"Malformed rss returned, skipping", logger.DEBUG) - continue - - # https://github.com/martinblech/xmltodict/issues/111 - entries = data['rss']['channel']['item'] - entries = entries if isinstance(entries, list) else [entries] - - for item in entries: - title = item['title'].decode('utf-8') - # info_hash = item['info_hash'] - size = int(item['size']) - seeders = try_int(item['seeders'], 0) - leechers = try_int(item['leechers'], 0) - download_url = item['enclosure']['@url'] if 'enclosure' in item else self._magnet_from_details(item['link']) - - if not all([title, download_url]): - continue - - # Filter unseeded torrent - if seeders < self.minseed or leechers < self.minleech: + with BS4Parser(data, 'html5lib') as parser: + for item in parser.findAll('item'): + title = re.sub(r'^<!\[CDATA\[|\]\]>$', '', item.find('title').text) + # info_hash = item.get('info_hash', '') + size = try_int(item.find('size').text, -1) + seeders = try_int(item.find('seeders').text) + leechers = try_int(item.find('leechers').text) + enclosure = item.find('enclosure') + download_url = enclosure['url'] if enclosure else self._magnet_from_details(item.find('link').text) + + if not all([title, download_url]): + continue + + # Filter unseeded torrent + if seeders < self.minseed or leechers < self.minleech: + if mode != 'RSS': + logger.log(u"Discarding torrent because it doesn't meet the minimum seeders or leechers: {0} (S:{1} L:{2})".format(title, seeders, leechers), logger.DEBUG) + continue + + item = title, download_url, size, seeders, leechers if mode != 'RSS': - logger.log(u"Discarding torrent because it doesn't meet the minimum seeders or leechers: {0} (S:{1} L:{2})".format(title, seeders, leechers), logger.DEBUG) - continue - - item = title, download_url, size, seeders, leechers - if mode != 'RSS': - logger.log(u"Found result: %s " % title, logger.DEBUG) + logger.log(u"Found result: %s " % title, logger.DEBUG) - items[mode].append(item) + items[mode].append(item) except (AttributeError, TypeError, KeyError, ValueError): - logger.log(u"Failed parsing provider. Traceback: %r" % traceback.format_exc(), logger.ERROR) + logger.log(u"Failed parsing provider. Traceback: %r" % traceback.format_exc(), logger.WARNING) # For each search mode sort all the items by seeders if available items[mode].sort(key=lambda tup: tup[3], reverse=True) diff --git a/sickbeard/providers/torrentz.py b/sickbeard/providers/torrentz.py index 662e8058464d25d799a3c50b4d09fdd9ac71db0e..817c6bf933ca8c3de3da53fbc88013abb881ce4e 100644 --- a/sickbeard/providers/torrentz.py +++ b/sickbeard/providers/torrentz.py @@ -18,17 +18,13 @@ # along with SickRage. If not, see <http://www.gnu.org/licenses/>. import re -import time import traceback -import xmltodict from six.moves import urllib -from xml.parsers.expat import ExpatError -import sickbeard from sickbeard import logger from sickbeard import tvcache -from sickbeard.common import cpu_presets from sickbeard.common import USER_AGENT +from sickbeard.bs4_parser import BS4Parser from sickrage.providers.torrent.TorrentProvider import TorrentProvider @@ -77,47 +73,32 @@ class TORRENTZProvider(TorrentProvider): logger.log(u'Wrong data returned from: ' + search_url, logger.DEBUG) continue - if not data.startswith('<?xml'): - logger.log(u'Expected xml but got something else, is your mirror failing?', logger.INFO) - continue - try: - data = xmltodict.parse(data) - except ExpatError: - logger.log(u"Failed parsing provider. Traceback: %r\n%r" % (traceback.format_exc(), data), logger.ERROR) - continue - - if not all([data, 'rss' in data, 'channel' in data['rss'], 'item' in data['rss']['channel']]): - logger.log(u"Malformed rss returned or no results, skipping", logger.DEBUG) - continue - - time.sleep(cpu_presets[sickbeard.CPU_PRESET]) - - # https://github.com/martinblech/xmltodict/issues/111 - entries = data['rss']['channel']['item'] - entries = entries if isinstance(entries, list) else [entries] + with BS4Parser(data, 'html5lib') as parser: + for item in parser.findAll('item'): + if item.category and 'tv' not in item.category.text: + continue - for item in entries: - if item.get('category', None) and 'tv' not in item.get('category', ''): - continue + title = item.title.text.rsplit(' ', 1)[0].replace(' ', '.') + t_hash = item.guid.text.rsplit('/', 1)[-1] - title = item.get('title', '').rsplit(' ', 1)[0].replace(' ', '.') - t_hash = item.get('guid', '').rsplit('/', 1)[-1] + if not all([title, t_hash]): + continue - if not all([title, t_hash]): - continue + # TODO: Add method to generic provider for building magnet from hash. + download_url = "magnet:?xt=urn:btih:" + t_hash + "&dn=" + title + self._custom_trackers + size, seeders, leechers = self._split_description(item.find('description').text) - # TODO: Add method to generic provider for building magnet from hash. - download_url = "magnet:?xt=urn:btih:" + t_hash + "&dn=" + title + self._custom_trackers - size, seeders, leechers = self._split_description(item.get('description', '')) + # Filter unseeded torrent + if seeders < self.minseed or leechers < self.minleech: + if mode != 'RSS': + logger.log(u"Discarding torrent because it doesn't meet the minimum seeders or leechers: {0} (S:{1} L:{2})".format(title, seeders, leechers), logger.DEBUG) + continue - # Filter unseeded torrent - if seeders < self.minseed or leechers < self.minleech: - if mode != 'RSS': - logger.log(u"Discarding torrent because it doesn't meet the minimum seeders or leechers: {0} (S:{1} L:{2})".format(title, seeders, leechers), logger.DEBUG) - continue + items[mode].append((title, download_url, size, seeders, leechers)) - items[mode].append((title, download_url, size, seeders, leechers)) + except (AttributeError, TypeError, KeyError, ValueError): + logger.log(u"Failed parsing provider. Traceback: %r" % traceback.format_exc(), logger.WARNING) # For each search mode sort all the items by seeders if available items[mode].sort(key=lambda tup: tup[3], reverse=True)