Private GIT

Skip to content
Snippets Groups Projects
Commit 062f6511 authored by miigotu's avatar miigotu
Browse files

Merge pull request #594 from SickRage/no-xmltodict

Rework torrentz and ettv to use bs4 instead of xmltodict
parents b9236a60 a9a6460a
Branches
Tags
No related merge requests found
......@@ -19,13 +19,11 @@
import re
import traceback
import xmltodict
from xml.parsers.expat import ExpatError
from sickbeard import logger
from sickbeard import tvcache
from sickbeard.common import USER_AGENT
from sickrage.helper.common import try_int
from sickbeard.bs4_parser import BS4Parser
from sickrage.providers.torrent.TorrentProvider import TorrentProvider
......@@ -58,17 +56,15 @@ class ExtraTorrentProvider(TorrentProvider):
for mode in search_strings.keys():
logger.log(u"Search Mode: %s" % mode, logger.DEBUG)
for search_string in search_strings[mode]:
if mode != 'RSS':
logger.log(u"Search string: %s " % search_string, logger.DEBUG)
try:
self.search_params.update({'type': ('search', 'rss')[mode == 'RSS'], 'search': search_string})
if self.custom_url:
url = self.custom_url + '/rss.xml'
url = self.urls['rss'] if not self.custom_url else self.urls['rss'].replace(self.urls['index'], self.custom_url)
data = self.get_url(url, params=self.search_params)
else:
data = self.get_url(self.urls['rss'], params=self.search_params)
if not data:
logger.log(u"No data returned from provider", logger.DEBUG)
continue
......@@ -77,27 +73,15 @@ class ExtraTorrentProvider(TorrentProvider):
logger.log(u'Expected xml but got something else, is your mirror failing?', logger.INFO)
continue
try:
data = xmltodict.parse(data)
except ExpatError:
logger.log(u"Failed parsing provider. Traceback: %r\n%r" % (traceback.format_exc(), data), logger.ERROR)
continue
if not all([data, 'rss' in data, 'channel' in data['rss'], 'item' in data['rss']['channel']]):
logger.log(u"Malformed rss returned, skipping", logger.DEBUG)
continue
# https://github.com/martinblech/xmltodict/issues/111
entries = data['rss']['channel']['item']
entries = entries if isinstance(entries, list) else [entries]
for item in entries:
title = item['title'].decode('utf-8')
# info_hash = item['info_hash']
size = int(item['size'])
seeders = try_int(item['seeders'], 0)
leechers = try_int(item['leechers'], 0)
download_url = item['enclosure']['@url'] if 'enclosure' in item else self._magnet_from_details(item['link'])
with BS4Parser(data, 'html5lib') as parser:
for item in parser.findAll('item'):
title = re.sub(r'^<!\[CDATA\[|\]\]>$', '', item.find('title').text)
# info_hash = item.get('info_hash', '')
size = try_int(item.find('size').text, -1)
seeders = try_int(item.find('seeders').text)
leechers = try_int(item.find('leechers').text)
enclosure = item.find('enclosure')
download_url = enclosure['url'] if enclosure else self._magnet_from_details(item.find('link').text)
if not all([title, download_url]):
continue
......@@ -115,7 +99,7 @@ class ExtraTorrentProvider(TorrentProvider):
items[mode].append(item)
except (AttributeError, TypeError, KeyError, ValueError):
logger.log(u"Failed parsing provider. Traceback: %r" % traceback.format_exc(), logger.ERROR)
logger.log(u"Failed parsing provider. Traceback: %r" % traceback.format_exc(), logger.WARNING)
# For each search mode sort all the items by seeders if available
items[mode].sort(key=lambda tup: tup[3], reverse=True)
......
......@@ -18,17 +18,13 @@
# along with SickRage. If not, see <http://www.gnu.org/licenses/>.
import re
import time
import traceback
import xmltodict
from six.moves import urllib
from xml.parsers.expat import ExpatError
import sickbeard
from sickbeard import logger
from sickbeard import tvcache
from sickbeard.common import cpu_presets
from sickbeard.common import USER_AGENT
from sickbeard.bs4_parser import BS4Parser
from sickrage.providers.torrent.TorrentProvider import TorrentProvider
......@@ -77,39 +73,21 @@ class TORRENTZProvider(TorrentProvider):
logger.log(u'Wrong data returned from: ' + search_url, logger.DEBUG)
continue
if not data.startswith('<?xml'):
logger.log(u'Expected xml but got something else, is your mirror failing?', logger.INFO)
continue
try:
data = xmltodict.parse(data)
except ExpatError:
logger.log(u"Failed parsing provider. Traceback: %r\n%r" % (traceback.format_exc(), data), logger.ERROR)
with BS4Parser(data, 'html5lib') as parser:
for item in parser.findAll('item'):
if item.category and 'tv' not in item.category.text:
continue
if not all([data, 'rss' in data, 'channel' in data['rss'], 'item' in data['rss']['channel']]):
logger.log(u"Malformed rss returned or no results, skipping", logger.DEBUG)
continue
time.sleep(cpu_presets[sickbeard.CPU_PRESET])
# https://github.com/martinblech/xmltodict/issues/111
entries = data['rss']['channel']['item']
entries = entries if isinstance(entries, list) else [entries]
for item in entries:
if item.get('category', None) and 'tv' not in item.get('category', ''):
continue
title = item.get('title', '').rsplit(' ', 1)[0].replace(' ', '.')
t_hash = item.get('guid', '').rsplit('/', 1)[-1]
title = item.title.text.rsplit(' ', 1)[0].replace(' ', '.')
t_hash = item.guid.text.rsplit('/', 1)[-1]
if not all([title, t_hash]):
continue
# TODO: Add method to generic provider for building magnet from hash.
download_url = "magnet:?xt=urn:btih:" + t_hash + "&dn=" + title + self._custom_trackers
size, seeders, leechers = self._split_description(item.get('description', ''))
size, seeders, leechers = self._split_description(item.find('description').text)
# Filter unseeded torrent
if seeders < self.minseed or leechers < self.minleech:
......@@ -119,6 +97,9 @@ class TORRENTZProvider(TorrentProvider):
items[mode].append((title, download_url, size, seeders, leechers))
except (AttributeError, TypeError, KeyError, ValueError):
logger.log(u"Failed parsing provider. Traceback: %r" % traceback.format_exc(), logger.WARNING)
# For each search mode sort all the items by seeders if available
items[mode].sort(key=lambda tup: tup[3], reverse=True)
results += items[mode]
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment