Private GIT

Skip to content
Snippets Groups Projects
Commit 42257994 authored by miigotu's avatar miigotu
Browse files

Rewrite speed.cd to use page parsing

Some touches in tpb, alpharatio, and mtv
parent 6739e03f
Branches
Tags
No related merge requests found
# coding=utf-8
# Author: Bill Nasty
# URL: https://github.com/SickRage/SickRage
# Author: Dustyn Gibson <miigotu@gmail.com>
# URL: https://sickrage.github.io
#
# This file is part of SickRage.
#
......@@ -20,6 +19,7 @@
import re
from urllib import urlencode
from requests.utils import dict_from_cookiejar
from sickbeard import logger
from sickbeard import tvcache
......@@ -51,6 +51,9 @@ class AlphaRatioProvider(TorrentProvider): # pylint: disable=too-many-instance-
self.cache = AlphaRatioCache(self)
def login(self):
if any(dict_from_cookiejar(self.session.cookies).values()):
return True
login_params = {
'username': self.username,
'password': self.password,
......
# coding=utf-8
# Author: Seamus Wassman
# URL: http://code.google.com/p/sickbeard/
# Author: Dustyn Gibson <miigotu@gmail.com>
# URL: https://sickrage.github.io
#
# This file is part of SickRage.
#
......@@ -17,12 +17,8 @@
# You should have received a copy of the GNU General Public License
# along with SickRage. If not, see <http://www.gnu.org/licenses/>.
# This file was adapted for MoreThanTV from the freshontv scraper by
# Sparhawk76, this is my first foray into python, so there most likely
# are some mistakes or things I could have done better.
import re
import requests
from requests.utils import dict_from_cookiejar
from urllib import urlencode
from sickbeard import logger
......@@ -65,7 +61,7 @@ class MoreThanTVProvider(TorrentProvider): # pylint: disable=too-many-instance-
return True
def login(self):
if any(requests.utils.dict_from_cookiejar(self.session.cookies).values()):
if any(dict_from_cookiejar(self.session.cookies).values()):
return True
login_params = {
......
# coding=utf-8
# Author: Mr_Orange
# URL: https://github.com/mr-orange/Sick-Beard
# Author: Dustyn Gibson <miigotu@gmail.com>
# URL: https://sickrage.github.io
#
# This file is part of SickRage.
#
......@@ -18,10 +18,13 @@
# along with SickRage. If not, see <http://www.gnu.org/licenses/>.
import re
from urllib import urlencode
from requests.utils import dict_from_cookiejar
from sickbeard import logger
from sickbeard import tvcache
from sickrage.helper.common import convert_size
from sickbeard.bs4_parser import BS4Parser
from sickrage.helper.common import try_int, convert_size
from sickrage.providers.torrent.TorrentProvider import TorrentProvider
......@@ -38,24 +41,24 @@ class SpeedCDProvider(TorrentProvider): # pylint: disable=too-many-instance-att
self.minseed = None
self.minleech = None
self.urls = {'base_url': 'http://speed.cd/',
'login': 'http://speed.cd/take.login.php',
'detail': 'http://speed.cd/t/%s',
'search': 'http://speed.cd/V3/API/API.php',
'download': 'http://speed.cd/download.php?torrent=%s'}
self.url = self.urls['base_url']
self.categories = {'Season': {'c14': 1}, 'Episode': {'c2': 1, 'c49': 1}, 'RSS': {'c14': 1, 'c2': 1, 'c49': 1}}
self.url = 'http://speed.cd/'
self.urls = {
'login': self.url + 'take.login.php',
'search': self.url + 'browse.php',
}
self.proper_strings = ['PROPER', 'REPACK']
self.cache = SpeedCDCache(self)
def login(self):
if any(dict_from_cookiejar(self.session.cookies).values()):
return True
login_params = {'username': self.username,
'password': self.password}
login_params = {
'username': self.username,
'password': self.password
}
response = self.get_url(self.urls['login'], post_data=login_params, timeout=30)
if not response:
......@@ -68,59 +71,91 @@ class SpeedCDProvider(TorrentProvider): # pylint: disable=too-many-instance-att
return True
def search(self, search_params, age=0, ep_obj=None): # pylint: disable=too-many-locals
def search(self, search_strings, age=0, ep_obj=None): # pylint: disable=too-many-locals, too-many-branches
results = []
if not self.login():
return results
for mode in search_params:
# http://speed.cd/browse.php?c49=1&c50=1&c52=1&c41=1&c55=1&c2=1&c30=1&freeleech=on&search=arrow&d=on
search_params = {
'c2': 1, # TV/Episodes
'c30': 1, # Anime
'c41': 1, # TV/Packs
'c49': 1, # TV/HD
'c50': 1, # TV/Sports
'c52': 1, # TV/B-Ray
'c55': 1, # TV/Kids
'search': '',
}
if self.freeleech:
search_params['freeleech'] = 'on'
for mode in search_strings:
items = []
logger.log(u"Search Mode: %s" % mode, logger.DEBUG)
for search_string in search_params[mode]:
for search_string in search_strings[mode]:
if mode != 'RSS':
logger.log(u"Search string: %s " % search_string, logger.DEBUG)
search_string = '+'.join(search_string.split())
search_params['search'] = search_string
post_data = dict({'/browse.php?': None, 'cata': 'yes', 'jxt': 4, 'jxw': 'b', 'search': search_string},
**self.categories[mode])
search_url = "%s?%s" % (self.urls['search'], urlencode(search_params))
logger.log(u"Search URL: %s" % search_url, logger.DEBUG)
parsedJSON = self.get_url(self.urls['search'], post_data=post_data, json=True)
if not parsedJSON:
continue
try:
torrents = parsedJSON.get('Fs', [])[0].get('Cn', {}).get('torrents', [])
except Exception:
# returns top 15 results by default, expandable in user profile to 100
data = self.get_url(search_url)
if not data:
continue
for torrent in torrents:
with BS4Parser(data, 'html5lib') as html:
torrent_table = html.find('div', class_='boxContent').find('table')
torrent_rows = torrent_table.find_all('tr') if torrent_table else []
if self.freeleech and not torrent['free']:
# Continue only if one Release is found
if len(torrent_rows) < 2:
logger.log(u"Data returned from provider does not contain any torrents", logger.DEBUG)
continue
title = re.sub('<[^>]*>', '', torrent['name'])
download_url = self.urls['download'] % (torrent['id'])
seeders = int(torrent['seed'])
leechers = int(torrent['leech'])
torrent_size = torrent['size']
def process_column_header(td):
result = ''
if td.a and td.a.img:
result = td.a.img.get('alt', td.a.get_text(strip=True))
if td.img and not result:
result = td.img.get('alt', '')
if not result:
result = td.get_text(strip=True)
return result
size = convert_size(torrent_size) or -1
labels = [process_column_header(label) for label in torrent_rows[0].find_all('th')]
# skip colheader
for result in torrent_rows[1:]:
try:
cells = result.find_all('td')
title = cells[labels.index('Title')].find('a', class_='torrent').get_text()
download_url = self.url + cells[labels.index('Download')].find(title='Download').parent['href']
if not all([title, download_url]):
continue
# Filter unseeded torrent
seeders = try_int(cells[labels.index('Seeders')].get_text(strip=True))
leechers = try_int(cells[labels.index('Leechers')].get_text(strip=True))
if seeders < self.minseed or leechers < self.minleech:
if mode != 'RSS':
logger.log(u"Discarding torrent because it doesn't meet the minimum seeders or leechers: {0} (S:{1} L:{2})".format(title, seeders, leechers), logger.DEBUG)
continue
torrent_size = cells[labels.index('Size')].get_text()
# TODO: Make convert_size work with 123.12GB
torrent_size = torrent_size[:-2] + ' ' + torrent_size[-2:]
size = convert_size(torrent_size) or -1
item = title, download_url, size, seeders, leechers
if mode != 'RSS':
logger.log(u"Found result: %s " % title, logger.DEBUG)
items.append(item)
except StandardError:
continue
# For each search mode sort all the items by seeders if available
items.sort(key=lambda tup: tup[3], reverse=True)
......@@ -141,7 +176,8 @@ class SpeedCDCache(tvcache.TVCache):
self.minTime = 20
def _getRSSData(self):
search_params = {'RSS': ['']}
return {'entries': self.provider.search(search_params)}
search_strings = {'RSS': ['']}
return {'entries': self.provider.search(search_strings)}
provider = SpeedCDProvider()
# coding=utf-8
# Author: Mr_Orange <mr_orange@hotmail.it>
# URL: http://code.google.com/p/sickbeard/
# Author: Dustyn Gibson <miigotu@gmail.com>
# URL: https://sickrage.github.io
#
# This file is part of SickRage.
#
......@@ -125,7 +125,7 @@ class ThePirateBayProvider(TorrentProvider): # pylint: disable=too-many-instanc
# Convert size after all possible skip scenarios
torrent_size = cells[labels.index('Name')].find(class_='detDesc').get_text(strip=True).split(', ')[1]
torrent_size = re.sub(r'Size ([\d.]+).+([KMG]iB)', r'\1 \2', torrent_size)
torrent_size = re.sub(r'Size ([\d.]+).+([KMGT]iB)', r'\1 \2', torrent_size)
size = convert_size(torrent_size) or -1
item = title, download_url, size, seeders, leechers
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please to comment