Source code for couchpotato.core.media.movie.providers.trailer.hdtrailers

from string import digits, ascii_letters
import re

from bs4 import SoupStrainer, BeautifulSoup
from couchpotato.core.helpers.encoding import tryUrlencode
from couchpotato.core.helpers.variable import mergeDicts, getTitle, getIdentifier
from couchpotato.core.logger import CPLog
from couchpotato.core.media.movie.providers.trailer.base import TrailerProvider
from requests import HTTPError


log = CPLog(__name__)

autoload = 'HDTrailers'


[docs]class HDTrailers(TrailerProvider):
urls = { 'api': 'http://www.hd-trailers.net/movie/%s/', 'backup': 'http://www.hd-trailers.net/blog/', } providers = ['apple.ico', 'yahoo.ico', 'moviefone.ico', 'myspace.ico', 'favicon.ico'] only_tables_tags = SoupStrainer('table') def search(self, group): movie_name = getTitle(group) url = self.urls['api'] % self.movieUrlName(movie_name) try: data = self.getCache('hdtrailers.%s' % getIdentifier(group), url, show_error = False) except HTTPError: log.debug('No page found for: %s', movie_name) data = None result_data = {'480p': [], '720p': [], '1080p': []} if not data: return result_data did_alternative = False for provider in self.providers: results = self.findByProvider(data, provider) # Find alternative if results.get('404') and not did_alternative: results = self.findViaAlternative(group) did_alternative = True result_data = mergeDicts(result_data, results) return result_data def findViaAlternative(self, group): results = {'480p': [], '720p': [], '1080p': []} movie_name = getTitle(group) url = "%s?%s" % (self.urls['backup'], tryUrlencode({'s':movie_name})) try: data = self.getCache('hdtrailers.alt.%s' % getIdentifier(group), url, show_error = False) except HTTPError: log.debug('No alternative page found for: %s', movie_name) data = None if not data: return results try: html = BeautifulSoup(data, parse_only = self.only_tables_tags) result_table = html.find_all('h2', text = re.compile(movie_name)) for h2 in result_table: if 'trailer' in h2.lower(): parent = h2.parent.parent.parent trailerLinks = parent.find_all('a', text = re.compile('480p|720p|1080p')) try: for trailer in trailerLinks: results[trailer].insert(0, trailer.parent['href']) except: pass except AttributeError: log.debug('No trailers found in via alternative.') return results def findByProvider(self, data, provider): results = {'480p':[], '720p':[], '1080p':[]} try: html = BeautifulSoup(data, parse_only = self.only_tables_tags) result_table = html.find('table', attrs = {'class':'bottomTable'}) for tr in result_table.find_all('tr'): trtext = str(tr).lower() if 'clips' in trtext: break if 'trailer' in trtext and not 'clip' in trtext and provider in trtext and not '3d' in trtext: if 'trailer' not in tr.find('span', 'standardTrailerName').text.lower(): continue resolutions = tr.find_all('td', attrs = {'class':'bottomTableResolution'}) for res in resolutions: if res.a and str(res.a.contents[0]) in results: results[str(res.a.contents[0])].insert(0, res.a['href']) except AttributeError: log.debug('No trailers found in provider %s.', provider) results['404'] = True return results def movieUrlName(self, string): safe_chars = ascii_letters + digits + ' ' r = ''.join([char if char in safe_chars else ' ' for char in string]) name = re.sub('\s+' , '-', r).lower() try: int(name) return '-' + name except: return name