Source code for couchpotato.core.media.movie.providers.automation.imdb

import traceback
import re

from bs4 import BeautifulSoup
from couchpotato import fireEvent
from couchpotato.core.helpers.encoding import ss
from couchpotato.core.helpers.rss import RSS
from couchpotato.core.helpers.variable import getImdb, splitString, tryInt
from couchpotato.core.logger import CPLog
from couchpotato.core.media._base.providers.base import MultiProvider
from couchpotato.core.media.movie.providers.automation.base import Automation


log = CPLog(__name__)

autoload = 'IMDB'


[docs]class IMDB(MultiProvider):
def getTypes(self): return [IMDBWatchlist, IMDBAutomation, IMDBCharts]
[docs]class IMDBBase(Automation, RSS):
interval = 1800 charts = { 'theater': { 'order': 1, 'name': 'IMDB - Movies in Theaters', 'url': 'http://www.imdb.com/movies-in-theaters/', }, 'boxoffice': { 'order': 2, 'name': 'IMDB - Box Office', 'url': 'http://www.imdb.com/boxoffice/', }, 'top250': { 'order': 3, 'name': 'IMDB - Top 250 Movies', 'url': 'http://www.imdb.com/chart/top', }, } def getInfo(self, imdb_id): return fireEvent('movie.info', identifier = imdb_id, extended = False, adding = False, merge = True) def getFromURL(self, url): log.debug('Getting IMDBs from: %s', url) html = self.getHTMLData(url) try: split = splitString(html, split_on = "<div class=\"list compact\">")[1] html = splitString(split, split_on = "<div class=\"pages\">")[0] except: try: split = splitString(html, split_on = "<div id=\"main\">") if len(split) < 2: log.error('Failed parsing IMDB page "%s", unexpected html.', url) return [] html = BeautifulSoup(split[1]) for x in ['list compact', 'lister', 'list detail sub-list']: html2 = html.find('div', attrs = { 'class': x }) if html2: html = html2.contents html = ''.join([str(x) for x in html]) break except: log.error('Failed parsing IMDB page "%s": %s', (url, traceback.format_exc())) html = ss(html) imdbs = getImdb(html, multiple = True) if html else [] return imdbs
[docs]class IMDBWatchlist(IMDBBase):
enabled_option = 'automation_enabled' def getIMDBids(self): movies = [] watchlist_enablers = [tryInt(x) for x in splitString(self.conf('automation_urls_use'))] watchlist_urls = splitString(self.conf('automation_urls')) index = -1 for watchlist_url in watchlist_urls: try: # Get list ID ids = re.findall('(?:list/|list_id=)([a-zA-Z0-9\-_]{11})', watchlist_url) if len(ids) == 1: watchlist_url = 'http://www.imdb.com/list/%s/?view=compact&sort=created:asc' % ids[0] # Try find user id with watchlist else: userids = re.findall('(ur\d{7,9})', watchlist_url) if len(userids) == 1: watchlist_url = 'http://www.imdb.com/user/%s/watchlist?view=compact&sort=created:asc' % userids[0] except: log.error('Failed getting id from watchlist: %s', traceback.format_exc()) index += 1 if not watchlist_enablers[index]: continue start = 0 while True: try: w_url = '%s&start=%s' % (watchlist_url, start) imdbs = self.getFromURL(w_url) for imdb in imdbs: if imdb not in movies: movies.append(imdb) if self.shuttingDown(): break log.debug('Found %s movies on %s', (len(imdbs), w_url)) if len(imdbs) < 225: break start = len(movies) except: log.error('Failed loading IMDB watchlist: %s %s', (watchlist_url, traceback.format_exc())) break return movies
[docs]class IMDBAutomation(IMDBBase):
enabled_option = 'automation_providers_enabled' def getIMDBids(self): movies = [] for name in self.charts: chart = self.charts[name] url = chart.get('url') if self.conf('automation_charts_%s' % name): imdb_ids = self.getFromURL(url) try: for imdb_id in imdb_ids: info = self.getInfo(imdb_id) if info and self.isMinimalMovie(info): movies.append(imdb_id) if self.shuttingDown(): break except: log.error('Failed loading IMDB chart results from %s: %s', (url, traceback.format_exc())) return movies
[docs]class IMDBCharts(IMDBBase):
def getChartList(self): # Nearly identical to 'getIMDBids', but we don't care about minimalMovie and return all movie data (not just id) movie_lists = [] max_items = 10 for name in self.charts: chart = self.charts[name].copy() cache_key = 'imdb.chart_display_%s' % name if self.conf('chart_display_%s' % name): cached = self.getCache(cache_key) if cached: chart['list'] = cached movie_lists.append(chart) continue url = chart.get('url') chart['list'] = [] imdb_ids = self.getFromURL(url) try: for imdb_id in imdb_ids[0:max_items]: is_movie = fireEvent('movie.is_movie', identifier = imdb_id, adding = False, single = True) if not is_movie: continue info = self.getInfo(imdb_id) chart['list'].append(info) if self.shuttingDown(): break except: log.error('Failed loading IMDB chart results from %s: %s', (url, traceback.format_exc())) self.setCache(cache_key, chart['list'], timeout = 259200) if chart['list']: movie_lists.append(chart) return movie_lists config = [{ 'name': 'imdb', 'groups': [ { 'tab': 'automation', 'list': 'watchlist_providers', 'name': 'imdb_automation_watchlist', 'label': 'IMDB', 'description': 'From any <strong>public</strong> IMDB watchlists.', 'options': [ { 'name': 'automation_enabled', 'default': False, 'type': 'enabler', }, { 'name': 'automation_urls_use', 'label': 'Use', }, { 'name': 'automation_urls', 'label': 'url', 'type': 'combined', 'combine': ['automation_urls_use', 'automation_urls'], }, ], }, { 'tab': 'automation', 'list': 'automation_providers', 'name': 'imdb_automation_charts', 'label': 'IMDB', 'description': 'Import movies from IMDB Charts', 'options': [ { 'name': 'automation_providers_enabled', 'default': False, 'type': 'enabler', }, { 'name': 'automation_charts_theater', 'type': 'bool', 'label': 'In Theaters', 'description': 'New Movies <a href="http://www.imdb.com/movies-in-theaters/" target="_blank">In-Theaters</a> chart', 'default': True, }, { 'name': 'automation_charts_top250', 'type': 'bool', 'label': 'TOP 250', 'description': 'IMDB <a href="http://www.imdb.com/chart/top/" target="_blank">TOP 250</a> chart', 'default': False, }, { 'name': 'automation_charts_boxoffice', 'type': 'bool', 'label': 'Box office TOP 10', 'description': 'IMDB Box office <a href="http://www.imdb.com/chart/" target="_blank">TOP 10</a> chart', 'default': True, }, ], }, { 'tab': 'display', 'list': 'charts_providers', 'name': 'imdb_charts_display', 'label': 'IMDB', 'description': 'Display movies from IMDB Charts', 'options': [ { 'name': 'chart_display_enabled', 'default': True, 'type': 'enabler', }, { 'name': 'chart_display_theater', 'type': 'bool', 'label': 'In Theaters', 'description': 'New Movies <a href="http://www.imdb.com/movies-in-theaters/" target="_blank">In-Theaters</a> chart', 'default': False, }, { 'name': 'chart_display_top250', 'type': 'bool', 'label': 'TOP 250', 'description': 'IMDB <a href="http://www.imdb.com/chart/top/" target="_blank">TOP 250</a> chart', 'default': False, }, { 'name': 'chart_display_boxoffice', 'type': 'bool', 'label': 'Box office TOP 10', 'description': 'IMDB Box office <a href="http://www.imdb.com/chart/" target="_blank">TOP 10</a> chart', 'default': True, }, ], }, ], }]