[cbc] add new extractor for olympics.cbc.ca(closes #15535)
This commit is contained in:
parent
7d2b4aa047
commit
b12cf31bb1
3 changed files with 64 additions and 1 deletions
|
@ -1,6 +1,7 @@
|
||||||
# coding: utf-8
|
# coding: utf-8
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import json
|
||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
@ -13,6 +14,7 @@ from ..utils import (
|
||||||
xpath_element,
|
xpath_element,
|
||||||
xpath_with_ns,
|
xpath_with_ns,
|
||||||
find_xpath_attr,
|
find_xpath_attr,
|
||||||
|
parse_duration,
|
||||||
parse_iso8601,
|
parse_iso8601,
|
||||||
parse_age_limit,
|
parse_age_limit,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
|
@ -359,3 +361,63 @@ class CBCWatchIE(CBCWatchBaseIE):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
rss = self._call_api('web/browse/' + video_id, video_id)
|
rss = self._call_api('web/browse/' + video_id, video_id)
|
||||||
return self._parse_rss_feed(rss)
|
return self._parse_rss_feed(rss)
|
||||||
|
|
||||||
|
|
||||||
|
class CBCOlympicsIE(InfoExtractor):
|
||||||
|
IE_NAME = 'cbc.ca:olympics'
|
||||||
|
_VALID_URL = r'https?://olympics\.cbc\.ca/video/[^/]+/(?P<id>[^/?#]+)'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://olympics.cbc.ca/video/whats-on-tv/olympic-morning-featuring-the-opening-ceremony/',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
display_id = self._match_id(url)
|
||||||
|
webpage = self._download_webpage(url, display_id)
|
||||||
|
video_id = self._hidden_inputs(webpage)['videoId']
|
||||||
|
video_doc = self._download_xml(
|
||||||
|
'https://olympics.cbc.ca/videodata/%s.xml' % video_id, video_id)
|
||||||
|
title = xpath_text(video_doc, 'title', fatal=True)
|
||||||
|
is_live = xpath_text(video_doc, 'kind') == 'Live'
|
||||||
|
if is_live:
|
||||||
|
title = self._live_title(title)
|
||||||
|
|
||||||
|
formats = []
|
||||||
|
for video_source in video_doc.findall('videoSources/videoSource'):
|
||||||
|
uri = xpath_text(video_source, 'uri')
|
||||||
|
if not uri:
|
||||||
|
continue
|
||||||
|
tokenize = self._download_json(
|
||||||
|
'https://olympics.cbc.ca/api/api-akamai/tokenize',
|
||||||
|
video_id, data=json.dumps({
|
||||||
|
'VideoSource': uri,
|
||||||
|
}).encode(), headers={
|
||||||
|
'Content-Type': 'application/json',
|
||||||
|
'Referer': url,
|
||||||
|
# d3.VideoPlayer._init in https://olympics.cbc.ca/components/script/base.js
|
||||||
|
'Cookie': '_dvp=TK:C0ObxjerU', # AKAMAI CDN cookie
|
||||||
|
}, fatal=False)
|
||||||
|
if not tokenize:
|
||||||
|
continue
|
||||||
|
content_url = tokenize['ContentUrl']
|
||||||
|
video_source_format = video_source.get('format')
|
||||||
|
if video_source_format == 'IIS':
|
||||||
|
formats.extend(self._extract_ism_formats(
|
||||||
|
content_url, video_id, ism_id=video_source_format, fatal=False))
|
||||||
|
else:
|
||||||
|
formats.extend(self._extract_m3u8_formats(
|
||||||
|
content_url, video_id, 'mp4',
|
||||||
|
'm3u8' if is_live else 'm3u8_native',
|
||||||
|
m3u8_id=video_source_format, fatal=False))
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'display_id': display_id,
|
||||||
|
'title': title,
|
||||||
|
'description': xpath_text(video_doc, 'description'),
|
||||||
|
'thumbnail': xpath_text(video_doc, 'thumbnailUrl'),
|
||||||
|
'duration': parse_duration(xpath_text(video_doc, 'duration')),
|
||||||
|
'formats': formats,
|
||||||
|
'is_live': is_live,
|
||||||
|
}
|
||||||
|
|
|
@ -162,6 +162,7 @@ from .cbc import (
|
||||||
CBCPlayerIE,
|
CBCPlayerIE,
|
||||||
CBCWatchVideoIE,
|
CBCWatchVideoIE,
|
||||||
CBCWatchIE,
|
CBCWatchIE,
|
||||||
|
CBCOlympicsIE,
|
||||||
)
|
)
|
||||||
from .cbs import CBSIE
|
from .cbs import CBSIE
|
||||||
from .cbslocal import CBSLocalIE
|
from .cbslocal import CBSLocalIE
|
||||||
|
|
|
@ -82,7 +82,7 @@ def register_socks_protocols():
|
||||||
compiled_regex_type = type(re.compile(''))
|
compiled_regex_type = type(re.compile(''))
|
||||||
|
|
||||||
std_headers = {
|
std_headers = {
|
||||||
'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) Gecko/20150101 Firefox/47.0 (Chrome)',
|
'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:59.0) Gecko/20100101 Firefox/59.0 (Chrome)',
|
||||||
'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
|
'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
|
||||||
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
|
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
|
||||||
'Accept-Encoding': 'gzip, deflate',
|
'Accept-Encoding': 'gzip, deflate',
|
||||||
|
|
Loading…
Reference in a new issue