[ie/CBCPlayer] Extract HLS formats and subtitles (#7484)
Authored by: trainman261
This commit is contained in:
parent
dab87ca236
commit
339c339fec
3 changed files with 48 additions and 3 deletions
|
@ -161,7 +161,7 @@ class CBCPlayerIE(InfoExtractor):
|
||||||
'upload_date': '20160210',
|
'upload_date': '20160210',
|
||||||
'uploader': 'CBCC-NEW',
|
'uploader': 'CBCC-NEW',
|
||||||
},
|
},
|
||||||
'skip': 'Geo-restricted to Canada',
|
'skip': 'Geo-restricted to Canada and no longer available',
|
||||||
}, {
|
}, {
|
||||||
# Redirected from http://www.cbc.ca/player/AudioMobile/All%20in%20a%20Weekend%20Montreal/ID/2657632011/
|
# Redirected from http://www.cbc.ca/player/AudioMobile/All%20in%20a%20Weekend%20Montreal/ID/2657632011/
|
||||||
'url': 'http://www.cbc.ca/player/play/2657631896',
|
'url': 'http://www.cbc.ca/player/play/2657631896',
|
||||||
|
@ -174,6 +174,9 @@ class CBCPlayerIE(InfoExtractor):
|
||||||
'timestamp': 1425704400,
|
'timestamp': 1425704400,
|
||||||
'upload_date': '20150307',
|
'upload_date': '20150307',
|
||||||
'uploader': 'CBCC-NEW',
|
'uploader': 'CBCC-NEW',
|
||||||
|
'thumbnail': 'http://thumbnails.cbc.ca/maven_legacy/thumbnails/sonali-karnick-220.jpg',
|
||||||
|
'chapters': [],
|
||||||
|
'duration': 494.811,
|
||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://www.cbc.ca/player/play/2164402062',
|
'url': 'http://www.cbc.ca/player/play/2164402062',
|
||||||
|
@ -186,6 +189,28 @@ class CBCPlayerIE(InfoExtractor):
|
||||||
'timestamp': 1320410746,
|
'timestamp': 1320410746,
|
||||||
'upload_date': '20111104',
|
'upload_date': '20111104',
|
||||||
'uploader': 'CBCC-NEW',
|
'uploader': 'CBCC-NEW',
|
||||||
|
'thumbnail': 'https://thumbnails.cbc.ca/maven_legacy/thumbnails/277/67/cancer_852x480_2164412612.jpg',
|
||||||
|
'chapters': [],
|
||||||
|
'duration': 186.867,
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
# Has subtitles
|
||||||
|
# These broadcasts expire after ~1 month, can find new test URL here:
|
||||||
|
# https://www.cbc.ca/player/news/TV%20Shows/The%20National/Latest%20Broadcast
|
||||||
|
'url': 'http://www.cbc.ca/player/play/2249992771553',
|
||||||
|
'md5': '2f2fb675dd4f0f8a5bb7588d1b13bacd',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '2249992771553',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'The National | Women’s soccer pay, Florida seawater, Swift quake',
|
||||||
|
'description': 'md5:adba28011a56cfa47a080ff198dad27a',
|
||||||
|
'timestamp': 1690596000,
|
||||||
|
'duration': 2716.333,
|
||||||
|
'subtitles': {'eng': [{'ext': 'vtt', 'protocol': 'm3u8_native'}]},
|
||||||
|
'thumbnail': 'https://thumbnails.cbc.ca/maven_legacy/thumbnails/481/326/thumbnail.jpeg',
|
||||||
|
'uploader': 'CBCC-NEW',
|
||||||
|
'chapters': 'count:5',
|
||||||
|
'upload_date': '20230729',
|
||||||
},
|
},
|
||||||
}]
|
}]
|
||||||
|
|
||||||
|
@ -199,6 +224,7 @@ class CBCPlayerIE(InfoExtractor):
|
||||||
'force_smil_url': True
|
'force_smil_url': True
|
||||||
}),
|
}),
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
|
'_format_sort_fields': ('res', 'proto') # Prioritize direct http formats over HLS
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -115,6 +115,7 @@ class ScrippsNetworksIE(InfoExtractor):
|
||||||
'uploader': 'SCNI-SCND',
|
'uploader': 'SCNI-SCND',
|
||||||
},
|
},
|
||||||
'add_ie': ['ThePlatform'],
|
'add_ie': ['ThePlatform'],
|
||||||
|
'expected_warnings': ['No HLS formats found'],
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://www.diynetwork.com/videos/diy-barnwood-tablet-stand-0265790',
|
'url': 'https://www.diynetwork.com/videos/diy-barnwood-tablet-stand-0265790',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
|
|
@ -19,7 +19,11 @@ from ..utils import (
|
||||||
xpath_with_ns,
|
xpath_with_ns,
|
||||||
mimetype2ext,
|
mimetype2ext,
|
||||||
find_xpath_attr,
|
find_xpath_attr,
|
||||||
|
traverse_obj,
|
||||||
|
update_url,
|
||||||
|
urlhandle_detect_ext,
|
||||||
)
|
)
|
||||||
|
from ..networking import HEADRequest
|
||||||
|
|
||||||
default_ns = 'http://www.w3.org/2005/SMIL21/Language'
|
default_ns = 'http://www.w3.org/2005/SMIL21/Language'
|
||||||
_x = lambda p: xpath_with_ns(p, {'smil': default_ns})
|
_x = lambda p: xpath_with_ns(p, {'smil': default_ns})
|
||||||
|
@ -162,7 +166,8 @@ class ThePlatformIE(ThePlatformBaseIE, AdobePassIE):
|
||||||
'params': {
|
'params': {
|
||||||
# rtmp download
|
# rtmp download
|
||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
}
|
},
|
||||||
|
'skip': '404 Not Found',
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://player.theplatform.com/p/D6x-PC/pulse_preview/embed/select/media/yMBg9E8KFxZD',
|
'url': 'https://player.theplatform.com/p/D6x-PC/pulse_preview/embed/select/media/yMBg9E8KFxZD',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
|
@ -171,7 +176,8 @@ class ThePlatformIE(ThePlatformBaseIE, AdobePassIE):
|
||||||
'description': 'md5:644ad9188d655b742f942bf2e06b002d',
|
'description': 'md5:644ad9188d655b742f942bf2e06b002d',
|
||||||
'title': 'HIGHLIGHTS: USA bag first ever series Cup win',
|
'title': 'HIGHLIGHTS: USA bag first ever series Cup win',
|
||||||
'uploader': 'EGSM',
|
'uploader': 'EGSM',
|
||||||
}
|
},
|
||||||
|
'skip': '404 Not Found',
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://player.theplatform.com/p/NnzsPC/widget/select/media/4Y0TlYUr_ZT7',
|
'url': 'http://player.theplatform.com/p/NnzsPC/widget/select/media/4Y0TlYUr_ZT7',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
@ -189,6 +195,7 @@ class ThePlatformIE(ThePlatformBaseIE, AdobePassIE):
|
||||||
'upload_date': '20150701',
|
'upload_date': '20150701',
|
||||||
'uploader': 'NBCU-NEWS',
|
'uploader': 'NBCU-NEWS',
|
||||||
},
|
},
|
||||||
|
'skip': '404 Not Found',
|
||||||
}, {
|
}, {
|
||||||
# From http://www.nbc.com/the-blacklist/video/sir-crispin-crandall/2928790?onid=137781#vc137781=1
|
# From http://www.nbc.com/the-blacklist/video/sir-crispin-crandall/2928790?onid=137781#vc137781=1
|
||||||
# geo-restricted (US), HLS encrypted with AES-128
|
# geo-restricted (US), HLS encrypted with AES-128
|
||||||
|
@ -295,6 +302,17 @@ class ThePlatformIE(ThePlatformBaseIE, AdobePassIE):
|
||||||
|
|
||||||
formats, subtitles = self._extract_theplatform_smil(smil_url, video_id)
|
formats, subtitles = self._extract_theplatform_smil(smil_url, video_id)
|
||||||
|
|
||||||
|
# With some sites, manifest URL must be forced to extract HLS formats
|
||||||
|
if not traverse_obj(formats, lambda _, v: v['format_id'].startswith('hls')):
|
||||||
|
m3u8_url = update_url(url, query='mbr=true&manifest=m3u', fragment=None)
|
||||||
|
urlh = self._request_webpage(
|
||||||
|
HEADRequest(m3u8_url), video_id, 'Checking for HLS formats', 'No HLS formats found', fatal=False)
|
||||||
|
if urlh and urlhandle_detect_ext(urlh) == 'm3u8':
|
||||||
|
m3u8_fmts, m3u8_subs = self._extract_m3u8_formats_and_subtitles(
|
||||||
|
m3u8_url, video_id, m3u8_id='hls', fatal=False)
|
||||||
|
formats.extend(m3u8_fmts)
|
||||||
|
self._merge_subtitles(m3u8_subs, target=subtitles)
|
||||||
|
|
||||||
ret = self._extract_theplatform_metadata(path, video_id)
|
ret = self._extract_theplatform_metadata(path, video_id)
|
||||||
combined_subtitles = self._merge_subtitles(ret.get('subtitles', {}), subtitles)
|
combined_subtitles = self._merge_subtitles(ret.get('subtitles', {}), subtitles)
|
||||||
ret.update({
|
ret.update({
|
||||||
|
|
Loading…
Reference in a new issue