[extractor/generic] Add extractor-args hls_key
, variant_query
(#6567)
Authored by: bashonly
This commit is contained in:
parent
06966cb896
commit
c2e0fc40a7
2 changed files with 23 additions and 11 deletions
|
@ -1798,6 +1798,8 @@ The following extractors use this feature:
|
||||||
|
|
||||||
#### generic
|
#### generic
|
||||||
* `fragment_query`: Passthrough any query in mpd/m3u8 manifest URLs to their fragments. Does not apply to ffmpeg
|
* `fragment_query`: Passthrough any query in mpd/m3u8 manifest URLs to their fragments. Does not apply to ffmpeg
|
||||||
|
* `variant_query`: Passthrough the master m3u8 URL query to its variant playlist URLs
|
||||||
|
* `hls_key`: An HLS AES-128 key URI *or* key (as hex), and optionally the IV (as hex), in the form of `(URI|KEY)[,IV]`; e.g. `generic:hls_key=ABCDEF1234567980,0xFEDCBA0987654321`. Passing any of these values will force usage of the native HLS downloader and override the corresponding values found in the m3u8 playlist
|
||||||
|
|
||||||
#### funimation
|
#### funimation
|
||||||
* `language`: Audio languages to extract, e.g. `funimation:language=english,japanese`
|
* `language`: Audio languages to extract, e.g. `funimation:language=english,japanese`
|
||||||
|
|
|
@ -24,6 +24,7 @@ from ..utils import (
|
||||||
mimetype2ext,
|
mimetype2ext,
|
||||||
orderedSet,
|
orderedSet,
|
||||||
parse_duration,
|
parse_duration,
|
||||||
|
parse_qs,
|
||||||
parse_resolution,
|
parse_resolution,
|
||||||
smuggle_url,
|
smuggle_url,
|
||||||
str_or_none,
|
str_or_none,
|
||||||
|
@ -32,6 +33,7 @@ from ..utils import (
|
||||||
unescapeHTML,
|
unescapeHTML,
|
||||||
unified_timestamp,
|
unified_timestamp,
|
||||||
unsmuggle_url,
|
unsmuggle_url,
|
||||||
|
update_url_query,
|
||||||
url_or_none,
|
url_or_none,
|
||||||
urljoin,
|
urljoin,
|
||||||
variadic,
|
variadic,
|
||||||
|
@ -2184,12 +2186,21 @@ class GenericIE(InfoExtractor):
|
||||||
|
|
||||||
self._downloader.write_debug(f'Identified {num} {name}{format_field(note, None, "; %s")}')
|
self._downloader.write_debug(f'Identified {num} {name}{format_field(note, None, "; %s")}')
|
||||||
|
|
||||||
def _fragment_query(self, url):
|
def _extra_manifest_info(self, info, manifest_url):
|
||||||
if self._configuration_arg('fragment_query'):
|
if self._configuration_arg('fragment_query'):
|
||||||
query_string = urllib.parse.urlparse(url).query
|
query_string = urllib.parse.urlparse(manifest_url).query
|
||||||
if query_string:
|
if query_string:
|
||||||
return {'extra_param_to_segment_url': query_string}
|
info['extra_param_to_segment_url'] = query_string
|
||||||
return {}
|
|
||||||
|
hex_or_none = lambda x: x if re.fullmatch(r'(0x)?[\da-f]+', x, re.IGNORECASE) else None
|
||||||
|
info['hls_aes'] = traverse_obj(self._configuration_arg('hls_key'), {
|
||||||
|
'uri': (0, {url_or_none}), 'key': (0, {hex_or_none}), 'iv': (1, {hex_or_none}),
|
||||||
|
}) or None
|
||||||
|
|
||||||
|
if self._configuration_arg('variant_query'):
|
||||||
|
query = parse_qs(manifest_url)
|
||||||
|
for fmt in self._downloader._get_formats(info):
|
||||||
|
fmt['url'] = update_url_query(fmt['url'], query)
|
||||||
|
|
||||||
def _extract_rss(self, url, video_id, doc):
|
def _extract_rss(self, url, video_id, doc):
|
||||||
NS_MAP = {
|
NS_MAP = {
|
||||||
|
@ -2397,10 +2408,8 @@ class GenericIE(InfoExtractor):
|
||||||
subtitles = {}
|
subtitles = {}
|
||||||
if format_id.endswith('mpegurl') or ext == 'm3u8':
|
if format_id.endswith('mpegurl') or ext == 'm3u8':
|
||||||
formats, subtitles = self._extract_m3u8_formats_and_subtitles(url, video_id, 'mp4', headers=headers)
|
formats, subtitles = self._extract_m3u8_formats_and_subtitles(url, video_id, 'mp4', headers=headers)
|
||||||
info_dict.update(self._fragment_query(url))
|
|
||||||
elif format_id.endswith('mpd') or format_id.endswith('dash+xml') or ext == 'mpd':
|
elif format_id.endswith('mpd') or format_id.endswith('dash+xml') or ext == 'mpd':
|
||||||
formats, subtitles = self._extract_mpd_formats_and_subtitles(url, video_id, headers=headers)
|
formats, subtitles = self._extract_mpd_formats_and_subtitles(url, video_id, headers=headers)
|
||||||
info_dict.update(self._fragment_query(url))
|
|
||||||
elif format_id == 'f4m' or ext == 'f4m':
|
elif format_id == 'f4m' or ext == 'f4m':
|
||||||
formats = self._extract_f4m_formats(url, video_id, headers=headers)
|
formats = self._extract_f4m_formats(url, video_id, headers=headers)
|
||||||
else:
|
else:
|
||||||
|
@ -2415,6 +2424,7 @@ class GenericIE(InfoExtractor):
|
||||||
'subtitles': subtitles,
|
'subtitles': subtitles,
|
||||||
'http_headers': headers or None,
|
'http_headers': headers or None,
|
||||||
})
|
})
|
||||||
|
self._extra_manifest_info(info_dict, url)
|
||||||
return info_dict
|
return info_dict
|
||||||
|
|
||||||
if not self.get_param('test', False) and not is_intentional:
|
if not self.get_param('test', False) and not is_intentional:
|
||||||
|
@ -2427,7 +2437,7 @@ class GenericIE(InfoExtractor):
|
||||||
if first_bytes.startswith(b'#EXTM3U'):
|
if first_bytes.startswith(b'#EXTM3U'):
|
||||||
self.report_detected('M3U playlist')
|
self.report_detected('M3U playlist')
|
||||||
info_dict['formats'], info_dict['subtitles'] = self._extract_m3u8_formats_and_subtitles(url, video_id, 'mp4')
|
info_dict['formats'], info_dict['subtitles'] = self._extract_m3u8_formats_and_subtitles(url, video_id, 'mp4')
|
||||||
info_dict.update(self._fragment_query(url))
|
self._extra_manifest_info(info_dict, url)
|
||||||
return info_dict
|
return info_dict
|
||||||
|
|
||||||
# Maybe it's a direct link to a video?
|
# Maybe it's a direct link to a video?
|
||||||
|
@ -2478,7 +2488,7 @@ class GenericIE(InfoExtractor):
|
||||||
doc,
|
doc,
|
||||||
mpd_base_url=full_response.geturl().rpartition('/')[0],
|
mpd_base_url=full_response.geturl().rpartition('/')[0],
|
||||||
mpd_url=url)
|
mpd_url=url)
|
||||||
info_dict.update(self._fragment_query(url))
|
self._extra_manifest_info(info_dict, url)
|
||||||
self.report_detected('DASH manifest')
|
self.report_detected('DASH manifest')
|
||||||
return info_dict
|
return info_dict
|
||||||
elif re.match(r'^{http://ns\.adobe\.com/f4m/[12]\.0}manifest$', doc.tag):
|
elif re.match(r'^{http://ns\.adobe\.com/f4m/[12]\.0}manifest$', doc.tag):
|
||||||
|
@ -2592,7 +2602,7 @@ class GenericIE(InfoExtractor):
|
||||||
formats.extend(fmts)
|
formats.extend(fmts)
|
||||||
self._merge_subtitles(subs, target=subtitles)
|
self._merge_subtitles(subs, target=subtitles)
|
||||||
for fmt in formats:
|
for fmt in formats:
|
||||||
fmt.update(self._fragment_query(src))
|
self._extra_manifest_info(fmt, src)
|
||||||
|
|
||||||
if not formats:
|
if not formats:
|
||||||
formats.append({
|
formats.append({
|
||||||
|
@ -2795,10 +2805,10 @@ class GenericIE(InfoExtractor):
|
||||||
return [self._extract_xspf_playlist(video_url, video_id)]
|
return [self._extract_xspf_playlist(video_url, video_id)]
|
||||||
elif ext == 'm3u8':
|
elif ext == 'm3u8':
|
||||||
entry_info_dict['formats'], entry_info_dict['subtitles'] = self._extract_m3u8_formats_and_subtitles(video_url, video_id, ext='mp4', headers=headers)
|
entry_info_dict['formats'], entry_info_dict['subtitles'] = self._extract_m3u8_formats_and_subtitles(video_url, video_id, ext='mp4', headers=headers)
|
||||||
entry_info_dict.update(self._fragment_query(video_url))
|
self._extra_manifest_info(entry_info_dict, video_url)
|
||||||
elif ext == 'mpd':
|
elif ext == 'mpd':
|
||||||
entry_info_dict['formats'], entry_info_dict['subtitles'] = self._extract_mpd_formats_and_subtitles(video_url, video_id, headers=headers)
|
entry_info_dict['formats'], entry_info_dict['subtitles'] = self._extract_mpd_formats_and_subtitles(video_url, video_id, headers=headers)
|
||||||
entry_info_dict.update(self._fragment_query(video_url))
|
self._extra_manifest_info(entry_info_dict, video_url)
|
||||||
elif ext == 'f4m':
|
elif ext == 'f4m':
|
||||||
entry_info_dict['formats'] = self._extract_f4m_formats(video_url, video_id, headers=headers)
|
entry_info_dict['formats'] = self._extract_f4m_formats(video_url, video_id, headers=headers)
|
||||||
elif re.search(r'(?i)\.(?:ism|smil)/manifest', video_url) and video_url != url:
|
elif re.search(r'(?i)\.(?:ism|smil)/manifest', video_url) and video_url != url:
|
||||||
|
|
Loading…
Reference in a new issue