[extractor/generic] Add fragment_query
extractor arg for DASH and HLS (#5528)
* `fragment_query`: passthrough any query in generic mpd/m3u8 manifest URLs to their fragments * Add support for `extra_param_to_segment_url` to DASH downloader Authored by: bashonly, pukkandan
This commit is contained in:
parent
f352a09778
commit
3b021eacef
3 changed files with 31 additions and 4 deletions
|
@ -1736,6 +1736,9 @@ The following extractors use this feature:
|
||||||
* `skip`: One or more of `webpage` (skip initial webpage download), `authcheck` (allow the download of playlists requiring authentication when no initial webpage is downloaded. This may cause unwanted behavior, see [#1122](https://github.com/yt-dlp/yt-dlp/pull/1122) for more details)
|
* `skip`: One or more of `webpage` (skip initial webpage download), `authcheck` (allow the download of playlists requiring authentication when no initial webpage is downloaded. This may cause unwanted behavior, see [#1122](https://github.com/yt-dlp/yt-dlp/pull/1122) for more details)
|
||||||
* `approximate_date`: Extract approximate `upload_date` and `timestamp` in flat-playlist. This may cause date-based filters to be slightly off
|
* `approximate_date`: Extract approximate `upload_date` and `timestamp` in flat-playlist. This may cause date-based filters to be slightly off
|
||||||
|
|
||||||
|
#### generic
|
||||||
|
* `fragment_query`: Passthrough any query in mpd/m3u8 manifest URLs to their fragments. Does not apply to ffmpeg
|
||||||
|
|
||||||
#### funimation
|
#### funimation
|
||||||
* `language`: Audio languages to extract, e.g. `funimation:language=english,japanese`
|
* `language`: Audio languages to extract, e.g. `funimation:language=english,japanese`
|
||||||
* `version`: The video version to extract - `uncut` or `simulcast`
|
* `version`: The video version to extract - `uncut` or `simulcast`
|
||||||
|
|
|
@ -1,8 +1,9 @@
|
||||||
import time
|
import time
|
||||||
|
import urllib.parse
|
||||||
|
|
||||||
from . import get_suitable_downloader
|
from . import get_suitable_downloader
|
||||||
from .fragment import FragmentFD
|
from .fragment import FragmentFD
|
||||||
from ..utils import urljoin
|
from ..utils import update_url_query, urljoin
|
||||||
|
|
||||||
|
|
||||||
class DashSegmentsFD(FragmentFD):
|
class DashSegmentsFD(FragmentFD):
|
||||||
|
@ -40,7 +41,12 @@ class DashSegmentsFD(FragmentFD):
|
||||||
self._prepare_and_start_frag_download(ctx, fmt)
|
self._prepare_and_start_frag_download(ctx, fmt)
|
||||||
ctx['start'] = real_start
|
ctx['start'] = real_start
|
||||||
|
|
||||||
fragments_to_download = self._get_fragments(fmt, ctx)
|
extra_query = None
|
||||||
|
extra_param_to_segment_url = info_dict.get('extra_param_to_segment_url')
|
||||||
|
if extra_param_to_segment_url:
|
||||||
|
extra_query = urllib.parse.parse_qs(extra_param_to_segment_url)
|
||||||
|
|
||||||
|
fragments_to_download = self._get_fragments(fmt, ctx, extra_query)
|
||||||
|
|
||||||
if real_downloader:
|
if real_downloader:
|
||||||
self.to_screen(
|
self.to_screen(
|
||||||
|
@ -57,7 +63,7 @@ class DashSegmentsFD(FragmentFD):
|
||||||
fragments = fragments(ctx) if callable(fragments) else fragments
|
fragments = fragments(ctx) if callable(fragments) else fragments
|
||||||
return [next(iter(fragments))] if self.params.get('test') else fragments
|
return [next(iter(fragments))] if self.params.get('test') else fragments
|
||||||
|
|
||||||
def _get_fragments(self, fmt, ctx):
|
def _get_fragments(self, fmt, ctx, extra_query):
|
||||||
fragment_base_url = fmt.get('fragment_base_url')
|
fragment_base_url = fmt.get('fragment_base_url')
|
||||||
fragments = self._resolve_fragments(fmt['fragments'], ctx)
|
fragments = self._resolve_fragments(fmt['fragments'], ctx)
|
||||||
|
|
||||||
|
@ -70,6 +76,8 @@ class DashSegmentsFD(FragmentFD):
|
||||||
if not fragment_url:
|
if not fragment_url:
|
||||||
assert fragment_base_url
|
assert fragment_base_url
|
||||||
fragment_url = urljoin(fragment_base_url, fragment['path'])
|
fragment_url = urljoin(fragment_base_url, fragment['path'])
|
||||||
|
if extra_query:
|
||||||
|
fragment_url = update_url_query(fragment_url, extra_query)
|
||||||
|
|
||||||
yield {
|
yield {
|
||||||
'frag_index': frag_index,
|
'frag_index': frag_index,
|
||||||
|
|
|
@ -2189,6 +2189,13 @@ class GenericIE(InfoExtractor):
|
||||||
|
|
||||||
self._downloader.write_debug(f'Identified {num} {name}{format_field(note, None, "; %s")}')
|
self._downloader.write_debug(f'Identified {num} {name}{format_field(note, None, "; %s")}')
|
||||||
|
|
||||||
|
def _fragment_query(self, url):
|
||||||
|
if self._configuration_arg('fragment_query'):
|
||||||
|
query_string = urllib.parse.urlparse(url).query
|
||||||
|
if query_string:
|
||||||
|
return {'extra_param_to_segment_url': query_string}
|
||||||
|
return {}
|
||||||
|
|
||||||
def _extract_rss(self, url, video_id, doc):
|
def _extract_rss(self, url, video_id, doc):
|
||||||
NS_MAP = {
|
NS_MAP = {
|
||||||
'itunes': 'http://www.itunes.com/dtds/podcast-1.0.dtd',
|
'itunes': 'http://www.itunes.com/dtds/podcast-1.0.dtd',
|
||||||
|
@ -2351,8 +2358,10 @@ class GenericIE(InfoExtractor):
|
||||||
subtitles = {}
|
subtitles = {}
|
||||||
if format_id.endswith('mpegurl'):
|
if format_id.endswith('mpegurl'):
|
||||||
formats, subtitles = self._extract_m3u8_formats_and_subtitles(url, video_id, 'mp4', headers=headers)
|
formats, subtitles = self._extract_m3u8_formats_and_subtitles(url, video_id, 'mp4', headers=headers)
|
||||||
|
info_dict.update(self._fragment_query(url))
|
||||||
elif format_id.endswith('mpd') or format_id.endswith('dash+xml'):
|
elif format_id.endswith('mpd') or format_id.endswith('dash+xml'):
|
||||||
formats, subtitles = self._extract_mpd_formats_and_subtitles(url, video_id, headers=headers)
|
formats, subtitles = self._extract_mpd_formats_and_subtitles(url, video_id, headers=headers)
|
||||||
|
info_dict.update(self._fragment_query(url))
|
||||||
elif format_id == 'f4m':
|
elif format_id == 'f4m':
|
||||||
formats = self._extract_f4m_formats(url, video_id, headers=headers)
|
formats = self._extract_f4m_formats(url, video_id, headers=headers)
|
||||||
else:
|
else:
|
||||||
|
@ -2379,6 +2388,7 @@ class GenericIE(InfoExtractor):
|
||||||
if first_bytes.startswith(b'#EXTM3U'):
|
if first_bytes.startswith(b'#EXTM3U'):
|
||||||
self.report_detected('M3U playlist')
|
self.report_detected('M3U playlist')
|
||||||
info_dict['formats'], info_dict['subtitles'] = self._extract_m3u8_formats_and_subtitles(url, video_id, 'mp4')
|
info_dict['formats'], info_dict['subtitles'] = self._extract_m3u8_formats_and_subtitles(url, video_id, 'mp4')
|
||||||
|
info_dict.update(self._fragment_query(url))
|
||||||
return info_dict
|
return info_dict
|
||||||
|
|
||||||
# Maybe it's a direct link to a video?
|
# Maybe it's a direct link to a video?
|
||||||
|
@ -2429,6 +2439,7 @@ class GenericIE(InfoExtractor):
|
||||||
doc,
|
doc,
|
||||||
mpd_base_url=full_response.geturl().rpartition('/')[0],
|
mpd_base_url=full_response.geturl().rpartition('/')[0],
|
||||||
mpd_url=url)
|
mpd_url=url)
|
||||||
|
info_dict.update(self._fragment_query(url))
|
||||||
self.report_detected('DASH manifest')
|
self.report_detected('DASH manifest')
|
||||||
return info_dict
|
return info_dict
|
||||||
elif re.match(r'^{http://ns\.adobe\.com/f4m/[12]\.0}manifest$', doc.tag):
|
elif re.match(r'^{http://ns\.adobe\.com/f4m/[12]\.0}manifest$', doc.tag):
|
||||||
|
@ -2541,7 +2552,10 @@ class GenericIE(InfoExtractor):
|
||||||
m3u8_id='hls', fatal=False)
|
m3u8_id='hls', fatal=False)
|
||||||
formats.extend(fmts)
|
formats.extend(fmts)
|
||||||
self._merge_subtitles(subs, target=subtitles)
|
self._merge_subtitles(subs, target=subtitles)
|
||||||
else:
|
for fmt in formats:
|
||||||
|
fmt.update(self._fragment_query(src))
|
||||||
|
|
||||||
|
if not formats:
|
||||||
formats.append({
|
formats.append({
|
||||||
'url': src,
|
'url': src,
|
||||||
'ext': (mimetype2ext(src_type)
|
'ext': (mimetype2ext(src_type)
|
||||||
|
@ -2776,8 +2790,10 @@ class GenericIE(InfoExtractor):
|
||||||
return [self._extract_xspf_playlist(video_url, video_id)]
|
return [self._extract_xspf_playlist(video_url, video_id)]
|
||||||
elif ext == 'm3u8':
|
elif ext == 'm3u8':
|
||||||
entry_info_dict['formats'], entry_info_dict['subtitles'] = self._extract_m3u8_formats_and_subtitles(video_url, video_id, ext='mp4', headers=headers)
|
entry_info_dict['formats'], entry_info_dict['subtitles'] = self._extract_m3u8_formats_and_subtitles(video_url, video_id, ext='mp4', headers=headers)
|
||||||
|
entry_info_dict.update(self._fragment_query(video_url))
|
||||||
elif ext == 'mpd':
|
elif ext == 'mpd':
|
||||||
entry_info_dict['formats'], entry_info_dict['subtitles'] = self._extract_mpd_formats_and_subtitles(video_url, video_id, headers=headers)
|
entry_info_dict['formats'], entry_info_dict['subtitles'] = self._extract_mpd_formats_and_subtitles(video_url, video_id, headers=headers)
|
||||||
|
entry_info_dict.update(self._fragment_query(video_url))
|
||||||
elif ext == 'f4m':
|
elif ext == 'f4m':
|
||||||
entry_info_dict['formats'] = self._extract_f4m_formats(video_url, video_id, headers=headers)
|
entry_info_dict['formats'] = self._extract_f4m_formats(video_url, video_id, headers=headers)
|
||||||
elif re.search(r'(?i)\.(?:ism|smil)/manifest', video_url) and video_url != url:
|
elif re.search(r'(?i)\.(?:ism|smil)/manifest', video_url) and video_url != url:
|
||||||
|
|
Loading…
Reference in a new issue