[extractor/common] Extract DASH subtitle tracks
_extract_mpd_formats and _parse_mpd_formats were extended into _…_formats_and_subtitles; wrappers with old names are provided for compatibility.
This commit is contained in:
parent
a0c3b2d5cf
commit
171e59edd4
1 changed files with 54 additions and 27 deletions
|
@ -2426,23 +2426,44 @@ class InfoExtractor(object):
|
||||||
})
|
})
|
||||||
return entries
|
return entries
|
||||||
|
|
||||||
def _extract_mpd_formats(self, mpd_url, video_id, mpd_id=None, note=None, errnote=None, fatal=True, data=None, headers={}, query={}):
|
def _extract_mpd_formats(self, *args, **kwargs):
|
||||||
|
fmts, subs = self._extract_mpd_formats_and_subtitles(*args, **kwargs)
|
||||||
|
if subs:
|
||||||
|
self.report_warning(bug_reports_message(
|
||||||
|
"Ignoring subtitle tracks found in the DASH manifest; "
|
||||||
|
"if any subtitle tracks are missing,"
|
||||||
|
))
|
||||||
|
return fmts
|
||||||
|
|
||||||
|
def _extract_mpd_formats_and_subtitles(
|
||||||
|
self, mpd_url, video_id, mpd_id=None, note=None, errnote=None,
|
||||||
|
fatal=True, data=None, headers={}, query={}):
|
||||||
res = self._download_xml_handle(
|
res = self._download_xml_handle(
|
||||||
mpd_url, video_id,
|
mpd_url, video_id,
|
||||||
note=note or 'Downloading MPD manifest',
|
note=note or 'Downloading MPD manifest',
|
||||||
errnote=errnote or 'Failed to download MPD manifest',
|
errnote=errnote or 'Failed to download MPD manifest',
|
||||||
fatal=fatal, data=data, headers=headers, query=query)
|
fatal=fatal, data=data, headers=headers, query=query)
|
||||||
if res is False:
|
if res is False:
|
||||||
return []
|
return [], {}
|
||||||
mpd_doc, urlh = res
|
mpd_doc, urlh = res
|
||||||
if mpd_doc is None:
|
if mpd_doc is None:
|
||||||
return []
|
return [], {}
|
||||||
mpd_base_url = base_url(urlh.geturl())
|
mpd_base_url = base_url(urlh.geturl())
|
||||||
|
|
||||||
return self._parse_mpd_formats(
|
return self._parse_mpd_formats_and_subtitles(
|
||||||
mpd_doc, mpd_id, mpd_base_url, mpd_url)
|
mpd_doc, mpd_id, mpd_base_url, mpd_url)
|
||||||
|
|
||||||
def _parse_mpd_formats(self, mpd_doc, mpd_id=None, mpd_base_url='', mpd_url=None):
|
def _parse_mpd_formats(self, *args, **kwargs):
|
||||||
|
fmts, subs = self._parse_mpd_formats_and_subtitles(*args, **kwargs)
|
||||||
|
if subs:
|
||||||
|
self.report_warning(bug_reports_message(
|
||||||
|
"Ignoring subtitle tracks found in the DASH manifest; "
|
||||||
|
"if any subtitle tracks are missing,"
|
||||||
|
))
|
||||||
|
return fmts
|
||||||
|
|
||||||
|
def _parse_mpd_formats_and_subtitles(
|
||||||
|
self, mpd_doc, mpd_id=None, mpd_base_url='', mpd_url=None):
|
||||||
"""
|
"""
|
||||||
Parse formats from MPD manifest.
|
Parse formats from MPD manifest.
|
||||||
References:
|
References:
|
||||||
|
@ -2452,7 +2473,7 @@ class InfoExtractor(object):
|
||||||
"""
|
"""
|
||||||
if not self._downloader.params.get('dynamic_mpd', True):
|
if not self._downloader.params.get('dynamic_mpd', True):
|
||||||
if mpd_doc.get('type') == 'dynamic':
|
if mpd_doc.get('type') == 'dynamic':
|
||||||
return []
|
return [], {}
|
||||||
|
|
||||||
namespace = self._search_regex(r'(?i)^{([^}]+)?}MPD$', mpd_doc.tag, 'namespace', default=None)
|
namespace = self._search_regex(r'(?i)^{([^}]+)?}MPD$', mpd_doc.tag, 'namespace', default=None)
|
||||||
|
|
||||||
|
@ -2524,6 +2545,7 @@ class InfoExtractor(object):
|
||||||
|
|
||||||
mpd_duration = parse_duration(mpd_doc.get('mediaPresentationDuration'))
|
mpd_duration = parse_duration(mpd_doc.get('mediaPresentationDuration'))
|
||||||
formats = []
|
formats = []
|
||||||
|
subtitles = {}
|
||||||
for period in mpd_doc.findall(_add_ns('Period')):
|
for period in mpd_doc.findall(_add_ns('Period')):
|
||||||
period_duration = parse_duration(period.get('duration')) or mpd_duration
|
period_duration = parse_duration(period.get('duration')) or mpd_duration
|
||||||
period_ms_info = extract_multisegment_info(period, {
|
period_ms_info = extract_multisegment_info(period, {
|
||||||
|
@ -2541,11 +2563,9 @@ class InfoExtractor(object):
|
||||||
representation_attrib.update(representation.attrib)
|
representation_attrib.update(representation.attrib)
|
||||||
# According to [1, 5.3.7.2, Table 9, page 41], @mimeType is mandatory
|
# According to [1, 5.3.7.2, Table 9, page 41], @mimeType is mandatory
|
||||||
mime_type = representation_attrib['mimeType']
|
mime_type = representation_attrib['mimeType']
|
||||||
content_type = mime_type.split('/')[0]
|
content_type = representation_attrib.get('contentType', mime_type.split('/')[0])
|
||||||
if content_type == 'text':
|
|
||||||
# TODO implement WebVTT downloading
|
if content_type in ('video', 'audio', 'text'):
|
||||||
pass
|
|
||||||
elif content_type in ('video', 'audio'):
|
|
||||||
base_url = ''
|
base_url = ''
|
||||||
for element in (representation, adaptation_set, period, mpd_doc):
|
for element in (representation, adaptation_set, period, mpd_doc):
|
||||||
base_url_e = element.find(_add_ns('BaseURL'))
|
base_url_e = element.find(_add_ns('BaseURL'))
|
||||||
|
@ -2562,21 +2582,28 @@ class InfoExtractor(object):
|
||||||
url_el = representation.find(_add_ns('BaseURL'))
|
url_el = representation.find(_add_ns('BaseURL'))
|
||||||
filesize = int_or_none(url_el.attrib.get('{http://youtube.com/yt/2012/10/10}contentLength') if url_el is not None else None)
|
filesize = int_or_none(url_el.attrib.get('{http://youtube.com/yt/2012/10/10}contentLength') if url_el is not None else None)
|
||||||
bandwidth = int_or_none(representation_attrib.get('bandwidth'))
|
bandwidth = int_or_none(representation_attrib.get('bandwidth'))
|
||||||
f = {
|
if content_type in ('video', 'audio'):
|
||||||
'format_id': '%s-%s' % (mpd_id, representation_id) if mpd_id else representation_id,
|
f = {
|
||||||
'manifest_url': mpd_url,
|
'format_id': '%s-%s' % (mpd_id, representation_id) if mpd_id else representation_id,
|
||||||
'ext': mimetype2ext(mime_type),
|
'manifest_url': mpd_url,
|
||||||
'width': int_or_none(representation_attrib.get('width')),
|
'ext': mimetype2ext(mime_type),
|
||||||
'height': int_or_none(representation_attrib.get('height')),
|
'width': int_or_none(representation_attrib.get('width')),
|
||||||
'tbr': float_or_none(bandwidth, 1000),
|
'height': int_or_none(representation_attrib.get('height')),
|
||||||
'asr': int_or_none(representation_attrib.get('audioSamplingRate')),
|
'tbr': float_or_none(bandwidth, 1000),
|
||||||
'fps': int_or_none(representation_attrib.get('frameRate')),
|
'asr': int_or_none(representation_attrib.get('audioSamplingRate')),
|
||||||
'language': lang if lang not in ('mul', 'und', 'zxx', 'mis') else None,
|
'fps': int_or_none(representation_attrib.get('frameRate')),
|
||||||
'format_note': 'DASH %s' % content_type,
|
'language': lang if lang not in ('mul', 'und', 'zxx', 'mis') else None,
|
||||||
'filesize': filesize,
|
'format_note': 'DASH %s' % content_type,
|
||||||
'container': mimetype2ext(mime_type) + '_dash',
|
'filesize': filesize,
|
||||||
}
|
'container': mimetype2ext(mime_type) + '_dash',
|
||||||
f.update(parse_codecs(representation_attrib.get('codecs')))
|
}
|
||||||
|
f.update(parse_codecs(representation_attrib.get('codecs')))
|
||||||
|
elif content_type == 'text':
|
||||||
|
f = {
|
||||||
|
'ext': mimetype2ext(mime_type),
|
||||||
|
'manifest_url': mpd_url,
|
||||||
|
'filesize': filesize,
|
||||||
|
}
|
||||||
representation_ms_info = extract_multisegment_info(representation, adaption_set_ms_info)
|
representation_ms_info = extract_multisegment_info(representation, adaption_set_ms_info)
|
||||||
|
|
||||||
def prepare_template(template_name, identifiers):
|
def prepare_template(template_name, identifiers):
|
||||||
|
@ -2726,7 +2753,7 @@ class InfoExtractor(object):
|
||||||
formats.append(f)
|
formats.append(f)
|
||||||
else:
|
else:
|
||||||
self.report_warning('Unknown MIME type %s in DASH manifest' % mime_type)
|
self.report_warning('Unknown MIME type %s in DASH manifest' % mime_type)
|
||||||
return formats
|
return formats, subtitles
|
||||||
|
|
||||||
def _extract_ism_formats(self, ism_url, video_id, ism_id=None, note=None, errnote=None, fatal=True, data=None, headers={}, query={}):
|
def _extract_ism_formats(self, ism_url, video_id, ism_id=None, note=None, errnote=None, fatal=True, data=None, headers={}, query={}):
|
||||||
res = self._download_xml_handle(
|
res = self._download_xml_handle(
|
||||||
|
|
Loading…
Reference in a new issue