[extractor/common, downloader/ism] Extract SSTR subtitle tracks
_parse_ism_formats was extended into _parse_ism_formats_and_subtitles; all direct users were updated, though _extract_ism_formats was left as a compatibility wrapper. The SSTR downloader was also modified in order to prepare for muxing subtitle streams, although no support for any subtitle codecs was added in this commit.
This commit is contained in:
parent
171e59edd4
commit
fd76a14259
2 changed files with 98 additions and 45 deletions
|
@ -48,7 +48,7 @@ def write_piff_header(stream, params):
|
||||||
language = params.get('language', 'und')
|
language = params.get('language', 'und')
|
||||||
height = params.get('height', 0)
|
height = params.get('height', 0)
|
||||||
width = params.get('width', 0)
|
width = params.get('width', 0)
|
||||||
is_audio = width == 0 and height == 0
|
stream_type = params['stream_type']
|
||||||
creation_time = modification_time = int(time.time())
|
creation_time = modification_time = int(time.time())
|
||||||
|
|
||||||
ftyp_payload = b'isml' # major brand
|
ftyp_payload = b'isml' # major brand
|
||||||
|
@ -77,7 +77,7 @@ def write_piff_header(stream, params):
|
||||||
tkhd_payload += u32.pack(0) * 2 # reserved
|
tkhd_payload += u32.pack(0) * 2 # reserved
|
||||||
tkhd_payload += s16.pack(0) # layer
|
tkhd_payload += s16.pack(0) # layer
|
||||||
tkhd_payload += s16.pack(0) # alternate group
|
tkhd_payload += s16.pack(0) # alternate group
|
||||||
tkhd_payload += s88.pack(1 if is_audio else 0) # volume
|
tkhd_payload += s88.pack(1 if stream_type == 'audio' else 0) # volume
|
||||||
tkhd_payload += u16.pack(0) # reserved
|
tkhd_payload += u16.pack(0) # reserved
|
||||||
tkhd_payload += unity_matrix
|
tkhd_payload += unity_matrix
|
||||||
tkhd_payload += u1616.pack(width)
|
tkhd_payload += u1616.pack(width)
|
||||||
|
@ -93,19 +93,35 @@ def write_piff_header(stream, params):
|
||||||
mdia_payload = full_box(b'mdhd', 1, 0, mdhd_payload) # Media Header Box
|
mdia_payload = full_box(b'mdhd', 1, 0, mdhd_payload) # Media Header Box
|
||||||
|
|
||||||
hdlr_payload = u32.pack(0) # pre defined
|
hdlr_payload = u32.pack(0) # pre defined
|
||||||
hdlr_payload += b'soun' if is_audio else b'vide' # handler type
|
if stream_type == 'audio': # handler type
|
||||||
hdlr_payload += u32.pack(0) * 3 # reserved
|
hdlr_payload += b'soun'
|
||||||
hdlr_payload += (b'Sound' if is_audio else b'Video') + b'Handler\0' # name
|
hdlr_payload += u32.pack(0) * 3 # reserved
|
||||||
|
hdlr_payload += b'SoundHandler\0' # name
|
||||||
|
elif stream_type == 'video':
|
||||||
|
hdlr_payload += b'vide'
|
||||||
|
hdlr_payload += u32.pack(0) * 3 # reserved
|
||||||
|
hdlr_payload += b'VideoHandler\0' # name
|
||||||
|
elif stream_type == 'text':
|
||||||
|
hdlr_payload += b'subt'
|
||||||
|
hdlr_payload += u32.pack(0) * 3 # reserved
|
||||||
|
hdlr_payload += b'SubtitleHandler\0' # name
|
||||||
|
else:
|
||||||
|
assert False
|
||||||
mdia_payload += full_box(b'hdlr', 0, 0, hdlr_payload) # Handler Reference Box
|
mdia_payload += full_box(b'hdlr', 0, 0, hdlr_payload) # Handler Reference Box
|
||||||
|
|
||||||
if is_audio:
|
if stream_type == 'audio':
|
||||||
smhd_payload = s88.pack(0) # balance
|
smhd_payload = s88.pack(0) # balance
|
||||||
smhd_payload += u16.pack(0) # reserved
|
smhd_payload += u16.pack(0) # reserved
|
||||||
media_header_box = full_box(b'smhd', 0, 0, smhd_payload) # Sound Media Header
|
media_header_box = full_box(b'smhd', 0, 0, smhd_payload) # Sound Media Header
|
||||||
else:
|
elif stream_type == 'video':
|
||||||
vmhd_payload = u16.pack(0) # graphics mode
|
vmhd_payload = u16.pack(0) # graphics mode
|
||||||
vmhd_payload += u16.pack(0) * 3 # opcolor
|
vmhd_payload += u16.pack(0) * 3 # opcolor
|
||||||
media_header_box = full_box(b'vmhd', 0, 1, vmhd_payload) # Video Media Header
|
media_header_box = full_box(b'vmhd', 0, 1, vmhd_payload) # Video Media Header
|
||||||
|
elif stream_type == 'text':
|
||||||
|
sthd_payload = u16.pack(0) * 2
|
||||||
|
media_header_box = full_box(b'sthd', 0, 1, sthd_payload) # Subtitle Media Header
|
||||||
|
else:
|
||||||
|
assert False
|
||||||
minf_payload = media_header_box
|
minf_payload = media_header_box
|
||||||
|
|
||||||
dref_payload = u32.pack(1) # entry count
|
dref_payload = u32.pack(1) # entry count
|
||||||
|
@ -117,7 +133,7 @@ def write_piff_header(stream, params):
|
||||||
|
|
||||||
sample_entry_payload = u8.pack(0) * 6 # reserved
|
sample_entry_payload = u8.pack(0) * 6 # reserved
|
||||||
sample_entry_payload += u16.pack(1) # data reference index
|
sample_entry_payload += u16.pack(1) # data reference index
|
||||||
if is_audio:
|
if stream_type == 'audio':
|
||||||
sample_entry_payload += u32.pack(0) * 2 # reserved
|
sample_entry_payload += u32.pack(0) * 2 # reserved
|
||||||
sample_entry_payload += u16.pack(params.get('channels', 2))
|
sample_entry_payload += u16.pack(params.get('channels', 2))
|
||||||
sample_entry_payload += u16.pack(params.get('bits_per_sample', 16))
|
sample_entry_payload += u16.pack(params.get('bits_per_sample', 16))
|
||||||
|
@ -127,7 +143,7 @@ def write_piff_header(stream, params):
|
||||||
|
|
||||||
if fourcc == 'AACL':
|
if fourcc == 'AACL':
|
||||||
sample_entry_box = box(b'mp4a', sample_entry_payload)
|
sample_entry_box = box(b'mp4a', sample_entry_payload)
|
||||||
else:
|
elif stream_type == 'video':
|
||||||
sample_entry_payload += u16.pack(0) # pre defined
|
sample_entry_payload += u16.pack(0) # pre defined
|
||||||
sample_entry_payload += u16.pack(0) # reserved
|
sample_entry_payload += u16.pack(0) # reserved
|
||||||
sample_entry_payload += u32.pack(0) * 3 # pre defined
|
sample_entry_payload += u32.pack(0) * 3 # pre defined
|
||||||
|
@ -155,6 +171,10 @@ def write_piff_header(stream, params):
|
||||||
avcc_payload += pps
|
avcc_payload += pps
|
||||||
sample_entry_payload += box(b'avcC', avcc_payload) # AVC Decoder Configuration Record
|
sample_entry_payload += box(b'avcC', avcc_payload) # AVC Decoder Configuration Record
|
||||||
sample_entry_box = box(b'avc1', sample_entry_payload) # AVC Simple Entry
|
sample_entry_box = box(b'avc1', sample_entry_payload) # AVC Simple Entry
|
||||||
|
else:
|
||||||
|
assert False
|
||||||
|
else:
|
||||||
|
assert False
|
||||||
stsd_payload += sample_entry_box
|
stsd_payload += sample_entry_box
|
||||||
|
|
||||||
stbl_payload = full_box(b'stsd', 0, 0, stsd_payload) # Sample Description Box
|
stbl_payload = full_box(b'stsd', 0, 0, stsd_payload) # Sample Description Box
|
||||||
|
|
|
@ -2750,26 +2750,38 @@ class InfoExtractor(object):
|
||||||
else:
|
else:
|
||||||
# Assuming direct URL to unfragmented media.
|
# Assuming direct URL to unfragmented media.
|
||||||
f['url'] = base_url
|
f['url'] = base_url
|
||||||
formats.append(f)
|
if content_type in ('video', 'audio'):
|
||||||
|
formats.append(f)
|
||||||
|
elif content_type == 'text':
|
||||||
|
subtitles.setdefault(lang or 'und', []).append(f)
|
||||||
else:
|
else:
|
||||||
self.report_warning('Unknown MIME type %s in DASH manifest' % mime_type)
|
self.report_warning('Unknown MIME type %s in DASH manifest' % mime_type)
|
||||||
return formats, subtitles
|
return formats, subtitles
|
||||||
|
|
||||||
def _extract_ism_formats(self, ism_url, video_id, ism_id=None, note=None, errnote=None, fatal=True, data=None, headers={}, query={}):
|
def _extract_ism_formats(self, *args, **kwargs):
|
||||||
|
fmts, subs = self._extract_ism_formats_and_subtitles(*args, **kwargs)
|
||||||
|
if subs:
|
||||||
|
self.report_warning(bug_reports_message(
|
||||||
|
"Ignoring subtitle tracks found in the ISM manifest; "
|
||||||
|
"if any subtitle tracks are missing,"
|
||||||
|
))
|
||||||
|
return fmts
|
||||||
|
|
||||||
|
def _extract_ism_formats_and_subtitles(self, ism_url, video_id, ism_id=None, note=None, errnote=None, fatal=True, data=None, headers={}, query={}):
|
||||||
res = self._download_xml_handle(
|
res = self._download_xml_handle(
|
||||||
ism_url, video_id,
|
ism_url, video_id,
|
||||||
note=note or 'Downloading ISM manifest',
|
note=note or 'Downloading ISM manifest',
|
||||||
errnote=errnote or 'Failed to download ISM manifest',
|
errnote=errnote or 'Failed to download ISM manifest',
|
||||||
fatal=fatal, data=data, headers=headers, query=query)
|
fatal=fatal, data=data, headers=headers, query=query)
|
||||||
if res is False:
|
if res is False:
|
||||||
return []
|
return [], {}
|
||||||
ism_doc, urlh = res
|
ism_doc, urlh = res
|
||||||
if ism_doc is None:
|
if ism_doc is None:
|
||||||
return []
|
return [], {}
|
||||||
|
|
||||||
return self._parse_ism_formats(ism_doc, urlh.geturl(), ism_id)
|
return self._parse_ism_formats_and_subtitles(ism_doc, urlh.geturl(), ism_id)
|
||||||
|
|
||||||
def _parse_ism_formats(self, ism_doc, ism_url, ism_id=None):
|
def _parse_ism_formats_and_subtitles(self, ism_doc, ism_url, ism_id=None):
|
||||||
"""
|
"""
|
||||||
Parse formats from ISM manifest.
|
Parse formats from ISM manifest.
|
||||||
References:
|
References:
|
||||||
|
@ -2777,22 +2789,24 @@ class InfoExtractor(object):
|
||||||
https://msdn.microsoft.com/en-us/library/ff469518.aspx
|
https://msdn.microsoft.com/en-us/library/ff469518.aspx
|
||||||
"""
|
"""
|
||||||
if ism_doc.get('IsLive') == 'TRUE':
|
if ism_doc.get('IsLive') == 'TRUE':
|
||||||
return []
|
return [], {}
|
||||||
if (not self._downloader.params.get('allow_unplayable_formats')
|
if (not self._downloader.params.get('allow_unplayable_formats')
|
||||||
and ism_doc.find('Protection') is not None):
|
and ism_doc.find('Protection') is not None):
|
||||||
return []
|
return [], {}
|
||||||
|
|
||||||
duration = int(ism_doc.attrib['Duration'])
|
duration = int(ism_doc.attrib['Duration'])
|
||||||
timescale = int_or_none(ism_doc.get('TimeScale')) or 10000000
|
timescale = int_or_none(ism_doc.get('TimeScale')) or 10000000
|
||||||
|
|
||||||
formats = []
|
formats = []
|
||||||
|
subtitles = {}
|
||||||
for stream in ism_doc.findall('StreamIndex'):
|
for stream in ism_doc.findall('StreamIndex'):
|
||||||
stream_type = stream.get('Type')
|
stream_type = stream.get('Type')
|
||||||
if stream_type not in ('video', 'audio'):
|
if stream_type not in ('video', 'audio', 'text'):
|
||||||
continue
|
continue
|
||||||
url_pattern = stream.attrib['Url']
|
url_pattern = stream.attrib['Url']
|
||||||
stream_timescale = int_or_none(stream.get('TimeScale')) or timescale
|
stream_timescale = int_or_none(stream.get('TimeScale')) or timescale
|
||||||
stream_name = stream.get('Name')
|
stream_name = stream.get('Name')
|
||||||
|
stream_language = stream.get('Language', 'und')
|
||||||
for track in stream.findall('QualityLevel'):
|
for track in stream.findall('QualityLevel'):
|
||||||
fourcc = track.get('FourCC', 'AACL' if track.get('AudioTag') == '255' else None)
|
fourcc = track.get('FourCC', 'AACL' if track.get('AudioTag') == '255' else None)
|
||||||
# TODO: add support for WVC1 and WMAP
|
# TODO: add support for WVC1 and WMAP
|
||||||
|
@ -2839,33 +2853,52 @@ class InfoExtractor(object):
|
||||||
format_id.append(stream_name)
|
format_id.append(stream_name)
|
||||||
format_id.append(compat_str(tbr))
|
format_id.append(compat_str(tbr))
|
||||||
|
|
||||||
formats.append({
|
if stream_type == 'text':
|
||||||
'format_id': '-'.join(format_id),
|
subtitles.setdefault(stream_language, []).append({
|
||||||
'url': ism_url,
|
'ext': 'ismt',
|
||||||
'manifest_url': ism_url,
|
'protocol': 'ism',
|
||||||
'ext': 'ismv' if stream_type == 'video' else 'isma',
|
'url': ism_url,
|
||||||
'width': width,
|
'manifest_url': ism_url,
|
||||||
'height': height,
|
'fragments': fragments,
|
||||||
'tbr': tbr,
|
'_download_params': {
|
||||||
'asr': sampling_rate,
|
'stream_type': stream_type,
|
||||||
'vcodec': 'none' if stream_type == 'audio' else fourcc,
|
'duration': duration,
|
||||||
'acodec': 'none' if stream_type == 'video' else fourcc,
|
'timescale': stream_timescale,
|
||||||
'protocol': 'ism',
|
'fourcc': fourcc,
|
||||||
'fragments': fragments,
|
'language': stream_language,
|
||||||
'_download_params': {
|
'codec_private_data': track.get('CodecPrivateData'),
|
||||||
'duration': duration,
|
}
|
||||||
'timescale': stream_timescale,
|
})
|
||||||
'width': width or 0,
|
elif stream_type in ('video', 'audio'):
|
||||||
'height': height or 0,
|
formats.append({
|
||||||
'fourcc': fourcc,
|
'format_id': '-'.join(format_id),
|
||||||
'codec_private_data': track.get('CodecPrivateData'),
|
'url': ism_url,
|
||||||
'sampling_rate': sampling_rate,
|
'manifest_url': ism_url,
|
||||||
'channels': int_or_none(track.get('Channels', 2)),
|
'ext': 'ismv' if stream_type == 'video' else 'isma',
|
||||||
'bits_per_sample': int_or_none(track.get('BitsPerSample', 16)),
|
'width': width,
|
||||||
'nal_unit_length_field': int_or_none(track.get('NALUnitLengthField', 4)),
|
'height': height,
|
||||||
},
|
'tbr': tbr,
|
||||||
})
|
'asr': sampling_rate,
|
||||||
return formats
|
'vcodec': 'none' if stream_type == 'audio' else fourcc,
|
||||||
|
'acodec': 'none' if stream_type == 'video' else fourcc,
|
||||||
|
'protocol': 'ism',
|
||||||
|
'fragments': fragments,
|
||||||
|
'_download_params': {
|
||||||
|
'stream_type': stream_type,
|
||||||
|
'duration': duration,
|
||||||
|
'timescale': stream_timescale,
|
||||||
|
'width': width or 0,
|
||||||
|
'height': height or 0,
|
||||||
|
'fourcc': fourcc,
|
||||||
|
'language': stream_language,
|
||||||
|
'codec_private_data': track.get('CodecPrivateData'),
|
||||||
|
'sampling_rate': sampling_rate,
|
||||||
|
'channels': int_or_none(track.get('Channels', 2)),
|
||||||
|
'bits_per_sample': int_or_none(track.get('BitsPerSample', 16)),
|
||||||
|
'nal_unit_length_field': int_or_none(track.get('NALUnitLengthField', 4)),
|
||||||
|
},
|
||||||
|
})
|
||||||
|
return formats, subtitles
|
||||||
|
|
||||||
def _parse_html5_media_entries(self, base_url, webpage, video_id, m3u8_id=None, m3u8_entry_protocol='m3u8', mpd_id=None, preference=None, quality=None):
|
def _parse_html5_media_entries(self, base_url, webpage, video_id, m3u8_id=None, m3u8_entry_protocol='m3u8', mpd_id=None, preference=None, quality=None):
|
||||||
def absolute_url(item_url):
|
def absolute_url(item_url):
|
||||||
|
|
Loading…
Reference in a new issue