[extractor/youtube] Consider language in format de-duplication
This commit is contained in:
parent
d7b460d0e5
commit
a4894d3e25
1 changed files with 42 additions and 11 deletions
|
@ -1,5 +1,6 @@
|
||||||
import base64
|
import base64
|
||||||
import calendar
|
import calendar
|
||||||
|
import collections
|
||||||
import copy
|
import copy
|
||||||
import datetime
|
import datetime
|
||||||
import enum
|
import enum
|
||||||
|
@ -2480,6 +2481,34 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||||
'note': '6 channel audio',
|
'note': '6 channel audio',
|
||||||
'url': 'https://www.youtube.com/watch?v=zgdo7-RRjgo',
|
'url': 'https://www.youtube.com/watch?v=zgdo7-RRjgo',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'note': 'Multiple HLS formats with same itag',
|
||||||
|
'url': 'https://www.youtube.com/watch?v=kX3nB4PpJko',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'kX3nB4PpJko',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'categories': ['Entertainment'],
|
||||||
|
'description': 'md5:e8031ff6e426cdb6a77670c9b81f6fa6',
|
||||||
|
'uploader_url': 'http://www.youtube.com/user/MrBeast6000',
|
||||||
|
'live_status': 'not_live',
|
||||||
|
'duration': 937,
|
||||||
|
'channel_follower_count': int,
|
||||||
|
'thumbnail': 'https://i.ytimg.com/vi_webp/kX3nB4PpJko/maxresdefault.webp',
|
||||||
|
'title': 'Last To Take Hand Off Jet, Keeps It!',
|
||||||
|
'channel': 'MrBeast',
|
||||||
|
'playable_in_embed': True,
|
||||||
|
'view_count': int,
|
||||||
|
'upload_date': '20221112',
|
||||||
|
'uploader': 'MrBeast',
|
||||||
|
'uploader_id': 'MrBeast6000',
|
||||||
|
'channel_url': 'https://www.youtube.com/channel/UCX6OQ3DkcsbYNE6H8uQQuVA',
|
||||||
|
'age_limit': 0,
|
||||||
|
'availability': 'public',
|
||||||
|
'channel_id': 'UCX6OQ3DkcsbYNE6H8uQQuVA',
|
||||||
|
'like_count': int,
|
||||||
|
'tags': [],
|
||||||
|
},
|
||||||
|
'params': {'extractor_args': {'youtube': {'player_client': ['ios']}}, 'format': '233-1'},
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
|
|
||||||
|
@ -3472,7 +3501,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||||
return live_status
|
return live_status
|
||||||
|
|
||||||
def _extract_formats_and_subtitles(self, streaming_data, video_id, player_url, live_status, duration):
|
def _extract_formats_and_subtitles(self, streaming_data, video_id, player_url, live_status, duration):
|
||||||
itags, stream_ids = {}, []
|
itags, stream_ids = collections.defaultdict(set), []
|
||||||
itag_qualities, res_qualities = {}, {0: None}
|
itag_qualities, res_qualities = {}, {0: None}
|
||||||
q = qualities([
|
q = qualities([
|
||||||
# Normally tiny is the smallest video-only formats. But
|
# Normally tiny is the smallest video-only formats. But
|
||||||
|
@ -3554,10 +3583,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||||
video_id=video_id, only_once=True)
|
video_id=video_id, only_once=True)
|
||||||
throttled = True
|
throttled = True
|
||||||
|
|
||||||
if itag:
|
|
||||||
itags[itag] = 'https'
|
|
||||||
stream_ids.append(stream_id)
|
|
||||||
|
|
||||||
tbr = float_or_none(fmt.get('averageBitrate') or fmt.get('bitrate'), 1000)
|
tbr = float_or_none(fmt.get('averageBitrate') or fmt.get('bitrate'), 1000)
|
||||||
language_preference = (
|
language_preference = (
|
||||||
10 if audio_track.get('audioIsDefault') and 10
|
10 if audio_track.get('audioIsDefault') and 10
|
||||||
|
@ -3616,6 +3641,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||||
}
|
}
|
||||||
if dct.get('ext'):
|
if dct.get('ext'):
|
||||||
dct['container'] = dct['ext'] + '_dash'
|
dct['container'] = dct['ext'] + '_dash'
|
||||||
|
|
||||||
|
if itag:
|
||||||
|
itags[itag].add(('https', dct.get('language')))
|
||||||
|
stream_ids.append(stream_id)
|
||||||
yield dct
|
yield dct
|
||||||
|
|
||||||
needs_live_processing = self._needs_live_processing(live_status, duration)
|
needs_live_processing = self._needs_live_processing(live_status, duration)
|
||||||
|
@ -3636,13 +3665,15 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||||
skip_manifests.add('dash')
|
skip_manifests.add('dash')
|
||||||
|
|
||||||
def process_manifest_format(f, proto, itag):
|
def process_manifest_format(f, proto, itag):
|
||||||
if itag in itags:
|
key = (proto, f.get('language'))
|
||||||
if itags[itag] == proto or f'{itag}-{proto}' in itags:
|
if key in itags[itag]:
|
||||||
return False
|
return False
|
||||||
itag = f'{itag}-{proto}'
|
itags[itag].add(key)
|
||||||
if itag:
|
|
||||||
|
if any(p != proto for p, _ in itags[itag]):
|
||||||
|
f['format_id'] = f'{itag}-{proto}'
|
||||||
|
elif itag:
|
||||||
f['format_id'] = itag
|
f['format_id'] = itag
|
||||||
itags[itag] = proto
|
|
||||||
|
|
||||||
f['quality'] = q(itag_qualities.get(try_get(f, lambda f: f['format_id'].split('-')[0]), -1))
|
f['quality'] = q(itag_qualities.get(try_get(f, lambda f: f['format_id'].split('-')[0]), -1))
|
||||||
if f['quality'] == -1 and f.get('height'):
|
if f['quality'] == -1 and f.get('height'):
|
||||||
|
|
Loading…
Reference in a new issue