[YouTube] Age-gate bypass implementation (#575)

* Calling the API with `clientScreen=EMBED` allows access to most age-gated videos - discovered by @ccdffddfddfdsfedeee (https://github.com/yt-dlp/yt-dlp/issues/574#issuecomment-887171136)
* Adds clients: (web/android/ios)_(embedded/agegate), mweb_embedded
* Renamed mobile_web to mweb

Closes #574

Authored by pukkandan, MinePlayersPE
This commit is contained in:
MinePlayersPE 2021-07-27 16:40:44 +07:00 committed by GitHub
parent 2a9c6dcd22
commit c0bc527bca
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
2 changed files with 105 additions and 85 deletions

View file

@ -1354,7 +1354,7 @@ Some extractors accept additional arguments which can be passed using `--extract
The following extractors use this feature:
* **youtube**
* `skip`: `hls` or `dash` (or both) to skip download of the respective manifests
* `player_client`: Clients to extract video data from - one or more of `web`, `android`, `ios`, `mobile_web`, `web_music`, `android_music`, `ios_music` or `all`. By default, `android,web` is used. If the URL is from `music.youtube.com`, `android,web,android_music,web_music` is used
* `player_client`: Clients to extract video data from - one or more of `web`, `android`, `ios`, `mweb`, `web_music`, `android_music`, `ios_music`, `web_embedded`, `android_embedded`, `ios_embedded`, `web_agegate`, `android_agegate`, `ios_agegate`, `mweb_agegate` or `all`. By default, `android,web` is used. If the URL is from `music.youtube.com`, `android,web,android_music,web_music` is used. If age-gate is detected, the `_agegate` variants are automatically added.
* `player_skip`: `configs` - skip any requests for client configs and use defaults
* `comment_sort`: `top` or `new` (default) - choose comment sorting mode (on YouTube's side).
* `max_comments`: maximum amount of comments to download (default all).

View file

@ -327,6 +327,21 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
},
'INNERTUBE_CONTEXT_CLIENT_NAME': 1
},
'WEB_AGEGATE': {
'INNERTUBE_API_VERSION': 'v1',
'INNERTUBE_CLIENT_NAME': 'WEB',
'INNERTUBE_CLIENT_VERSION': '2.20210622.10.00',
'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
'INNERTUBE_CONTEXT': {
'client': {
'clientName': 'WEB',
'clientVersion': '2.20210622.10.00',
'clientScreen': 'EMBED',
'hl': 'en',
}
},
'INNERTUBE_CONTEXT_CLIENT_NAME': 1
},
'WEB_REMIX': {
'INNERTUBE_API_VERSION': 'v1',
'INNERTUBE_CLIENT_NAME': 'WEB_REMIX',
@ -369,6 +384,21 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
},
'INNERTUBE_CONTEXT_CLIENT_NAME': 3
},
'ANDROID_AGEGATE': {
'INNERTUBE_API_VERSION': 'v1',
'INNERTUBE_CLIENT_NAME': 'ANDROID',
'INNERTUBE_CLIENT_VERSION': '16.20',
'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
'INNERTUBE_CONTEXT': {
'client': {
'clientName': 'ANDROID',
'clientVersion': '16.20',
'clientScreen': 'EMBED',
'hl': 'en',
}
},
'INNERTUBE_CONTEXT_CLIENT_NAME': 3
},
'ANDROID_EMBEDDED_PLAYER': {
'INNERTUBE_API_VERSION': 'v1',
'INNERTUBE_CLIENT_NAME': 'ANDROID_EMBEDDED_PLAYER',
@ -410,7 +440,21 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
}
},
'INNERTUBE_CONTEXT_CLIENT_NAME': 5
},
'IOS_AGEGATE': {
'INNERTUBE_API_VERSION': 'v1',
'INNERTUBE_CLIENT_NAME': 'IOS',
'INNERTUBE_CLIENT_VERSION': '16.20',
'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
'INNERTUBE_CONTEXT': {
'client': {
'clientName': 'IOS',
'clientVersion': '16.20',
'clientScreen': 'EMBED',
'hl': 'en',
}
},
'INNERTUBE_CONTEXT_CLIENT_NAME': 5
},
'IOS_MUSIC': {
'INNERTUBE_API_VERSION': 'v1',
@ -454,6 +498,21 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
},
'INNERTUBE_CONTEXT_CLIENT_NAME': 2
},
'MWEB_AGEGATE': {
'INNERTUBE_API_VERSION': 'v1',
'INNERTUBE_CLIENT_NAME': 'MWEB',
'INNERTUBE_CLIENT_VERSION': '2.20210721.07.00',
'INNERTUBE_API_KEY': 'AIzaSyDCU8hByM-4DrUqRUYnGn-3llEO78bcxq8',
'INNERTUBE_CONTEXT': {
'client': {
'clientName': 'MWEB',
'clientVersion': '2.20210721.07.00',
'clientScreen': 'EMBED',
'hl': 'en',
}
},
'INNERTUBE_CONTEXT_CLIENT_NAME': 2
},
}
_YT_DEFAULT_INNERTUBE_HOSTS = {
@ -467,17 +526,18 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
_YT_CLIENTS = {
'android': 'ANDROID',
'android_music': 'ANDROID_MUSIC',
'_android_embedded': 'ANDROID_EMBEDDED_PLAYER',
'_android_agegate': 'ANDROID',
'android_embedded': 'ANDROID_EMBEDDED_PLAYER',
'android_agegate': 'ANDROID_AGEGATE',
'ios': 'IOS',
'ios_music': 'IOS_MUSIC',
'_ios_embedded': 'IOS_MESSAGES_EXTENSION',
'_ios_agegate': 'IOS',
'ios_embedded': 'IOS_MESSAGES_EXTENSION',
'ios_agegate': 'IOS_AGEGATE',
'web': 'WEB',
'web_music': 'WEB_REMIX',
'_web_embedded': 'WEB_EMBEDDED_PLAYER',
'_web_agegate': 'TVHTML5',
'mobile_web': 'MWEB',
'web_embedded': 'WEB_EMBEDDED_PLAYER',
'web_agegate': 'WEB_AGEGATE',
'mweb': 'MWEB',
'mweb_agegate': 'MWEB_AGEGATE',
}
def _get_default_ytcfg(self, client='WEB'):
@ -2366,30 +2426,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'racyCheckOk': True
}
@staticmethod
def _get_video_info_params(video_id, client='TVHTML5'):
GVI_CLIENTS = {
'ANDROID': {
'c': 'ANDROID',
'cver': '16.20',
},
'TVHTML5': {
'c': 'TVHTML5',
'cver': '6.20180913',
},
'IOS': {
'c': 'IOS',
'cver': '16.20'
}
}
query = {
'video_id': video_id,
'eurl': 'https://youtube.googleapis.com/v/' + video_id,
'html5': '1'
}
query.update(GVI_CLIENTS.get(client))
return query
def _extract_player_response(self, client, video_id, master_ytcfg, player_ytcfg, identity_token, player_url, initial_pr):
session_index = self._extract_session_index(player_ytcfg, master_ytcfg)
@ -2408,42 +2444,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
note='Downloading %s player API JSON' % client.replace('_', ' ').strip()
) or None
def _extract_age_gated_player_response(self, client, video_id, ytcfg, identity_token, player_url, initial_pr):
# get_video_info endpoint seems to be completely dead
gvi_client = None # self._YT_CLIENTS.get(f'_{client}_agegate')
if gvi_client:
pr = self._parse_json(traverse_obj(
compat_parse_qs(self._download_webpage(
self.http_scheme() + '//www.youtube.com/get_video_info', video_id,
'Refetching age-gated %s info webpage' % gvi_client.lower(),
'unable to download video info webpage', fatal=False,
query=self._get_video_info_params(video_id, client=gvi_client))),
('player_response', 0), expected_type=str) or '{}', video_id)
if pr:
return pr
self.report_warning('Falling back to embedded-only age-gate workaround')
if not self._YT_CLIENTS.get(f'_{client}_embedded'):
return
embed_webpage = None
if client == 'web' and 'configs' not in self._configuration_arg('player_skip'):
embed_webpage = self._download_webpage(
'https://www.youtube.com/embed/%s?html5=1' % video_id,
video_id=video_id, note=f'Downloading age-gated {client} embed config')
ytcfg_age = self.extract_ytcfg(video_id, embed_webpage) or {}
# If we extracted the embed webpage, it'll tell us if we can view the video
embedded_pr = self._parse_json(
traverse_obj(ytcfg_age, ('PLAYER_VARS', 'embedded_player_response'), expected_type=str) or '{}',
video_id=video_id)
embedded_ps_reason = traverse_obj(embedded_pr, ('playabilityStatus', 'reason'), expected_type=str) or ''
if embedded_ps_reason in self._AGE_GATE_REASONS:
return
return self._extract_player_response(
f'_{client}_embedded', video_id,
ytcfg_age or ytcfg, ytcfg_age if client == 'web' else {},
identity_token, player_url, initial_pr)
def _get_requested_clients(self, url, smuggled_data):
requested_clients = []
allowed_clients = [client for client in self._YT_CLIENTS.keys() if client[:1] != '_']
@ -2463,6 +2463,16 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
return orderedSet(requested_clients)
def _extract_player_ytcfg(self, client, video_id):
url = {
'web_music': 'https://music.youtube.com',
'web_embedded': f'https://www.youtube.com/embed/{video_id}?html5=1'
}.get(client)
if not url:
return {}
webpage = self._download_webpage(url, video_id, fatal=False, note=f'Downloading {client} config')
return self.extract_ytcfg(video_id, webpage) or {}
def _extract_player_responses(self, clients, video_id, webpage, master_ytcfg, player_url, identity_token):
initial_pr = None
if webpage:
@ -2470,30 +2480,40 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
webpage, self._YT_INITIAL_PLAYER_RESPONSE_RE,
video_id, 'initial player response')
for client in clients:
original_clients = clients
clients = clients[::-1]
while clients:
client = clients.pop()
player_ytcfg = master_ytcfg if client == 'web' else {}
if client == 'web' and initial_pr:
pr = initial_pr
else:
if client == 'web_music' and 'configs' not in self._configuration_arg('player_skip'):
ytm_webpage = self._download_webpage(
'https://music.youtube.com',
video_id, fatal=False, note='Downloading remix client config')
player_ytcfg = self.extract_ytcfg(video_id, ytm_webpage) or {}
pr = self._extract_player_response(
client, video_id, player_ytcfg or master_ytcfg, player_ytcfg, identity_token, player_url, initial_pr)
if 'configs' not in self._configuration_arg('player_skip'):
player_ytcfg = self._extract_player_ytcfg(client, video_id) or player_ytcfg
if client == 'web_embedded':
# If we extracted the embed webpage, it'll tell us if we can view the video
embedded_pr = self._parse_json(
traverse_obj(player_ytcfg, ('PLAYER_VARS', 'embedded_player_response'), expected_type=str) or '{}',
video_id=video_id)
embedded_ps_reason = traverse_obj(embedded_pr, ('playabilityStatus', 'reason'), expected_type=str) or ''
if embedded_ps_reason in self._AGE_GATE_REASONS:
self.report_warning(f'Youtube said: {embedded_ps_reason}')
continue
pr = (
initial_pr if client == 'web' and initial_pr
else self._extract_player_response(
client, video_id, player_ytcfg or master_ytcfg, player_ytcfg, identity_token, player_url, initial_pr))
if pr:
yield pr
if traverse_obj(pr, ('playabilityStatus', 'reason')) in self._AGE_GATE_REASONS:
pr = self._extract_age_gated_player_response(
client, video_id, player_ytcfg or master_ytcfg, identity_token, player_url, initial_pr)
if pr:
yield pr
client = f'{client}_agegate'
if client in self._YT_CLIENTS and client not in original_clients:
clients.append(client)
# Android player_response does not have microFormats which are needed for
# extraction of some data. So we return the initial_pr with formats
# stripped out even if not requested by the user
# See: https://github.com/yt-dlp/yt-dlp/issues/501
if initial_pr and 'web' not in clients:
if initial_pr and 'web' not in original_clients:
initial_pr['streamingData'] = None
yield initial_pr