From 2ae778b8fc56087462e48d1e31208c2a398409c1 Mon Sep 17 00:00:00 2001 From: coletdev Date: Thu, 9 Jun 2022 10:18:01 +1200 Subject: [PATCH] [extractor/youtube] Add `innertube_host` and `innertube_key` extractor args (#3916) Allows user to override Innertube API host or key for all requests Authored by: coletdjnz --- README.md | 4 ++++ yt_dlp/extractor/youtube.py | 12 +++++++++--- 2 files changed, 13 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 9424f67a0..5fd7880ff 100644 --- a/README.md +++ b/README.md @@ -1716,6 +1716,10 @@ The following extractors use this feature: * `comment_sort`: `top` or `new` (default) - choose comment sorting mode (on YouTube's side) * `max_comments`: Limit the amount of comments to gather. Comma-separated list of integers representing `max-comments,max-parents,max-replies,max-replies-per-thread`. Default is `all,all,all,all` * E.g. `all,all,1000,10` will get a maximum of 1000 replies total, with up to 10 replies per thread. `1000,all,100` will get a maximum of 1000 comments, with a maximum of 100 replies total +* `innertube_host`: Innertube API host to use for all API requests + * e.g. `studio.youtube.com`, `youtubei.googleapis.com` + * Note: Cookies exported from `www.youtube.com` will not work with hosts other than `*.youtube.com` +* `innertube_key`: Innertube API key to use for all API requests #### youtubetab (YouTube playlists, channels, feeds, etc.) * `skip`: One or more of `webpage` (skip initial webpage download), `authcheck` (allow the download of playlists requiring authentication when no initial webpage is downloaded. This may cause unwanted behavior, see [#1122](https://github.com/yt-dlp/yt-dlp/pull/1122) for more details) diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py index d44f16bc0..9921c8394 100644 --- a/yt_dlp/extractor/youtube.py +++ b/yt_dlp/extractor/youtube.py @@ -421,6 +421,10 @@ class YoutubeBaseInfoExtractor(InfoExtractor): ytcfg, (lambda x: x['INNERTUBE_CLIENT_VERSION'], lambda x: x['INNERTUBE_CONTEXT']['client']['clientVersion']), compat_str, default_client) + def _select_api_hostname(self, req_api_hostname, default_client=None): + return (self._configuration_arg('innertube_host', [''], ie_key=YoutubeIE.ie_key())[0] + or req_api_hostname or self._get_innertube_host(default_client or 'web')) + def _extract_api_key(self, ytcfg=None, default_client='web'): return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_API_KEY'], compat_str, default_client) @@ -469,11 +473,13 @@ class YoutubeBaseInfoExtractor(InfoExtractor): real_headers.update({'content-type': 'application/json'}) if headers: real_headers.update(headers) + api_key = (self._configuration_arg('innertube_key', [''], ie_key=YoutubeIE.ie_key(), casesense=True)[0] + or api_key or self._extract_api_key(default_client=default_client)) return self._download_json( - f'https://{api_hostname or self._get_innertube_host(default_client)}/youtubei/v1/{ep}', + f'https://{self._select_api_hostname(api_hostname, default_client)}/youtubei/v1/{ep}', video_id=video_id, fatal=fatal, note=note, errnote=errnote, data=json.dumps(data).encode('utf8'), headers=real_headers, - query={'key': api_key or self._extract_api_key(), 'prettyPrint': 'false'}) + query={'key': api_key, 'prettyPrint': 'false'}) def extract_yt_initial_data(self, item_id, webpage, fatal=True): return self._search_json(self._YT_INITIAL_DATA_RE, webpage, 'yt initial data', item_id, fatal=fatal) @@ -545,7 +551,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor): self, *, ytcfg=None, account_syncid=None, session_index=None, visitor_data=None, identity_token=None, api_hostname=None, default_client='web'): - origin = 'https://' + (api_hostname if api_hostname else self._get_innertube_host(default_client)) + origin = 'https://' + (self._select_api_hostname(api_hostname, default_client)) headers = { 'X-YouTube-Client-Name': compat_str( self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CONTEXT_CLIENT_NAME'], default_client=default_client)),