[extractor/youtube] Add innertube_host and innertube_key extractor args (#3916)

Allows user to override Innertube API host or key for all requests
Authored by: coletdjnz
This commit is contained in:
coletdev 2022-06-09 10:18:01 +12:00 committed by GitHub
parent c82a4a8fce
commit 2ae778b8fc
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
2 changed files with 13 additions and 3 deletions

View file

@ -1716,6 +1716,10 @@ The following extractors use this feature:
* `comment_sort`: `top` or `new` (default) - choose comment sorting mode (on YouTube's side) * `comment_sort`: `top` or `new` (default) - choose comment sorting mode (on YouTube's side)
* `max_comments`: Limit the amount of comments to gather. Comma-separated list of integers representing `max-comments,max-parents,max-replies,max-replies-per-thread`. Default is `all,all,all,all` * `max_comments`: Limit the amount of comments to gather. Comma-separated list of integers representing `max-comments,max-parents,max-replies,max-replies-per-thread`. Default is `all,all,all,all`
* E.g. `all,all,1000,10` will get a maximum of 1000 replies total, with up to 10 replies per thread. `1000,all,100` will get a maximum of 1000 comments, with a maximum of 100 replies total * E.g. `all,all,1000,10` will get a maximum of 1000 replies total, with up to 10 replies per thread. `1000,all,100` will get a maximum of 1000 comments, with a maximum of 100 replies total
* `innertube_host`: Innertube API host to use for all API requests
* e.g. `studio.youtube.com`, `youtubei.googleapis.com`
* Note: Cookies exported from `www.youtube.com` will not work with hosts other than `*.youtube.com`
* `innertube_key`: Innertube API key to use for all API requests
#### youtubetab (YouTube playlists, channels, feeds, etc.) #### youtubetab (YouTube playlists, channels, feeds, etc.)
* `skip`: One or more of `webpage` (skip initial webpage download), `authcheck` (allow the download of playlists requiring authentication when no initial webpage is downloaded. This may cause unwanted behavior, see [#1122](https://github.com/yt-dlp/yt-dlp/pull/1122) for more details) * `skip`: One or more of `webpage` (skip initial webpage download), `authcheck` (allow the download of playlists requiring authentication when no initial webpage is downloaded. This may cause unwanted behavior, see [#1122](https://github.com/yt-dlp/yt-dlp/pull/1122) for more details)

View file

@ -421,6 +421,10 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
ytcfg, (lambda x: x['INNERTUBE_CLIENT_VERSION'], ytcfg, (lambda x: x['INNERTUBE_CLIENT_VERSION'],
lambda x: x['INNERTUBE_CONTEXT']['client']['clientVersion']), compat_str, default_client) lambda x: x['INNERTUBE_CONTEXT']['client']['clientVersion']), compat_str, default_client)
def _select_api_hostname(self, req_api_hostname, default_client=None):
return (self._configuration_arg('innertube_host', [''], ie_key=YoutubeIE.ie_key())[0]
or req_api_hostname or self._get_innertube_host(default_client or 'web'))
def _extract_api_key(self, ytcfg=None, default_client='web'): def _extract_api_key(self, ytcfg=None, default_client='web'):
return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_API_KEY'], compat_str, default_client) return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_API_KEY'], compat_str, default_client)
@ -469,11 +473,13 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
real_headers.update({'content-type': 'application/json'}) real_headers.update({'content-type': 'application/json'})
if headers: if headers:
real_headers.update(headers) real_headers.update(headers)
api_key = (self._configuration_arg('innertube_key', [''], ie_key=YoutubeIE.ie_key(), casesense=True)[0]
or api_key or self._extract_api_key(default_client=default_client))
return self._download_json( return self._download_json(
f'https://{api_hostname or self._get_innertube_host(default_client)}/youtubei/v1/{ep}', f'https://{self._select_api_hostname(api_hostname, default_client)}/youtubei/v1/{ep}',
video_id=video_id, fatal=fatal, note=note, errnote=errnote, video_id=video_id, fatal=fatal, note=note, errnote=errnote,
data=json.dumps(data).encode('utf8'), headers=real_headers, data=json.dumps(data).encode('utf8'), headers=real_headers,
query={'key': api_key or self._extract_api_key(), 'prettyPrint': 'false'}) query={'key': api_key, 'prettyPrint': 'false'})
def extract_yt_initial_data(self, item_id, webpage, fatal=True): def extract_yt_initial_data(self, item_id, webpage, fatal=True):
return self._search_json(self._YT_INITIAL_DATA_RE, webpage, 'yt initial data', item_id, fatal=fatal) return self._search_json(self._YT_INITIAL_DATA_RE, webpage, 'yt initial data', item_id, fatal=fatal)
@ -545,7 +551,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
self, *, ytcfg=None, account_syncid=None, session_index=None, self, *, ytcfg=None, account_syncid=None, session_index=None,
visitor_data=None, identity_token=None, api_hostname=None, default_client='web'): visitor_data=None, identity_token=None, api_hostname=None, default_client='web'):
origin = 'https://' + (api_hostname if api_hostname else self._get_innertube_host(default_client)) origin = 'https://' + (self._select_api_hostname(api_hostname, default_client))
headers = { headers = {
'X-YouTube-Client-Name': compat_str( 'X-YouTube-Client-Name': compat_str(
self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CONTEXT_CLIENT_NAME'], default_client=default_client)), self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CONTEXT_CLIENT_NAME'], default_client=default_client)),