Youtube api improvements (#2277)

* Put youtube API functions under the YoutubeAPI namespace

* Implement the following endpoints:
  - `next`
  - `player`
  - `resolve_url`

* Allow a ClientConfig to be passed to YoutubeAPI endpoint handlers.

* Add constants for many new clients

* Fix documentation of YoutubeAPI.browse(): Comments and search
  result aren't returned by the browse() endpoint but by the next()
  and search() endpoints, respectively.

* Accept gzip compressed data, to help save on bandwidth

* Add debug/trace logging

* Other minor fixes
This commit is contained in:
Samantaz Fox 2021-08-03 02:58:27 +02:00 committed by GitHub
parent c76bd7b45b
commit 5b020e81ca
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
6 changed files with 440 additions and 108 deletions

View file

@ -1,6 +1,6 @@
def fetch_channel_playlists(ucid, author, continuation, sort_by)
if continuation
response_json = request_youtube_api_browse(continuation)
response_json = YoutubeAPI.browse(continuation)
continuationItems = response_json["onResponseReceivedActions"]?
.try &.[0]["appendContinuationItemsAction"]["continuationItems"]

View file

@ -61,7 +61,7 @@ def get_channel_videos_response(ucid, page = 1, auto_generated = nil, sort_by =
continuation = produce_channel_videos_continuation(ucid, page,
auto_generated: auto_generated, sort_by: sort_by, v2: true)
return request_youtube_api_browse(continuation)
return YoutubeAPI.browse(continuation)
end
def get_60_videos(ucid, author, page, auto_generated, sort_by = "newest")

View file

@ -2,120 +2,450 @@
# This file contains youtube API wrappers
#
# Hard-coded constants required by the API
HARDCODED_API_KEY = "AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8"
HARDCODED_CLIENT_VERS = "2.20210330.08.00"
module YoutubeAPI
extend self
####################################################################
# make_youtube_api_context(region)
#
# Return, as a Hash, the "context" data required to request the
# youtube API endpoints.
#
def make_youtube_api_context(region : String | Nil) : Hash
return {
"client" => {
"hl" => "en",
"gl" => region || "US", # Can't be empty!
"clientName" => "WEB",
"clientVersion" => HARDCODED_CLIENT_VERS,
# Enumerate used to select one of the clients supported by the API
enum ClientType
Web
WebEmbed
WebMobile
WebAgeBypass
Android
AndroidEmbed
AndroidAgeBypass
end
# List of hard-coded values used by the different clients
HARDCODED_CLIENTS = {
ClientType::Web => {
name: "WEB",
version: "2.20210721.00.00",
api_key: "AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8",
screen: "WATCH_FULL_SCREEN",
},
ClientType::WebEmbed => {
name: "WEB_EMBEDDED_PLAYER", # 56
version: "1.20210721.1.0",
api_key: "AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8",
screen: "EMBED",
},
ClientType::WebMobile => {
name: "MWEB",
version: "2.20210726.08.00",
api_key: "AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8",
screen: "", # None
},
ClientType::WebAgeBypass => {
name: "WEB",
version: "2.20210721.00.00",
api_key: "AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8",
screen: "EMBED",
},
ClientType::Android => {
name: "ANDROID",
version: "16.20",
api_key: "AIzaSyA8eiZmM1FaDVjRy-df2KTyQ_vz_yYM39w",
screen: "", # ??
},
ClientType::AndroidEmbed => {
name: "ANDROID_EMBEDDED_PLAYER", # 55
version: "16.20",
api_key: "AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8",
screen: "", # None?
},
ClientType::AndroidAgeBypass => {
name: "ANDROID", # 3
version: "16.20",
api_key: "AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8",
screen: "EMBED",
},
}
end
####################################################################
# request_youtube_api_browse(continuation)
# request_youtube_api_browse(browse_id, params, region)
#
# Requests the youtubei/v1/browse endpoint with the required headers
# and POST data in order to get a JSON reply in english that can
# be easily parsed.
#
# The region can be provided, default is US.
#
# The requested data can either be:
#
# - A continuation token (ctoken). Depending on this token's
# contents, the returned data can be comments, playlist videos,
# search results, channel community tab, ...
#
# - A playlist ID (parameters MUST be an empty string)
#
def request_youtube_api_browse(continuation : String)
# JSON Request data, required by the API
data = {
"context" => make_youtube_api_context("US"),
"continuation" => continuation,
}
####################################################################
# struct ClientConfig
#
# Data structure used to pass a client configuration to the different
# API endpoints handlers.
#
# Use case examples:
#
# ```
# # Get Norwegian search results
# conf_1 = ClientConfig.new(region: "NO")
# YoutubeAPI::search("Kollektivet", params: "", client_config: conf_1)
#
# # Use the Android client to request video streams URLs
# conf_2 = ClientConfig.new(client_type: ClientType::Android)
# YoutubeAPI::player(video_id: "dQw4w9WgXcQ", client_config: conf_2)
#
# # Proxy request through russian proxies
# conf_3 = ClientConfig.new(proxy_region: "RU")
# YoutubeAPI::next({video_id: "dQw4w9WgXcQ"}, client_config: conf_3)
# ```
#
struct ClientConfig
# Type of client to emulate.
# See `enum ClientType` and `HARDCODED_CLIENTS`.
property client_type : ClientType
return _youtube_api_post_json("/youtubei/v1/browse", data)
end
# Region to provide to youtube, e.g to alter search results
# (this is passed as the `gl` parmeter).
property region : String | Nil
def request_youtube_api_browse(browse_id : String, params : String, region : String = "US")
# JSON Request data, required by the API
data = {
"browseId" => browse_id,
"context" => make_youtube_api_context(region),
}
# ISO code of country where the proxy is located.
# Used in case of geo-restricted videos.
property proxy_region : String | Nil
# Append the additionnal parameters if those were provided
# (this is required for channel info, playlist and community, e.g)
if params != ""
data["params"] = params
# Initialization function
def initialize(
*,
@client_type = ClientType::Web,
@region = "US",
@proxy_region = nil
)
end
# Getter functions that provides easy access to hardcoded clients
# parameters (name/version strings and related API key)
def name : String
HARDCODED_CLIENTS[@client_type][:name]
end
# :ditto:
def version : String
HARDCODED_CLIENTS[@client_type][:version]
end
# :ditto:
def api_key : String
HARDCODED_CLIENTS[@client_type][:api_key]
end
# :ditto:
def screen : String
HARDCODED_CLIENTS[@client_type][:screen]
end
# Convert to string, for logging purposes
def to_s
return {
client_type: self.name,
region: @region,
proxy_region: @proxy_region,
}.to_s
end
end
return _youtube_api_post_json("/youtubei/v1/browse", data)
end
# Default client config, used if nothing is passed
DEFAULT_CLIENT_CONFIG = ClientConfig.new
####################################################################
# request_youtube_api_search(search_query, params, region)
#
# Requests the youtubei/v1/search endpoint with the required headers
# and POST data in order to get a JSON reply. As the search results
# vary depending on the region, a region code can be specified in
# order to get non-US results.
#
# The requested data is a search string, with some additional
# paramters, formatted as a base64 string.
#
def request_youtube_api_search(search_query : String, params : String, region = nil)
# JSON Request data, required by the API
data = {
"query" => search_query,
"context" => make_youtube_api_context(region),
"params" => params,
}
####################################################################
# make_context(client_config)
#
# Return, as a Hash, the "context" data required to request the
# youtube API endpoints.
#
private def make_context(client_config : ClientConfig | Nil) : Hash
# Use the default client config if nil is passed
client_config ||= DEFAULT_CLIENT_CONFIG
return _youtube_api_post_json("/youtubei/v1/search", data)
end
client_context = {
"client" => {
"hl" => "en",
"gl" => client_config.region || "US", # Can't be empty!
"clientName" => client_config.name,
"clientVersion" => client_config.version,
"thirdParty" => {
"embedUrl" => "", # Placeholder
},
},
}
####################################################################
# _youtube_api_post_json(endpoint, data)
#
# Internal function that does the actual request to youtube servers
# and handles errors.
#
# The requested data is an endpoint (URL without the domain part)
# and the data as a Hash object.
#
def _youtube_api_post_json(endpoint, data)
# Send the POST request and parse result
response = YT_POOL.client &.post(
"#{endpoint}?key=#{HARDCODED_API_KEY}",
headers: HTTP::Headers{"content-type" => "application/json; charset=UTF-8"},
body: data.to_json
# Add some more context if it exists in the client definitions
if !client_config.screen.empty?
client_context["client"]["clientScreen"] = client_config.screen
end
# Replacing/removing the placeholder is easier than trying to
# merge two different Hash structures.
if client_config.screen == "EMBED"
client_context["client"]["thirdParty"] = {
"embedUrl" => "https://www.youtube.com/embed/dQw4w9WgXcQ",
}
else
client_context["client"].delete("thirdParty")
end
return client_context
end
####################################################################
# browse(continuation, client_config?)
# browse(browse_id, params, client_config?)
#
# Requests the youtubei/v1/browse endpoint with the required headers
# and POST data in order to get a JSON reply in english that can
# be easily parsed.
#
# Both forms can take an optional ClientConfig parameter (see
# `struct ClientConfig` above for more details).
#
# The requested data can either be:
#
# - A continuation token (ctoken). Depending on this token's
# contents, the returned data can be playlist videos, channel
# community tab content, channel info, ...
#
# - A playlist ID (parameters MUST be an empty string)
#
def browse(continuation : String, client_config : ClientConfig | Nil = nil)
# JSON Request data, required by the API
data = {
"context" => self.make_context(client_config),
"continuation" => continuation,
}
return self._post_json("/youtubei/v1/browse", data, client_config)
end
# :ditto:
def browse(
browse_id : String,
*, # Force the following paramters to be passed by name
params : String,
client_config : ClientConfig | Nil = nil
)
# JSON Request data, required by the API
data = {
"browseId" => browse_id,
"context" => self.make_context(client_config),
}
initial_data = JSON.parse(response.body).as_h
# Append the additionnal parameters if those were provided
# (this is required for channel info, playlist and community, e.g)
if params != ""
data["params"] = params
end
# Error handling
if initial_data.has_key?("error")
code = initial_data["error"]["code"]
message = initial_data["error"]["message"].to_s.sub(/(\\n)+\^$/, "")
raise InfoException.new("Could not extract JSON. Youtube API returned \
error #{code} with message:<br>\"#{message}\"")
return self._post_json("/youtubei/v1/browse", data, client_config)
end
return initial_data
end
####################################################################
# next(continuation, client_config?)
# next(data, client_config?)
#
# Requests the youtubei/v1/next endpoint with the required headers
# and POST data in order to get a JSON reply in english that can
# be easily parsed.
#
# Both forms can take an optional ClientConfig parameter (see
# `struct ClientConfig` above for more details).
#
# The requested data can be:
#
# - A continuation token (ctoken). Depending on this token's
# contents, the returned data can be videos comments,
# their replies, ... In this case, the string must be passed
# directly to the function. E.g:
#
# ```
# YoutubeAPI::next("ABCDEFGH_abcdefgh==")
# ```
#
# - Arbitrary parameters, in Hash form. See examples below for
# known examples of arbitrary data that can be passed to YouTube:
#
# ```
# # Get the videos related to a specific video ID
# YoutubeAPI::next({"videoId" => "dQw4w9WgXcQ"})
#
# # Get a playlist video's details
# YoutubeAPI::next({
# "videoId" => "9bZkp7q19f0",
# "playlistId" => "PL_oFlvgqkrjUVQwiiE3F3k3voF4tjXeP0",
# })
# ```
#
def next(continuation : String, *, client_config : ClientConfig | Nil = nil)
# JSON Request data, required by the API
data = {
"context" => self.make_context(client_config),
"continuation" => continuation,
}
return self._post_json("/youtubei/v1/next", data, client_config)
end
# :ditto:
def next(data : Hash, *, client_config : ClientConfig | Nil = nil)
# JSON Request data, required by the API
data2 = data.merge({
"context" => self.make_context(client_config),
})
return self._post_json("/youtubei/v1/next", data2, client_config)
end
# Allow a NamedTuple to be passed, too.
def next(data : NamedTuple, *, client_config : ClientConfig | Nil = nil)
return self.next(data.to_h, client_config: client_config)
end
####################################################################
# player(video_id, params, client_config?)
#
# Requests the youtubei/v1/player endpoint with the required headers
# and POST data in order to get a JSON reply.
#
# The requested data is a video ID (`v=` parameter), with some
# additional paramters, formatted as a base64 string.
#
# An optional ClientConfig parameter can be passed, too (see
# `struct ClientConfig` above for more details).
#
def player(
video_id : String,
*, # Force the following paramters to be passed by name
params : String,
client_config : ClientConfig | Nil = nil
)
# JSON Request data, required by the API
data = {
"videoId" => video_id,
"context" => self.make_context(client_config),
}
# Append the additionnal parameters if those were provided
if params != ""
data["params"] = params
end
return self._post_json("/youtubei/v1/player", data, client_config)
end
####################################################################
# resolve_url(url)
#
# Requests the youtubei/v1/navigation/resolve_url endpoint with the
# required headers and POST data in order to get a JSON reply.
#
# Output:
#
# ```
# # Valid channel "brand URL" gives the related UCID and browse ID
# channel_a = YoutubeAPI.resolve_url("https://youtube.com/c/google")
# channel_a # => {
# "endpoint": {
# "browseEndpoint": {
# "params": "EgC4AQA%3D",
# "browseId":"UCK8sQmJBp8GCxrOtXWBpyEA"
# },
# ...
# }
# }
#
# # Invalid URL returns throws an InfoException
# channel_b = YoutubeAPI.resolve_url("https://youtube.com/c/invalid")
# ```
#
def resolve_url(url : String)
data = {
"context" => self.make_context(nil),
"url" => url,
}
return self._post_json("/youtubei/v1/navigation/resolve_url", data)
end
####################################################################
# search(search_query, params, client_config?)
#
# Requests the youtubei/v1/search endpoint with the required headers
# and POST data in order to get a JSON reply. As the search results
# vary depending on the region, a region code can be specified in
# order to get non-US results.
#
# The requested data is a search string, with some additional
# paramters, formatted as a base64 string.
#
# An optional ClientConfig parameter can be passed, too (see
# `struct ClientConfig` above for more details).
#
def search(
search_query : String,
params : String,
client_config : ClientConfig | Nil = nil
)
# JSON Request data, required by the API
data = {
"query" => search_query,
"context" => self.make_context(client_config),
"params" => params,
}
return self._post_json("/youtubei/v1/search", data, client_config)
end
####################################################################
# _post_json(endpoint, data, client_config?)
#
# Internal function that does the actual request to youtube servers
# and handles errors.
#
# The requested data is an endpoint (URL without the domain part)
# and the data as a Hash object.
#
def _post_json(
endpoint : String,
data : Hash,
client_config : ClientConfig | Nil
) : Hash(String, JSON::Any)
# Use the default client config if nil is passed
client_config ||= DEFAULT_CLIENT_CONFIG
# Query parameters
url = "#{endpoint}?key=#{client_config.api_key}"
headers = HTTP::Headers{
"Content-Type" => "application/json; charset=UTF-8",
"Accept-Encoding" => "gzip",
}
# Logging
LOGGER.debug("YoutubeAPI: Using endpoint: \"#{endpoint}\"")
LOGGER.trace("YoutubeAPI: ClientConfig: #{client_config.to_s}")
LOGGER.trace("YoutubeAPI: POST data: #{data.to_s}")
# Send the POST request
if client_config.proxy_region
response = YT_POOL.client(
client_config.proxy_region,
&.post(url, headers: headers, body: data.to_json)
)
else
response = YT_POOL.client &.post(
url, headers: headers, body: data.to_json
)
end
# Convert result to Hash
initial_data = JSON.parse(response.body).as_h
# Error handling
if initial_data.has_key?("error")
code = initial_data["error"]["code"]
message = initial_data["error"]["message"].to_s.sub(/(\\n)+\^$/, "")
# Logging
LOGGER.error("YoutubeAPI: Got error #{code} when requesting #{endpoint}")
LOGGER.error("YoutubeAPI: #{message}")
LOGGER.info("YoutubeAPI: POST data was: #{data.to_s}")
raise InfoException.new("Could not extract JSON. Youtube API returned \
error #{code} with message:<br>\"#{message}\"")
end
return initial_data
end
end # End of module

View file

@ -361,7 +361,7 @@ def fetch_playlist(plid, locale)
plid = "UU#{plid.lchop("UC")}"
end
initial_data = request_youtube_api_browse("VL" + plid, params: "")
initial_data = YoutubeAPI.browse("VL" + plid, params: "")
playlist_sidebar_renderer = initial_data["sidebar"]?.try &.["playlistSidebarRenderer"]?.try &.["items"]?
raise InfoException.new("Could not extract playlistSidebarRenderer.") if !playlist_sidebar_renderer
@ -442,9 +442,9 @@ def get_playlist_videos(db, playlist, offset, locale = nil, continuation = nil)
offset = (offset / 100).to_i64 * 100_i64
ctoken = produce_playlist_continuation(playlist.id, offset)
initial_data = request_youtube_api_browse(ctoken)
initial_data = YoutubeAPI.browse(ctoken)
else
initial_data = request_youtube_api_browse("VL" + playlist.id, params: "")
initial_data = YoutubeAPI.browse("VL" + playlist.id, params: "")
end
return extract_playlist_videos(initial_data)

View file

@ -244,7 +244,7 @@ def channel_search(query, page, channel)
end
continuation = produce_channel_search_continuation(ucid, query, page)
response_json = request_youtube_api_browse(continuation)
response_json = YoutubeAPI.browse(continuation)
continuationItems = response_json["onResponseReceivedActions"]?
.try &.[0]["appendContinuationItemsAction"]["continuationItems"]
@ -263,7 +263,8 @@ end
def search(query, search_params = produce_search_params(content_type: "all"), region = nil)
return 0, [] of SearchItem if query.empty?
initial_data = request_youtube_api_search(query, search_params, region)
client_config = YoutubeAPI::ClientConfig.new(region: region)
initial_data = YoutubeAPI.search(query, search_params, client_config: client_config)
items = extract_items(initial_data)
return items.size, items

View file

@ -14,7 +14,8 @@ def fetch_trending(trending_type, region, locale)
params = ""
end
initial_data = request_youtube_api_browse("FEtrending", params: params, region: region)
client_config = YoutubeAPI::ClientConfig.new(region: region)
initial_data = YoutubeAPI.browse("FEtrending", params: params, client_config: client_config)
trending = extract_videos(initial_data)
return {trending, plid}