Add parser for categories (shelfRenderer)

This commit adds a new parser for YT's shelfRenderers which are
typically used to denote different categories.The code for featured
channels parsing has also been moved to use the new parser but some
additional refactoring are needed there.

The ContinuationExtractor has also been improved and is now capable of
extraction continuation data that is packaged under
"appendContinuationItemsAction"

In additional this commit adds some useful helper functions to extract
the current selected tab the continuation token. This is to mainly
reduce code size and repetition.
--
This cherry-picked commit also removes the code for parsing featured
channels present on the original.

(cherry picked from commit 8000d538dbbf1eb9c78e000b1449926ba3b24da9)
This commit is contained in:
syeopite 2021-05-07 05:13:53 -07:00
parent 1323b94b7a
commit a50f64f6e9
No known key found for this signature in database
GPG key ID: 6FA616E5A5294A82
5 changed files with 389 additions and 244 deletions

View file

@ -13,6 +13,7 @@ private ITEM_PARSERS = {
ChannelParser.new,
GridPlaylistParser.new,
PlaylistParser.new,
CategoryParser.new,
}
private struct AuthorFallback
@ -95,7 +96,7 @@ end
private class ChannelParser < ItemParser
def process(item, author_fallback)
if item_contents = item["channelRenderer"]?
if item_contents = (item["channelRenderer"]? || item["gridChannelRenderer"]?)
return self.parse(item_contents, author_fallback)
end
end
@ -194,6 +195,88 @@ private class PlaylistParser < ItemParser
end
end
private class CategoryParser < ItemParser
def process(item, author_fallback)
if item_contents = item["shelfRenderer"]?
return self.parse(item_contents, author_fallback)
end
end
def parse(item_contents, author_fallback)
# Title extraction is a bit complicated. There are two possible routes for it
# as well as times when the title attribute just isn't sent by YT.
title_container = item_contents["title"]? || ""
if !title_container.is_a? String
if title = title_container["simpleText"]?
title = title.as_s
else
title = title_container["runs"][0]["text"].as_s
end
else
title = ""
end
browse_endpoint = item_contents["endpoint"]?.try &.["browseEndpoint"] || nil
browse_endpoint_data = ""
category_type = 0 # 0: Video, 1: Channels, 2: Playlist/feed, 3: trending
# There's no endpoint data for video and trending category
if !item_contents["endpoint"]?
if !item_contents["videoId"]?
category_type = 3
end
end
if !browse_endpoint.nil?
# Playlist/feed categories doesn't need the params value (nor is it even included in yt response)
# instead it uses the browseId parameter. So if there isn't a params value we can assume the
# category is a playlist/feed
if browse_endpoint["params"]?
browse_endpoint_data = browse_endpoint["params"].as_s
category_type = 1
else
browse_endpoint_data = browse_endpoint["browseId"].as_s
category_type = 2
end
end
# Sometimes a category can have badges.
badges = [] of Tuple(String, String) # (Badge style, label)
item_contents["badges"]?.try &.as_a.each do |badge|
badge = badge["metadataBadgeRenderer"]
badges << {badge["style"].as_s, badge["label"].as_s}
end
# Content parsing
contents = [] of SearchItem
# Content could be in three locations.
if content_container = item_contents["content"]["horizontalListRenderer"]?
elsif content_container = item_contents["content"]["expandedShelfContentsRenderer"]
elsif content_container = item_contents["content"]["verticalListRenderer"]
else
content_container = item_contents["contents"]
end
raw_contents = content_container["items"].as_a
raw_contents.each do |item|
result = extract_item(item)
if !result.nil?
contents << result
end
end
Category.new({
title: title,
contents: contents,
browse_endpoint_data: browse_endpoint_data,
continuation_token: nil,
badges: badges,
})
end
end
# The following are the extractors for extracting an array of items from
# the internal Youtube API's JSON response. The result is then packaged into
# a structure we can more easily use via the parsers above. Their internals are
@ -217,19 +300,16 @@ private class YoutubeTabsExtractor < ItemsContainerExtractor
private def extract(target)
raw_items = [] of JSON::Any
selected_tab = extract_selected_tab(target["tabs"])
content = selected_tab["tabRenderer"]["content"]
content = selected_tab["content"]
content["sectionListRenderer"]["contents"].as_a.each do |renderer_container|
renderer_container = renderer_container["itemSectionRenderer"]
renderer_container_contents = renderer_container["contents"].as_a[0]
# Shelf renderer usually refer to a category and would need special handling once
# An extractor for categories are added. But for now it is just used to
# extract items for the trending page
# Category extraction
if items_container = renderer_container_contents["shelfRenderer"]?
if items_container["content"]["expandedShelfContentsRenderer"]?
items_container = items_container["content"]["expandedShelfContentsRenderer"]
end
raw_items << renderer_container_contents
next
elsif items_container = renderer_container_contents["gridRenderer"]?
else
items_container = renderer_container_contents
@ -265,6 +345,8 @@ private class ContinuationExtractor < ItemsContainerExtractor
def process(initial_data)
if target = initial_data["continuationContents"]?
self.extract(target)
elsif target = initial_data["appendContinuationItemsAction"]?
self.extract(target)
end
end
@ -272,13 +354,16 @@ private class ContinuationExtractor < ItemsContainerExtractor
raw_items = [] of JSON::Any
if content = target["gridContinuation"]?
raw_items = content["items"].as_a
elsif content = target["continuationItems"]?
raw_items = content.as_a
end
return raw_items
end
end
def extract_item(item : JSON::Any, author_fallback : String? = nil, author_id_fallback : String? = nil)
def extract_item(item : JSON::Any, author_fallback : String? = nil,
author_id_fallback : String? = nil)
# Parses an item from Youtube's JSON response into a more usable structure.
# The end result can either be a SearchVideo, SearchPlaylist or SearchChannel.
author_fallback = AuthorFallback.new(author_fallback, author_id_fallback)
@ -295,13 +380,20 @@ def extract_item(item : JSON::Any, author_fallback : String? = nil, author_id_fa
# TODO radioRenderer, showRenderer, shelfRenderer, horizontalCardListRenderer, searchPyvRenderer
end
def extract_items(initial_data : Hash(String, JSON::Any), author_fallback : String? = nil, author_id_fallback : String? = nil)
def extract_items(initial_data : Hash(String, JSON::Any), author_fallback : String? = nil,
author_id_fallback : String? = nil)
items = [] of SearchItem
initial_data = initial_data["contents"]?.try &.as_h || initial_data["response"]?.try &.as_h || initial_data
if unpackaged_data = initial_data["contents"]?.try &.as_h
elsif unpackaged_data = initial_data["response"]?.try &.as_h
elsif unpackaged_data = initial_data["onResponseReceivedActions"]?.try &.as_a.[0].as_h
else
unpackaged_data = initial_data
end
# This is identicial to the parser cyling of extract_item().
ITEM_CONTAINER_EXTRACTOR.each do |extractor|
results = extractor.process(initial_data)
results = extractor.process(unpackaged_data)
if !results.nil?
results.each do |item|
parsed_result = extract_item(item, author_fallback, author_id_fallback)
@ -310,6 +402,7 @@ def extract_items(initial_data : Hash(String, JSON::Any), author_fallback : Stri
items << parsed_result
end
end
return items
end
end

View file

@ -248,12 +248,37 @@ def html_to_content(description_html : String)
end
def extract_videos(initial_data : Hash(String, JSON::Any), author_fallback : String? = nil, author_id_fallback : String? = nil)
extract_items(initial_data, author_fallback, author_id_fallback).select(&.is_a?(SearchVideo)).map(&.as(SearchVideo))
extracted = extract_items(initial_data, author_fallback, author_id_fallback)
if extracted.is_a?(Category)
target = extracted.contents
else
target = extracted
end
return target.select(&.is_a?(SearchVideo)).map(&.as(SearchVideo))
end
def extract_selected_tab(tabs)
# Extract the selected tab from the array of tabs Youtube returns
return selected_target = tabs.as_a.select(&.["tabRenderer"]?.try &.["selected"].as_bool)[0]
return selected_target = tabs.as_a.select(&.["tabRenderer"]?.try &.["selected"].as_bool)[0]["tabRenderer"]
end
def fetch_continuation_token(items : Array(JSON::Any))
# Fetches the continuation token from an array of items
return items.last["continuationItemRenderer"]?
.try &.["continuationEndpoint"]["continuationCommand"]["token"].as_s
end
def fetch_continuation_token(initial_data : Hash(String, JSON::Any))
# Fetches the continuation token from initial data
if initial_data["onResponseReceivedActions"]?
continuation_items = initial_data["onResponseReceivedActions"][0]["appendContinuationItemsAction"]["continuationItems"]
else
tab = extract_selected_tab(initial_data["contents"]["twoColumnBrowseResultsRenderer"]["tabs"])
continuation_items = tab["content"]["sectionListRenderer"]["contents"][0]["itemSectionRenderer"]["contents"][0]["gridRenderer"]["items"]
end
return fetch_continuation_token(continuation_items.as_a)
end
def check_enum(db, enum_name, struct_type = nil)

View file

@ -0,0 +1,256 @@
struct SearchVideo
include DB::Serializable
property title : String
property id : String
property author : String
property ucid : String
property published : Time
property views : Int64
property description_html : String
property length_seconds : Int32
property live_now : Bool
property premium : Bool
property premiere_timestamp : Time?
def to_xml(auto_generated, query_params, xml : XML::Builder)
query_params["v"] = self.id
xml.element("entry") do
xml.element("id") { xml.text "yt:video:#{self.id}" }
xml.element("yt:videoId") { xml.text self.id }
xml.element("yt:channelId") { xml.text self.ucid }
xml.element("title") { xml.text self.title }
xml.element("link", rel: "alternate", href: "#{HOST_URL}/watch?#{query_params}")
xml.element("author") do
if auto_generated
xml.element("name") { xml.text self.author }
xml.element("uri") { xml.text "#{HOST_URL}/channel/#{self.ucid}" }
else
xml.element("name") { xml.text author }
xml.element("uri") { xml.text "#{HOST_URL}/channel/#{ucid}" }
end
end
xml.element("content", type: "xhtml") do
xml.element("div", xmlns: "http://www.w3.org/1999/xhtml") do
xml.element("a", href: "#{HOST_URL}/watch?#{query_params}") do
xml.element("img", src: "#{HOST_URL}/vi/#{self.id}/mqdefault.jpg")
end
xml.element("p", style: "word-break:break-word;white-space:pre-wrap") { xml.text html_to_content(self.description_html) }
end
end
xml.element("published") { xml.text self.published.to_s("%Y-%m-%dT%H:%M:%S%:z") }
xml.element("media:group") do
xml.element("media:title") { xml.text self.title }
xml.element("media:thumbnail", url: "#{HOST_URL}/vi/#{self.id}/mqdefault.jpg",
width: "320", height: "180")
xml.element("media:description") { xml.text html_to_content(self.description_html) }
end
xml.element("media:community") do
xml.element("media:statistics", views: self.views)
end
end
end
def to_xml(auto_generated, query_params, xml : XML::Builder | Nil = nil)
if xml
to_xml(HOST_URL, auto_generated, query_params, xml)
else
XML.build do |json|
to_xml(HOST_URL, auto_generated, query_params, xml)
end
end
end
def to_json(locale : Hash(String, JSON::Any), json : JSON::Builder)
json.object do
json.field "type", "video"
json.field "title", self.title
json.field "videoId", self.id
json.field "author", self.author
json.field "authorId", self.ucid
json.field "authorUrl", "/channel/#{self.ucid}"
json.field "videoThumbnails" do
generate_thumbnails(json, self.id)
end
json.field "description", html_to_content(self.description_html)
json.field "descriptionHtml", self.description_html
json.field "viewCount", self.views
json.field "published", self.published.to_unix
json.field "publishedText", translate(locale, "`x` ago", recode_date(self.published, locale))
json.field "lengthSeconds", self.length_seconds
json.field "liveNow", self.live_now
json.field "premium", self.premium
json.field "isUpcoming", self.is_upcoming
if self.premiere_timestamp
json.field "premiereTimestamp", self.premiere_timestamp.try &.to_unix
end
end
end
def to_json(locale, json : JSON::Builder | Nil = nil)
if json
to_json(locale, json)
else
JSON.build do |json|
to_json(locale, json)
end
end
end
def is_upcoming
premiere_timestamp ? true : false
end
end
struct SearchPlaylistVideo
include DB::Serializable
property title : String
property id : String
property length_seconds : Int32
end
struct SearchPlaylist
include DB::Serializable
property title : String
property id : String
property author : String
property ucid : String
property video_count : Int32
property videos : Array(SearchPlaylistVideo)
property thumbnail : String?
def to_json(locale, json : JSON::Builder)
json.object do
json.field "type", "playlist"
json.field "title", self.title
json.field "playlistId", self.id
json.field "playlistThumbnail", self.thumbnail
json.field "author", self.author
json.field "authorId", self.ucid
json.field "authorUrl", "/channel/#{self.ucid}"
json.field "videoCount", self.video_count
json.field "videos" do
json.array do
self.videos.each do |video|
json.object do
json.field "title", video.title
json.field "videoId", video.id
json.field "lengthSeconds", video.length_seconds
json.field "videoThumbnails" do
generate_thumbnails(json, video.id)
end
end
end
end
end
end
end
def to_json(locale, json : JSON::Builder | Nil = nil)
if json
to_json(locale, json)
else
JSON.build do |json|
to_json(locale, json)
end
end
end
end
struct SearchChannel
include DB::Serializable
property author : String
property ucid : String
property author_thumbnail : String
property subscriber_count : Int32
property video_count : Int32
property description_html : String
property auto_generated : Bool
def to_json(locale, json : JSON::Builder)
json.object do
json.field "type", "channel"
json.field "author", self.author
json.field "authorId", self.ucid
json.field "authorUrl", "/channel/#{self.ucid}"
json.field "authorThumbnails" do
json.array do
qualities = {32, 48, 76, 100, 176, 512}
qualities.each do |quality|
json.object do
json.field "url", self.author_thumbnail.gsub(/=\d+/, "=s#{quality}")
json.field "width", quality
json.field "height", quality
end
end
end
end
json.field "autoGenerated", self.auto_generated
json.field "subCount", self.subscriber_count
json.field "videoCount", self.video_count
json.field "description", html_to_content(self.description_html)
json.field "descriptionHtml", self.description_html
end
end
def to_json(locale, json : JSON::Builder | Nil = nil)
if json
to_json(locale, json)
else
JSON.build do |json|
to_json(locale, json)
end
end
end
end
class Category
include DB::Serializable
property title : String
property contents : Array(SearchItem) | SearchItem
property browse_endpoint_data : String?
property continuation_token : String?
property badges : Array(Tuple(String, String))?
def to_json(locale, json : JSON::Builder)
json.object do
json.field "title", self.title
json.field "contents", self.contents
end
end
def to_json(locale, json : JSON::Builder | Nil = nil)
if json
to_json(locale, json)
else
JSON.build do |json|
to_json(locale, json)
end
end
end
end
alias SearchItem = SearchVideo | SearchChannel | SearchPlaylist | Category

View file

@ -1,233 +1,3 @@
struct SearchVideo
include DB::Serializable
property title : String
property id : String
property author : String
property ucid : String
property published : Time
property views : Int64
property description_html : String
property length_seconds : Int32
property live_now : Bool
property premium : Bool
property premiere_timestamp : Time?
def to_xml(auto_generated, query_params, xml : XML::Builder)
query_params["v"] = self.id
xml.element("entry") do
xml.element("id") { xml.text "yt:video:#{self.id}" }
xml.element("yt:videoId") { xml.text self.id }
xml.element("yt:channelId") { xml.text self.ucid }
xml.element("title") { xml.text self.title }
xml.element("link", rel: "alternate", href: "#{HOST_URL}/watch?#{query_params}")
xml.element("author") do
if auto_generated
xml.element("name") { xml.text self.author }
xml.element("uri") { xml.text "#{HOST_URL}/channel/#{self.ucid}" }
else
xml.element("name") { xml.text author }
xml.element("uri") { xml.text "#{HOST_URL}/channel/#{ucid}" }
end
end
xml.element("content", type: "xhtml") do
xml.element("div", xmlns: "http://www.w3.org/1999/xhtml") do
xml.element("a", href: "#{HOST_URL}/watch?#{query_params}") do
xml.element("img", src: "#{HOST_URL}/vi/#{self.id}/mqdefault.jpg")
end
xml.element("p", style: "word-break:break-word;white-space:pre-wrap") { xml.text html_to_content(self.description_html) }
end
end
xml.element("published") { xml.text self.published.to_s("%Y-%m-%dT%H:%M:%S%:z") }
xml.element("media:group") do
xml.element("media:title") { xml.text self.title }
xml.element("media:thumbnail", url: "#{HOST_URL}/vi/#{self.id}/mqdefault.jpg",
width: "320", height: "180")
xml.element("media:description") { xml.text html_to_content(self.description_html) }
end
xml.element("media:community") do
xml.element("media:statistics", views: self.views)
end
end
end
def to_xml(auto_generated, query_params, xml : XML::Builder | Nil = nil)
if xml
to_xml(HOST_URL, auto_generated, query_params, xml)
else
XML.build do |json|
to_xml(HOST_URL, auto_generated, query_params, xml)
end
end
end
def to_json(locale, json : JSON::Builder)
json.object do
json.field "type", "video"
json.field "title", self.title
json.field "videoId", self.id
json.field "author", self.author
json.field "authorId", self.ucid
json.field "authorUrl", "/channel/#{self.ucid}"
json.field "videoThumbnails" do
generate_thumbnails(json, self.id)
end
json.field "description", html_to_content(self.description_html)
json.field "descriptionHtml", self.description_html
json.field "viewCount", self.views
json.field "published", self.published.to_unix
json.field "publishedText", translate(locale, "`x` ago", recode_date(self.published, locale))
json.field "lengthSeconds", self.length_seconds
json.field "liveNow", self.live_now
json.field "premium", self.premium
json.field "isUpcoming", self.is_upcoming
if self.premiere_timestamp
json.field "premiereTimestamp", self.premiere_timestamp.try &.to_unix
end
end
end
def to_json(locale, json : JSON::Builder | Nil = nil)
if json
to_json(locale, json)
else
JSON.build do |json|
to_json(locale, json)
end
end
end
def is_upcoming
premiere_timestamp ? true : false
end
end
struct SearchPlaylistVideo
include DB::Serializable
property title : String
property id : String
property length_seconds : Int32
end
struct SearchPlaylist
include DB::Serializable
property title : String
property id : String
property author : String
property ucid : String
property video_count : Int32
property videos : Array(SearchPlaylistVideo)
property thumbnail : String?
def to_json(locale, json : JSON::Builder)
json.object do
json.field "type", "playlist"
json.field "title", self.title
json.field "playlistId", self.id
json.field "playlistThumbnail", self.thumbnail
json.field "author", self.author
json.field "authorId", self.ucid
json.field "authorUrl", "/channel/#{self.ucid}"
json.field "videoCount", self.video_count
json.field "videos" do
json.array do
self.videos.each do |video|
json.object do
json.field "title", video.title
json.field "videoId", video.id
json.field "lengthSeconds", video.length_seconds
json.field "videoThumbnails" do
generate_thumbnails(json, video.id)
end
end
end
end
end
end
end
def to_json(locale, json : JSON::Builder | Nil = nil)
if json
to_json(locale, json)
else
JSON.build do |json|
to_json(locale, json)
end
end
end
end
struct SearchChannel
include DB::Serializable
property author : String
property ucid : String
property author_thumbnail : String
property subscriber_count : Int32
property video_count : Int32
property description_html : String
property auto_generated : Bool
def to_json(locale, json : JSON::Builder)
json.object do
json.field "type", "channel"
json.field "author", self.author
json.field "authorId", self.ucid
json.field "authorUrl", "/channel/#{self.ucid}"
json.field "authorThumbnails" do
json.array do
qualities = {32, 48, 76, 100, 176, 512}
qualities.each do |quality|
json.object do
json.field "url", self.author_thumbnail.gsub(/=\d+/, "=s#{quality}")
json.field "width", quality
json.field "height", quality
end
end
end
end
json.field "autoGenerated", self.auto_generated
json.field "subCount", self.subscriber_count
json.field "videoCount", self.video_count
json.field "description", html_to_content(self.description_html)
json.field "descriptionHtml", self.description_html
end
end
def to_json(locale, json : JSON::Builder | Nil = nil)
if json
to_json(locale, json)
else
JSON.build do |json|
to_json(locale, json)
end
end
end
end
alias SearchItem = SearchVideo | SearchChannel | SearchPlaylist
def channel_search(query, page, channel)
response = YT_POOL.client &.get("/channel/#{channel}")

View file

@ -96,6 +96,7 @@
</div>
<% end %>
</div>
<% when Category %>
<% else %>
<a style="width:100%" href="/watch?v=<%= item.id %>">
<% if !env.get("preferences").as(Preferences).thin_mode %>