Merge pull request #3137 from SamantazFox/add-hashtags

Add hashtags
This commit is contained in:
Samantaz Fox 2022-06-09 00:35:09 +02:00
commit b5c54b4e41
No known key found for this signature in database
GPG key ID: F42821059186176E
14 changed files with 355 additions and 38 deletions

View file

@ -47,6 +47,8 @@ jobs:
steps: steps:
- uses: actions/checkout@v3 - uses: actions/checkout@v3
with:
submodules: true
- name: Install Crystal - name: Install Crystal
uses: crystal-lang/install-crystal@v1.6.0 uses: crystal-lang/install-crystal@v1.6.0

3
.gitmodules vendored Normal file
View file

@ -0,0 +1,3 @@
[submodule "mocks"]
path = mocks
url = ../mocks

1
mocks Submodule

@ -0,0 +1 @@
Subproject commit 020337194dd482c47ee2d53cd111d0ebf2831e52

View file

@ -0,0 +1,109 @@
require "../parsers_helper.cr"
Spectator.describe Invidious::Hashtag do
it "parses richItemRenderer containers (test 1)" do
# Enable mock
test_content = load_mock("hashtag/martingarrix_page1")
videos = extract_items(test_content)
expect(typeof(videos)).to eq(Array(SearchItem))
expect(videos.size).to eq(60)
#
# Random video check 1
#
expect(typeof(videos[11])).to eq(SearchItem)
video_11 = videos[11].as(SearchVideo)
expect(video_11.id).to eq("06eSsOWcKYA")
expect(video_11.title).to eq("Martin Garrix - Live @ Tomorrowland 2018")
expect(video_11.ucid).to eq("UC5H_KXkPbEsGs0tFt8R35mA")
expect(video_11.author).to eq("Martin Garrix")
expect(video_11.author_verified).to be_true
expect(video_11.published).to be_close(Time.utc - 3.years, 1.second)
expect(video_11.length_seconds).to eq((56.minutes + 41.seconds).total_seconds.to_i32)
expect(video_11.views).to eq(40_504_893)
expect(video_11.live_now).to be_false
expect(video_11.premium).to be_false
expect(video_11.premiere_timestamp).to be_nil
#
# Random video check 2
#
expect(typeof(videos[35])).to eq(SearchItem)
video_35 = videos[35].as(SearchVideo)
expect(video_35.id).to eq("b9HpOAYjY9I")
expect(video_35.title).to eq("Martin Garrix feat. Mike Yung - Dreamer (Official Video)")
expect(video_35.ucid).to eq("UC5H_KXkPbEsGs0tFt8R35mA")
expect(video_35.author).to eq("Martin Garrix")
expect(video_35.author_verified).to be_true
expect(video_35.published).to be_close(Time.utc - 3.years, 1.second)
expect(video_35.length_seconds).to eq((3.minutes + 14.seconds).total_seconds.to_i32)
expect(video_35.views).to eq(30_790_049)
expect(video_35.live_now).to be_false
expect(video_35.premium).to be_false
expect(video_35.premiere_timestamp).to be_nil
end
it "parses richItemRenderer containers (test 2)" do
# Enable mock
test_content = load_mock("hashtag/martingarrix_page2")
videos = extract_items(test_content)
expect(typeof(videos)).to eq(Array(SearchItem))
expect(videos.size).to eq(60)
#
# Random video check 1
#
expect(typeof(videos[41])).to eq(SearchItem)
video_41 = videos[41].as(SearchVideo)
expect(video_41.id).to eq("qhstH17zAjs")
expect(video_41.title).to eq("Martin Garrix Radio - Episode 391")
expect(video_41.ucid).to eq("UC5H_KXkPbEsGs0tFt8R35mA")
expect(video_41.author).to eq("Martin Garrix")
expect(video_41.author_verified).to be_true
expect(video_41.published).to be_close(Time.utc - 2.months, 1.second)
expect(video_41.length_seconds).to eq((1.hour).total_seconds.to_i32)
expect(video_41.views).to eq(63_240)
expect(video_41.live_now).to be_false
expect(video_41.premium).to be_false
expect(video_41.premiere_timestamp).to be_nil
#
# Random video check 2
#
expect(typeof(videos[48])).to eq(SearchItem)
video_48 = videos[48].as(SearchVideo)
expect(video_48.id).to eq("lqGvW0NIfdc")
expect(video_48.title).to eq("Martin Garrix SENTIO Full Album Mix by Sakul")
expect(video_48.ucid).to eq("UC3833PXeLTS6yRpwGMQpp4Q")
expect(video_48.author).to eq("SAKUL")
expect(video_48.author_verified).to be_false
expect(video_48.published).to be_close(Time.utc - 3.weeks, 1.second)
expect(video_48.length_seconds).to eq((35.minutes + 46.seconds).total_seconds.to_i32)
expect(video_48.views).to eq(68_704)
expect(video_48.live_now).to be_false
expect(video_48.premium).to be_false
expect(video_48.premiere_timestamp).to be_nil
end
end

33
spec/parsers_helper.cr Normal file
View file

@ -0,0 +1,33 @@
require "db"
require "json"
require "kemal"
require "protodec/utils"
require "spectator"
require "../src/invidious/helpers/macros"
require "../src/invidious/helpers/logger"
require "../src/invidious/helpers/utils"
require "../src/invidious/videos"
require "../src/invidious/comments"
require "../src/invidious/helpers/serialized_yt_data"
require "../src/invidious/yt_backend/extractors"
require "../src/invidious/yt_backend/extractors_utils"
OUTPUT = File.open(File::NULL, "w")
LOGGER = Invidious::LogHandler.new(OUTPUT, LogLevel::Off)
def load_mock(file) : Hash(String, JSON::Any)
file = File.join(__DIR__, "..", "mocks", file + ".json")
content = File.read(file)
return JSON.parse(content).as_h
end
Spectator.configure do |config|
config.fail_blank
config.randomize
end

View file

@ -385,6 +385,7 @@ end
Invidious::Routing.get "/opensearch.xml", Invidious::Routes::Search, :opensearch Invidious::Routing.get "/opensearch.xml", Invidious::Routes::Search, :opensearch
Invidious::Routing.get "/results", Invidious::Routes::Search, :results Invidious::Routing.get "/results", Invidious::Routes::Search, :results
Invidious::Routing.get "/search", Invidious::Routes::Search, :search Invidious::Routing.get "/search", Invidious::Routes::Search, :search
Invidious::Routing.get "/hashtag/:hashtag", Invidious::Routes::Search, :hashtag
# User routes # User routes
define_user_routes() define_user_routes()

View file

@ -61,6 +61,7 @@ def get_about_info(ucid, locale) : AboutChannel
author = initdata["metadata"]["channelMetadataRenderer"]["title"].as_s author = initdata["metadata"]["channelMetadataRenderer"]["title"].as_s
author_url = initdata["metadata"]["channelMetadataRenderer"]["channelUrl"].as_s author_url = initdata["metadata"]["channelMetadataRenderer"]["channelUrl"].as_s
author_thumbnail = initdata["metadata"]["channelMetadataRenderer"]["avatar"]["thumbnails"][0]["url"].as_s author_thumbnail = initdata["metadata"]["channelMetadataRenderer"]["avatar"]["thumbnails"][0]["url"].as_s
author_verified = has_verified_badge?(initdata.dig?("header", "c4TabbedHeaderRenderer", "badges"))
ucid = initdata["metadata"]["channelMetadataRenderer"]["externalId"].as_s ucid = initdata["metadata"]["channelMetadataRenderer"]["externalId"].as_s
@ -71,9 +72,6 @@ def get_about_info(ucid, locale) : AboutChannel
# if banner.includes? "channels/c4/default_banner" # if banner.includes? "channels/c4/default_banner"
# banner = nil # banner = nil
# end # end
# author_verified_badges = initdata["header"]?.try &.["c4TabbedHeaderRenderer"]?.try &.["badges"]?
author_verified_badge = initdata["header"].dig?("c4TabbedHeaderRenderer", "badges", 0, "metadataBadgeRenderer", "tooltip")
author_verified = (author_verified_badge && author_verified_badge == "Verified")
description_node = initdata["metadata"]["channelMetadataRenderer"]?.try &.["description"]? description_node = initdata["metadata"]["channelMetadataRenderer"]?.try &.["description"]?

44
src/invidious/hashtag.cr Normal file
View file

@ -0,0 +1,44 @@
module Invidious::Hashtag
extend self
def fetch(hashtag : String, page : Int, region : String? = nil) : Array(SearchItem)
cursor = (page - 1) * 60
ctoken = generate_continuation(hashtag, cursor)
client_config = YoutubeAPI::ClientConfig.new(region: region)
response = YoutubeAPI.browse(continuation: ctoken, client_config: client_config)
return extract_items(response)
end
def generate_continuation(hashtag : String, cursor : Int)
object = {
"80226972:embedded" => {
"2:string" => "FEhashtag",
"3:base64" => {
"1:varint" => cursor.to_i64,
},
"7:base64" => {
"325477796:embedded" => {
"1:embedded" => {
"2:0:embedded" => {
"2:string" => '#' + hashtag,
"4:varint" => 0_i64,
"11:string" => "",
},
"4:string" => "browse-feedFEhashtag",
},
"2:string" => hashtag,
},
},
},
}
continuation = object.try { |i| Protodec::Any.cast_json(i) }
.try { |i| Protodec::Any.from_json(i) }
.try { |i| Base64.urlsafe_encode(i) }
.try { |i| URI.encode_www_form(i) }
return continuation
end
end

View file

@ -182,7 +182,7 @@ module Invidious::Routes::Feeds
paid: false, paid: false,
premium: false, premium: false,
premiere_timestamp: nil, premiere_timestamp: nil,
author_verified: false, # ¯\_(ツ)_/¯ author_verified: false,
}) })
end end

View file

@ -63,4 +63,35 @@ module Invidious::Routes::Search
templated "search" templated "search"
end end
end end
def self.hashtag(env : HTTP::Server::Context)
locale = env.get("preferences").as(Preferences).locale
hashtag = env.params.url["hashtag"]?
if hashtag.nil? || hashtag.empty?
return error_template(400, "Invalid request")
end
page = env.params.query["page"]?
if page.nil?
page = 1
else
page = Math.max(1, page.to_i)
env.params.query.delete_all("page")
end
begin
videos = Invidious::Hashtag.fetch(hashtag, page)
rescue ex
return error_template(500, ex)
end
params = env.params.query.empty? ? "" : "&#{env.params.query}"
hashtag_encoded = URI.encode_www_form(hashtag, space_to_plus: false)
url_prev_page = "/hashtag/#{hashtag_encoded}?page=#{page - 1}#{params}"
url_next_page = "/hashtag/#{hashtag_encoded}?page=#{page + 1}#{params}"
templated "hashtag"
end
end end

View file

@ -853,6 +853,7 @@ end
# the same 11 first entries as the compact rendered. # the same 11 first entries as the compact rendered.
# #
# TODO: "compactRadioRenderer" (Mix) and # TODO: "compactRadioRenderer" (Mix) and
# TODO: Use a proper struct/class instead of a hacky JSON object
def parse_related_video(related : JSON::Any) : Hash(String, JSON::Any)? def parse_related_video(related : JSON::Any) : Hash(String, JSON::Any)?
return nil if !related["videoId"]? return nil if !related["videoId"]?
@ -868,11 +869,7 @@ def parse_related_video(related : JSON::Any) : Hash(String, JSON::Any)?
.try &.dig?("runs", 0) .try &.dig?("runs", 0)
author = channel_info.try &.dig?("text") author = channel_info.try &.dig?("text")
author_verified_badge = related["ownerBadges"]?.try do |badges_array| author_verified = has_verified_badge?(related["ownerBadges"]?).to_s
badges_array.as_a.find(&.dig("metadataBadgeRenderer", "tooltip").as_s.== "Verified")
end
author_verified = (author_verified_badge && author_verified_badge.size > 0).to_s
ucid = channel_info.try { |ci| HelperExtractors.get_browse_id(ci) } ucid = channel_info.try { |ci| HelperExtractors.get_browse_id(ci) }
@ -1089,17 +1086,19 @@ def extract_video_info(video_id : String, proxy_region : String? = nil, context_
# Author infos # Author infos
author_info = video_secondary_renderer.try &.dig?("owner", "videoOwnerRenderer") if author_info = video_secondary_renderer.try &.dig?("owner", "videoOwnerRenderer")
author_thumbnail = author_info.try &.dig?("thumbnail", "thumbnails", 0, "url") author_thumbnail = author_info.dig?("thumbnail", "thumbnails", 0, "url")
params["authorThumbnail"] = JSON::Any.new(author_thumbnail.try &.as_s || "")
author_verified_badge = author_info.try &.dig?("badges", 0, "metadataBadgeRenderer", "tooltip") author_verified = has_verified_badge?(author_info["badges"]?)
author_verified = (!author_verified_badge.nil? && author_verified_badge == "Verified") params["authorVerified"] = JSON::Any.new(author_verified)
params["authorVerified"] = JSON::Any.new(author_verified)
params["authorThumbnail"] = JSON::Any.new(author_thumbnail.try &.as_s || "") subs_text = author_info["subscriberCountText"]?
.try { |t| t["simpleText"]? || t.dig?("runs", 0, "text") }
.try &.as_s.split(" ", 2)[0]
params["subCountText"] = JSON::Any.new(author_info.try &.["subscriberCountText"]? params["subCountText"] = JSON::Any.new(subs_text || "-")
.try { |t| t["simpleText"]? || t.dig?("runs", 0, "text") }.try &.as_s.split(" ", 2)[0] || "-") end
# Return data # Return data

View file

@ -0,0 +1,39 @@
<% content_for "header" do %>
<title><%= HTML.escape(hashtag) %> - Invidious</title>
<% end %>
<hr/>
<div class="pure-g h-box v-box">
<div class="pure-u-1 pure-u-lg-1-5">
<%- if page > 1 -%>
<a href="<%= url_prev_page %>"><%= translate(locale, "Previous page") %></a>
<%- end -%>
</div>
<div class="pure-u-1 pure-u-lg-3-5"></div>
<div class="pure-u-1 pure-u-lg-1-5" style="text-align:right">
<%- if videos.size >= 60 -%>
<a href="<%= url_next_page %>"><%= translate(locale, "Next page") %></a>
<%- end -%>
</div>
</div>
<div class="pure-g">
<%- videos.each do |item| -%>
<%= rendered "components/item" %>
<%- end -%>
</div>
<div class="pure-g h-box">
<div class="pure-u-1 pure-u-lg-1-5">
<%- if page > 1 -%>
<a href="<%= url_prev_page %>"><%= translate(locale, "Previous page") %></a>
<%- end -%>
</div>
<div class="pure-u-1 pure-u-lg-3-5"></div>
<div class="pure-u-1 pure-u-lg-1-5" style="text-align:right">
<%- if videos.size >= 60 -%>
<a href="<%= url_next_page %>"><%= translate(locale, "Next page") %></a>
<%- end -%>
</div>
</div>

View file

@ -1,3 +1,5 @@
require "../helpers/serialized_yt_data"
# This file contains helper methods to parse the Youtube API json data into # This file contains helper methods to parse the Youtube API json data into
# neat little packages we can use # neat little packages we can use
@ -14,6 +16,7 @@ private ITEM_PARSERS = {
Parsers::GridPlaylistRendererParser, Parsers::GridPlaylistRendererParser,
Parsers::PlaylistRendererParser, Parsers::PlaylistRendererParser,
Parsers::CategoryRendererParser, Parsers::CategoryRendererParser,
Parsers::RichItemRendererParser,
} }
record AuthorFallback, name : String, id : String record AuthorFallback, name : String, id : String
@ -57,6 +60,8 @@ private module Parsers
author_id = author_fallback.id author_id = author_fallback.id
end end
author_verified = has_verified_badge?(item_contents["ownerBadges"]?)
# For live videos (and possibly recently premiered videos) there is no published information. # For live videos (and possibly recently premiered videos) there is no published information.
# Instead, in its place is the amount of people currently watching. This behavior should be replicated # Instead, in its place is the amount of people currently watching. This behavior should be replicated
# on Invidious once all features of livestreams are supported. On an unrelated note, defaulting to the current # on Invidious once all features of livestreams are supported. On an unrelated note, defaulting to the current
@ -102,11 +107,7 @@ private module Parsers
premium = false premium = false
premiere_timestamp = item_contents.dig?("upcomingEventData", "startTime").try { |t| Time.unix(t.as_s.to_i64) } premiere_timestamp = item_contents.dig?("upcomingEventData", "startTime").try { |t| Time.unix(t.as_s.to_i64) }
author_verified_badge = item_contents["ownerBadges"]?.try do |badges_array|
badges_array.as_a.find(&.dig("metadataBadgeRenderer", "tooltip").as_s.== "Verified")
end
author_verified = (author_verified_badge && author_verified_badge.size > 0)
item_contents["badges"]?.try &.as_a.each do |badge| item_contents["badges"]?.try &.as_a.each do |badge|
b = badge["metadataBadgeRenderer"] b = badge["metadataBadgeRenderer"]
case b["label"].as_s case b["label"].as_s
@ -133,7 +134,7 @@ private module Parsers
live_now: live_now, live_now: live_now,
premium: premium, premium: premium,
premiere_timestamp: premiere_timestamp, premiere_timestamp: premiere_timestamp,
author_verified: author_verified || false, author_verified: author_verified,
}) })
end end
@ -161,12 +162,9 @@ private module Parsers
private def self.parse(item_contents, author_fallback) private def self.parse(item_contents, author_fallback)
author = extract_text(item_contents["title"]) || author_fallback.name author = extract_text(item_contents["title"]) || author_fallback.name
author_id = item_contents["channelId"]?.try &.as_s || author_fallback.id author_id = item_contents["channelId"]?.try &.as_s || author_fallback.id
author_verified_badge = item_contents["ownerBadges"]?.try do |badges_array| author_verified = has_verified_badge?(item_contents["ownerBadges"]?)
badges_array.as_a.find(&.dig("metadataBadgeRenderer", "tooltip").as_s.== "Verified")
end
author_verified = (author_verified_badge && author_verified_badge.size > 0)
author_thumbnail = HelperExtractors.get_thumbnails(item_contents) author_thumbnail = HelperExtractors.get_thumbnails(item_contents)
# When public subscriber count is disabled, the subscriberCountText isn't sent by InnerTube. # When public subscriber count is disabled, the subscriberCountText isn't sent by InnerTube.
# Always simpleText # Always simpleText
# TODO change default value to nil # TODO change default value to nil
@ -188,7 +186,7 @@ private module Parsers
video_count: video_count, video_count: video_count,
description_html: description_html, description_html: description_html,
auto_generated: auto_generated, auto_generated: auto_generated,
author_verified: author_verified || false, author_verified: author_verified,
}) })
end end
@ -216,11 +214,9 @@ private module Parsers
private def self.parse(item_contents, author_fallback) private def self.parse(item_contents, author_fallback)
title = extract_text(item_contents["title"]) || "" title = extract_text(item_contents["title"]) || ""
plid = item_contents["playlistId"]?.try &.as_s || "" plid = item_contents["playlistId"]?.try &.as_s || ""
author_verified_badge = item_contents["ownerBadges"]?.try do |badges_array|
badges_array.as_a.find(&.dig("metadataBadgeRenderer", "tooltip").as_s.== "Verified")
end
author_verified = (author_verified_badge && author_verified_badge.size > 0) author_verified = has_verified_badge?(item_contents["ownerBadges"]?)
video_count = HelperExtractors.get_video_count(item_contents) video_count = HelperExtractors.get_video_count(item_contents)
playlist_thumbnail = HelperExtractors.get_thumbnails(item_contents) playlist_thumbnail = HelperExtractors.get_thumbnails(item_contents)
@ -232,7 +228,7 @@ private module Parsers
video_count: video_count, video_count: video_count,
videos: [] of SearchPlaylistVideo, videos: [] of SearchPlaylistVideo,
thumbnail: playlist_thumbnail, thumbnail: playlist_thumbnail,
author_verified: author_verified || false, author_verified: author_verified,
}) })
end end
@ -266,11 +262,8 @@ private module Parsers
author_info = item_contents.dig?("shortBylineText", "runs", 0) author_info = item_contents.dig?("shortBylineText", "runs", 0)
author = author_info.try &.["text"].as_s || author_fallback.name author = author_info.try &.["text"].as_s || author_fallback.name
author_id = author_info.try { |x| HelperExtractors.get_browse_id(x) } || author_fallback.id author_id = author_info.try { |x| HelperExtractors.get_browse_id(x) } || author_fallback.id
author_verified_badge = item_contents["ownerBadges"]?.try do |badges_array| author_verified = has_verified_badge?(item_contents["ownerBadges"]?)
badges_array.as_a.find(&.dig("metadataBadgeRenderer", "tooltip").as_s.== "Verified")
end
author_verified = (author_verified_badge && author_verified_badge.size > 0)
videos = item_contents["videos"]?.try &.as_a.map do |v| videos = item_contents["videos"]?.try &.as_a.map do |v|
v = v["childVideoRenderer"] v = v["childVideoRenderer"]
v_title = v.dig?("title", "simpleText").try &.as_s || "" v_title = v.dig?("title", "simpleText").try &.as_s || ""
@ -293,7 +286,7 @@ private module Parsers
video_count: video_count, video_count: video_count,
videos: videos, videos: videos,
thumbnail: playlist_thumbnail, thumbnail: playlist_thumbnail,
author_verified: author_verified || false, author_verified: author_verified,
}) })
end end
@ -374,6 +367,29 @@ private module Parsers
return {{@type.name}} return {{@type.name}}
end end
end end
# Parses an InnerTube richItemRenderer into a SearchVideo.
# Returns nil when the given object isn't a shelfRenderer
#
# A richItemRenderer seems to be a simple wrapper for a videoRenderer, used
# by the result page for hashtags. It is located inside a continuationItems
# container.
#
module RichItemRendererParser
def self.process(item : JSON::Any, author_fallback : AuthorFallback)
if item_contents = item.dig?("richItemRenderer", "content")
return self.parse(item_contents, author_fallback)
end
end
private def self.parse(item_contents, author_fallback)
return VideoRendererParser.process(item_contents, author_fallback)
end
def self.parser_name
return {{@type.name}}
end
end
end end
# The following are the extractors for extracting an array of items from # The following are the extractors for extracting an array of items from
@ -501,6 +517,8 @@ private module Extractors
self.extract(target) self.extract(target)
elsif target = initial_data["appendContinuationItemsAction"]? elsif target = initial_data["appendContinuationItemsAction"]?
self.extract(target) self.extract(target)
elsif target = initial_data["reloadContinuationItemsCommand"]?
self.extract(target)
end end
end end

View file

@ -29,6 +29,45 @@ def extract_text(item : JSON::Any?) : String?
end end
end end
# Check if an "ownerBadges" or a "badges" element contains a verified badge.
# There is currently two known types of verified badges:
#
# "ownerBadges": [{
# "metadataBadgeRenderer": {
# "icon": { "iconType": "CHECK_CIRCLE_THICK" },
# "style": "BADGE_STYLE_TYPE_VERIFIED",
# "tooltip": "Verified",
# "accessibilityData": { "label": "Verified" }
# }
# }],
#
# "ownerBadges": [{
# "metadataBadgeRenderer": {
# "icon": { "iconType": "OFFICIAL_ARTIST_BADGE" },
# "style": "BADGE_STYLE_TYPE_VERIFIED_ARTIST",
# "tooltip": "Official Artist Channel",
# "accessibilityData": { "label": "Official Artist Channel" }
# }
# }],
#
def has_verified_badge?(badges : JSON::Any?)
return false if badges.nil?
badges.as_a.each do |badge|
style = badge.dig("metadataBadgeRenderer", "style").as_s
return true if style == "BADGE_STYLE_TYPE_VERIFIED"
return true if style == "BADGE_STYLE_TYPE_VERIFIED_ARTIST"
end
return false
rescue ex
LOGGER.debug("Unable to parse owner badges. Got exception: #{ex.message}")
LOGGER.trace("Owner badges data: #{badges.to_json}")
return false
end
def extract_videos(initial_data : Hash(String, JSON::Any), author_fallback : String? = nil, author_id_fallback : String? = nil) def extract_videos(initial_data : Hash(String, JSON::Any), author_fallback : String? = nil, author_id_fallback : String? = nil)
extracted = extract_items(initial_data, author_fallback, author_id_fallback) extracted = extract_items(initial_data, author_fallback, author_id_fallback)