Update channel playlists to use polymer
This commit is contained in:
parent
074497b0f6
commit
056e7432bd
5 changed files with 47 additions and 386 deletions
|
@ -41,7 +41,7 @@ describe "Helper" do
|
||||||
|
|
||||||
describe "#extract_channel_playlists_cursor" do
|
describe "#extract_channel_playlists_cursor" do
|
||||||
it "correctly extracts a playlists cursor from the given URL" do
|
it "correctly extracts a playlists cursor from the given URL" do
|
||||||
extract_channel_playlists_cursor("/browse_ajax?continuation=4qmFsgLRARIYVUNDajk1NklGNjJGYlQ3R291c3phajl3GrQBRWdsd2JHRjViR2x6ZEhNWUF5QUJNQUk0QVdBQmFnQjZabEZWYkZCaE1XczFVbFpHZDJGV09XNWxWelI0V0RGR2VWSnVWbUZOV0Vwc1ZHcG5lRmd3TVU1aVZXdDRWMWN4YzFGdFNuTmtlbWh4VGpCd1NWTllVa1pTYTJNeFlVUmtlRmt3Y0ZWVWJWRXdWbnBzTkU1V1JqRmhNVGxFVm14dmQwMXFhRzVXZDdnQkFBJTNEJTNE&gl=US&hl=en", false).should eq("AIOkY9EQpi_gyn1_QrFuZ1reN81_MMmI1YmlBblw8j7JHItEFG5h7qcJTNd4W9x5Quk_CVZ028gW")
|
extract_channel_playlists_cursor("4qmFsgLRARIYVUNDajk1NklGNjJGYlQ3R291c3phajl3GrQBRWdsd2JHRjViR2x6ZEhNWUF5QUJNQUk0QVdBQmFnQjZabEZWYkZCaE1XczFVbFpHZDJGV09XNWxWelI0V0RGR2VWSnVWbUZOV0Vwc1ZHcG5lRmd3TVU1aVZXdDRWMWN4YzFGdFNuTmtlbWh4VGpCd1NWTllVa1pTYTJNeFlVUmtlRmt3Y0ZWVWJWRXdWbnBzTkU1V1JqRmhNVGxFVm14dmQwMXFhRzVXZDdnQkFBJTNEJTNE", false).should eq("AIOkY9EQpi_gyn1_QrFuZ1reN81_MMmI1YmlBblw8j7JHItEFG5h7qcJTNd4W9x5Quk_CVZ028gW")
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
|
|
@ -216,30 +216,18 @@ def fetch_channel(ucid, db, pull_all_videos = true, locale = nil)
|
||||||
url = produce_channel_videos_url(ucid, page, auto_generated: auto_generated)
|
url = produce_channel_videos_url(ucid, page, auto_generated: auto_generated)
|
||||||
response = YT_POOL.client &.get(url)
|
response = YT_POOL.client &.get(url)
|
||||||
|
|
||||||
|
videos = [] of SearchVideo
|
||||||
begin
|
begin
|
||||||
json = JSON.parse(response.body)
|
initial_data = JSON.parse(response.body).as_a.find &.["response"]?
|
||||||
|
raise "Could not extract JSON" if !initial_data
|
||||||
|
videos = extract_videos(initial_data.as_h, author, ucid)
|
||||||
rescue ex
|
rescue ex
|
||||||
if response.body.includes?("To continue with your YouTube experience, please fill out the form below.") ||
|
if response.body.includes?("To continue with your YouTube experience, please fill out the form below.") ||
|
||||||
response.body.includes?("https://www.google.com/sorry/index")
|
response.body.includes?("https://www.google.com/sorry/index")
|
||||||
raise "Could not extract channel info. Instance is likely blocked."
|
raise "Could not extract channel info. Instance is likely blocked."
|
||||||
end
|
end
|
||||||
|
|
||||||
raise "Could not extract JSON"
|
|
||||||
end
|
end
|
||||||
|
|
||||||
if json["content_html"]? && !json["content_html"].as_s.empty?
|
|
||||||
document = XML.parse_html(json["content_html"].as_s)
|
|
||||||
nodeset = document.xpath_nodes(%q(//li[contains(@class, "feed-item-container")]))
|
|
||||||
|
|
||||||
if auto_generated
|
|
||||||
videos = extract_videos_html(nodeset)
|
|
||||||
else
|
|
||||||
videos = extract_videos_html(nodeset, ucid, author)
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
videos ||= [] of ChannelVideo
|
|
||||||
|
|
||||||
rss.xpath_nodes("//feed/entry").each do |entry|
|
rss.xpath_nodes("//feed/entry").each do |entry|
|
||||||
video_id = entry.xpath_node("videoid").not_nil!.content
|
video_id = entry.xpath_node("videoid").not_nil!.content
|
||||||
title = entry.xpath_node("title").not_nil!.content
|
title = entry.xpath_node("title").not_nil!.content
|
||||||
|
@ -305,24 +293,11 @@ def fetch_channel(ucid, db, pull_all_videos = true, locale = nil)
|
||||||
loop do
|
loop do
|
||||||
url = produce_channel_videos_url(ucid, page, auto_generated: auto_generated)
|
url = produce_channel_videos_url(ucid, page, auto_generated: auto_generated)
|
||||||
response = YT_POOL.client &.get(url)
|
response = YT_POOL.client &.get(url)
|
||||||
json = JSON.parse(response.body)
|
initial_data = JSON.parse(response.body).as_a.find &.["response"]?
|
||||||
|
raise "Could not extract JSON" if !initial_data
|
||||||
|
videos = extract_videos(initial_data.as_h, author, ucid)
|
||||||
|
|
||||||
if json["content_html"]? && !json["content_html"].as_s.empty?
|
count = videos.size
|
||||||
document = XML.parse_html(json["content_html"].as_s)
|
|
||||||
nodeset = document.xpath_nodes(%q(//li[contains(@class, "feed-item-container")]))
|
|
||||||
else
|
|
||||||
break
|
|
||||||
end
|
|
||||||
|
|
||||||
nodeset = nodeset.not_nil!
|
|
||||||
|
|
||||||
if auto_generated
|
|
||||||
videos = extract_videos_html(nodeset)
|
|
||||||
else
|
|
||||||
videos = extract_videos_html(nodeset, ucid, author)
|
|
||||||
end
|
|
||||||
|
|
||||||
count = nodeset.size
|
|
||||||
videos = videos.map { |video| ChannelVideo.new(
|
videos = videos.map { |video| ChannelVideo.new(
|
||||||
id: video.id,
|
id: video.id,
|
||||||
title: video.title,
|
title: video.title,
|
||||||
|
@ -387,23 +362,11 @@ def fetch_channel_playlists(ucid, author, auto_generated, continuation, sort_by)
|
||||||
url = produce_channel_playlists_url(ucid, continuation, sort_by, auto_generated)
|
url = produce_channel_playlists_url(ucid, continuation, sort_by, auto_generated)
|
||||||
|
|
||||||
response = YT_POOL.client &.get(url)
|
response = YT_POOL.client &.get(url)
|
||||||
json = JSON.parse(response.body)
|
|
||||||
|
|
||||||
if json["load_more_widget_html"].as_s.empty?
|
continuation = response.body.match(/"continuation":"(?<continuation>[^"]+)"/).try &.["continuation"]?
|
||||||
continuation = nil
|
initial_data = JSON.parse(response.body).as_a.find(&.["response"]?).try &.as_h
|
||||||
else
|
else
|
||||||
continuation = XML.parse_html(json["load_more_widget_html"].as_s)
|
url = "/channel/#{ucid}/playlists?flow=list&view=1"
|
||||||
continuation = continuation.xpath_node(%q(//button[@data-uix-load-more-href]))
|
|
||||||
|
|
||||||
if continuation
|
|
||||||
continuation = extract_channel_playlists_cursor(continuation["data-uix-load-more-href"], auto_generated)
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
html = XML.parse_html(json["content_html"].as_s)
|
|
||||||
nodeset = html.xpath_nodes(%q(//li[contains(@class, "feed-item-container")]))
|
|
||||||
else
|
|
||||||
url = "/channel/#{ucid}/playlists?disable_polymer=1&flow=list&view=1"
|
|
||||||
|
|
||||||
case sort_by
|
case sort_by
|
||||||
when "last", "last_added"
|
when "last", "last_added"
|
||||||
|
@ -416,21 +379,13 @@ def fetch_channel_playlists(ucid, author, auto_generated, continuation, sort_by)
|
||||||
end
|
end
|
||||||
|
|
||||||
response = YT_POOL.client &.get(url)
|
response = YT_POOL.client &.get(url)
|
||||||
html = XML.parse_html(response.body)
|
continuation = response.body.match(/"continuation":"(?<continuation>[^"]+)"/).try &.["continuation"]?
|
||||||
|
initial_data = extract_initial_data(response.body)
|
||||||
continuation = html.xpath_node(%q(//button[@data-uix-load-more-href]))
|
|
||||||
if continuation
|
|
||||||
continuation = extract_channel_playlists_cursor(continuation["data-uix-load-more-href"], auto_generated)
|
|
||||||
end
|
end
|
||||||
|
|
||||||
nodeset = html.xpath_nodes(%q(//ul[@id="browse-items-primary"]/li[contains(@class, "feed-item-container")]))
|
return [] of SearchItem, nil if !initial_data
|
||||||
end
|
items = extract_items(initial_data)
|
||||||
|
continuation = extract_channel_playlists_cursor(continuation, auto_generated) if continuation
|
||||||
if auto_generated
|
|
||||||
items = extract_shelf_items(nodeset, ucid, author)
|
|
||||||
else
|
|
||||||
items = extract_items_html(nodeset, ucid, author)
|
|
||||||
end
|
|
||||||
|
|
||||||
return items, continuation
|
return items, continuation
|
||||||
end
|
end
|
||||||
|
@ -530,9 +485,8 @@ def produce_channel_playlists_url(ucid, cursor, sort = "newest", auto_generated
|
||||||
return "/browse_ajax?continuation=#{continuation}&gl=US&hl=en"
|
return "/browse_ajax?continuation=#{continuation}&gl=US&hl=en"
|
||||||
end
|
end
|
||||||
|
|
||||||
def extract_channel_playlists_cursor(url, auto_generated)
|
def extract_channel_playlists_cursor(cursor, auto_generated)
|
||||||
cursor = URI.parse(url).query_params
|
cursor = URI.decode_www_form(cursor)
|
||||||
.try { |i| URI.decode_www_form(i["continuation"]) }
|
|
||||||
.try { |i| Base64.decode(i) }
|
.try { |i| Base64.decode(i) }
|
||||||
.try { |i| IO::Memory.new(i) }
|
.try { |i| IO::Memory.new(i) }
|
||||||
.try { |i| Protodec::Any.parse(i) }
|
.try { |i| Protodec::Any.parse(i) }
|
||||||
|
@ -949,25 +903,19 @@ def get_60_videos(ucid, author, page, auto_generated, sort_by = "newest")
|
||||||
response = YT_POOL.client &.get(url)
|
response = YT_POOL.client &.get(url)
|
||||||
initial_data = JSON.parse(response.body).as_a.find &.["response"]?
|
initial_data = JSON.parse(response.body).as_a.find &.["response"]?
|
||||||
break if !initial_data
|
break if !initial_data
|
||||||
videos.concat extract_videos(initial_data.as_h)
|
videos.concat extract_videos(initial_data.as_h, author, ucid)
|
||||||
end
|
end
|
||||||
|
|
||||||
return videos.size, videos
|
return videos.size, videos
|
||||||
end
|
end
|
||||||
|
|
||||||
def get_latest_videos(ucid)
|
def get_latest_videos(ucid)
|
||||||
videos = [] of SearchVideo
|
|
||||||
|
|
||||||
url = produce_channel_videos_url(ucid, 0)
|
url = produce_channel_videos_url(ucid, 0)
|
||||||
response = YT_POOL.client &.get(url)
|
response = YT_POOL.client &.get(url)
|
||||||
json = JSON.parse(response.body)
|
initial_data = JSON.parse(response.body).as_a.find &.["response"]?
|
||||||
|
return [] of SearchVideo if !initial_data
|
||||||
|
author = initial_data["response"]?.try &.["metadata"]?.try &.["channelMetadataRenderer"]?.try &.["title"]?.try &.as_s
|
||||||
|
items = extract_videos(initial_data.as_h, author, ucid)
|
||||||
|
|
||||||
if json["content_html"]? && !json["content_html"].as_s.empty?
|
return items
|
||||||
document = XML.parse_html(json["content_html"].as_s)
|
|
||||||
nodeset = document.xpath_nodes(%q(//li[contains(@class, "feed-item-container")]))
|
|
||||||
|
|
||||||
videos = extract_videos_html(nodeset, ucid)
|
|
||||||
end
|
|
||||||
|
|
||||||
return videos
|
|
||||||
end
|
end
|
||||||
|
|
|
@ -313,32 +313,30 @@ def html_to_content(description_html : String)
|
||||||
return description
|
return description
|
||||||
end
|
end
|
||||||
|
|
||||||
def extract_videos(initial_data : Hash(String, JSON::Any))
|
def extract_videos(initial_data : Hash(String, JSON::Any), author_fallback : String? = nil, author_id_fallback : String? = nil)
|
||||||
extract_items(initial_data).select(&.is_a?(SearchVideo)).map(&.as(SearchVideo))
|
extract_items(initial_data, author_fallback, author_id_fallback).select(&.is_a?(SearchVideo)).map(&.as(SearchVideo))
|
||||||
end
|
end
|
||||||
|
|
||||||
def extract_items(initial_data : Hash(String, JSON::Any))
|
def extract_items(initial_data : Hash(String, JSON::Any), author_fallback : String? = nil, author_id_fallback : String? = nil)
|
||||||
items = [] of SearchItem
|
items = [] of SearchItem
|
||||||
|
|
||||||
initial_data.try { |t|
|
initial_data.try { |t| t["contents"]? || t["response"]? }
|
||||||
t["contents"]? || t["response"]?
|
.try { |t| t["twoColumnBrowseResultsRenderer"]?.try &.["tabs"].as_a.select(&.["tabRenderer"]?.try &.["selected"].as_bool)[0]?.try &.["tabRenderer"]["content"] ||
|
||||||
}.try { |t|
|
|
||||||
t["twoColumnBrowseResultsRenderer"]?.try &.["tabs"].as_a[0]?.try &.["tabRenderer"]["content"] ||
|
|
||||||
t["twoColumnSearchResultsRenderer"]?.try &.["primaryContents"] ||
|
t["twoColumnSearchResultsRenderer"]?.try &.["primaryContents"] ||
|
||||||
t["continuationContents"]?
|
t["continuationContents"]? }
|
||||||
}.try { |t| t["sectionListRenderer"]? || t["sectionListContinuation"]? }
|
.try { |t| t["sectionListRenderer"]? || t["sectionListContinuation"]? }
|
||||||
.try &.["contents"]
|
.try &.["contents"].as_a
|
||||||
.as_a.each { |c|
|
.each { |c| c.try &.["itemSectionRenderer"]["contents"].as_a
|
||||||
c.try &.["itemSectionRenderer"]["contents"].as_a
|
.try { |t| t[0]?.try &.["shelfRenderer"]?.try &.["content"]["expandedShelfContentsRenderer"]?.try &.["items"].as_a ||
|
||||||
.try { |t| t[0]?.try &.["shelfRenderer"]?.try &.["content"]["expandedShelfContentsRenderer"]?.try &.["items"].as_a || t }
|
t[0]?.try &.["gridRenderer"]?.try &.["items"].as_a || t }
|
||||||
.each { |item|
|
.each { |item|
|
||||||
if i = item["videoRenderer"]?
|
if i = item["videoRenderer"]?
|
||||||
video_id = i["videoId"].as_s
|
video_id = i["videoId"].as_s
|
||||||
title = i["title"].try { |t| t["simpleText"]?.try &.as_s || t["runs"]?.try &.as_a.map(&.["text"].as_s).join("") } || ""
|
title = i["title"].try { |t| t["simpleText"]?.try &.as_s || t["runs"]?.try &.as_a.map(&.["text"].as_s).join("") } || ""
|
||||||
|
|
||||||
author_info = i["ownerText"]?.try &.["runs"].as_a[0]?
|
author_info = i["ownerText"]?.try &.["runs"].as_a[0]?
|
||||||
author = author_info.try &.["text"].as_s || ""
|
author = author_info.try &.["text"].as_s || author_fallback || ""
|
||||||
author_id = author_info.try &.["navigationEndpoint"]?.try &.["browseEndpoint"]["browseId"].as_s || ""
|
author_id = author_info.try &.["navigationEndpoint"]?.try &.["browseEndpoint"]["browseId"].as_s || author_id_fallback || ""
|
||||||
|
|
||||||
published = i["publishedTimeText"]?.try &.["simpleText"]?.try { |t| decode_date(t.as_s) } || Time.local
|
published = i["publishedTimeText"]?.try &.["simpleText"]?.try { |t| decode_date(t.as_s) } || Time.local
|
||||||
view_count = i["viewCountText"]?.try &.["simpleText"]?.try &.as_s.gsub(/\D+/, "").to_i64? || 0_i64
|
view_count = i["viewCountText"]?.try &.["simpleText"]?.try &.as_s.gsub(/\D+/, "").to_i64? || 0_i64
|
||||||
|
@ -382,8 +380,8 @@ def extract_items(initial_data : Hash(String, JSON::Any))
|
||||||
premiere_timestamp: premiere_timestamp
|
premiere_timestamp: premiere_timestamp
|
||||||
)
|
)
|
||||||
elsif i = item["channelRenderer"]?
|
elsif i = item["channelRenderer"]?
|
||||||
author = i["title"]["simpleText"]?.try &.as_s || ""
|
author = i["title"]["simpleText"]?.try &.as_s || author_fallback || ""
|
||||||
author_id = i["channelId"]?.try &.as_s || ""
|
author_id = i["channelId"]?.try &.as_s || author_id_fallback || ""
|
||||||
|
|
||||||
author_thumbnail = i["thumbnail"]["thumbnails"]?.try &.as_a[0]?.try { |u| "https:#{u["url"]}" } || ""
|
author_thumbnail = i["thumbnail"]["thumbnails"]?.try &.as_a[0]?.try { |u| "https:#{u["url"]}" } || ""
|
||||||
subscriber_count = i["subscriberCountText"]?.try &.["simpleText"]?.try &.as_s.try { |s| short_text_to_number(s.split(" ")[0]) } || 0
|
subscriber_count = i["subscriberCountText"]?.try &.["simpleText"]?.try &.as_s.try { |s| short_text_to_number(s.split(" ")[0]) } || 0
|
||||||
|
@ -409,9 +407,9 @@ def extract_items(initial_data : Hash(String, JSON::Any))
|
||||||
video_count = i["videoCount"]?.try &.as_s.to_i || 0
|
video_count = i["videoCount"]?.try &.as_s.to_i || 0
|
||||||
playlist_thumbnail = i["thumbnails"].as_a[0]?.try &.["thumbnails"]?.try &.as_a[0]?.try &.["url"].as_s || ""
|
playlist_thumbnail = i["thumbnails"].as_a[0]?.try &.["thumbnails"]?.try &.as_a[0]?.try &.["url"].as_s || ""
|
||||||
|
|
||||||
author_info = i["shortBylineText"]["runs"].as_a[0]?
|
author_info = i["shortBylineText"]?.try &.["runs"].as_a[0]?
|
||||||
author = author_info.try &.["text"].as_s || ""
|
author = author_info.try &.["text"].as_s || author_fallback || ""
|
||||||
author_id = author_info.try &.["navigationEndpoint"]?.try &.["browseEndpoint"]["browseId"].as_s || ""
|
author_id = author_info.try &.["navigationEndpoint"]?.try &.["browseEndpoint"]["browseId"].as_s || author_id_fallback || ""
|
||||||
|
|
||||||
videos = i["videos"]?.try &.as_a.map do |v|
|
videos = i["videos"]?.try &.as_a.map do |v|
|
||||||
v = v["childVideoRenderer"]
|
v = v["childVideoRenderer"]
|
||||||
|
@ -444,297 +442,11 @@ def extract_items(initial_data : Hash(String, JSON::Any))
|
||||||
elsif i = item["horizontalCardListRenderer"]?
|
elsif i = item["horizontalCardListRenderer"]?
|
||||||
elsif i = item["searchPyvRenderer"]? # Ad
|
elsif i = item["searchPyvRenderer"]? # Ad
|
||||||
end
|
end
|
||||||
}
|
} }
|
||||||
}
|
|
||||||
|
|
||||||
items
|
items
|
||||||
end
|
end
|
||||||
|
|
||||||
def extract_videos_html(nodeset, ucid = nil, author_name = nil)
|
|
||||||
extract_items_html(nodeset, ucid, author_name).select(&.is_a?(SearchVideo)).map(&.as(SearchVideo))
|
|
||||||
end
|
|
||||||
|
|
||||||
def extract_items_html(nodeset, ucid = nil, author_name = nil)
|
|
||||||
# TODO: Make this a 'CommonItem', so it makes more sense to be used here
|
|
||||||
items = [] of SearchItem
|
|
||||||
|
|
||||||
nodeset.each do |node|
|
|
||||||
anchor = node.xpath_node(%q(.//h3[contains(@class, "yt-lockup-title")]/a))
|
|
||||||
if !anchor
|
|
||||||
next
|
|
||||||
end
|
|
||||||
title = anchor.content.strip
|
|
||||||
id = anchor["href"]
|
|
||||||
|
|
||||||
if anchor["href"].starts_with? "https://www.googleadservices.com"
|
|
||||||
next
|
|
||||||
end
|
|
||||||
|
|
||||||
author_id = node.xpath_node(%q(.//div[contains(@class, "yt-lockup-byline")]/a)).try &.["href"].split("/")[-1] || ucid || ""
|
|
||||||
author = node.xpath_node(%q(.//div[contains(@class, "yt-lockup-byline")]/a)).try &.content.strip || author_name || ""
|
|
||||||
description_html = node.xpath_node(%q(.//div[contains(@class, "yt-lockup-description")])).try &.to_s || ""
|
|
||||||
|
|
||||||
tile = node.xpath_node(%q(.//div[contains(@class, "yt-lockup-tile")]))
|
|
||||||
if !tile
|
|
||||||
next
|
|
||||||
end
|
|
||||||
|
|
||||||
case tile["class"]
|
|
||||||
when .includes? "yt-lockup-playlist"
|
|
||||||
plid = HTTP::Params.parse(URI.parse(id).query.not_nil!)["list"]
|
|
||||||
|
|
||||||
anchor = node.xpath_node(%q(.//div[contains(@class, "yt-lockup-meta")]/a))
|
|
||||||
|
|
||||||
if !anchor
|
|
||||||
anchor = node.xpath_node(%q(.//ul[@class="yt-lockup-meta-info"]/li/a))
|
|
||||||
end
|
|
||||||
|
|
||||||
video_count = node.xpath_node(%q(.//span[@class="formatted-video-count-label"]/b)) ||
|
|
||||||
node.xpath_node(%q(.//span[@class="formatted-video-count-label"]))
|
|
||||||
if video_count
|
|
||||||
video_count = video_count.content
|
|
||||||
|
|
||||||
if video_count == "50+"
|
|
||||||
author = "YouTube"
|
|
||||||
author_id = "UC-9-kyTW8ZkZNDHQJ6FgpwQ"
|
|
||||||
end
|
|
||||||
|
|
||||||
video_count = video_count.gsub(/\D/, "").to_i?
|
|
||||||
end
|
|
||||||
video_count ||= 0
|
|
||||||
|
|
||||||
videos = [] of SearchPlaylistVideo
|
|
||||||
node.xpath_nodes(%q(.//*[contains(@class, "yt-lockup-playlist-items")]/li)).each do |video|
|
|
||||||
anchor = video.xpath_node(%q(.//a))
|
|
||||||
if anchor
|
|
||||||
video_title = anchor.content.strip
|
|
||||||
id = HTTP::Params.parse(URI.parse(anchor["href"]).query.not_nil!)["v"]
|
|
||||||
end
|
|
||||||
video_title ||= ""
|
|
||||||
id ||= ""
|
|
||||||
|
|
||||||
anchor = video.xpath_node(%q(.//span/span))
|
|
||||||
if anchor
|
|
||||||
length_seconds = decode_length_seconds(anchor.content)
|
|
||||||
end
|
|
||||||
length_seconds ||= 0
|
|
||||||
|
|
||||||
videos << SearchPlaylistVideo.new(
|
|
||||||
video_title,
|
|
||||||
id,
|
|
||||||
length_seconds
|
|
||||||
)
|
|
||||||
end
|
|
||||||
|
|
||||||
playlist_thumbnail = node.xpath_node(%q(.//span/img)).try &.["data-thumb"]?
|
|
||||||
playlist_thumbnail ||= node.xpath_node(%q(.//span/img)).try &.["src"]
|
|
||||||
|
|
||||||
items << SearchPlaylist.new(
|
|
||||||
title: title,
|
|
||||||
id: plid,
|
|
||||||
author: author,
|
|
||||||
ucid: author_id,
|
|
||||||
video_count: video_count,
|
|
||||||
videos: videos,
|
|
||||||
thumbnail: playlist_thumbnail
|
|
||||||
)
|
|
||||||
when .includes? "yt-lockup-channel"
|
|
||||||
author = title.strip
|
|
||||||
|
|
||||||
ucid = node.xpath_node(%q(.//button[contains(@class, "yt-uix-subscription-button")])).try &.["data-channel-external-id"]?
|
|
||||||
ucid ||= id.split("/")[-1]
|
|
||||||
|
|
||||||
author_thumbnail = node.xpath_node(%q(.//div/span/img)).try &.["data-thumb"]?
|
|
||||||
author_thumbnail ||= node.xpath_node(%q(.//div/span/img)).try &.["src"]
|
|
||||||
if author_thumbnail
|
|
||||||
author_thumbnail = URI.parse(author_thumbnail)
|
|
||||||
author_thumbnail.scheme = "https"
|
|
||||||
author_thumbnail = author_thumbnail.to_s
|
|
||||||
end
|
|
||||||
|
|
||||||
author_thumbnail ||= ""
|
|
||||||
|
|
||||||
subscriber_count = node.xpath_node(%q(.//span[contains(@class, "subscriber-count")]))
|
|
||||||
.try &.["title"].try { |text| short_text_to_number(text) } || 0
|
|
||||||
|
|
||||||
video_count = node.xpath_node(%q(.//ul[@class="yt-lockup-meta-info"]/li)).try &.content.split(" ")[0].gsub(/\D/, "").to_i?
|
|
||||||
|
|
||||||
items << SearchChannel.new(
|
|
||||||
author: author,
|
|
||||||
ucid: ucid,
|
|
||||||
author_thumbnail: author_thumbnail,
|
|
||||||
subscriber_count: subscriber_count,
|
|
||||||
video_count: video_count || 0,
|
|
||||||
description_html: description_html,
|
|
||||||
auto_generated: video_count ? false : true,
|
|
||||||
)
|
|
||||||
else
|
|
||||||
id = id.lchop("/watch?v=")
|
|
||||||
|
|
||||||
metadata = node.xpath_node(%q(.//div[contains(@class,"yt-lockup-meta")]/ul))
|
|
||||||
|
|
||||||
published = metadata.try &.xpath_node(%q(.//li[contains(text(), " ago")])).try { |node| decode_date(node.content.sub(/^[a-zA-Z]+ /, "")) }
|
|
||||||
published ||= metadata.try &.xpath_node(%q(.//span[@data-timestamp])).try { |node| Time.unix(node["data-timestamp"].to_i64) }
|
|
||||||
published ||= Time.utc
|
|
||||||
|
|
||||||
view_count = metadata.try &.xpath_node(%q(.//li[contains(text(), " views")])).try &.content.gsub(/\D/, "").to_i64?
|
|
||||||
view_count ||= 0_i64
|
|
||||||
|
|
||||||
length_seconds = node.xpath_node(%q(.//span[@class="video-time"])).try { |node| decode_length_seconds(node.content) }
|
|
||||||
length_seconds ||= -1
|
|
||||||
|
|
||||||
live_now = node.xpath_node(%q(.//span[contains(@class, "yt-badge-live")])) ? true : false
|
|
||||||
premium = node.xpath_node(%q(.//span[text()="Premium"])) ? true : false
|
|
||||||
|
|
||||||
if !premium || node.xpath_node(%q(.//span[contains(text(), "Free episode")]))
|
|
||||||
paid = false
|
|
||||||
else
|
|
||||||
paid = true
|
|
||||||
end
|
|
||||||
|
|
||||||
premiere_timestamp = node.xpath_node(%q(.//ul[@class="yt-lockup-meta-info"]/li/span[@class="localized-date"])).try &.["data-timestamp"]?.try &.to_i64?
|
|
||||||
if premiere_timestamp
|
|
||||||
premiere_timestamp = Time.unix(premiere_timestamp)
|
|
||||||
end
|
|
||||||
|
|
||||||
items << SearchVideo.new(
|
|
||||||
title: title,
|
|
||||||
id: id,
|
|
||||||
author: author,
|
|
||||||
ucid: author_id,
|
|
||||||
published: published,
|
|
||||||
views: view_count,
|
|
||||||
description_html: description_html,
|
|
||||||
length_seconds: length_seconds,
|
|
||||||
live_now: live_now,
|
|
||||||
paid: paid,
|
|
||||||
premium: premium,
|
|
||||||
premiere_timestamp: premiere_timestamp
|
|
||||||
)
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
return items
|
|
||||||
end
|
|
||||||
|
|
||||||
def extract_shelf_items(nodeset, ucid = nil, author_name = nil)
|
|
||||||
items = [] of SearchPlaylist
|
|
||||||
|
|
||||||
nodeset.each do |shelf|
|
|
||||||
shelf_anchor = shelf.xpath_node(%q(.//h2[contains(@class, "branded-page-module-title")]))
|
|
||||||
next if !shelf_anchor
|
|
||||||
|
|
||||||
title = shelf_anchor.xpath_node(%q(.//span[contains(@class, "branded-page-module-title-text")])).try &.content.strip
|
|
||||||
title ||= ""
|
|
||||||
|
|
||||||
id = shelf_anchor.xpath_node(%q(.//a)).try &.["href"]
|
|
||||||
next if !id
|
|
||||||
|
|
||||||
shelf_is_playlist = false
|
|
||||||
videos = [] of SearchPlaylistVideo
|
|
||||||
|
|
||||||
shelf.xpath_nodes(%q(.//ul[contains(@class, "yt-uix-shelfslider-list") or contains(@class, "expanded-shelf-content-list")]/li)).each do |child_node|
|
|
||||||
type = child_node.xpath_node(%q(./div))
|
|
||||||
next if !type
|
|
||||||
|
|
||||||
case type["class"]
|
|
||||||
when .includes? "yt-lockup-video"
|
|
||||||
shelf_is_playlist = true
|
|
||||||
|
|
||||||
anchor = child_node.xpath_node(%q(.//h3[contains(@class, "yt-lockup-title")]/a))
|
|
||||||
if anchor
|
|
||||||
video_title = anchor.content.strip
|
|
||||||
video_id = HTTP::Params.parse(URI.parse(anchor["href"]).query.not_nil!)["v"]
|
|
||||||
end
|
|
||||||
video_title ||= ""
|
|
||||||
video_id ||= ""
|
|
||||||
|
|
||||||
anchor = child_node.xpath_node(%q(.//span[@class="video-time"]))
|
|
||||||
if anchor
|
|
||||||
length_seconds = decode_length_seconds(anchor.content)
|
|
||||||
end
|
|
||||||
length_seconds ||= 0
|
|
||||||
|
|
||||||
videos << SearchPlaylistVideo.new(
|
|
||||||
video_title,
|
|
||||||
video_id,
|
|
||||||
length_seconds
|
|
||||||
)
|
|
||||||
when .includes? "yt-lockup-playlist"
|
|
||||||
anchor = child_node.xpath_node(%q(.//h3[contains(@class, "yt-lockup-title")]/a))
|
|
||||||
if anchor
|
|
||||||
playlist_title = anchor.content.strip
|
|
||||||
params = HTTP::Params.parse(URI.parse(anchor["href"]).query.not_nil!)
|
|
||||||
plid = params["list"]
|
|
||||||
end
|
|
||||||
playlist_title ||= ""
|
|
||||||
plid ||= ""
|
|
||||||
|
|
||||||
playlist_thumbnail = child_node.xpath_node(%q(.//span/img)).try &.["data-thumb"]?
|
|
||||||
playlist_thumbnail ||= child_node.xpath_node(%q(.//span/img)).try &.["src"]
|
|
||||||
|
|
||||||
video_count = child_node.xpath_node(%q(.//span[@class="formatted-video-count-label"]/b)) ||
|
|
||||||
child_node.xpath_node(%q(.//span[@class="formatted-video-count-label"]))
|
|
||||||
if video_count
|
|
||||||
video_count = video_count.content.gsub(/\D/, "").to_i?
|
|
||||||
end
|
|
||||||
video_count ||= 50
|
|
||||||
|
|
||||||
videos = [] of SearchPlaylistVideo
|
|
||||||
child_node.xpath_nodes(%q(.//*[contains(@class, "yt-lockup-playlist-items")]/li)).each do |video|
|
|
||||||
anchor = video.xpath_node(%q(.//a))
|
|
||||||
if anchor
|
|
||||||
video_title = anchor.content.strip
|
|
||||||
id = HTTP::Params.parse(URI.parse(anchor["href"]).query.not_nil!)["v"]
|
|
||||||
end
|
|
||||||
video_title ||= ""
|
|
||||||
id ||= ""
|
|
||||||
|
|
||||||
anchor = video.xpath_node(%q(.//span/span))
|
|
||||||
if anchor
|
|
||||||
length_seconds = decode_length_seconds(anchor.content)
|
|
||||||
end
|
|
||||||
length_seconds ||= 0
|
|
||||||
|
|
||||||
videos << SearchPlaylistVideo.new(
|
|
||||||
video_title,
|
|
||||||
id,
|
|
||||||
length_seconds
|
|
||||||
)
|
|
||||||
end
|
|
||||||
|
|
||||||
items << SearchPlaylist.new(
|
|
||||||
title: playlist_title,
|
|
||||||
id: plid,
|
|
||||||
author: author_name,
|
|
||||||
ucid: ucid,
|
|
||||||
video_count: video_count,
|
|
||||||
videos: videos,
|
|
||||||
thumbnail: playlist_thumbnail
|
|
||||||
)
|
|
||||||
else
|
|
||||||
next # Skip
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
if shelf_is_playlist
|
|
||||||
plid = HTTP::Params.parse(URI.parse(id).query.not_nil!)["list"]
|
|
||||||
|
|
||||||
items << SearchPlaylist.new(
|
|
||||||
title: title,
|
|
||||||
id: plid,
|
|
||||||
author: author_name,
|
|
||||||
ucid: ucid,
|
|
||||||
video_count: videos.size,
|
|
||||||
videos: videos,
|
|
||||||
thumbnail: "https://i.ytimg.com/vi/#{videos[0].id}/mqdefault.jpg"
|
|
||||||
)
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
return items
|
|
||||||
end
|
|
||||||
|
|
||||||
def check_enum(db, logger, enum_name, struct_type = nil)
|
def check_enum(db, logger, enum_name, struct_type = nil)
|
||||||
return # TODO
|
return # TODO
|
||||||
if !db.query_one?("SELECT true FROM pg_type WHERE typname = $1", enum_name, as: Bool)
|
if !db.query_one?("SELECT true FROM pg_type WHERE typname = $1", enum_name, as: Bool)
|
||||||
|
|
|
@ -243,7 +243,8 @@ def channel_search(query, page, channel)
|
||||||
response = YT_POOL.client &.get(url)
|
response = YT_POOL.client &.get(url)
|
||||||
initial_data = JSON.parse(response.body).as_a.find &.["response"]?
|
initial_data = JSON.parse(response.body).as_a.find &.["response"]?
|
||||||
return 0, [] of SearchItem if !initial_data
|
return 0, [] of SearchItem if !initial_data
|
||||||
items = extract_items(initial_data.as_h)
|
author = initial_data["response"]?.try &.["metadata"]?.try &.["channelMetadataRenderer"]?.try &.["title"]?.try &.as_s
|
||||||
|
items = extract_items(initial_data.as_h, author, ucid)
|
||||||
|
|
||||||
return items.size, items
|
return items.size, items
|
||||||
end
|
end
|
||||||
|
|
|
@ -286,7 +286,7 @@ end
|
||||||
# headers = HTTP::Headers.new
|
# headers = HTTP::Headers.new
|
||||||
# headers["Cookie"] = env_headers["Cookie"]
|
# headers["Cookie"] = env_headers["Cookie"]
|
||||||
#
|
#
|
||||||
# html = YT_POOL.client &.get("/view_all_playlists?disable_polymer=1", headers)
|
# html = YT_POOL.client &.get("/view_all_playlists", headers)
|
||||||
#
|
#
|
||||||
# cookies = HTTP::Cookies.from_headers(headers)
|
# cookies = HTTP::Cookies.from_headers(headers)
|
||||||
# html.cookies.each do |cookie|
|
# html.cookies.each do |cookie|
|
||||||
|
|
Loading…
Reference in a new issue