Refactor search extractor
This commit is contained in:
parent
e768e1e277
commit
9f9cc1ffb5
3 changed files with 48 additions and 59 deletions
|
@ -5167,7 +5167,7 @@ get "/vi/:id/:name" do |env|
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
# Undocumented, creates anonymous playlist with specified 'video_ids'
|
# Undocumented, creates anonymous playlist with specified 'video_ids', max 50 videos
|
||||||
get "/watch_videos" do |env|
|
get "/watch_videos" do |env|
|
||||||
client = make_client(YT_URL)
|
client = make_client(YT_URL)
|
||||||
|
|
||||||
|
|
|
@ -387,14 +387,15 @@ def fetch_channel_playlists(ucid, author, auto_generated, continuation, sort_by)
|
||||||
|
|
||||||
html = XML.parse_html(json["content_html"].as_s)
|
html = XML.parse_html(json["content_html"].as_s)
|
||||||
nodeset = html.xpath_nodes(%q(//li[contains(@class, "feed-item-container")]))
|
nodeset = html.xpath_nodes(%q(//li[contains(@class, "feed-item-container")]))
|
||||||
else
|
elsif auto_generated
|
||||||
url = "/channel/#{ucid}/playlists?disable_polymer=1&flow=list"
|
url = "/channel/#{ucid}"
|
||||||
|
|
||||||
if auto_generated
|
response = client.get(url)
|
||||||
url += "&view=50"
|
html = XML.parse_html(response.body)
|
||||||
else
|
|
||||||
url += "&view=1"
|
nodeset = html.xpath_nodes(%q(//ul[@id="browse-items-primary"]/li[contains(@class, "feed-item-container")]))
|
||||||
end
|
else
|
||||||
|
url = "/channel/#{ucid}/playlists?disable_polymer=1&flow=list&view=1"
|
||||||
|
|
||||||
case sort_by
|
case sort_by
|
||||||
when "last", "last_added"
|
when "last", "last_added"
|
||||||
|
|
|
@ -442,47 +442,20 @@ def extract_items(nodeset, ucid = nil, author_name = nil)
|
||||||
else
|
else
|
||||||
id = id.lchop("/watch?v=")
|
id = id.lchop("/watch?v=")
|
||||||
|
|
||||||
metadata = node.xpath_nodes(%q(.//div[contains(@class,"yt-lockup-meta")]/ul/li))
|
metadata = node.xpath_node(%q(.//div[contains(@class,"yt-lockup-meta")]/ul))
|
||||||
|
|
||||||
begin
|
published = metadata.try &.xpath_node(%q(.//li[contains(text(), " ago")])).try { |node| decode_date(node.content.sub(/^[a-zA-Z]+ /, "")) }
|
||||||
published = decode_date(metadata[0].content.lchop("Streamed ").lchop("Starts "))
|
published ||= metadata.try &.xpath_node(%q(.//span[@data-timestamp])).try { |node| Time.unix(node["data-timestamp"].to_i64) }
|
||||||
rescue ex
|
|
||||||
end
|
|
||||||
begin
|
|
||||||
published ||= Time.unix(metadata[0].xpath_node(%q(.//span)).not_nil!["data-timestamp"].to_i64)
|
|
||||||
rescue ex
|
|
||||||
end
|
|
||||||
published ||= Time.utc
|
published ||= Time.utc
|
||||||
|
|
||||||
begin
|
view_count = metadata.try &.xpath_node(%q(.//li[contains(text(), " views")])).try &.content.gsub(/\D/, "").to_i64?
|
||||||
view_count = metadata[0].content.rchop(" watching").delete(",").try &.to_i64?
|
|
||||||
rescue ex
|
|
||||||
end
|
|
||||||
begin
|
|
||||||
view_count ||= metadata.try &.[1].content.delete("No views,").try &.to_i64?
|
|
||||||
rescue ex
|
|
||||||
end
|
|
||||||
view_count ||= 0_i64
|
view_count ||= 0_i64
|
||||||
|
|
||||||
length_seconds = node.xpath_node(%q(.//span[@class="video-time"]))
|
length_seconds = node.xpath_node(%q(.//span[@class="video-time"])).try { |node| decode_length_seconds(node.content) }
|
||||||
if length_seconds
|
length_seconds ||= -1
|
||||||
length_seconds = decode_length_seconds(length_seconds.content)
|
|
||||||
else
|
|
||||||
length_seconds = -1
|
|
||||||
end
|
|
||||||
|
|
||||||
live_now = node.xpath_node(%q(.//span[contains(@class, "yt-badge-live")]))
|
live_now = node.xpath_node(%q(.//span[contains(@class, "yt-badge-live")])) ? true : false
|
||||||
if live_now
|
premium = node.xpath_node(%q(.//span[text()="Premium"])) ? true : false
|
||||||
live_now = true
|
|
||||||
else
|
|
||||||
live_now = false
|
|
||||||
end
|
|
||||||
|
|
||||||
if node.xpath_node(%q(.//span[text()="Premium"]))
|
|
||||||
premium = true
|
|
||||||
else
|
|
||||||
premium = false
|
|
||||||
end
|
|
||||||
|
|
||||||
if !premium || node.xpath_node(%q(.//span[contains(text(), "Free episode")]))
|
if !premium || node.xpath_node(%q(.//span[contains(text(), "Free episode")]))
|
||||||
paid = false
|
paid = false
|
||||||
|
@ -520,26 +493,18 @@ def extract_shelf_items(nodeset, ucid = nil, author_name = nil)
|
||||||
|
|
||||||
nodeset.each do |shelf|
|
nodeset.each do |shelf|
|
||||||
shelf_anchor = shelf.xpath_node(%q(.//h2[contains(@class, "branded-page-module-title")]))
|
shelf_anchor = shelf.xpath_node(%q(.//h2[contains(@class, "branded-page-module-title")]))
|
||||||
|
next if !shelf_anchor
|
||||||
|
|
||||||
if !shelf_anchor
|
title = shelf_anchor.xpath_node(%q(.//span[contains(@class, "branded-page-module-title-text")])).try &.content.strip
|
||||||
next
|
|
||||||
end
|
|
||||||
|
|
||||||
title = shelf_anchor.xpath_node(%q(.//span[contains(@class, "branded-page-module-title-text")]))
|
|
||||||
if title
|
|
||||||
title = title.content.strip
|
|
||||||
end
|
|
||||||
title ||= ""
|
title ||= ""
|
||||||
|
|
||||||
id = shelf_anchor.xpath_node(%q(.//a)).try &.["href"]
|
id = shelf_anchor.xpath_node(%q(.//a)).try &.["href"]
|
||||||
if !id
|
next if !id
|
||||||
next
|
|
||||||
end
|
|
||||||
|
|
||||||
is_playlist = false
|
shelf_is_playlist = false
|
||||||
videos = [] of SearchPlaylistVideo
|
videos = [] of SearchPlaylistVideo
|
||||||
|
|
||||||
shelf.xpath_nodes(%q(.//ul[contains(@class, "yt-uix-shelfslider-list")]/li)).each do |child_node|
|
shelf.xpath_nodes(%q(.//ul[contains(@class, "yt-uix-shelfslider-list") or contains(@class, "expanded-shelf-content-list")]/li)).each do |child_node|
|
||||||
type = child_node.xpath_node(%q(./div))
|
type = child_node.xpath_node(%q(./div))
|
||||||
if !type
|
if !type
|
||||||
next
|
next
|
||||||
|
@ -547,7 +512,7 @@ def extract_shelf_items(nodeset, ucid = nil, author_name = nil)
|
||||||
|
|
||||||
case type["class"]
|
case type["class"]
|
||||||
when .includes? "yt-lockup-video"
|
when .includes? "yt-lockup-video"
|
||||||
is_playlist = true
|
shelf_is_playlist = true
|
||||||
|
|
||||||
anchor = child_node.xpath_node(%q(.//h3[contains(@class, "yt-lockup-title")]/a))
|
anchor = child_node.xpath_node(%q(.//h3[contains(@class, "yt-lockup-title")]/a))
|
||||||
if anchor
|
if anchor
|
||||||
|
@ -588,19 +553,42 @@ def extract_shelf_items(nodeset, ucid = nil, author_name = nil)
|
||||||
end
|
end
|
||||||
video_count ||= 50
|
video_count ||= 50
|
||||||
|
|
||||||
|
videos = [] of SearchPlaylistVideo
|
||||||
|
child_node.xpath_nodes(%q(.//*[contains(@class, "yt-lockup-playlist-items")]/li)).each do |video|
|
||||||
|
anchor = video.xpath_node(%q(.//a))
|
||||||
|
if anchor
|
||||||
|
video_title = anchor.content.strip
|
||||||
|
id = HTTP::Params.parse(URI.parse(anchor["href"]).query.not_nil!)["v"]
|
||||||
|
end
|
||||||
|
video_title ||= ""
|
||||||
|
id ||= ""
|
||||||
|
|
||||||
|
anchor = video.xpath_node(%q(.//span/span))
|
||||||
|
if anchor
|
||||||
|
length_seconds = decode_length_seconds(anchor.content)
|
||||||
|
end
|
||||||
|
length_seconds ||= 0
|
||||||
|
|
||||||
|
videos << SearchPlaylistVideo.new(
|
||||||
|
video_title,
|
||||||
|
id,
|
||||||
|
length_seconds
|
||||||
|
)
|
||||||
|
end
|
||||||
|
|
||||||
items << SearchPlaylist.new(
|
items << SearchPlaylist.new(
|
||||||
playlist_title,
|
playlist_title,
|
||||||
plid,
|
plid,
|
||||||
author_name,
|
author_name,
|
||||||
ucid,
|
ucid,
|
||||||
video_count,
|
video_count,
|
||||||
Array(SearchPlaylistVideo).new,
|
videos,
|
||||||
playlist_thumbnail
|
playlist_thumbnail
|
||||||
)
|
)
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
if is_playlist
|
if shelf_is_playlist
|
||||||
plid = HTTP::Params.parse(URI.parse(id).query.not_nil!)["list"]
|
plid = HTTP::Params.parse(URI.parse(id).query.not_nil!)["list"]
|
||||||
|
|
||||||
items << SearchPlaylist.new(
|
items << SearchPlaylist.new(
|
||||||
|
|
Loading…
Reference in a new issue