Fix crash in video crawler

This commit is contained in:
Omar Roth 2018-07-26 08:41:23 -05:00
parent 3f577650bc
commit 0fba601e91
2 changed files with 3 additions and 4 deletions

View file

@ -86,6 +86,7 @@ crawl_threads.times do
end end
loop do loop do
client = make_client(YT_URL)
if ids.empty? if ids.empty?
search(random.base64(3), client) do |id| search(random.base64(3), client) do |id|
ids << id ids << id
@ -97,7 +98,6 @@ crawl_threads.times do
video = get_video(id, client, PG_DB) video = get_video(id, client, PG_DB)
rescue ex rescue ex
STDOUT << id << " : " << ex.message << "\n" STDOUT << id << " : " << ex.message << "\n"
client = make_client(YT_URL)
next next
ensure ensure
ids.delete(id) ids.delete(id)

View file

@ -307,9 +307,8 @@ def get_video(id, client, db, refresh = true)
return video return video
end end
def search(query, client) def search(query, client, &block)
html = client.get("https://www.youtube.com/results?q=#{query}&sp=EgIQAVAU&disable_polymer=1").body html = client.get("/results?q=#{query}&sp=EgIQAVAU&disable_polymer=1").body
html = XML.parse_html(html) html = XML.parse_html(html)
html.xpath_nodes(%q(//ol[@class="item-section"]/li)).each do |item| html.xpath_nodes(%q(//ol[@class="item-section"]/li)).each do |item|