Add table cleaning job (#3294)

This commit is contained in:
Samantaz Fox 2022-10-12 10:06:36 +02:00 committed by GitHub
parent 6707368f19
commit 3b39b8c772
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
8 changed files with 146 additions and 9 deletions

View file

@ -304,10 +304,8 @@ https_only: false
## Number of threads to use when crawling channel videos (during ## Number of threads to use when crawling channel videos (during
## subscriptions update). ## subscriptions update).
## ##
## Notes: ## Notes: This setting is overridden if either "-c THREADS" or
## - Setting this to 0 will disable the channel videos crawl job. ## "--channel-threads=THREADS" is passed on the command line.
## - This setting is overridden if "-c THREADS" or
## "--channel-threads=THREADS" are passed on the command line.
## ##
## Accepted values: a positive integer ## Accepted values: a positive integer
## Default: 1 ## Default: 1
@ -335,10 +333,8 @@ full_refresh: false
## ##
## Number of threads to use when updating RSS feeds. ## Number of threads to use when updating RSS feeds.
## ##
## Notes: ## Notes: This setting is overridden if either "-f THREADS" or
## - Setting this to 0 will disable the channel videos crawl job. ## "--feed-threads=THREADS" is passed on the command line.
## - This setting is overridden if "-f THREADS" or
## "--feed-threads=THREADS" are passed on the command line.
## ##
## Accepted values: a positive integer ## Accepted values: a positive integer
## Default: 1 ## Default: 1
@ -361,6 +357,39 @@ feed_threads: 1
#decrypt_polling: false #decrypt_polling: false
jobs:
## Options for the database cleaning job
clear_expired_items:
## Enable/Disable job
##
## Accepted values: true, false
## Default: true
##
enable: true
## Options for the channels updater job
refresh_channels:
## Enable/Disable job
##
## Accepted values: true, false
## Default: true
##
enable: true
## Options for the RSS feeds updater job
refresh_feeds:
## Enable/Disable job
##
## Accepted values: true, false
## Default: true
##
enable: true
# ----------------------------- # -----------------------------
# Captcha API # Captcha API
# ----------------------------- # -----------------------------

View file

@ -172,6 +172,8 @@ end
CONNECTION_CHANNEL = Channel({Bool, Channel(PQ::Notification)}).new(32) CONNECTION_CHANNEL = Channel({Bool, Channel(PQ::Notification)}).new(32)
Invidious::Jobs.register Invidious::Jobs::NotificationJob.new(CONNECTION_CHANNEL, CONFIG.database_url) Invidious::Jobs.register Invidious::Jobs::NotificationJob.new(CONNECTION_CHANNEL, CONFIG.database_url)
Invidious::Jobs.register Invidious::Jobs::ClearExpiredItemsJob.new
Invidious::Jobs.start_all Invidious::Jobs.start_all
def popular_videos def popular_videos

View file

@ -78,6 +78,10 @@ class Config
property decrypt_polling : Bool = false property decrypt_polling : Bool = false
# Used for crawling channels: threads should check all videos uploaded by a channel # Used for crawling channels: threads should check all videos uploaded by a channel
property full_refresh : Bool = false property full_refresh : Bool = false
# Jobs config structure. See jobs.cr and jobs/base_job.cr
property jobs = Invidious::Jobs::JobsConfig.new
# Used to tell Invidious it is behind a proxy, so links to resources should be https:// # Used to tell Invidious it is behind a proxy, so links to resources should be https://
property https_only : Bool? property https_only : Bool?
# HMAC signing key for CSRF tokens and verifying pubsub subscriptions # HMAC signing key for CSRF tokens and verifying pubsub subscriptions

View file

@ -4,7 +4,7 @@ module Invidious::Database::Nonces
extend self extend self
# ------------------- # -------------------
# Insert # Insert / Delete
# ------------------- # -------------------
def insert(nonce : String, expire : Time) def insert(nonce : String, expire : Time)
@ -17,6 +17,15 @@ module Invidious::Database::Nonces
PG_DB.exec(request, nonce, expire) PG_DB.exec(request, nonce, expire)
end end
def delete_expired
request = <<-SQL
DELETE FROM nonces *
WHERE expire < now()
SQL
PG_DB.exec(request)
end
# ------------------- # -------------------
# Update # Update
# ------------------- # -------------------

View file

@ -22,6 +22,15 @@ module Invidious::Database::Videos
PG_DB.exec(request, id) PG_DB.exec(request, id)
end end
def delete_expired
request = <<-SQL
DELETE FROM videos *
WHERE updated < (now() - interval '6 hours')
SQL
PG_DB.exec(request)
end
def update(video : Video) def update(video : Video)
request = <<-SQL request = <<-SQL
UPDATE videos UPDATE videos

View file

@ -1,12 +1,39 @@
module Invidious::Jobs module Invidious::Jobs
JOBS = [] of BaseJob JOBS = [] of BaseJob
# Automatically generate a structure that wraps the various
# jobs' configs, so that the follwing YAML config can be used:
#
# jobs:
# job_name:
# enabled: true
# some_property: "value"
#
macro finished
struct JobsConfig
include YAML::Serializable
{% for sc in BaseJob.subclasses %}
# Voodoo macro to transform `Some::Module::CustomJob` to `custom`
{% class_name = sc.id.split("::").last.id.gsub(/Job$/, "").underscore %}
getter {{ class_name }} = {{ sc.name }}::Config.new
{% end %}
def initialize
end
end
end
def self.register(job : BaseJob) def self.register(job : BaseJob)
JOBS << job JOBS << job
end end
def self.start_all def self.start_all
JOBS.each do |job| JOBS.each do |job|
# Don't run the main rountine if the job is disabled by config
next if job.disabled?
spawn { job.begin } spawn { job.begin }
end end
end end

View file

@ -1,3 +1,33 @@
abstract class Invidious::Jobs::BaseJob abstract class Invidious::Jobs::BaseJob
abstract def begin abstract def begin
# When this base job class is inherited, make sure to define
# a basic "Config" structure, that contains the "enable" property,
# and to create the associated instance property.
#
macro inherited
macro finished
# This config structure can be expanded as required.
struct Config
include YAML::Serializable
property enable = true
def initialize
end
end
property cfg = Config.new
# Return true if job is enabled by config
protected def enabled? : Bool
return (@cfg.enable == true)
end
# Return true if job is disabled by config
protected def disabled? : Bool
return (@cfg.enable == false)
end
end
end
end end

View file

@ -0,0 +1,27 @@
class Invidious::Jobs::ClearExpiredItemsJob < Invidious::Jobs::BaseJob
# Remove items (videos, nonces, etc..) whose cache is outdated every hour.
# Removes the need for a cron job.
def begin
loop do
failed = false
LOGGER.info("jobs: running ClearExpiredItems job")
begin
Invidious::Database::Videos.delete_expired
Invidious::Database::Nonces.delete_expired
rescue DB::Error
failed = true
end
# Retry earlier than scheduled on DB error
if failed
LOGGER.info("jobs: ClearExpiredItems failed. Retrying in 10 minutes.")
sleep 10.minutes
else
LOGGER.info("jobs: ClearExpiredItems done.")
sleep 1.hour
end
end
end
end