Add a youtube URL sanitizer
This commit is contained in:
parent
e319c35f09
commit
eb0f651812
1 changed files with 121 additions and 0 deletions
121
src/invidious/yt_backend/url_sanitizer.cr
Normal file
121
src/invidious/yt_backend/url_sanitizer.cr
Normal file
|
@ -0,0 +1,121 @@
|
|||
require "uri"
|
||||
|
||||
module UrlSanitizer
|
||||
extend self
|
||||
|
||||
ALLOWED_QUERY_PARAMS = {
|
||||
channel: ["u", "user", "lb"],
|
||||
playlist: ["list"],
|
||||
search: ["q", "search_query", "sp"],
|
||||
watch: [
|
||||
"v", # Video ID
|
||||
"list", "index", # Playlist-related
|
||||
"playlist", # Unnamed playlist (id,id,id,...) (embed-only?)
|
||||
"t", "time_continue", "start", "end", # Timestamp
|
||||
"lc", # Highlighted comment (watch page only)
|
||||
],
|
||||
}
|
||||
|
||||
# Returns wether the given string is an ASCII word. This is the same as
|
||||
# running the following regex in US-ASCII locale: /^[\w-]+$/
|
||||
private def ascii_word?(str : String) : Bool
|
||||
if str.bytesize == str.size
|
||||
str.each_byte do |byte|
|
||||
next if 'a'.ord <= byte <= 'z'.ord
|
||||
next if 'A'.ord <= byte <= 'Z'.ord
|
||||
next if '0'.ord <= byte <= '9'.ord
|
||||
next if byte == '-'.ord || byte == '_'.ord
|
||||
|
||||
return false
|
||||
end
|
||||
|
||||
return true
|
||||
else
|
||||
return false
|
||||
end
|
||||
end
|
||||
|
||||
# Return which kind of parameters are allowed based on the
|
||||
# first path component (breadcrumb 0).
|
||||
private def determine_allowed(path_root : String)
|
||||
case path_root
|
||||
when "watch", "w", "v", "embed", "e", "shorts", "clip"
|
||||
return :watch
|
||||
when .starts_with?("@"), "c", "channel", "user", "profile", "attribution_link"
|
||||
return :channel
|
||||
when "playlist", "mix"
|
||||
return :playlist
|
||||
when "results", "search"
|
||||
return :search
|
||||
else # hashtag, post, trending, brand URLs, etc..
|
||||
return nil
|
||||
end
|
||||
end
|
||||
|
||||
# Create a new URI::Param containing only the allowed parameters
|
||||
private def copy_params(unsafe_params : URI::Params, allowed_type) : URI::Params
|
||||
new_params = URI::Params.new
|
||||
|
||||
ALLOWED_QUERY_PARAMS[allowed_type].each do |name|
|
||||
if unsafe_params[name]?
|
||||
# Only copy the last parameter, in case there is more than one
|
||||
new_params[name] = unsafe_params.fetch_all(name)[-1]
|
||||
end
|
||||
end
|
||||
|
||||
return new_params
|
||||
end
|
||||
|
||||
# Transform any user-supplied youtube URL into something we can trust
|
||||
# and use across the code.
|
||||
def process(str : String) : URI
|
||||
# Because URI follows RFC3986 specifications, URL without a scheme
|
||||
# will be parsed as a relative path. So we have to add a scheme ourselves.
|
||||
str = "https://#{str}" if !str.starts_with?(/https?:\/\//)
|
||||
|
||||
unsafe_uri = URI.parse(str)
|
||||
new_uri = URI.new(path: "/")
|
||||
|
||||
# Redirect to homepage for bogus URLs
|
||||
return new_uri if (unsafe_uri.host.nil? || unsafe_uri.path.nil?)
|
||||
|
||||
breadcrumbs = unsafe_uri.path
|
||||
.split('/', remove_empty: true)
|
||||
.compact_map do |bc|
|
||||
# Exclude attempts at path trasversal
|
||||
next if bc == "." || bc == ".."
|
||||
|
||||
# Non-alnum characters are unlikely in a genuine URL
|
||||
next if !ascii_word?(bc)
|
||||
|
||||
bc
|
||||
end
|
||||
|
||||
# If nothing remains, it's either a legit URL to the homepage
|
||||
# (who does that!?) or because we filtered some junk earlier.
|
||||
return new_uri if breadcrumbs.empty?
|
||||
|
||||
# Replace the original query parameters with the sanitized ones
|
||||
case unsafe_uri.host.not_nil!
|
||||
when .ends_with?("youtube.com")
|
||||
# Use our sanitized path (not forgetting the leading '/')
|
||||
new_uri.path = "/#{breadcrumbs.join('/')}"
|
||||
|
||||
# Then determine which params are allowed, and copy them over
|
||||
if allowed = determine_allowed(breadcrumbs[0])
|
||||
new_uri.query_params = copy_params(unsafe_uri.query_params, allowed)
|
||||
end
|
||||
when "youtu.be"
|
||||
# Always redirect to the watch page
|
||||
new_uri.path = "/watch"
|
||||
|
||||
new_params = copy_params(unsafe_uri.query_params, :watch)
|
||||
new_params["id"] = breadcrumbs[0]
|
||||
|
||||
new_uri.query_params = new_params
|
||||
end
|
||||
|
||||
new_uri.host = nil # Safety measure
|
||||
return new_uri
|
||||
end
|
||||
end
|
Loading…
Reference in a new issue