api: rework url parsing
- tlds are now parsed and validated correctly (e.g. ".co.uk" works now) - url patterns are pre-compiled instead of being compiled for every request - aliases are computed in a safe manner using the URL object where possible
This commit is contained in:
parent
64790b9820
commit
dd563eb752
5 changed files with 116 additions and 81 deletions
|
@ -36,6 +36,7 @@
|
||||||
"hls-parser": "^0.10.7",
|
"hls-parser": "^0.10.7",
|
||||||
"nanoid": "^4.0.2",
|
"nanoid": "^4.0.2",
|
||||||
"node-cache": "^5.1.2",
|
"node-cache": "^5.1.2",
|
||||||
|
"psl": "^1.9.0",
|
||||||
"set-cookie-parser": "2.6.0",
|
"set-cookie-parser": "2.6.0",
|
||||||
"undici": "^5.19.1",
|
"undici": "^5.19.1",
|
||||||
"url-pattern": "1.0.3",
|
"url-pattern": "1.0.3",
|
||||||
|
|
|
@ -1,35 +1,32 @@
|
||||||
import UrlPattern from "url-pattern";
|
import { services } from "./config.js";
|
||||||
|
|
||||||
import { services as patterns } from "./config.js";
|
import { apiJSON } from "./sub/utils.js";
|
||||||
|
|
||||||
import { cleanURL, apiJSON } from "./sub/utils.js";
|
|
||||||
import { errorUnsupported } from "./sub/errors.js";
|
import { errorUnsupported } from "./sub/errors.js";
|
||||||
import loc from "../localization/manager.js";
|
import loc from "../localization/manager.js";
|
||||||
import match from "./processing/match.js";
|
import match from "./processing/match.js";
|
||||||
import hostOverrides from "./processing/hostOverrides.js";
|
import { hasValidHostname, normalizeURL } from "./processing/url.js";
|
||||||
|
|
||||||
export async function getJSON(originalURL, lang, obj) {
|
export async function getJSON(originalURL, lang, obj) {
|
||||||
try {
|
try {
|
||||||
let patternMatch, url = encodeURI(decodeURIComponent(originalURL)),
|
const url = normalizeURL(decodeURIComponent(originalURL));
|
||||||
hostname = new URL(url).hostname.split('.'),
|
|
||||||
host = hostname[hostname.length - 2];
|
|
||||||
|
|
||||||
if (!url.startsWith('https://')) return apiJSON(0, { t: errorUnsupported(lang) });
|
if (!hasValidHostname(url) || !services[host].enabled) {
|
||||||
|
return apiJSON(0, { t: errorUnsupported(lang) });
|
||||||
let overrides = hostOverrides(host, url);
|
|
||||||
host = overrides.host;
|
|
||||||
url = overrides.url;
|
|
||||||
|
|
||||||
if (!(host && host.length < 20 && host in patterns && patterns[host]["enabled"])) return apiJSON(0, { t: errorUnsupported(lang) });
|
|
||||||
|
|
||||||
let pathToMatch = cleanURL(url, host).split(`.${patterns[host]['tld'] ? patterns[host]['tld'] : "com"}/`)[1].replace('.', '');
|
|
||||||
for (let i in patterns[host]["patterns"]) {
|
|
||||||
patternMatch = new UrlPattern(patterns[host]["patterns"][i]).match(pathToMatch);
|
|
||||||
if (patternMatch) break
|
|
||||||
}
|
}
|
||||||
if (!patternMatch) return apiJSON(0, { t: errorUnsupported(lang) });
|
|
||||||
|
|
||||||
return await match(host, patternMatch, url, lang, obj)
|
let patternMatch;
|
||||||
|
for (const pattern of services[host].patterns) {
|
||||||
|
patternMatch = pattern.match(
|
||||||
|
url.pathname.substring(1) + url.search
|
||||||
|
);
|
||||||
|
if (patternMatch) break;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!patternMatch) {
|
||||||
|
return apiJSON(0, { t: errorUnsupported(lang) });
|
||||||
|
}
|
||||||
|
|
||||||
|
return await match(host, patternMatch, url.toString(), lang, obj)
|
||||||
} catch (e) {
|
} catch (e) {
|
||||||
return apiJSON(0, { t: loc(lang, 'ErrorSomethingWentWrong') })
|
return apiJSON(0, { t: loc(lang, 'ErrorSomethingWentWrong') })
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,8 +1,17 @@
|
||||||
|
import UrlPattern from "url-pattern";
|
||||||
import { loadJSON } from "./sub/loadFromFs.js";
|
import { loadJSON } from "./sub/loadFromFs.js";
|
||||||
const config = loadJSON("./src/config.json");
|
const config = loadJSON("./src/config.json");
|
||||||
const packageJson = loadJSON("./package.json");
|
const packageJson = loadJSON("./package.json");
|
||||||
const servicesConfigJson = loadJSON("./src/modules/processing/servicesConfig.json");
|
const servicesConfigJson = loadJSON("./src/modules/processing/servicesConfig.json");
|
||||||
|
|
||||||
|
Object.values(servicesConfigJson.config).forEach(service => {
|
||||||
|
service.patterns = service.patterns.map(
|
||||||
|
pattern => new UrlPattern(pattern, {
|
||||||
|
segmentValueCharset: UrlPattern.defaultOptions.segmentValueCharset + '\\.'
|
||||||
|
})
|
||||||
|
)
|
||||||
|
})
|
||||||
|
|
||||||
export const
|
export const
|
||||||
services = servicesConfigJson.config,
|
services = servicesConfigJson.config,
|
||||||
audioIgnore = servicesConfigJson.audioIgnore,
|
audioIgnore = servicesConfigJson.audioIgnore,
|
||||||
|
|
|
@ -1,48 +1,102 @@
|
||||||
export default function (inHost, inURL) {
|
import { services } from "./config.js";
|
||||||
let host = String(inHost);
|
import { strict as assert } from "node:assert";
|
||||||
let url = String(inURL);
|
import psl from "psl";
|
||||||
|
|
||||||
switch(host) {
|
export function aliasURL(url) {
|
||||||
|
assert(url instanceof URL);
|
||||||
|
|
||||||
|
const host = psl.parse(url.hostname);
|
||||||
|
const parts = url.pathname.split('/');
|
||||||
|
|
||||||
|
switch (host.sld) {
|
||||||
case "youtube":
|
case "youtube":
|
||||||
if (url.startsWith("https://youtube.com/live/") || url.startsWith("https://www.youtube.com/live/")) {
|
if (url.pathname.startsWith('/live/') || url.pathname.startsWith('/shorts/')) {
|
||||||
url = url.split("?")[0].replace("www.", "");
|
url.pathname = '/watch';
|
||||||
url = `https://youtube.com/watch?v=${url.replace("https://youtube.com/live/", "")}`
|
// ['', 'live' || 'shorts', id, ...rest]
|
||||||
}
|
url.search = `?v=${encodeURIComponent(parts[2])}`
|
||||||
if (url.includes('youtube.com/shorts/')) {
|
|
||||||
url = url.split('?')[0].replace('shorts/', 'watch?v=');
|
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
case "youtu":
|
case "youtu":
|
||||||
if (url.startsWith("https://youtu.be/")) {
|
if (url.hostname === 'youtu.be' && parts.length === 2) {
|
||||||
host = "youtube";
|
/* youtu.be urls can be weird, e.g. https://youtu.be/<id>//asdasd// still works
|
||||||
url = `https://youtube.com/watch?v=${url.replace("https://youtu.be/", "")}`
|
** but we only care about the 1st segment of the path */
|
||||||
|
url = new URL(`https://youtube.com/watch?v=${
|
||||||
|
encodeURIComponent(parts[1])
|
||||||
|
}`)
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case "vxtwitter":
|
case "vxtwitter":
|
||||||
case "x":
|
case "x":
|
||||||
if (url.startsWith("https://x.com/")) {
|
if (['x.com', 'vxtwitter.com'].includes(url.hostname)) {
|
||||||
host = "twitter";
|
url.hostname = 'twitter.com'
|
||||||
url = url.replace("https://x.com/", "https://twitter.com/")
|
|
||||||
}
|
|
||||||
if (url.startsWith("https://vxtwitter.com/")) {
|
|
||||||
host = "twitter";
|
|
||||||
url = url.replace("https://vxtwitter.com/", "https://twitter.com/")
|
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case "tumblr":
|
case "tumblr":
|
||||||
if (!url.includes("blog/view")) {
|
if (!url.pathname.includes("/blog/view")) {
|
||||||
if (url.slice(-1) === '/') url = url.slice(0, -1);
|
if (url.pathname.endsWith('/'))
|
||||||
url = url.replace(url.split('/')[5], '')
|
url.pathname = url.pathname.slice(0, -1);
|
||||||
|
url.pathname = url.pathname.replace(parts[5], '')
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case "twitch":
|
case "twitch":
|
||||||
if (url.includes('clips.twitch.tv')) {
|
if (url.hostname === 'clips.twitch.tv' && parts.length >= 2) {
|
||||||
url = url.split('?')[0].replace('clips.twitch.tv/', 'twitch.tv/_/clip/');
|
url = new URL(`https://twitch.tv/_/clip/${parts[1]}`);
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
return {
|
|
||||||
host: host,
|
return { url, host: host.sld }
|
||||||
url: url
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
export function cleanURL({ url, host }) {
|
||||||
|
assert(url instanceof URL);
|
||||||
|
let stripQuery = true;
|
||||||
|
|
||||||
|
if (host === 'pinterest') {
|
||||||
|
url.hostname = 'pinterest.com'
|
||||||
|
} else if (host === 'vk' && url.pathname.includes('/clip')) {
|
||||||
|
if (url.searchParams.get('z'))
|
||||||
|
url.search = '?z=' + encodeURIComponent(url.searchParams.get('z'));
|
||||||
|
stripQuery = false;
|
||||||
|
} else if (host === 'youtube' && url.searchParams.get('v')) {
|
||||||
|
url.search = '?v=' + encodeURIComponent(url.searchParams.get('v'));
|
||||||
|
stripQuery = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (stripQuery) {
|
||||||
|
url.search = url.hash = ''
|
||||||
|
}
|
||||||
|
|
||||||
|
if (url.pathname.endsWith('/'))
|
||||||
|
url.pathname = url.pathname.slice(0, -1);
|
||||||
|
|
||||||
|
return url
|
||||||
|
}
|
||||||
|
|
||||||
|
export function normalizeURL(url) {
|
||||||
|
return cleanURL(
|
||||||
|
aliasURL(
|
||||||
|
new URL(url.replace(/^https\/\//, 'https://'))
|
||||||
|
)
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
export function hasValidHostname(url) {
|
||||||
|
const host = psl.parse(url.hostname);
|
||||||
|
if (host.error) return false;
|
||||||
|
|
||||||
|
const service = services[host.sld];
|
||||||
|
if (!service) return false;
|
||||||
|
|
||||||
|
if ((service.tld ?? 'com') !== host.tld) return false;
|
||||||
|
|
||||||
|
const anySubdomainAllowed = service.subdomains === '*';
|
||||||
|
const validSubdomain = [null, 'www', ...(service.subdomains ?? [])].includes(host.subdomain);
|
||||||
|
if (!validSubdomain && !anySubdomainAllowed)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
|
@ -52,29 +52,7 @@ export function metadataManager(obj) {
|
||||||
for (let i in keys) { if (tags.includes(keys[i])) commands.push('-metadata', `${keys[i]}=${obj[keys[i]]}`) }
|
for (let i in keys) { if (tags.includes(keys[i])) commands.push('-metadata', `${keys[i]}=${obj[keys[i]]}`) }
|
||||||
return commands;
|
return commands;
|
||||||
}
|
}
|
||||||
export function cleanURL(url, host) {
|
|
||||||
switch (host) {
|
|
||||||
case "vk":
|
|
||||||
url = url.includes('clip') ? url.split('&')[0] : url.split('?')[0];
|
|
||||||
break;
|
|
||||||
case "youtube":
|
|
||||||
url = url.split('&')[0];
|
|
||||||
break;
|
|
||||||
case "tiktok":
|
|
||||||
url = url.replace(/@([a-zA-Z]+(\.[a-zA-Z]+)+)/, "@a")
|
|
||||||
case "pinterest":
|
|
||||||
url = url.replace(/:\/\/(?:www.)pinterest(?:\.[a-z.]+)/, "://pinterest.com")
|
|
||||||
default:
|
|
||||||
url = url.split('?')[0];
|
|
||||||
if (url.substring(url.length - 1) === "/") url = url.substring(0, url.length - 1);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
for (let i in forbiddenChars) {
|
|
||||||
url = url.replaceAll(forbiddenChars[i], '')
|
|
||||||
}
|
|
||||||
url = url.replace('https//', 'https://')
|
|
||||||
return url.slice(0, 128)
|
|
||||||
}
|
|
||||||
export function cleanString(string) {
|
export function cleanString(string) {
|
||||||
for (let i in forbiddenCharsString) {
|
for (let i in forbiddenCharsString) {
|
||||||
string = string.replaceAll("/", "_").replaceAll(forbiddenCharsString[i], '')
|
string = string.replaceAll("/", "_").replaceAll(forbiddenCharsString[i], '')
|
||||||
|
@ -121,13 +99,9 @@ export function checkJSONPost(obj) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (def.dubLang) def.dubLang = verifyLanguageCode(obj.dubLang);
|
if (def.dubLang)
|
||||||
|
def.dubLang = verifyLanguageCode(obj.dubLang);
|
||||||
obj["url"] = decodeURIComponent(String(obj["url"]));
|
def.url = obj.url;
|
||||||
let hostname = obj["url"].replace("https://", "").replace(' ', '').split('&')[0].split("/")[0].split("."),
|
|
||||||
host = hostname[hostname.length - 2];
|
|
||||||
def["url"] = encodeURIComponent(cleanURL(obj["url"], host));
|
|
||||||
|
|
||||||
return def
|
return def
|
||||||
} catch (e) {
|
} catch (e) {
|
||||||
return false
|
return false
|
||||||
|
|
Loading…
Reference in a new issue