tldraw/apps/dotcom-bookmark-extractor/lib/unfurl.ts
Mime Čuvalo 41c3b1e3df
bookmarks: account for relative urls more robustly (#4022)
Fixes up url's that don't have `/` in front or `http`

### Change Type

<!--  Please select a 'Type' label ️ -->

- [ ] `feature` — New feature
- [x] `improvement` — Product improvement
- [ ] `api` — API change
- [ ] `bugfix` — Bug fix
- [ ] `other` — Changes that don't affect SDK users, e.g. internal or
.com changes

### Release Notes

- Bookmark extractor: account for relative urls more robustly
2024-06-26 11:36:57 +00:00

47 lines
1.4 KiB
TypeScript

import cheerio from 'cheerio'
export async function unfurl(url: string) {
const response = await fetch(url)
if (response.status >= 400) {
throw new Error(`Error fetching url: ${response.status}`)
}
const contentType = response.headers.get('content-type')
if (!contentType?.includes('text/html')) {
throw new Error(`Content-type not right: ${contentType}`)
}
const content = await response.text()
const $ = cheerio.load(content)
const og: { [key: string]: string | undefined } = {}
const twitter: { [key: string]: string | undefined } = {}
$('meta[property^=og:]').each((_, el) => (og[$(el).attr('property')!] = $(el).attr('content')))
$('meta[name^=twitter:]').each((_, el) => (twitter[$(el).attr('name')!] = $(el).attr('content')))
const title = og['og:title'] ?? twitter['twitter:title'] ?? $('title').text() ?? undefined
const description =
og['og:description'] ??
twitter['twitter:description'] ??
$('meta[name="description"]').attr('content') ??
undefined
let image = og['og:image:secure_url'] ?? og['og:image'] ?? twitter['twitter:image'] ?? undefined
let favicon =
$('link[rel="apple-touch-icon"]').attr('href') ??
$('link[rel="icon"]').attr('href') ??
undefined
if (image && !image?.startsWith('http')) {
image = new URL(image, url).href
}
if (favicon && !favicon?.startsWith('http')) {
favicon = new URL(favicon, url).href
}
return {
title,
description,
image,
favicon,
}
}