tldraw/apps/dotcom/scripts/build.ts
David Sheldrick ee6aa172b2
Unfurl bookmarks in worker (#4039)
This PR adds a `GET /api/unfurl?url=blahblah` endpoint to our worker.

I tried out the existing cheerio implementation but it added 300kb to
our worker bundle in the end, due to transitive dependencies.

So I implemented the same logic with cloudflare's sanctioned streaming
HTML parser `HTMLRewriter` and it seems to work fine.

I also made the vscode extension do its fetching locally (from the node
process so it's not bound by security policies), retaining the cheerio
version for that. At the same time I fixed a bug in the RPC layer that
was preventing unfurled metadata from loading correctly.

In a few months we can retire the bookmark-extractor app by just
deleting it in the vercel dashboard.

### Change Type


<!--  Please select a 'Type' label ️ -->

- [ ] `feature` — New feature
- [x] `improvement` — Product improvement
- [ ] `api` — API change
- [ ] `bugfix` — Bug fix
- [ ] `other` — Changes that don't affect SDK users, e.g. internal or
.com changes


### Test Plan

1. Add a step-by-step description of how to test your PR here.
2.

- [ ] Unit Tests
- [ ] End to end tests

### Release Notes

- Do link unfurling on the same subdomain as all our other api
endpoints.
2024-07-01 14:40:03 +00:00

162 lines
4.8 KiB
TypeScript

import glob from 'fast-glob'
import { mkdirSync, readFileSync, writeFileSync } from 'fs'
import { exec } from '../../../scripts/lib/exec'
import { Config } from './vercel-output-config'
import { config } from 'dotenv'
import json5 from 'json5'
import { nicelog } from '../../../scripts/lib/nicelog'
import { T } from '@tldraw/validate'
import { getMultiplayerServerURL } from '../vite.config'
const cspDirectives: { [key: string]: string[] } = {
'default-src': [`'self'`],
'connect-src': [
`'self'`,
`ws:`,
`wss:`,
`https://assets.tldraw.xyz`,
`https://*.tldraw.workers.dev`,
`https://*.ingest.sentry.io`,
],
'font-src': [`'self'`, `https://fonts.googleapis.com`, `https://fonts.gstatic.com`],
'frame-src': [`https:`],
'img-src': [`'self'`, `http:`, `https:`, `data:`, `blob:`],
'media-src': [`'self'`, `http:`, `https:`, `data:`, `blob:`],
'style-src': [`'self'`, `'unsafe-inline'`, `https://fonts.googleapis.com`],
'report-uri': [process.env.SENTRY_CSP_REPORT_URI ?? ``],
}
const csp = Object.keys(cspDirectives)
.map((directive) => `${directive} ${cspDirectives[directive].join(' ')}`)
.join('; ')
const commonSecurityHeaders = {
'Strict-Transport-Security': 'max-age=63072000; includeSubDomains; preload',
'X-Content-Type-Options': 'nosniff',
'Referrer-Policy': 'no-referrer-when-downgrade',
'Content-Security-Policy-Report-Only': csp,
}
// We load the list of routes that should be forwarded to our SPA's index.html here.
// It uses a jest snapshot file because deriving the set of routes from our
// react-router config works fine in our test environment, but is tricky to get running in this
// build script environment for various reasons (no global React, tsx being weird about decorators, etc).
function loadSpaRoutes() {
// eslint-disable-next-line @typescript-eslint/no-var-requires
const routesJson = require('../src/__snapshots__/routes.test.tsx.snap')['the_routes 1']
const routes = T.arrayOf(
T.object({
reactRouterPattern: T.string,
vercelRouterPattern: T.string,
})
).validate(json5.parse(routesJson))
return routes.map((route) => ({
check: true,
src: route.vercelRouterPattern,
dest: '/index.html',
headers: commonSecurityHeaders,
}))
}
config({
path: './.env.local',
})
nicelog('The multiplayer server is', process.env.MULTIPLAYER_SERVER)
async function build() {
// make sure we have the latest routes
await exec('yarn', ['test', 'src/routes.test.tsx'])
const spaRoutes = loadSpaRoutes()
await exec('vite', ['build', '--emptyOutDir'])
await exec('yarn', ['run', '-T', 'sentry-cli', 'sourcemaps', 'inject', 'dist/assets'])
// Clear output static folder (in case we are running locally and have already built the app once before)
await exec('rm', ['-rf', '.vercel/output'])
mkdirSync('.vercel/output', { recursive: true })
await exec('cp', ['-r', 'dist', '.vercel/output/static'])
await exec('rm', ['-rf', ...glob.sync('.vercel/output/static/**/*.js.map')])
// Add fonts to preload into index.html
const assetsList = (await exec('ls', ['-1', 'dist/assets'])).split('\n').filter(Boolean)
const fontsToPreload = [
'Shantell_Sans-Tldrawish',
'IBMPlexSerif-Medium',
'IBMPlexSans-Medium',
'IBMPlexMono-Medium',
]
const indexHtml = await readFileSync('.vercel/output/static/index.html', 'utf8')
await writeFileSync(
'.vercel/output/static/index.html',
indexHtml.replace(
'<!-- $PRELOADED_FONTS -->',
fontsToPreload
.map(
(font) => `<link
rel="preload"
href="/assets/${assetsList.find((a) => a.startsWith(font))}"
as="font"
type="font/woff2"
crossorigin="anonymous"
/>`
)
.join('\n')
)
)
const multiplayerServerUrl = getMultiplayerServerURL() ?? 'http://localhost:8787'
writeFileSync(
'.vercel/output/config.json',
JSON.stringify(
{
version: 3,
routes: [
// rewrite api calls to the multiplayer server
{
src: '^/api(/(.*))?$',
dest: `${multiplayerServerUrl}$1`,
check: true,
},
// cache static assets immutably
{
src: '^/assets/(.*)$',
headers: {
'Cache-Control': 'public, max-age=31536000, immutable',
'X-Content-Type-Options': 'nosniff',
},
},
// server up index.html specifically because we want to include
// security headers. otherwise, it goes to the handle: 'miss'
// part below (and _not_ to the spaRoutes as maybe expected!)
{
check: true,
src: '/',
dest: '/index.html',
headers: commonSecurityHeaders,
},
// serve static files
{
handle: 'miss',
},
// finally handle SPA routing
...spaRoutes,
// react router will handle drawing the 404 page
{
check: true,
src: '.*',
dest: '/index.html',
status: 404,
headers: commonSecurityHeaders,
},
],
overrides: {},
} satisfies Config,
null,
2
)
)
}
build()