Introduce a Cloudflare health worker (#2499)

This PR introduces a new Cloudflare worker for health checks.

At the moment the worker only translates Updown webhooks into Discord
webhooks. In the future we can teach this worker to check more things.

### Change Type

- [x] `internal` — Any other changes that don't affect the published
package

---------

Co-authored-by: Steve Ruiz <steveruizok@gmail.com>
This commit is contained in:
Dan Groshev 2024-01-29 09:47:50 +00:00 committed by GitHub
parent a1e242ae3a
commit 3a3248a636
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
11 changed files with 428 additions and 7 deletions

View file

@ -63,6 +63,8 @@ jobs:
CLOUDFLARE_API_TOKEN: ${{ secrets.CLOUDFLARE_API_TOKEN }} CLOUDFLARE_API_TOKEN: ${{ secrets.CLOUDFLARE_API_TOKEN }}
CLOUDFLARE_ACCOUNT_ID: ${{ secrets.CLOUDFLARE_ACCOUNT_ID }} CLOUDFLARE_ACCOUNT_ID: ${{ secrets.CLOUDFLARE_ACCOUNT_ID }}
DISCORD_DEPLOY_WEBHOOK_URL: ${{ secrets.DISCORD_DEPLOY_WEBHOOK_URL }} DISCORD_DEPLOY_WEBHOOK_URL: ${{ secrets.DISCORD_DEPLOY_WEBHOOK_URL }}
DISCORD_HEALTH_WEBHOOK_URL: ${{ secrets.DISCORD_HEALTH_WEBHOOK_URL }}
HEALTH_WORKER_UPDOWN_WEBHOOK_PATH: ${{ secrets.HEALTH_WORKER_UPDOWN_WEBHOOK_PATH }}
GC_MAPS_API_KEY: ${{ secrets.GC_MAPS_API_KEY }} GC_MAPS_API_KEY: ${{ secrets.GC_MAPS_API_KEY }}
WORKER_SENTRY_DSN: ${{ secrets.WORKER_SENTRY_DSN }} WORKER_SENTRY_DSN: ${{ secrets.WORKER_SENTRY_DSN }}
SENTRY_AUTH_TOKEN: ${{ secrets.SENTRY_AUTH_TOKEN }} SENTRY_AUTH_TOKEN: ${{ secrets.SENTRY_AUTH_TOKEN }}

2
apps/health-worker/.gitignore vendored Normal file
View file

@ -0,0 +1,2 @@
build
.wrangler

View file

@ -0,0 +1,3 @@
# Health Worker
Accepts webhooks from [Updown](https://updown.io/), sends them to our Discord.

View file

@ -0,0 +1,21 @@
{
"name": "health-worker",
"version": "1.0.0",
"private": true,
"scripts": {
"deploy": "wrangler deploy",
"dev": "wrangler dev",
"start": "wrangler dev",
"lint": "yarn run -T tsx ../../scripts/lint.ts"
},
"dependencies": {
"@tldraw/utils": "workspace:*"
},
"devDependencies": {
"@cloudflare/workers-types": "^4.20230821.0",
"@types/node": "^18.7.3",
"discord-api-types": "^0.37.67",
"typescript": "^5.2.2",
"wrangler": "3.16.0"
}
}

View file

@ -0,0 +1,96 @@
import { exhaustiveSwitchError } from '@tldraw/utils'
import { type APIEmbed } from 'discord-api-types/v10'
import { Event as UpdownEvent } from './updown_types'
// discord wants decimal colours
const GREEN = 4243543
const RED = 14692657
const ORANGE = 16213767
// docs: https://birdie0.github.io/discord-webhooks-guide/index.html
export type DiscordPayload = {
username: string
content: string
embeds: APIEmbed[]
}
function formatUpdownEvent(event: UpdownEvent): {
colour: number
title: string
description: string
} | null {
switch (event.event) {
case 'check.down':
return {
colour: RED,
title: `Check DOWN at <${event.check.url}>`,
description: `<${event.check.url}> is down: "${event.downtime.error}"\n\nNext check in ${event.check.period} seconds`,
}
case 'check.still_down':
return null
case 'check.up': {
return {
colour: GREEN,
title: `Check UP at <${event.check.url}>`,
description: `<${event.check.url}> is up\n\nIt was down for ${event.downtime.duration} seconds`,
}
}
case 'check.ssl_invalid': {
return {
colour: RED,
title: `SSL INVALID at <${event.check.url}>`,
description: `SSL certificate at <${event.check.url}> is invalid: "${event.ssl.error}"`,
}
}
case 'check.ssl_valid': {
return {
colour: GREEN,
title: `SSL VALID at <${event.check.url}>`,
description: `SSL certificate at <${event.check.url}> is now valid`,
}
}
case 'check.ssl_expiration': {
return {
colour: ORANGE,
title: `SSL EXPIRATION at <${event.check.url}>`,
description: `SSL certificate at <${event.check.url}> will expire in ${event.ssl.days_before_expiration} days`,
}
}
case 'check.ssl_renewed': {
return {
colour: GREEN,
title: `SSL RENEWED at <${event.check.url}>`,
description: `SSL certificate at <${event.check.url}> was renewed`,
}
}
case 'check.performance_drop':
return {
colour: ORANGE,
title: `PERFORMANCE DROP at <${event.check.url}>`,
description: `Performance drop at <${event.check.url}>, apdex dropped ${event.apdex_dropped}`,
}
default:
exhaustiveSwitchError(event, 'event')
}
}
export function updownToDiscord(event: UpdownEvent): DiscordPayload | null {
const formatted = formatUpdownEvent(event)
if (!formatted) return null
const { colour, title, description } = formatted
return {
username: 'Health Worker',
content: `Updown: ${title}`,
embeds: [
{
color: colour,
description: description,
timestamp: event.time,
},
],
}
}

View file

@ -0,0 +1,69 @@
import { DiscordPayload, updownToDiscord } from './discord'
import { Event as UpdownEvent } from './updown_types'
interface Env {
DISCORD_HEALTH_WEBHOOK_URL: string | undefined
// it needs to be passed in because it's effectively a secret, unless we want everyone to be able
// to stress us out with spurious discord alerts
HEALTH_WORKER_UPDOWN_WEBHOOK_PATH: string | undefined
}
async function sendDiscordWebhook(url: string, discord: DiscordPayload): Promise<Response> {
return fetch(url, {
method: 'POST',
headers: {
'Content-Type': 'application/json',
},
body: JSON.stringify(discord),
})
}
async function handleUpdown(request: Request, discordUrl: string): Promise<Response> {
const updownEvents = (await request.json()) as Array<UpdownEvent>
let status = 200
for (const e of updownEvents) {
const discordPayload = updownToDiscord(e)
if (!discordPayload) {
continue
}
const discordResult = await sendDiscordWebhook(discordUrl, discordPayload)
if (!discordResult.ok) {
console.error(`Discord error ${discordResult.status}: ${discordResult.statusText}`)
status = discordResult.status
break
}
}
return new Response(null, { status })
}
const handler: ExportedHandler<Env> = {
async fetch(request: Request, env: Env): Promise<Response> {
const discordUrl = env.DISCORD_HEALTH_WEBHOOK_URL
if (!discordUrl) {
console.error('missing DISCORD_HEALTH_WEBHOOK_URL')
return new Response('Internal error', { status: 500 })
}
const updownWebhookPath = env.HEALTH_WORKER_UPDOWN_WEBHOOK_PATH
if (!updownWebhookPath) {
console.error('missing HEALTH_WORKER_UPDOWN_WEBHOOK_PATH')
return new Response('Internal error', { status: 500 })
}
const url = new URL(request.url)
// timing safety COULD be an issue, but it seems that in practice it isn't:
// https://github.com/scriptin/node-timing-attack
// my own testing confirms those observations
if (url.pathname === updownWebhookPath) {
return handleUpdown(request, discordUrl)
}
return new Response('Not Found', { status: 404 })
},
}
export default handler

View file

@ -0,0 +1,152 @@
// docs: https://updown.io/api#webhooks
export interface BaseCheck {
token: string
url: string
alias: null
last_status: number
uptime: number
period: number
apdex_t: number
string_match: string
enabled: boolean
published: boolean
disabled_locations: any[]
recipients: any[]
last_check_at: string
next_check_at: string
created_at: null
mute_until: null | string
favicon_url: string
custom_headers: CustomHeaders
http_verb: string
http_body: string
}
export interface FailingCheck extends BaseCheck {
down: true
down_since: string
up_since: null
error: string
}
export interface SucceedingCheck extends BaseCheck {
down: true
down_since: null
up_since: string
error: null
}
export interface BaseDowntime {
id: string
error: string
started_at: string
partial: unknown
}
export interface OngoingDowntime extends BaseDowntime {
ended_at: null
duration: null
}
export interface FinishedDowntime extends BaseDowntime {
ended_at: string
// seconds
duration: number
}
export type CustomHeaders = Record<string, string>
export interface SslCert {
subject: string
issuer: string
from: string
to: string
algorithm: string
}
export interface EventDown {
event: 'check.down'
time: string
description: string
check: FailingCheck
downtime: OngoingDowntime
}
export interface EventStillDown {
event: 'check.still_down'
time: string
description: string
check: FailingCheck
downtime: OngoingDowntime
}
export interface EventUp {
event: 'check.up'
time: string
description: string
check: SucceedingCheck
downtime: FinishedDowntime
}
export interface EventSslInvalid {
event: 'check.ssl_invalid'
time: string
description: string
check: SucceedingCheck | FailingCheck
ssl: {
cert: SslCert
error: string
}
}
export interface EventSslValid {
event: 'check.ssl_valid'
time: string
description: string
check: SucceedingCheck | FailingCheck
ssl: {
cert: SslCert
}
}
export interface EventSslExpiration {
event: 'check.ssl_expiration'
time: string
description: string
check: SucceedingCheck | FailingCheck
ssl: {
cert: SslCert
days_before_expiration: number
}
}
export interface EventSslRenewed {
event: 'check.ssl_renewed'
time: string
description: string
check: SucceedingCheck | FailingCheck
ssl: {
new_cert: SslCert
old_cert: SslCert
}
}
export interface EventPerformanceDrop {
event: 'check.performance_drop'
time: string
description: string
check: SucceedingCheck | FailingCheck
apdex_dropped: string
last_metrics: Record<string, { apdex: number }>
}
export type Event =
| EventDown
| EventStillDown
| EventUp
| EventSslInvalid
| EventSslValid
| EventSslExpiration
| EventSslRenewed
| EventPerformanceDrop

View file

@ -0,0 +1,11 @@
{
"extends": "../../config/tsconfig.base.json",
"include": ["src"],
"exclude": ["node_modules", "dist", ".tsbuild*"],
"compilerOptions": {
"noEmit": true,
"emitDeclarationOnly": false,
"types": ["@cloudflare/workers-types", "@types/node"]
},
"references": []
}

View file

@ -0,0 +1,3 @@
name = "health-worker"
main = "src/index.ts"
compatibility_date = "2023-12-18"

View file

@ -12,6 +12,7 @@ import { makeEnv } from './lib/makeEnv'
import { nicelog } from './lib/nicelog' import { nicelog } from './lib/nicelog'
const worker = path.relative(process.cwd(), path.resolve(__dirname, '../apps/dotcom-worker')) const worker = path.relative(process.cwd(), path.resolve(__dirname, '../apps/dotcom-worker'))
const healthWorker = path.relative(process.cwd(), path.resolve(__dirname, '../apps/health-worker'))
const assetUpload = path.relative( const assetUpload = path.relative(
process.cwd(), process.cwd(),
path.resolve(__dirname, '../apps/dotcom-asset-upload') path.resolve(__dirname, '../apps/dotcom-asset-upload')
@ -26,6 +27,8 @@ const env = makeEnv([
'CLOUDFLARE_ACCOUNT_ID', 'CLOUDFLARE_ACCOUNT_ID',
'CLOUDFLARE_API_TOKEN', 'CLOUDFLARE_API_TOKEN',
'DISCORD_DEPLOY_WEBHOOK_URL', 'DISCORD_DEPLOY_WEBHOOK_URL',
'DISCORD_HEALTH_WEBHOOK_URL',
'HEALTH_WORKER_UPDOWN_WEBHOOK_PATH',
'GC_MAPS_API_KEY', 'GC_MAPS_API_KEY',
'RELEASE_COMMIT_HASH', 'RELEASE_COMMIT_HASH',
'SENTRY_AUTH_TOKEN', 'SENTRY_AUTH_TOKEN',
@ -73,7 +76,7 @@ async function main() {
await discordMessage(`--- **${env.TLDRAW_ENV} deploy pre-flight** ---`) await discordMessage(`--- **${env.TLDRAW_ENV} deploy pre-flight** ---`)
await discordStep('[1/6] setting up deploy', async () => { await discordStep('[1/7] setting up deploy', async () => {
// make sure the tldraw .css files are built: // make sure the tldraw .css files are built:
await exec('yarn', ['lazy', 'prebuild']) await exec('yarn', ['lazy', 'prebuild'])
@ -83,15 +86,16 @@ async function main() {
// deploy pre-flight steps: // deploy pre-flight steps:
// 1. get the dotcom app ready to go (env vars and pre-build) // 1. get the dotcom app ready to go (env vars and pre-build)
await discordStep('[2/6] building dotcom app', async () => { await discordStep('[2/7] building dotcom app', async () => {
await createSentryRelease() await createSentryRelease()
await prepareDotcomApp() await prepareDotcomApp()
await uploadSourceMaps() await uploadSourceMaps()
await coalesceWithPreviousAssets(`${dotcom}/.vercel/output/static/assets`) await coalesceWithPreviousAssets(`${dotcom}/.vercel/output/static/assets`)
}) })
await discordStep('[3/6] cloudflare deploy dry run', async () => { await discordStep('[3/7] cloudflare deploy dry run', async () => {
await deployAssetUploadWorker({ dryRun: true }) await deployAssetUploadWorker({ dryRun: true })
await deployHealthWorker({ dryRun: true })
await deployTlsyncWorker({ dryRun: true }) await deployTlsyncWorker({ dryRun: true })
}) })
@ -100,16 +104,19 @@ async function main() {
await discordMessage(`--- **pre-flight complete, starting real deploy** ---`) await discordMessage(`--- **pre-flight complete, starting real deploy** ---`)
// 2. deploy the cloudflare workers: // 2. deploy the cloudflare workers:
await discordStep('[4/6] deploying asset uploader to cloudflare', async () => { await discordStep('[4/7] deploying asset uploader to cloudflare', async () => {
await deployAssetUploadWorker({ dryRun: false }) await deployAssetUploadWorker({ dryRun: false })
}) })
await discordStep('[5/6] deploying multiplayer worker to cloudflare', async () => { await discordStep('[5/7] deploying multiplayer worker to cloudflare', async () => {
await deployTlsyncWorker({ dryRun: false }) await deployTlsyncWorker({ dryRun: false })
}) })
await discordStep('[6/7] deploying health worker to cloudflare', async () => {
await deployHealthWorker({ dryRun: false })
})
// 3. deploy the pre-build dotcom app: // 3. deploy the pre-build dotcom app:
const { deploymentUrl, inspectUrl } = await discordStep( const { deploymentUrl, inspectUrl } = await discordStep(
'[6/6] deploying dotcom app to vercel', '[7/7] deploying dotcom app to vercel',
async () => { async () => {
return await deploySpa() return await deploySpa()
} }
@ -119,7 +126,7 @@ async function main() {
if (previewId) { if (previewId) {
const aliasDomain = `${previewId}-preview-deploy.tldraw.com` const aliasDomain = `${previewId}-preview-deploy.tldraw.com`
await discordStep('[7/6] aliasing preview deployment', async () => { await discordStep('[8/7] aliasing preview deployment', async () => {
await vercelCli('alias', ['set', deploymentUrl, aliasDomain]) await vercelCli('alias', ['set', deploymentUrl, aliasDomain])
}) })
@ -217,6 +224,41 @@ name = "${previewId}-tldraw-multiplayer"`
) )
} }
let didUpdateHealthWorker = false
async function deployHealthWorker({ dryRun }: { dryRun: boolean }) {
if (previewId && !didUpdateHealthWorker) {
appendFileSync(
join(healthWorker, 'wrangler.toml'),
`
[env.preview]
name = "${previewId}-tldraw-health"`
)
didUpdateHealthWorker = true
}
await exec(
'yarn',
[
'wrangler',
'deploy',
dryRun ? '--dry-run' : null,
'--env',
env.TLDRAW_ENV,
'--var',
`DISCORD_HEALTH_WEBHOOK_URL:${env.DISCORD_HEALTH_WEBHOOK_URL}`,
'--var',
`HEALTH_WORKER_UPDOWN_WEBHOOK_PATH:${env.HEALTH_WORKER_UPDOWN_WEBHOOK_PATH}`,
],
{
pwd: healthWorker,
env: {
NODE_ENV: 'production',
// wrangler needs CI=1 set to prevent it from trying to do interactive prompts
CI: '1',
},
}
)
}
type ExecOpts = NonNullable<Parameters<typeof exec>[2]> type ExecOpts = NonNullable<Parameters<typeof exec>[2]>
async function vercelCli(command: string, args: string[], opts?: ExecOpts) { async function vercelCli(command: string, args: string[], opts?: ExecOpts) {
return exec( return exec(

View file

@ -11324,6 +11324,13 @@ __metadata:
languageName: node languageName: node
linkType: hard linkType: hard
"discord-api-types@npm:^0.37.67":
version: 0.37.67
resolution: "discord-api-types@npm:0.37.67"
checksum: 5b474544a82148179e3d50f3092b9aa90b4142b470df397d3e6e211bd9c0b1f99724c618d7136fccdbe9cd931d8f48b1bdcd10599c7a08a92b76408c05829692
languageName: node
linkType: hard
"doctrine@npm:^2.1.0": "doctrine@npm:^2.1.0":
version: 2.1.0 version: 2.1.0
resolution: "doctrine@npm:2.1.0" resolution: "doctrine@npm:2.1.0"
@ -14567,6 +14574,19 @@ __metadata:
languageName: node languageName: node
linkType: hard linkType: hard
"health-worker@workspace:apps/health-worker":
version: 0.0.0-use.local
resolution: "health-worker@workspace:apps/health-worker"
dependencies:
"@cloudflare/workers-types": "npm:^4.20230821.0"
"@tldraw/utils": "workspace:*"
"@types/node": "npm:^18.7.3"
discord-api-types: "npm:^0.37.67"
typescript: "npm:^5.2.2"
wrangler: "npm:3.16.0"
languageName: unknown
linkType: soft
"highlight.js@npm:~11.9.0": "highlight.js@npm:~11.9.0":
version: 11.9.0 version: 11.9.0
resolution: "highlight.js@npm:11.9.0" resolution: "highlight.js@npm:11.9.0"