Introduce a Cloudflare health worker (#2499)
This PR introduces a new Cloudflare worker for health checks. At the moment the worker only translates Updown webhooks into Discord webhooks. In the future we can teach this worker to check more things. ### Change Type - [x] `internal` — Any other changes that don't affect the published package --------- Co-authored-by: Steve Ruiz <steveruizok@gmail.com>
This commit is contained in:
parent
a1e242ae3a
commit
3a3248a636
11 changed files with 428 additions and 7 deletions
2
.github/workflows/deploy.yml
vendored
2
.github/workflows/deploy.yml
vendored
|
@ -63,6 +63,8 @@ jobs:
|
|||
CLOUDFLARE_API_TOKEN: ${{ secrets.CLOUDFLARE_API_TOKEN }}
|
||||
CLOUDFLARE_ACCOUNT_ID: ${{ secrets.CLOUDFLARE_ACCOUNT_ID }}
|
||||
DISCORD_DEPLOY_WEBHOOK_URL: ${{ secrets.DISCORD_DEPLOY_WEBHOOK_URL }}
|
||||
DISCORD_HEALTH_WEBHOOK_URL: ${{ secrets.DISCORD_HEALTH_WEBHOOK_URL }}
|
||||
HEALTH_WORKER_UPDOWN_WEBHOOK_PATH: ${{ secrets.HEALTH_WORKER_UPDOWN_WEBHOOK_PATH }}
|
||||
GC_MAPS_API_KEY: ${{ secrets.GC_MAPS_API_KEY }}
|
||||
WORKER_SENTRY_DSN: ${{ secrets.WORKER_SENTRY_DSN }}
|
||||
SENTRY_AUTH_TOKEN: ${{ secrets.SENTRY_AUTH_TOKEN }}
|
||||
|
|
2
apps/health-worker/.gitignore
vendored
Normal file
2
apps/health-worker/.gitignore
vendored
Normal file
|
@ -0,0 +1,2 @@
|
|||
build
|
||||
.wrangler
|
3
apps/health-worker/README.md
Normal file
3
apps/health-worker/README.md
Normal file
|
@ -0,0 +1,3 @@
|
|||
# Health Worker
|
||||
|
||||
Accepts webhooks from [Updown](https://updown.io/), sends them to our Discord.
|
21
apps/health-worker/package.json
Normal file
21
apps/health-worker/package.json
Normal file
|
@ -0,0 +1,21 @@
|
|||
{
|
||||
"name": "health-worker",
|
||||
"version": "1.0.0",
|
||||
"private": true,
|
||||
"scripts": {
|
||||
"deploy": "wrangler deploy",
|
||||
"dev": "wrangler dev",
|
||||
"start": "wrangler dev",
|
||||
"lint": "yarn run -T tsx ../../scripts/lint.ts"
|
||||
},
|
||||
"dependencies": {
|
||||
"@tldraw/utils": "workspace:*"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@cloudflare/workers-types": "^4.20230821.0",
|
||||
"@types/node": "^18.7.3",
|
||||
"discord-api-types": "^0.37.67",
|
||||
"typescript": "^5.2.2",
|
||||
"wrangler": "3.16.0"
|
||||
}
|
||||
}
|
96
apps/health-worker/src/discord.ts
Normal file
96
apps/health-worker/src/discord.ts
Normal file
|
@ -0,0 +1,96 @@
|
|||
import { exhaustiveSwitchError } from '@tldraw/utils'
|
||||
import { type APIEmbed } from 'discord-api-types/v10'
|
||||
import { Event as UpdownEvent } from './updown_types'
|
||||
|
||||
// discord wants decimal colours
|
||||
const GREEN = 4243543
|
||||
const RED = 14692657
|
||||
const ORANGE = 16213767
|
||||
|
||||
// docs: https://birdie0.github.io/discord-webhooks-guide/index.html
|
||||
export type DiscordPayload = {
|
||||
username: string
|
||||
content: string
|
||||
embeds: APIEmbed[]
|
||||
}
|
||||
|
||||
function formatUpdownEvent(event: UpdownEvent): {
|
||||
colour: number
|
||||
title: string
|
||||
description: string
|
||||
} | null {
|
||||
switch (event.event) {
|
||||
case 'check.down':
|
||||
return {
|
||||
colour: RED,
|
||||
title: `Check DOWN at <${event.check.url}>`,
|
||||
description: `<${event.check.url}> is down: "${event.downtime.error}"\n\nNext check in ${event.check.period} seconds`,
|
||||
}
|
||||
case 'check.still_down':
|
||||
return null
|
||||
case 'check.up': {
|
||||
return {
|
||||
colour: GREEN,
|
||||
title: `Check UP at <${event.check.url}>`,
|
||||
description: `<${event.check.url}> is up\n\nIt was down for ${event.downtime.duration} seconds`,
|
||||
}
|
||||
}
|
||||
case 'check.ssl_invalid': {
|
||||
return {
|
||||
colour: RED,
|
||||
title: `SSL INVALID at <${event.check.url}>`,
|
||||
description: `SSL certificate at <${event.check.url}> is invalid: "${event.ssl.error}"`,
|
||||
}
|
||||
}
|
||||
case 'check.ssl_valid': {
|
||||
return {
|
||||
colour: GREEN,
|
||||
title: `SSL VALID at <${event.check.url}>`,
|
||||
description: `SSL certificate at <${event.check.url}> is now valid`,
|
||||
}
|
||||
}
|
||||
case 'check.ssl_expiration': {
|
||||
return {
|
||||
colour: ORANGE,
|
||||
title: `SSL EXPIRATION at <${event.check.url}>`,
|
||||
description: `SSL certificate at <${event.check.url}> will expire in ${event.ssl.days_before_expiration} days`,
|
||||
}
|
||||
}
|
||||
|
||||
case 'check.ssl_renewed': {
|
||||
return {
|
||||
colour: GREEN,
|
||||
title: `SSL RENEWED at <${event.check.url}>`,
|
||||
description: `SSL certificate at <${event.check.url}> was renewed`,
|
||||
}
|
||||
}
|
||||
case 'check.performance_drop':
|
||||
return {
|
||||
colour: ORANGE,
|
||||
title: `PERFORMANCE DROP at <${event.check.url}>`,
|
||||
description: `Performance drop at <${event.check.url}>, apdex dropped ${event.apdex_dropped}`,
|
||||
}
|
||||
|
||||
default:
|
||||
exhaustiveSwitchError(event, 'event')
|
||||
}
|
||||
}
|
||||
|
||||
export function updownToDiscord(event: UpdownEvent): DiscordPayload | null {
|
||||
const formatted = formatUpdownEvent(event)
|
||||
if (!formatted) return null
|
||||
|
||||
const { colour, title, description } = formatted
|
||||
|
||||
return {
|
||||
username: 'Health Worker',
|
||||
content: `Updown: ${title}`,
|
||||
embeds: [
|
||||
{
|
||||
color: colour,
|
||||
description: description,
|
||||
timestamp: event.time,
|
||||
},
|
||||
],
|
||||
}
|
||||
}
|
69
apps/health-worker/src/index.ts
Normal file
69
apps/health-worker/src/index.ts
Normal file
|
@ -0,0 +1,69 @@
|
|||
import { DiscordPayload, updownToDiscord } from './discord'
|
||||
import { Event as UpdownEvent } from './updown_types'
|
||||
|
||||
interface Env {
|
||||
DISCORD_HEALTH_WEBHOOK_URL: string | undefined
|
||||
// it needs to be passed in because it's effectively a secret, unless we want everyone to be able
|
||||
// to stress us out with spurious discord alerts
|
||||
HEALTH_WORKER_UPDOWN_WEBHOOK_PATH: string | undefined
|
||||
}
|
||||
|
||||
async function sendDiscordWebhook(url: string, discord: DiscordPayload): Promise<Response> {
|
||||
return fetch(url, {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'Content-Type': 'application/json',
|
||||
},
|
||||
body: JSON.stringify(discord),
|
||||
})
|
||||
}
|
||||
|
||||
async function handleUpdown(request: Request, discordUrl: string): Promise<Response> {
|
||||
const updownEvents = (await request.json()) as Array<UpdownEvent>
|
||||
|
||||
let status = 200
|
||||
for (const e of updownEvents) {
|
||||
const discordPayload = updownToDiscord(e)
|
||||
if (!discordPayload) {
|
||||
continue
|
||||
}
|
||||
const discordResult = await sendDiscordWebhook(discordUrl, discordPayload)
|
||||
|
||||
if (!discordResult.ok) {
|
||||
console.error(`Discord error ${discordResult.status}: ${discordResult.statusText}`)
|
||||
status = discordResult.status
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
return new Response(null, { status })
|
||||
}
|
||||
|
||||
const handler: ExportedHandler<Env> = {
|
||||
async fetch(request: Request, env: Env): Promise<Response> {
|
||||
const discordUrl = env.DISCORD_HEALTH_WEBHOOK_URL
|
||||
if (!discordUrl) {
|
||||
console.error('missing DISCORD_HEALTH_WEBHOOK_URL')
|
||||
return new Response('Internal error', { status: 500 })
|
||||
}
|
||||
|
||||
const updownWebhookPath = env.HEALTH_WORKER_UPDOWN_WEBHOOK_PATH
|
||||
if (!updownWebhookPath) {
|
||||
console.error('missing HEALTH_WORKER_UPDOWN_WEBHOOK_PATH')
|
||||
return new Response('Internal error', { status: 500 })
|
||||
}
|
||||
|
||||
const url = new URL(request.url)
|
||||
|
||||
// timing safety COULD be an issue, but it seems that in practice it isn't:
|
||||
// https://github.com/scriptin/node-timing-attack
|
||||
// my own testing confirms those observations
|
||||
if (url.pathname === updownWebhookPath) {
|
||||
return handleUpdown(request, discordUrl)
|
||||
}
|
||||
|
||||
return new Response('Not Found', { status: 404 })
|
||||
},
|
||||
}
|
||||
|
||||
export default handler
|
152
apps/health-worker/src/updown_types.ts
Normal file
152
apps/health-worker/src/updown_types.ts
Normal file
|
@ -0,0 +1,152 @@
|
|||
// docs: https://updown.io/api#webhooks
|
||||
|
||||
export interface BaseCheck {
|
||||
token: string
|
||||
url: string
|
||||
alias: null
|
||||
last_status: number
|
||||
uptime: number
|
||||
period: number
|
||||
apdex_t: number
|
||||
string_match: string
|
||||
enabled: boolean
|
||||
published: boolean
|
||||
disabled_locations: any[]
|
||||
recipients: any[]
|
||||
last_check_at: string
|
||||
next_check_at: string
|
||||
created_at: null
|
||||
mute_until: null | string
|
||||
favicon_url: string
|
||||
custom_headers: CustomHeaders
|
||||
http_verb: string
|
||||
http_body: string
|
||||
}
|
||||
|
||||
export interface FailingCheck extends BaseCheck {
|
||||
down: true
|
||||
down_since: string
|
||||
up_since: null
|
||||
error: string
|
||||
}
|
||||
|
||||
export interface SucceedingCheck extends BaseCheck {
|
||||
down: true
|
||||
down_since: null
|
||||
up_since: string
|
||||
error: null
|
||||
}
|
||||
|
||||
export interface BaseDowntime {
|
||||
id: string
|
||||
error: string
|
||||
started_at: string
|
||||
partial: unknown
|
||||
}
|
||||
|
||||
export interface OngoingDowntime extends BaseDowntime {
|
||||
ended_at: null
|
||||
duration: null
|
||||
}
|
||||
|
||||
export interface FinishedDowntime extends BaseDowntime {
|
||||
ended_at: string
|
||||
// seconds
|
||||
duration: number
|
||||
}
|
||||
|
||||
export type CustomHeaders = Record<string, string>
|
||||
|
||||
export interface SslCert {
|
||||
subject: string
|
||||
issuer: string
|
||||
from: string
|
||||
to: string
|
||||
algorithm: string
|
||||
}
|
||||
|
||||
export interface EventDown {
|
||||
event: 'check.down'
|
||||
time: string
|
||||
description: string
|
||||
check: FailingCheck
|
||||
downtime: OngoingDowntime
|
||||
}
|
||||
|
||||
export interface EventStillDown {
|
||||
event: 'check.still_down'
|
||||
time: string
|
||||
description: string
|
||||
check: FailingCheck
|
||||
downtime: OngoingDowntime
|
||||
}
|
||||
|
||||
export interface EventUp {
|
||||
event: 'check.up'
|
||||
time: string
|
||||
description: string
|
||||
check: SucceedingCheck
|
||||
downtime: FinishedDowntime
|
||||
}
|
||||
|
||||
export interface EventSslInvalid {
|
||||
event: 'check.ssl_invalid'
|
||||
time: string
|
||||
description: string
|
||||
check: SucceedingCheck | FailingCheck
|
||||
ssl: {
|
||||
cert: SslCert
|
||||
error: string
|
||||
}
|
||||
}
|
||||
|
||||
export interface EventSslValid {
|
||||
event: 'check.ssl_valid'
|
||||
time: string
|
||||
description: string
|
||||
check: SucceedingCheck | FailingCheck
|
||||
ssl: {
|
||||
cert: SslCert
|
||||
}
|
||||
}
|
||||
|
||||
export interface EventSslExpiration {
|
||||
event: 'check.ssl_expiration'
|
||||
time: string
|
||||
description: string
|
||||
check: SucceedingCheck | FailingCheck
|
||||
ssl: {
|
||||
cert: SslCert
|
||||
days_before_expiration: number
|
||||
}
|
||||
}
|
||||
|
||||
export interface EventSslRenewed {
|
||||
event: 'check.ssl_renewed'
|
||||
time: string
|
||||
description: string
|
||||
check: SucceedingCheck | FailingCheck
|
||||
ssl: {
|
||||
new_cert: SslCert
|
||||
old_cert: SslCert
|
||||
}
|
||||
}
|
||||
|
||||
export interface EventPerformanceDrop {
|
||||
event: 'check.performance_drop'
|
||||
time: string
|
||||
description: string
|
||||
check: SucceedingCheck | FailingCheck
|
||||
apdex_dropped: string
|
||||
last_metrics: Record<string, { apdex: number }>
|
||||
}
|
||||
|
||||
export type Event =
|
||||
| EventDown
|
||||
| EventStillDown
|
||||
| EventUp
|
||||
| EventSslInvalid
|
||||
| EventSslValid
|
||||
| EventSslExpiration
|
||||
| EventSslRenewed
|
||||
| EventPerformanceDrop
|
11
apps/health-worker/tsconfig.json
Normal file
11
apps/health-worker/tsconfig.json
Normal file
|
@ -0,0 +1,11 @@
|
|||
{
|
||||
"extends": "../../config/tsconfig.base.json",
|
||||
"include": ["src"],
|
||||
"exclude": ["node_modules", "dist", ".tsbuild*"],
|
||||
"compilerOptions": {
|
||||
"noEmit": true,
|
||||
"emitDeclarationOnly": false,
|
||||
"types": ["@cloudflare/workers-types", "@types/node"]
|
||||
},
|
||||
"references": []
|
||||
}
|
3
apps/health-worker/wrangler.toml
Normal file
3
apps/health-worker/wrangler.toml
Normal file
|
@ -0,0 +1,3 @@
|
|||
name = "health-worker"
|
||||
main = "src/index.ts"
|
||||
compatibility_date = "2023-12-18"
|
|
@ -12,6 +12,7 @@ import { makeEnv } from './lib/makeEnv'
|
|||
import { nicelog } from './lib/nicelog'
|
||||
|
||||
const worker = path.relative(process.cwd(), path.resolve(__dirname, '../apps/dotcom-worker'))
|
||||
const healthWorker = path.relative(process.cwd(), path.resolve(__dirname, '../apps/health-worker'))
|
||||
const assetUpload = path.relative(
|
||||
process.cwd(),
|
||||
path.resolve(__dirname, '../apps/dotcom-asset-upload')
|
||||
|
@ -26,6 +27,8 @@ const env = makeEnv([
|
|||
'CLOUDFLARE_ACCOUNT_ID',
|
||||
'CLOUDFLARE_API_TOKEN',
|
||||
'DISCORD_DEPLOY_WEBHOOK_URL',
|
||||
'DISCORD_HEALTH_WEBHOOK_URL',
|
||||
'HEALTH_WORKER_UPDOWN_WEBHOOK_PATH',
|
||||
'GC_MAPS_API_KEY',
|
||||
'RELEASE_COMMIT_HASH',
|
||||
'SENTRY_AUTH_TOKEN',
|
||||
|
@ -73,7 +76,7 @@ async function main() {
|
|||
|
||||
await discordMessage(`--- **${env.TLDRAW_ENV} deploy pre-flight** ---`)
|
||||
|
||||
await discordStep('[1/6] setting up deploy', async () => {
|
||||
await discordStep('[1/7] setting up deploy', async () => {
|
||||
// make sure the tldraw .css files are built:
|
||||
await exec('yarn', ['lazy', 'prebuild'])
|
||||
|
||||
|
@ -83,15 +86,16 @@ async function main() {
|
|||
|
||||
// deploy pre-flight steps:
|
||||
// 1. get the dotcom app ready to go (env vars and pre-build)
|
||||
await discordStep('[2/6] building dotcom app', async () => {
|
||||
await discordStep('[2/7] building dotcom app', async () => {
|
||||
await createSentryRelease()
|
||||
await prepareDotcomApp()
|
||||
await uploadSourceMaps()
|
||||
await coalesceWithPreviousAssets(`${dotcom}/.vercel/output/static/assets`)
|
||||
})
|
||||
|
||||
await discordStep('[3/6] cloudflare deploy dry run', async () => {
|
||||
await discordStep('[3/7] cloudflare deploy dry run', async () => {
|
||||
await deployAssetUploadWorker({ dryRun: true })
|
||||
await deployHealthWorker({ dryRun: true })
|
||||
await deployTlsyncWorker({ dryRun: true })
|
||||
})
|
||||
|
||||
|
@ -100,16 +104,19 @@ async function main() {
|
|||
await discordMessage(`--- **pre-flight complete, starting real deploy** ---`)
|
||||
|
||||
// 2. deploy the cloudflare workers:
|
||||
await discordStep('[4/6] deploying asset uploader to cloudflare', async () => {
|
||||
await discordStep('[4/7] deploying asset uploader to cloudflare', async () => {
|
||||
await deployAssetUploadWorker({ dryRun: false })
|
||||
})
|
||||
await discordStep('[5/6] deploying multiplayer worker to cloudflare', async () => {
|
||||
await discordStep('[5/7] deploying multiplayer worker to cloudflare', async () => {
|
||||
await deployTlsyncWorker({ dryRun: false })
|
||||
})
|
||||
await discordStep('[6/7] deploying health worker to cloudflare', async () => {
|
||||
await deployHealthWorker({ dryRun: false })
|
||||
})
|
||||
|
||||
// 3. deploy the pre-build dotcom app:
|
||||
const { deploymentUrl, inspectUrl } = await discordStep(
|
||||
'[6/6] deploying dotcom app to vercel',
|
||||
'[7/7] deploying dotcom app to vercel',
|
||||
async () => {
|
||||
return await deploySpa()
|
||||
}
|
||||
|
@ -119,7 +126,7 @@ async function main() {
|
|||
|
||||
if (previewId) {
|
||||
const aliasDomain = `${previewId}-preview-deploy.tldraw.com`
|
||||
await discordStep('[7/6] aliasing preview deployment', async () => {
|
||||
await discordStep('[8/7] aliasing preview deployment', async () => {
|
||||
await vercelCli('alias', ['set', deploymentUrl, aliasDomain])
|
||||
})
|
||||
|
||||
|
@ -217,6 +224,41 @@ name = "${previewId}-tldraw-multiplayer"`
|
|||
)
|
||||
}
|
||||
|
||||
let didUpdateHealthWorker = false
|
||||
async function deployHealthWorker({ dryRun }: { dryRun: boolean }) {
|
||||
if (previewId && !didUpdateHealthWorker) {
|
||||
appendFileSync(
|
||||
join(healthWorker, 'wrangler.toml'),
|
||||
`
|
||||
[env.preview]
|
||||
name = "${previewId}-tldraw-health"`
|
||||
)
|
||||
didUpdateHealthWorker = true
|
||||
}
|
||||
await exec(
|
||||
'yarn',
|
||||
[
|
||||
'wrangler',
|
||||
'deploy',
|
||||
dryRun ? '--dry-run' : null,
|
||||
'--env',
|
||||
env.TLDRAW_ENV,
|
||||
'--var',
|
||||
`DISCORD_HEALTH_WEBHOOK_URL:${env.DISCORD_HEALTH_WEBHOOK_URL}`,
|
||||
'--var',
|
||||
`HEALTH_WORKER_UPDOWN_WEBHOOK_PATH:${env.HEALTH_WORKER_UPDOWN_WEBHOOK_PATH}`,
|
||||
],
|
||||
{
|
||||
pwd: healthWorker,
|
||||
env: {
|
||||
NODE_ENV: 'production',
|
||||
// wrangler needs CI=1 set to prevent it from trying to do interactive prompts
|
||||
CI: '1',
|
||||
},
|
||||
}
|
||||
)
|
||||
}
|
||||
|
||||
type ExecOpts = NonNullable<Parameters<typeof exec>[2]>
|
||||
async function vercelCli(command: string, args: string[], opts?: ExecOpts) {
|
||||
return exec(
|
||||
|
|
20
yarn.lock
20
yarn.lock
|
@ -11324,6 +11324,13 @@ __metadata:
|
|||
languageName: node
|
||||
linkType: hard
|
||||
|
||||
"discord-api-types@npm:^0.37.67":
|
||||
version: 0.37.67
|
||||
resolution: "discord-api-types@npm:0.37.67"
|
||||
checksum: 5b474544a82148179e3d50f3092b9aa90b4142b470df397d3e6e211bd9c0b1f99724c618d7136fccdbe9cd931d8f48b1bdcd10599c7a08a92b76408c05829692
|
||||
languageName: node
|
||||
linkType: hard
|
||||
|
||||
"doctrine@npm:^2.1.0":
|
||||
version: 2.1.0
|
||||
resolution: "doctrine@npm:2.1.0"
|
||||
|
@ -14567,6 +14574,19 @@ __metadata:
|
|||
languageName: node
|
||||
linkType: hard
|
||||
|
||||
"health-worker@workspace:apps/health-worker":
|
||||
version: 0.0.0-use.local
|
||||
resolution: "health-worker@workspace:apps/health-worker"
|
||||
dependencies:
|
||||
"@cloudflare/workers-types": "npm:^4.20230821.0"
|
||||
"@tldraw/utils": "workspace:*"
|
||||
"@types/node": "npm:^18.7.3"
|
||||
discord-api-types: "npm:^0.37.67"
|
||||
typescript: "npm:^5.2.2"
|
||||
wrangler: "npm:3.16.0"
|
||||
languageName: unknown
|
||||
linkType: soft
|
||||
|
||||
"highlight.js@npm:~11.9.0":
|
||||
version: 11.9.0
|
||||
resolution: "highlight.js@npm:11.9.0"
|
||||
|
|
Loading…
Reference in a new issue