Introduce a Cloudflare health worker (#2499)

This PR introduces a new Cloudflare worker for health checks.

At the moment the worker only translates Updown webhooks into Discord
webhooks. In the future we can teach this worker to check more things.

### Change Type

- [x] `internal` — Any other changes that don't affect the published
package

---------

Co-authored-by: Steve Ruiz <steveruizok@gmail.com>
This commit is contained in:
Dan Groshev 2024-01-29 09:47:50 +00:00 committed by GitHub
parent a1e242ae3a
commit 3a3248a636
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
11 changed files with 428 additions and 7 deletions

2
apps/health-worker/.gitignore vendored Normal file
View file

@ -0,0 +1,2 @@
build
.wrangler

View file

@ -0,0 +1,3 @@
# Health Worker
Accepts webhooks from [Updown](https://updown.io/), sends them to our Discord.

View file

@ -0,0 +1,21 @@
{
"name": "health-worker",
"version": "1.0.0",
"private": true,
"scripts": {
"deploy": "wrangler deploy",
"dev": "wrangler dev",
"start": "wrangler dev",
"lint": "yarn run -T tsx ../../scripts/lint.ts"
},
"dependencies": {
"@tldraw/utils": "workspace:*"
},
"devDependencies": {
"@cloudflare/workers-types": "^4.20230821.0",
"@types/node": "^18.7.3",
"discord-api-types": "^0.37.67",
"typescript": "^5.2.2",
"wrangler": "3.16.0"
}
}

View file

@ -0,0 +1,96 @@
import { exhaustiveSwitchError } from '@tldraw/utils'
import { type APIEmbed } from 'discord-api-types/v10'
import { Event as UpdownEvent } from './updown_types'
// discord wants decimal colours
const GREEN = 4243543
const RED = 14692657
const ORANGE = 16213767
// docs: https://birdie0.github.io/discord-webhooks-guide/index.html
export type DiscordPayload = {
username: string
content: string
embeds: APIEmbed[]
}
function formatUpdownEvent(event: UpdownEvent): {
colour: number
title: string
description: string
} | null {
switch (event.event) {
case 'check.down':
return {
colour: RED,
title: `Check DOWN at <${event.check.url}>`,
description: `<${event.check.url}> is down: "${event.downtime.error}"\n\nNext check in ${event.check.period} seconds`,
}
case 'check.still_down':
return null
case 'check.up': {
return {
colour: GREEN,
title: `Check UP at <${event.check.url}>`,
description: `<${event.check.url}> is up\n\nIt was down for ${event.downtime.duration} seconds`,
}
}
case 'check.ssl_invalid': {
return {
colour: RED,
title: `SSL INVALID at <${event.check.url}>`,
description: `SSL certificate at <${event.check.url}> is invalid: "${event.ssl.error}"`,
}
}
case 'check.ssl_valid': {
return {
colour: GREEN,
title: `SSL VALID at <${event.check.url}>`,
description: `SSL certificate at <${event.check.url}> is now valid`,
}
}
case 'check.ssl_expiration': {
return {
colour: ORANGE,
title: `SSL EXPIRATION at <${event.check.url}>`,
description: `SSL certificate at <${event.check.url}> will expire in ${event.ssl.days_before_expiration} days`,
}
}
case 'check.ssl_renewed': {
return {
colour: GREEN,
title: `SSL RENEWED at <${event.check.url}>`,
description: `SSL certificate at <${event.check.url}> was renewed`,
}
}
case 'check.performance_drop':
return {
colour: ORANGE,
title: `PERFORMANCE DROP at <${event.check.url}>`,
description: `Performance drop at <${event.check.url}>, apdex dropped ${event.apdex_dropped}`,
}
default:
exhaustiveSwitchError(event, 'event')
}
}
export function updownToDiscord(event: UpdownEvent): DiscordPayload | null {
const formatted = formatUpdownEvent(event)
if (!formatted) return null
const { colour, title, description } = formatted
return {
username: 'Health Worker',
content: `Updown: ${title}`,
embeds: [
{
color: colour,
description: description,
timestamp: event.time,
},
],
}
}

View file

@ -0,0 +1,69 @@
import { DiscordPayload, updownToDiscord } from './discord'
import { Event as UpdownEvent } from './updown_types'
interface Env {
DISCORD_HEALTH_WEBHOOK_URL: string | undefined
// it needs to be passed in because it's effectively a secret, unless we want everyone to be able
// to stress us out with spurious discord alerts
HEALTH_WORKER_UPDOWN_WEBHOOK_PATH: string | undefined
}
async function sendDiscordWebhook(url: string, discord: DiscordPayload): Promise<Response> {
return fetch(url, {
method: 'POST',
headers: {
'Content-Type': 'application/json',
},
body: JSON.stringify(discord),
})
}
async function handleUpdown(request: Request, discordUrl: string): Promise<Response> {
const updownEvents = (await request.json()) as Array<UpdownEvent>
let status = 200
for (const e of updownEvents) {
const discordPayload = updownToDiscord(e)
if (!discordPayload) {
continue
}
const discordResult = await sendDiscordWebhook(discordUrl, discordPayload)
if (!discordResult.ok) {
console.error(`Discord error ${discordResult.status}: ${discordResult.statusText}`)
status = discordResult.status
break
}
}
return new Response(null, { status })
}
const handler: ExportedHandler<Env> = {
async fetch(request: Request, env: Env): Promise<Response> {
const discordUrl = env.DISCORD_HEALTH_WEBHOOK_URL
if (!discordUrl) {
console.error('missing DISCORD_HEALTH_WEBHOOK_URL')
return new Response('Internal error', { status: 500 })
}
const updownWebhookPath = env.HEALTH_WORKER_UPDOWN_WEBHOOK_PATH
if (!updownWebhookPath) {
console.error('missing HEALTH_WORKER_UPDOWN_WEBHOOK_PATH')
return new Response('Internal error', { status: 500 })
}
const url = new URL(request.url)
// timing safety COULD be an issue, but it seems that in practice it isn't:
// https://github.com/scriptin/node-timing-attack
// my own testing confirms those observations
if (url.pathname === updownWebhookPath) {
return handleUpdown(request, discordUrl)
}
return new Response('Not Found', { status: 404 })
},
}
export default handler

View file

@ -0,0 +1,152 @@
// docs: https://updown.io/api#webhooks
export interface BaseCheck {
token: string
url: string
alias: null
last_status: number
uptime: number
period: number
apdex_t: number
string_match: string
enabled: boolean
published: boolean
disabled_locations: any[]
recipients: any[]
last_check_at: string
next_check_at: string
created_at: null
mute_until: null | string
favicon_url: string
custom_headers: CustomHeaders
http_verb: string
http_body: string
}
export interface FailingCheck extends BaseCheck {
down: true
down_since: string
up_since: null
error: string
}
export interface SucceedingCheck extends BaseCheck {
down: true
down_since: null
up_since: string
error: null
}
export interface BaseDowntime {
id: string
error: string
started_at: string
partial: unknown
}
export interface OngoingDowntime extends BaseDowntime {
ended_at: null
duration: null
}
export interface FinishedDowntime extends BaseDowntime {
ended_at: string
// seconds
duration: number
}
export type CustomHeaders = Record<string, string>
export interface SslCert {
subject: string
issuer: string
from: string
to: string
algorithm: string
}
export interface EventDown {
event: 'check.down'
time: string
description: string
check: FailingCheck
downtime: OngoingDowntime
}
export interface EventStillDown {
event: 'check.still_down'
time: string
description: string
check: FailingCheck
downtime: OngoingDowntime
}
export interface EventUp {
event: 'check.up'
time: string
description: string
check: SucceedingCheck
downtime: FinishedDowntime
}
export interface EventSslInvalid {
event: 'check.ssl_invalid'
time: string
description: string
check: SucceedingCheck | FailingCheck
ssl: {
cert: SslCert
error: string
}
}
export interface EventSslValid {
event: 'check.ssl_valid'
time: string
description: string
check: SucceedingCheck | FailingCheck
ssl: {
cert: SslCert
}
}
export interface EventSslExpiration {
event: 'check.ssl_expiration'
time: string
description: string
check: SucceedingCheck | FailingCheck
ssl: {
cert: SslCert
days_before_expiration: number
}
}
export interface EventSslRenewed {
event: 'check.ssl_renewed'
time: string
description: string
check: SucceedingCheck | FailingCheck
ssl: {
new_cert: SslCert
old_cert: SslCert
}
}
export interface EventPerformanceDrop {
event: 'check.performance_drop'
time: string
description: string
check: SucceedingCheck | FailingCheck
apdex_dropped: string
last_metrics: Record<string, { apdex: number }>
}
export type Event =
| EventDown
| EventStillDown
| EventUp
| EventSslInvalid
| EventSslValid
| EventSslExpiration
| EventSslRenewed
| EventPerformanceDrop

View file

@ -0,0 +1,11 @@
{
"extends": "../../config/tsconfig.base.json",
"include": ["src"],
"exclude": ["node_modules", "dist", ".tsbuild*"],
"compilerOptions": {
"noEmit": true,
"emitDeclarationOnly": false,
"types": ["@cloudflare/workers-types", "@types/node"]
},
"references": []
}

View file

@ -0,0 +1,3 @@
name = "health-worker"
main = "src/index.ts"
compatibility_date = "2023-12-18"