From 6da3cc8ca1baf268d768ed63e4b9cb16e40ce33d Mon Sep 17 00:00:00 2001 From: James Salter Date: Wed, 21 Jul 2021 13:48:10 +0100 Subject: [PATCH] Redact sensitive data --- src/PosthogAnalytics.ts | 103 +++++++++++++++++++++++++++++----- test/PosthogAnalytics-test.ts | 66 ++++++++++++++++------ 2 files changed, 138 insertions(+), 31 deletions(-) diff --git a/src/PosthogAnalytics.ts b/src/PosthogAnalytics.ts index 80b5861c17..d5f9b1d83c 100644 --- a/src/PosthogAnalytics.ts +++ b/src/PosthogAnalytics.ts @@ -1,4 +1,4 @@ -import posthog from 'posthog-js'; +import posthog, { PostHog } from 'posthog-js'; import SdkConfig from './SdkConfig'; interface IEvent { @@ -10,6 +10,11 @@ interface IEvent { properties: {} } +export enum Anonymity { + Anonymous, + Pseudonymous +} + // If an event extends IPseudonymousEvent, the event contains pseudonymous data // that won't be sent unless the user has explicitly consented to pseudonymous tracking. // For example, hashed user IDs or room IDs. @@ -33,10 +38,38 @@ const hashHex = async (input: string): Promise => { return [...new Uint8Array(digestBuf)].map((b: number) => b.toString(16).padStart(2, "0")).join(""); }; +const knownScreens = new Set([ + "register", "login", "forgot_password", "soft_logout", "new", "settings", "welcome", "home", "start", "directory", + "start_sso", "start_cas", "groups", "complete_security", "post_registration", "room", "user", "group", +]); + +export async function getRedactedCurrentLocation(origin: string, hash: string, pathname: string, anonymity: Anonymity) { + // Redact PII from the current location. + // If anonymous is true, redact entirely, if false, substitute it with a hash. + // For known screens, assumes a URL structure of //might/be/pii + if (origin.startsWith('file://')) { + pathname = "//"; + } + + let [_, screen, ...parts] = hash.split("/"); + + if (!knownScreens.has(screen)) { + screen = ""; + } + + for (let i = 0; i < parts.length; i++) { + parts[i] = anonymity === Anonymity.Anonymous ? `` : await hashHex(parts[i]); + } + + const hashStr = `${_}/${screen}/${parts.join("/")}`; + return origin + pathname + hashStr; +} + export class PosthogAnalytics { private onlyTrackAnonymousEvents = false; private initialised = false; - private posthog = null; + private posthog?: PostHog = null; + private redactedCurrentLocation = null; private static _instance = null; @@ -47,23 +80,63 @@ export class PosthogAnalytics { return this._instance; } - constructor(posthog) { + constructor(posthog: PostHog) { this.posthog = posthog; } - public init(onlyTrackAnonymousEvents: boolean) { + public async init(onlyTrackAnonymousEvents: boolean) { if (Boolean(navigator.doNotTrack === "1")) { this.initialised = false; return; } this.onlyTrackAnonymousEvents = onlyTrackAnonymousEvents; + const posthogConfig = SdkConfig.get()["posthog"]; if (posthogConfig) { - this.posthog.init(posthogConfig.projectApiKey, { api_host: posthogConfig.apiHost }); + // Update the redacted current location before initialising posthog, as posthog.init triggers + // an immediate pageview event which calls the sanitize_properties callback + await this.updateRedactedCurrentLocation(); + + this.posthog.init(posthogConfig.projectApiKey, { + api_host: posthogConfig.apiHost, + autocapture: false, + mask_all_text: true, + mask_all_element_attributes: true, + sanitize_properties: this.sanitizeProperties.bind(this), + }); this.initialised = true; } } + private async updateRedactedCurrentLocation() { + // TODO only calculate this when the location changes as its expensive + const { origin, hash, pathname } = window.location; + this.redactedCurrentLocation = await getRedactedCurrentLocation( + origin, hash, pathname, this.onlyTrackAnonymousEvents ? Anonymity.Anonymous : Anonymity.Pseudonymous); + } + + private sanitizeProperties(properties: posthog.Properties, _: string): posthog.Properties { + // Sanitize posthog's built in properties which leak PII e.g. url reporting + // see utils.js _.info.properties in posthog-js + + // this.redactedCurrentLocation needs to have been updated prior to reaching this point as + // updating it involves async, which this callback is not + properties['$current_url'] = this.redactedCurrentLocation; + + if (this.onlyTrackAnonymousEvents) { + // drop referrer information for anonymous users + properties['$referrer'] = null; + properties['$referring_domain'] = null; + properties['$initial_referrer'] = null; + properties['$initial_referring_domain'] = null; + + // drop device ID, which is a UUID persisted in local storage + properties['$device_id'] = null; + } + + return properties; + } + public async identifyUser(userId: string) { if (this.onlyTrackAnonymousEvents) return; this.posthog.identify(await hashHex(userId)); @@ -77,21 +150,25 @@ export class PosthogAnalytics { this.onlyTrackAnonymousEvents = enabled; } - public trackPseudonymousEvent( - eventName: E["eventName"], - properties: E["properties"], - ) { + private async capture(eventName: string, properties: posthog.Properties, anonymity: Anonymity) { if (!this.initialised) return; - if (this.onlyTrackAnonymousEvents) return; + await this.updateRedactedCurrentLocation(anonymity); this.posthog.capture(eventName, properties); } - public trackAnonymousEvent( + public async trackPseudonymousEvent( eventName: E["eventName"], properties: E["properties"], ) { - if (!this.initialised) return; - this.posthog.capture(eventName, properties); + if (this.onlyTrackAnonymousEvents) return; + this.capture(eventName, properties, Anonymity.Pseudonyomous); + } + + public async trackAnonymousEvent( + eventName: E["eventName"], + properties: E["properties"], + ) { + this.capture(eventName, properties, Anonymity.Anonymous); } public async trackRoomEvent( diff --git a/test/PosthogAnalytics-test.ts b/test/PosthogAnalytics-test.ts index fd49255fa1..e9efeffa7d 100644 --- a/test/PosthogAnalytics-test.ts +++ b/test/PosthogAnalytics-test.ts @@ -1,4 +1,5 @@ -import { IAnonymousEvent, IRoomEvent, PosthogAnalytics } from '../src/PosthogAnalytics'; +import { Anonymity, getRedactedCurrentLocation, IAnonymousEvent, IRoomEvent, + PosthogAnalytics } from '../src/PosthogAnalytics'; import SdkConfig from '../src/SdkConfig'; const crypto = require('crypto'); @@ -68,9 +69,9 @@ describe("PosthogAnalytics", () => { expect(analytics.isInitialised()).toBe(true); }); - it("Should pass track() to posthog", () => { + it("Should pass track() to posthog", async () => { analytics.init(false); - analytics.trackAnonymousEvent("jest_test_event", { + await analytics.trackAnonymousEvent("jest_test_event", { foo: "bar", }); expect(fakePosthog.capture.mock.calls[0][0]).toBe("jest_test_event"); @@ -80,29 +81,29 @@ describe("PosthogAnalytics", () => { it("Should pass trackRoomEvent to posthog", async () => { analytics.init(false); const roomId = "42"; - return analytics.trackRoomEvent("jest_test_event", roomId, { + await analytics.trackRoomEvent("jest_test_event", roomId, { foo: "bar", - }).then(() => { - expect(fakePosthog.capture.mock.calls[0][0]).toBe("jest_test_event"); - expect(fakePosthog.capture.mock.calls[0][1]).toEqual({ - foo: "bar", - hashedRoomId: "73475cb40a568e8da8a045ced110137e159f890ac4da883b6b17dc651b3a8049", - }); + }); + expect(fakePosthog.capture.mock.calls[0][0]).toBe("jest_test_event"); + expect(fakePosthog.capture.mock.calls[0][1]).toEqual({ + foo: "bar", + hashedRoomId: "73475cb40a568e8da8a045ced110137e159f890ac4da883b6b17dc651b3a8049", }); }); - it("Should silently not track if not inititalised", () => { - analytics.trackAnonymousEvent("jest_test_event", { + it("Should silently not track if not inititalised", async () => { + await analytics.trackAnonymousEvent("jest_test_event", { foo: "bar", }); - expect(fakePosthog.capture.mock.calls.length).toBe(0); }); - it("Should not track non-anonymous messages if onlyTrackAnonymousEvents is true", () => { - analytics.trackAnonymousEvent("jest_test_event", { + it("Should not track non-anonymous messages if onlyTrackAnonymousEvents is true", async () => { + analytics.init(true); + await analytics.trackPseudonymousEvent("jest_test_event", { foo: "bar", }); + expect(fakePosthog.capture.mock.calls.length).toBe(0); }); it("Should identify the user to posthog if onlyTrackAnonymousEvents is false", async () => { @@ -114,8 +115,37 @@ describe("PosthogAnalytics", () => { it("Should not identify the user to posthog if onlyTrackAnonymousEvents is true", async () => { analytics.init(true); - return analytics.identifyUser("foo").then(() => { - expect(fakePosthog.identify.mock.calls.length).toBe(0); - }); + await analytics.identifyUser("foo"); + expect(fakePosthog.identify.mock.calls.length).toBe(0); + }); + + it("Should pseudonymise a location of a known screen", async () => { + const location = await getRedactedCurrentLocation( + "https://foo.bar", "#/register/some/pii", "/", Anonymity.Pseudonymous); + expect(location).toBe( + `https://foo.bar/#/register/\ +a6b46dd0d1ae5e86cbc8f37e75ceeb6760230c1ca4ffbcb0c97b96dd7d9c464b/\ +bd75b3e080945674c0351f75e0db33d1e90986fa07b318ea7edf776f5eef38d4`); + }); + + it("Should anonymise a location of a known screen", async () => { + const location = await getRedactedCurrentLocation( + "https://foo.bar", "#/register/some/pii", "/", Anonymity.Anonymous); + expect(location).toBe("https://foo.bar/#/register//"); + }); + + it("Should pseudonymise a location of an unknown screen", async () => { + const location = await getRedactedCurrentLocation( + "https://foo.bar", "#/not_a_screen_name/some/pii", "/", Anonymity.Pseudonymous); + expect(location).toBe( + `https://foo.bar/#//\ +a6b46dd0d1ae5e86cbc8f37e75ceeb6760230c1ca4ffbcb0c97b96dd7d9c464b/\ +bd75b3e080945674c0351f75e0db33d1e90986fa07b318ea7edf776f5eef38d4`); + }); + + it("Should anonymise a location of an unknown screen", async () => { + const location = await getRedactedCurrentLocation( + "https://foo.bar", "#/not_a_screen_name/some/pii", "/", Anonymity.Anonymous); + expect(location).toBe("https://foo.bar/#///"); }); });