Redact sensitive data

This commit is contained in:
James Salter 2021-07-21 13:48:10 +01:00
parent 7e549f84e7
commit 6da3cc8ca1
2 changed files with 138 additions and 31 deletions

View file

@ -1,4 +1,4 @@
import posthog from 'posthog-js';
import posthog, { PostHog } from 'posthog-js';
import SdkConfig from './SdkConfig';
interface IEvent {
@ -10,6 +10,11 @@ interface IEvent {
properties: {}
}
export enum Anonymity {
Anonymous,
Pseudonymous
}
// If an event extends IPseudonymousEvent, the event contains pseudonymous data
// that won't be sent unless the user has explicitly consented to pseudonymous tracking.
// For example, hashed user IDs or room IDs.
@ -33,10 +38,38 @@ const hashHex = async (input: string): Promise<string> => {
return [...new Uint8Array(digestBuf)].map((b: number) => b.toString(16).padStart(2, "0")).join("");
};
const knownScreens = new Set([
"register", "login", "forgot_password", "soft_logout", "new", "settings", "welcome", "home", "start", "directory",
"start_sso", "start_cas", "groups", "complete_security", "post_registration", "room", "user", "group",
]);
export async function getRedactedCurrentLocation(origin: string, hash: string, pathname: string, anonymity: Anonymity) {
// Redact PII from the current location.
// If anonymous is true, redact entirely, if false, substitute it with a hash.
// For known screens, assumes a URL structure of /<screen name>/might/be/pii
if (origin.startsWith('file://')) {
pathname = "/<redacted_file_scheme_url>/";
}
let [_, screen, ...parts] = hash.split("/");
if (!knownScreens.has(screen)) {
screen = "<redacted_screen_name>";
}
for (let i = 0; i < parts.length; i++) {
parts[i] = anonymity === Anonymity.Anonymous ? `<redacted>` : await hashHex(parts[i]);
}
const hashStr = `${_}/${screen}/${parts.join("/")}`;
return origin + pathname + hashStr;
}
export class PosthogAnalytics {
private onlyTrackAnonymousEvents = false;
private initialised = false;
private posthog = null;
private posthog?: PostHog = null;
private redactedCurrentLocation = null;
private static _instance = null;
@ -47,23 +80,63 @@ export class PosthogAnalytics {
return this._instance;
}
constructor(posthog) {
constructor(posthog: PostHog) {
this.posthog = posthog;
}
public init(onlyTrackAnonymousEvents: boolean) {
public async init(onlyTrackAnonymousEvents: boolean) {
if (Boolean(navigator.doNotTrack === "1")) {
this.initialised = false;
return;
}
this.onlyTrackAnonymousEvents = onlyTrackAnonymousEvents;
const posthogConfig = SdkConfig.get()["posthog"];
if (posthogConfig) {
this.posthog.init(posthogConfig.projectApiKey, { api_host: posthogConfig.apiHost });
// Update the redacted current location before initialising posthog, as posthog.init triggers
// an immediate pageview event which calls the sanitize_properties callback
await this.updateRedactedCurrentLocation();
this.posthog.init(posthogConfig.projectApiKey, {
api_host: posthogConfig.apiHost,
autocapture: false,
mask_all_text: true,
mask_all_element_attributes: true,
sanitize_properties: this.sanitizeProperties.bind(this),
});
this.initialised = true;
}
}
private async updateRedactedCurrentLocation() {
// TODO only calculate this when the location changes as its expensive
const { origin, hash, pathname } = window.location;
this.redactedCurrentLocation = await getRedactedCurrentLocation(
origin, hash, pathname, this.onlyTrackAnonymousEvents ? Anonymity.Anonymous : Anonymity.Pseudonymous);
}
private sanitizeProperties(properties: posthog.Properties, _: string): posthog.Properties {
// Sanitize posthog's built in properties which leak PII e.g. url reporting
// see utils.js _.info.properties in posthog-js
// this.redactedCurrentLocation needs to have been updated prior to reaching this point as
// updating it involves async, which this callback is not
properties['$current_url'] = this.redactedCurrentLocation;
if (this.onlyTrackAnonymousEvents) {
// drop referrer information for anonymous users
properties['$referrer'] = null;
properties['$referring_domain'] = null;
properties['$initial_referrer'] = null;
properties['$initial_referring_domain'] = null;
// drop device ID, which is a UUID persisted in local storage
properties['$device_id'] = null;
}
return properties;
}
public async identifyUser(userId: string) {
if (this.onlyTrackAnonymousEvents) return;
this.posthog.identify(await hashHex(userId));
@ -77,21 +150,25 @@ export class PosthogAnalytics {
this.onlyTrackAnonymousEvents = enabled;
}
public trackPseudonymousEvent<E extends IPseudonymousEvent>(
eventName: E["eventName"],
properties: E["properties"],
) {
private async capture(eventName: string, properties: posthog.Properties, anonymity: Anonymity) {
if (!this.initialised) return;
if (this.onlyTrackAnonymousEvents) return;
await this.updateRedactedCurrentLocation(anonymity);
this.posthog.capture(eventName, properties);
}
public trackAnonymousEvent<E extends IAnonymousEvent>(
public async trackPseudonymousEvent<E extends IPseudonymousEvent>(
eventName: E["eventName"],
properties: E["properties"],
) {
if (!this.initialised) return;
this.posthog.capture(eventName, properties);
if (this.onlyTrackAnonymousEvents) return;
this.capture(eventName, properties, Anonymity.Pseudonyomous);
}
public async trackAnonymousEvent<E extends IAnonymousEvent>(
eventName: E["eventName"],
properties: E["properties"],
) {
this.capture(eventName, properties, Anonymity.Anonymous);
}
public async trackRoomEvent<E extends IRoomEvent>(

View file

@ -1,4 +1,5 @@
import { IAnonymousEvent, IRoomEvent, PosthogAnalytics } from '../src/PosthogAnalytics';
import { Anonymity, getRedactedCurrentLocation, IAnonymousEvent, IRoomEvent,
PosthogAnalytics } from '../src/PosthogAnalytics';
import SdkConfig from '../src/SdkConfig';
const crypto = require('crypto');
@ -68,9 +69,9 @@ describe("PosthogAnalytics", () => {
expect(analytics.isInitialised()).toBe(true);
});
it("Should pass track() to posthog", () => {
it("Should pass track() to posthog", async () => {
analytics.init(false);
analytics.trackAnonymousEvent<ITestEvent>("jest_test_event", {
await analytics.trackAnonymousEvent<ITestEvent>("jest_test_event", {
foo: "bar",
});
expect(fakePosthog.capture.mock.calls[0][0]).toBe("jest_test_event");
@ -80,29 +81,29 @@ describe("PosthogAnalytics", () => {
it("Should pass trackRoomEvent to posthog", async () => {
analytics.init(false);
const roomId = "42";
return analytics.trackRoomEvent<IRoomEvent>("jest_test_event", roomId, {
await analytics.trackRoomEvent<IRoomEvent>("jest_test_event", roomId, {
foo: "bar",
}).then(() => {
expect(fakePosthog.capture.mock.calls[0][0]).toBe("jest_test_event");
expect(fakePosthog.capture.mock.calls[0][1]).toEqual({
foo: "bar",
hashedRoomId: "73475cb40a568e8da8a045ced110137e159f890ac4da883b6b17dc651b3a8049",
});
});
expect(fakePosthog.capture.mock.calls[0][0]).toBe("jest_test_event");
expect(fakePosthog.capture.mock.calls[0][1]).toEqual({
foo: "bar",
hashedRoomId: "73475cb40a568e8da8a045ced110137e159f890ac4da883b6b17dc651b3a8049",
});
});
it("Should silently not track if not inititalised", () => {
analytics.trackAnonymousEvent<ITestEvent>("jest_test_event", {
it("Should silently not track if not inititalised", async () => {
await analytics.trackAnonymousEvent<ITestEvent>("jest_test_event", {
foo: "bar",
});
expect(fakePosthog.capture.mock.calls.length).toBe(0);
});
it("Should not track non-anonymous messages if onlyTrackAnonymousEvents is true", () => {
analytics.trackAnonymousEvent<ITestEvent>("jest_test_event", {
it("Should not track non-anonymous messages if onlyTrackAnonymousEvents is true", async () => {
analytics.init(true);
await analytics.trackPseudonymousEvent<ITestEvent>("jest_test_event", {
foo: "bar",
});
expect(fakePosthog.capture.mock.calls.length).toBe(0);
});
it("Should identify the user to posthog if onlyTrackAnonymousEvents is false", async () => {
@ -114,8 +115,37 @@ describe("PosthogAnalytics", () => {
it("Should not identify the user to posthog if onlyTrackAnonymousEvents is true", async () => {
analytics.init(true);
return analytics.identifyUser("foo").then(() => {
expect(fakePosthog.identify.mock.calls.length).toBe(0);
});
await analytics.identifyUser("foo");
expect(fakePosthog.identify.mock.calls.length).toBe(0);
});
it("Should pseudonymise a location of a known screen", async () => {
const location = await getRedactedCurrentLocation(
"https://foo.bar", "#/register/some/pii", "/", Anonymity.Pseudonymous);
expect(location).toBe(
`https://foo.bar/#/register/\
a6b46dd0d1ae5e86cbc8f37e75ceeb6760230c1ca4ffbcb0c97b96dd7d9c464b/\
bd75b3e080945674c0351f75e0db33d1e90986fa07b318ea7edf776f5eef38d4`);
});
it("Should anonymise a location of a known screen", async () => {
const location = await getRedactedCurrentLocation(
"https://foo.bar", "#/register/some/pii", "/", Anonymity.Anonymous);
expect(location).toBe("https://foo.bar/#/register/<redacted>/<redacted>");
});
it("Should pseudonymise a location of an unknown screen", async () => {
const location = await getRedactedCurrentLocation(
"https://foo.bar", "#/not_a_screen_name/some/pii", "/", Anonymity.Pseudonymous);
expect(location).toBe(
`https://foo.bar/#/<redacted_screen_name>/\
a6b46dd0d1ae5e86cbc8f37e75ceeb6760230c1ca4ffbcb0c97b96dd7d9c464b/\
bd75b3e080945674c0351f75e0db33d1e90986fa07b318ea7edf776f5eef38d4`);
});
it("Should anonymise a location of an unknown screen", async () => {
const location = await getRedactedCurrentLocation(
"https://foo.bar", "#/not_a_screen_name/some/pii", "/", Anonymity.Anonymous);
expect(location).toBe("https://foo.bar/#/<redacted_screen_name>/<redacted>/<redacted>");
});
});