Actually use a waveform instead of the frequency data

2021-03-24 23:31:02 -06:00 · 2021-03-24 23:31:02 -06:00 · 449e028bbd
commit 449e028bbd
parent 8ddd14e252
7 changed files with 159 additions and 80 deletions
--- a/res/css/_components.scss
+++ b/res/css/_components.scss
@ -246,7 +246,7 @@
@import "./views/toasts/_AnalyticsToast.scss";
@import "./views/toasts/_NonUrgentEchoFailureToast.scss";
@import "./views/verification/_VerificationShowSas.scss";
-@import "./views/voice_messages/_FrequencyBars.scss";
+@import "./views/voice_messages/_Waveform.scss";
@import "./views/voip/_CallContainer.scss";
@import "./views/voip/_CallView.scss";
@import "./views/voip/_DialPad.scss";
--- a/res/css/views/voice_messages/_FrequencyBars.scss
+++ b/res/css/views/voice_messages/_FrequencyBars.scss
@ -14,14 +14,14 @@ See the License for the specific language governing permissions and
 limitations under the License.
 */

-.mx_FrequencyBars {
+.mx_Waveform {
    position: relative;
    height: 30px; // tallest bar can only be 30px

    display: flex;
    align-items: center; // so the bars grow from the middle

-    .mx_FrequencyBars_bar {
+    .mx_Waveform_bar {
        width: 2px;
        margin-left: 1px;
        margin-right: 1px;
--- a/src/components/views/rooms/VoiceRecordComposerTile.tsx
+++ b/src/components/views/rooms/VoiceRecordComposerTile.tsx
@ -21,7 +21,7 @@ import {VoiceRecorder} from "../../../voice/VoiceRecorder";
 import {Room} from "matrix-js-sdk/src/models/room";
 import {MatrixClientPeg} from "../../../MatrixClientPeg";
 import classNames from "classnames";
-import FrequencyBars from "../voice_messages/FrequencyBars";
+import LiveRecordingWaveform from "../voice_messages/LiveRecordingWaveform";

 interface IProps {
    room: Room;
@ -68,16 +68,16 @@ export default class VoiceRecordComposerTile extends React.PureComponent<IProps,
            'mx_VoiceRecordComposerTile_stop': !!this.state.recorder,
        });

-        let bars = null;
+        let waveform = null;
        let tooltip = _t("Record a voice message");
        if (!!this.state.recorder) {
            // TODO: @@ TravisR: Change to match behaviour
            tooltip = _t("Stop & send recording");
-            bars = <FrequencyBars recorder={this.state.recorder} />;
+            waveform = <LiveRecordingWaveform recorder={this.state.recorder} />;
        }

        return (<>
-            {bars}
+            {waveform}
            <AccessibleTooltipButton
                className={classes}
                onClick={this.onStartStopVoiceMessage}
--- a/src/components/views/voice_messages/FrequencyBars.tsx
+++ b/src/components/views/voice_messages/FrequencyBars.tsx
@ -1,58 +0,0 @@
-/*
-Copyright 2021 The Matrix.org Foundation C.I.C.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-*/
-
-import React from "react";
-import {IFrequencyPackage, VoiceRecorder} from "../../../voice/VoiceRecorder";
-import {replaceableComponent} from "../../../utils/replaceableComponent";
-import {arrayFastResample, arraySeed} from "../../../utils/arrays";
-import {percentageOf} from "../../../utils/numbers";
-
-interface IProps {
-    recorder: VoiceRecorder
-}
-
-interface IState {
-    heights: number[];
-}
-
-const DOWNSAMPLE_TARGET = 35; // number of bars
-
-@replaceableComponent("views.voice_messages.FrequencyBars")
-export default class FrequencyBars extends React.PureComponent<IProps, IState> {
-    public constructor(props) {
-        super(props);
-
-        this.state = {heights: arraySeed(0, DOWNSAMPLE_TARGET)};
-        this.props.recorder.frequencyData.onUpdate(this.onFrequencyData);
-    }
-
-    private onFrequencyData = (freq: IFrequencyPackage) => {
-        // We're downsampling from about 1024 points to about 35, so this function is fine (see docs/impl)
-        const bars = arrayFastResample(Array.from(freq.dbBars), DOWNSAMPLE_TARGET);
-        this.setState({
-            // Values are somewhat arbitrary, but help decide what shape the graph should be
-            heights: bars.map(b => percentageOf(b, -150, -70) * 100),
-        });
-    };
-
-    public render() {
-        return <div className='mx_FrequencyBars'>
-            {this.state.heights.map((h, i) => {
-                return <span key={i} style={{height: h + '%'}} className='mx_FrequencyBars_bar' />;
-            })}
-        </div>;
-    }
-}
--- a/src/components/views/voice_messages/LiveRecordingWaveform.tsx
+++ b/src/components/views/voice_messages/LiveRecordingWaveform.tsx
@ -0,0 +1,64 @@
+/*
+Copyright 2021 The Matrix.org Foundation C.I.C.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+import React from "react";
+import {IRecordingUpdate, VoiceRecorder} from "../../../voice/VoiceRecorder";
+import {replaceableComponent} from "../../../utils/replaceableComponent";
+import {arrayFastResample, arraySeed} from "../../../utils/arrays";
+import {clamp, percentageOf} from "../../../utils/numbers";
+import Waveform from "./Waveform";
+
+interface IProps {
+    recorder: VoiceRecorder;
+}
+
+interface IState {
+    heights: number[];
+}
+
+const DOWNSAMPLE_TARGET = 35; // number of bars we want
+
+/**
+ * A waveform which shows the waveform of a live recording
+ */
+@replaceableComponent("views.voice_messages.LiveRecordingWaveform")
+export default class LiveRecordingWaveform extends React.PureComponent<IProps, IState> {
+    public constructor(props) {
+        super(props);
+
+        this.state = {heights: arraySeed(0, DOWNSAMPLE_TARGET)};
+        this.props.recorder.liveData.onUpdate(this.onRecordingUpdate);
+    }
+
+    private onRecordingUpdate = (update: IRecordingUpdate) => {
+        // The waveform and the downsample target are pretty close, so we should be fine to
+        // do this, despite the docs on arrayFastResample.
+        const bars = arrayFastResample(Array.from(update.waveform), DOWNSAMPLE_TARGET);
+        this.setState({
+            // The incoming data is between zero and one, but typically even screaming into a
+            // microphone won't send you over 0.6, so we "cap" the graph at about 0.4 for a
+            // point where the average user can still see feedback and be perceived as peaking
+            // when talking "loudly".
+            //
+            // We multiply by 100 because the Waveform component wants values in 0-100 (percentages)
+            heights: bars.map(b => percentageOf(b, 0, 0.40) * 100),
+        });
+    };
+
+    public render() {
+        return <Waveform heights={this.state.heights} />;
+    }
+}
--- a/src/components/views/voice_messages/Waveform.tsx
+++ b/src/components/views/voice_messages/Waveform.tsx
@ -0,0 +1,48 @@
+/*
+Copyright 2021 The Matrix.org Foundation C.I.C.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+import React from "react";
+import {IRecordingUpdate, VoiceRecorder} from "../../../voice/VoiceRecorder";
+import {replaceableComponent} from "../../../utils/replaceableComponent";
+import {arrayFastResample, arraySeed} from "../../../utils/arrays";
+import {percentageOf} from "../../../utils/numbers";
+
+interface IProps {
+    heights: number[]; // percentages as integers (0-100)
+}
+
+interface IState {
+}
+
+/**
+ * A simple waveform component. This renders bars (centered vertically) for each
+ * height provided in the component properties. Updating the properties will update
+ * the rendered waveform.
+ */
+@replaceableComponent("views.voice_messages.Waveform")
+export default class Waveform extends React.PureComponent<IProps, IState> {
+    public constructor(props) {
+        super(props);
+    }
+
+    public render() {
+        return <div className='mx_Waveform'>
+            {this.props.heights.map((h, i) => {
+                return <span key={i} style={{height: h + '%'}} className='mx_Waveform_bar' />;
+            })}
+        </div>;
+    }
+}
--- a/src/voice/VoiceRecorder.ts
+++ b/src/voice/VoiceRecorder.ts
@ -25,10 +25,8 @@ const SAMPLE_RATE = 48000; // 48khz is what WebRTC uses. 12khz is where we lose
 const BITRATE = 24000; // 24kbps is pretty high quality for our use case in opus.
 const FREQ_SAMPLE_RATE = 10; // Target rate of frequency data (samples / sec). We don't need this super often.

-export interface IFrequencyPackage {
-    dbBars: Float32Array;
-    dbMin: number;
-    dbMax: number;
+export interface IRecordingUpdate {
+    waveform: number[]; // floating points between 0 (low) and 1 (high).

    // TODO: @@ TravisR: Generalize this for a timing package?
 }
@ -38,11 +36,11 @@ export class VoiceRecorder {
    private recorderContext: AudioContext;
    private recorderSource: MediaStreamAudioSourceNode;
    private recorderStream: MediaStream;
-    private recorderFreqNode: AnalyserNode;
+    private recorderFFT: AnalyserNode;
    private buffer = new Uint8Array(0);
    private mxc: string;
    private recording = false;
-    private observable: SimpleObservable<IFrequencyPackage>;
+    private observable: SimpleObservable<IRecordingUpdate>;
    private freqTimerId: number;

    public constructor(private client: MatrixClient) {
@ -64,8 +62,16 @@ export class VoiceRecorder {
            sampleRate: SAMPLE_RATE, // once again, the browser will resample for us
        });
        this.recorderSource = this.recorderContext.createMediaStreamSource(this.recorderStream);
-        this.recorderFreqNode = this.recorderContext.createAnalyser();
-        this.recorderSource.connect(this.recorderFreqNode);
+        this.recorderFFT = this.recorderContext.createAnalyser();
+
+        // Bring the FFT time domain down a bit. The default is 2048, and this must be a power
+        // of two. We use 64 points because we happen to know down the line we need less than
+        // that, but 32 would be too few. Large numbers are not helpful here and do not add
+        // precision: they introduce higher precision outputs of the FFT (frequency data), but
+        // it makes the time domain less than helpful.
+        this.recorderFFT.fftSize = 64;
+
+        this.recorderSource.connect(this.recorderFFT);
        this.recorder = new Recorder({
            encoderPath, // magic from webpack
            encoderSampleRate: SAMPLE_RATE,
@ -91,7 +97,7 @@ export class VoiceRecorder {
        };
    }

-    public get frequencyData(): SimpleObservable<IFrequencyPackage> {
+    public get liveData(): SimpleObservable<IRecordingUpdate> {
        if (!this.recording) throw new Error("No observable when not recording");
        return this.observable;
    }
@ -121,16 +127,35 @@ export class VoiceRecorder {
        if (this.observable) {
            this.observable.close();
        }
-        this.observable = new SimpleObservable<IFrequencyPackage>();
+        this.observable = new SimpleObservable<IRecordingUpdate>();
        await this.makeRecorder();
        this.freqTimerId = setInterval(() => {
            if (!this.recording) return;
-            const data = new Float32Array(this.recorderFreqNode.frequencyBinCount);
-            this.recorderFreqNode.getFloatFrequencyData(data);
+
+            // The time domain is the input to the FFT, which means we use an array of the same
+            // size. The time domain is also known as the audio waveform. We're ignoring the
+            // output of the FFT here (frequency data) because we're not interested in it.
+            //
+            // We use bytes out of the analyser because floats have weird precision problems
+            // and are slightly more difficult to work with. The bytes are easy to work with,
+            // which is why we pick them (they're also more precise, but we care less about that).
+            const data = new Uint8Array(this.recorderFFT.fftSize);
+            this.recorderFFT.getByteTimeDomainData(data);
+
+            // Because we're dealing with a uint array we need to do math a bit differently.
+            // If we just `Array.from()` the uint array, we end up with 1s and 0s, which aren't
+            // what we're after. Instead, we have to use a bit of manual looping to correctly end
+            // up with the right values
+            const translatedData: number[] = [];
+            for (let i = 0; i < data.length; i++) {
+                // All we're doing here is inverting the amplitude and putting the metric somewhere
+                // between zero and one. Without the inversion, lower values are "louder", which is
+                // not super helpful.
+                translatedData.push(1 - (data[i] / 128.0));
+            }
+
            this.observable.update({
-                dbBars: data,
-                dbMin: this.recorderFreqNode.minDecibels,
-                dbMax: this.recorderFreqNode.maxDecibels,
+                waveform: translatedData,
            });
        }, 1000 / FREQ_SAMPLE_RATE) as any as number; // XXX: Linter doesn't understand timer environment
        await this.recorder.start();