Populate waveform data on voice message event

2021-04-19 23:05:06 -06:00 · 2021-04-19 23:05:06 -06:00 · 61730f2f88
commit 61730f2f88
parent 7d9562137e
5 changed files with 63 additions and 3 deletions
--- a/src/components/views/rooms/VoiceRecordComposerTile.tsx
+++ b/src/components/views/rooms/VoiceRecordComposerTile.tsx
@ -77,7 +77,13 @@ export default class VoiceRecordComposerTile extends React.PureComponent<IProps,
                },
                "org.matrix.experimental.msc2516.voice": { // MSC2516+MSC1767 experiment
                    duration: Math.round(this.state.recorder.durationSeconds * 1000),
-                    // TODO: @@ TravisR: Waveform.
+
+                    // Events can't have floats, so we try to maintain resolution by using 1024
+                    // as a maximum value. The waveform contains values between zero and 1, so this
+                    // should come out largely sane.
+                    //
+                    // We're expecting about one data point per second of audio.
+                    waveform: this.state.recorder.finalWaveform.map(v => Math.round(v * 1024)),
                },
            });
            await VoiceRecordingStore.instance.disposeRecording();
--- a/src/utils/arrays.ts
+++ b/src/utils/arrays.ts
@ -54,7 +54,7 @@ export function arraySeed<T>(val: T, length: number): T[] {
 * @param a The array to clone. Must be defined.
 * @returns A copy of the array.
 */
-export function arrayFastClone(a: any[]): any[] {
+export function arrayFastClone<T>(a: T[]): T[] {
    return a.slice(0, a.length);
 }

--- a/src/voice/RecorderWorklet.ts
+++ b/src/voice/RecorderWorklet.ts
@ -14,7 +14,8 @@ See the License for the specific language governing permissions and
 limitations under the License.
 */

-import {ITimingPayload, PayloadEvent, WORKLET_NAME} from "./consts";
+import {IAmplitudePayload, ITimingPayload, PayloadEvent, WORKLET_NAME} from "./consts";
+import {percentageOf} from "../utils/numbers";

 // from AudioWorkletGlobalScope: https://developer.mozilla.org/en-US/docs/Web/API/AudioWorkletGlobalScope
 declare const currentTime: number;
@ -22,12 +23,45 @@ declare const currentFrame: number;
 declare const sampleRate: number;

 class MxVoiceWorklet extends AudioWorkletProcessor {
+    private nextAmplitudeSecond = 0;
+
    constructor() {
        super();
    }

    process(inputs, outputs, parameters) {
+        // We only fire amplitude updates once a second to avoid flooding the recording instance
+        // with useless data. Much of the data would end up discarded, so we ratelimit ourselves
+        // here.
+        const currentSecond = Math.round(currentTime);
+        if (currentSecond === this.nextAmplitudeSecond) {
+            // We're expecting exactly one mono input source, so just grab the very first frame of
+            // samples for the analysis.
+            const monoChan = inputs[0][0];
+
+            // The amplitude of the frame's samples is effectively the loudness of the frame. This
+            // translates into a bar which can be rendered as part of the whole recording clip's
+            // waveform.
+            //
+            // We translate the amplitude down to 0-1 for sanity's sake.
+            const minVal = monoChan.reduce((m, v) => Math.min(m, v), Number.MAX_SAFE_INTEGER);
+            const maxVal = monoChan.reduce((m, v) => Math.max(m, v), Number.MIN_SAFE_INTEGER);
+            const amplitude = percentageOf(maxVal, -1, 1) - percentageOf(minVal, -1, 1);
+
+            this.port.postMessage(<IAmplitudePayload>{
+                ev: PayloadEvent.AmplitudeMark,
+                amplitude: amplitude,
+                forSecond: currentSecond,
+            });
+            this.nextAmplitudeSecond++;
+        }
+
+        // We mostly use this worklet to fire regular clock updates through to components
        this.port.postMessage(<ITimingPayload>{ev: PayloadEvent.Timekeep, timeSeconds: currentTime});
+
+        // We're supposed to return false when we're "done" with the audio clip, but seeing as
+        // we are acting as a passive processor we are never truly "done". The browser will clean
+        // us up when it is done with us.
        return true;
    }
 }
--- a/src/voice/VoiceRecording.ts
+++ b/src/voice/VoiceRecording.ts
@ -24,6 +24,7 @@ import EventEmitter from "events";
 import {IDestroyable} from "../utils/IDestroyable";
 import {Singleflight} from "../utils/Singleflight";
 import {PayloadEvent, WORKLET_NAME} from "./consts";
+import {arrayFastClone} from "../utils/arrays";

 const CHANNELS = 1; // stereo isn't important
 const SAMPLE_RATE = 48000; // 48khz is what WebRTC uses. 12khz is where we lose quality.
@ -55,11 +56,16 @@ export class VoiceRecording extends EventEmitter implements IDestroyable {
    private mxc: string;
    private recording = false;
    private observable: SimpleObservable<IRecordingUpdate>;
+    private amplitudes: number[] = []; // at each second mark, generated

    public constructor(private client: MatrixClient) {
        super();
    }

+    public get finalWaveform(): number[] {
+        return arrayFastClone(this.amplitudes);
+    }
+
    public get contentType(): string {
        return "audio/ogg";
    }
@ -114,6 +120,12 @@ export class VoiceRecording extends EventEmitter implements IDestroyable {
                case PayloadEvent.Timekeep:
                    this.processAudioUpdate(ev.data['timeSeconds']);
                    break;
+                case PayloadEvent.AmplitudeMark:
+                    // Sanity check to make sure we're adding about one sample per second
+                    if (ev.data['forSecond'] === this.amplitudes.length) {
+                        this.amplitudes.push(ev.data['amplitude']);
+                    }
+                    break;
            }
        };

--- a/src/voice/consts.ts
+++ b/src/voice/consts.ts
@ -18,6 +18,7 @@ export const WORKLET_NAME = "mx-voice-worklet";

 export enum PayloadEvent {
    Timekeep = "timekeep",
+    AmplitudeMark = "amplitude_mark",
 }

 export interface IPayload {
@ -25,5 +26,12 @@ export interface IPayload {
 }

 export interface ITimingPayload extends IPayload {
+    ev: PayloadEvent.Timekeep;
    timeSeconds: number;
 }
+
+export interface IAmplitudePayload extends IPayload {
+    ev: PayloadEvent.AmplitudeMark;
+    forSecond: number;
+    amplitude: number;
+}