Use a MediaElementSourceAudioNode to process large audio files

Fixes https://github.com/vector-im/element-web/issues/18149

See comment block contained within diff.
This commit is contained in:
Travis Ralston 2021-07-21 17:53:33 -06:00
parent b590b1d263
commit c53ed5be2e

View file

@ -59,9 +59,10 @@ export class Playback extends EventEmitter implements IDestroyable {
public readonly thumbnailWaveform: number[]; public readonly thumbnailWaveform: number[];
private readonly context: AudioContext; private readonly context: AudioContext;
private source: AudioBufferSourceNode; private source: AudioBufferSourceNode | MediaElementAudioSourceNode;
private state = PlaybackState.Decoding; private state = PlaybackState.Decoding;
private audioBuf: AudioBuffer; private audioBuf: AudioBuffer;
private element: HTMLAudioElement;
private resampledWaveform: number[]; private resampledWaveform: number[];
private waveformObservable = new SimpleObservable<number[]>(); private waveformObservable = new SimpleObservable<number[]>();
private readonly clock: PlaybackClock; private readonly clock: PlaybackClock;
@ -129,9 +130,30 @@ export class Playback extends EventEmitter implements IDestroyable {
this.removeAllListeners(); this.removeAllListeners();
this.clock.destroy(); this.clock.destroy();
this.waveformObservable.close(); this.waveformObservable.close();
if (this.element) {
URL.revokeObjectURL(this.element.src);
this.element.remove();
}
} }
public async prepare() { public async prepare() {
// The point where we use an audio element is fairly arbitrary, though we don't want
// it to be too low. As of writing, voice messages want to show a waveform but audio
// messages do not. Using an audio element means we can't show a waveform preview, so
// we try to target the difference between a voice message file and large audio file.
// Overall, the point of this is to avoid memory-related issues due to storing a massive
// audio buffer in memory, as that can balloon to far greater than the input buffer's
// byte length.
if (this.buf.byteLength > 5 * 1024 * 1024) { // 5mb
console.log("Audio file too large: processing through <audio /> element");
this.element = document.createElement("AUDIO") as HTMLAudioElement;
const prom = new Promise((resolve, reject) => {
this.element.onloadeddata = () => resolve(null);
this.element.onerror = (e) => reject(e);
});
this.element.src = URL.createObjectURL(new Blob([this.buf]));
await prom; // make sure the audio element is ready for us
} else {
// Safari compat: promise API not supported on this function // Safari compat: promise API not supported on this function
this.audioBuf = await new Promise((resolve, reject) => { this.audioBuf = await new Promise((resolve, reject) => {
this.context.decodeAudioData(this.buf, b => resolve(b), async e => { this.context.decodeAudioData(this.buf, b => resolve(b), async e => {
@ -154,11 +176,13 @@ export class Playback extends EventEmitter implements IDestroyable {
// exactly trust the user-provided waveform to be accurate... // exactly trust the user-provided waveform to be accurate...
const waveform = Array.from(this.audioBuf.getChannelData(0)); const waveform = Array.from(this.audioBuf.getChannelData(0));
this.resampledWaveform = makePlaybackWaveform(waveform); this.resampledWaveform = makePlaybackWaveform(waveform);
}
this.waveformObservable.update(this.resampledWaveform); this.waveformObservable.update(this.resampledWaveform);
this.emit(PlaybackState.Stopped); // signal that we're not decoding anymore this.emit(PlaybackState.Stopped); // signal that we're not decoding anymore
this.clock.flagLoadTime(); // must happen first because setting the duration fires a clock update this.clock.flagLoadTime(); // must happen first because setting the duration fires a clock update
this.clock.durationSeconds = this.audioBuf.duration; this.clock.durationSeconds = this.element ? this.element.duration : this.audioBuf.duration;
} }
private onPlaybackEnd = async () => { private onPlaybackEnd = async () => {
@ -171,7 +195,11 @@ export class Playback extends EventEmitter implements IDestroyable {
if (this.state === PlaybackState.Stopped) { if (this.state === PlaybackState.Stopped) {
this.disconnectSource(); this.disconnectSource();
this.makeNewSourceBuffer(); this.makeNewSourceBuffer();
this.source.start(); if (this.element) {
await this.element.play();
} else {
(this.source as AudioBufferSourceNode).start();
}
} }
// We use the context suspend/resume functions because it allows us to pause a source // We use the context suspend/resume functions because it allows us to pause a source
@ -182,13 +210,21 @@ export class Playback extends EventEmitter implements IDestroyable {
} }
private disconnectSource() { private disconnectSource() {
if (this.element) return; // leave connected, we can (and must) re-use it
this.source?.disconnect(); this.source?.disconnect();
this.source?.removeEventListener("ended", this.onPlaybackEnd); this.source?.removeEventListener("ended", this.onPlaybackEnd);
} }
private makeNewSourceBuffer() { private makeNewSourceBuffer() {
if (this.element && this.source) return; // leave connected, we can (and must) re-use it
if (this.element) {
this.source = this.context.createMediaElementSource(this.element);
} else {
this.source = this.context.createBufferSource(); this.source = this.context.createBufferSource();
this.source.buffer = this.audioBuf; this.source.buffer = this.audioBuf;
}
this.source.addEventListener("ended", this.onPlaybackEnd); this.source.addEventListener("ended", this.onPlaybackEnd);
this.source.connect(this.context.destination); this.source.connect(this.context.destination);
} }
@ -241,7 +277,11 @@ export class Playback extends EventEmitter implements IDestroyable {
// when it comes time to the user hitting play. After a couple jumps, the user // when it comes time to the user hitting play. After a couple jumps, the user
// will have desynced the clock enough to be about 10-15 seconds off, while this // will have desynced the clock enough to be about 10-15 seconds off, while this
// keeps it as close to perfect as humans can perceive. // keeps it as close to perfect as humans can perceive.
this.source.start(now, timeSeconds); if (this.element) {
this.element.currentTime = timeSeconds;
} else {
(this.source as AudioBufferSourceNode).start(now, timeSeconds);
}
// Dev note: it's critical that the code gap between `this.source.start()` and // Dev note: it's critical that the code gap between `this.source.start()` and
// `this.pause()` is as small as possible: we do not want to delay *anything* // `this.pause()` is as small as possible: we do not want to delay *anything*