/* * SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ import { useRef } from "react"; import logger from "../utils/logger"; const AUDIO_BUFFER_LENGTH_SEC = 120; const INITIAL_SAMPLE_RATE = 16_000; // Don't start the audio before the buffer has at least this much data. // If we start immediately, the audio chunks arrive too late to play // in real time and cause choppy audio, especially in high-latency environments. const MIN_BUFFER_DURATION_BEFORE_START_SEC = 0.8; class AudioPlayer { private timerID: ReturnType | null = null; private offset: number = 0; private currentAudioSequenceDuration: number = 0; private currentAudioSequenceStartedAt: number = 0; private audioBuffer: AudioBuffer; private source: AudioBufferSourceNode; private audioCtx = new AudioContext({ sampleRate: INITIAL_SAMPLE_RATE }); private timeUntilAudioCompleted: number = 0; constructor() { this.audioBuffer = this.createNewAudioBuffer(); this.source = this.audioCtx.createBufferSource(); this.source.buffer = this.audioBuffer; this.source.connect(this.audioCtx.destination); } play(chunk: AudioBuffer): void { const channel = this.audioBuffer.getChannelData(0); // mono channel const buffer = chunk.getChannelData(0); if (chunk.sampleRate !== this.audioCtx.sampleRate) { this.audioCtx = new AudioContext({ sampleRate: chunk.sampleRate }); logger.log( `New sample rate ${this.audioCtx.sampleRate}. Resetting buffer` ); this.reset(); } // We receive the data in unsigned 16-bit words. AudioBuffer must // be in 32-bit floats between -1.0 and 1.0. To convert, normalize // each sample for (let i = 0; i < buffer.length; i++) { channel[i + this.offset] = buffer[i]; } this.offset += buffer.length; // We set a timer that will reset the audio buffer after the audio sequence has been // played. We cannot predetermine the duration of the audio sequence, because more // audio chunks may be added after the audio has started playing. For this reason, // every time a chunk is added to the buffer, we clear the existing timer, recompute // the duration of the audio sequence, and create a new timer with the appropriate // audio sequence duration. if (this.timerID) { clearTimeout(this.timerID); } const chunkDuration = buffer.length / this.audioCtx.sampleRate; this.currentAudioSequenceDuration += chunkDuration; // If this is the first chunk of audio since the player was last reset, immediately // start playing the source. Additional chunks will be appended to the buffer as // they come if (!this.currentAudioSequenceStartedAt) { if ( this.currentAudioSequenceDuration < MIN_BUFFER_DURATION_BEFORE_START_SEC ) { logger.warn( `The current buffer is too short ${this.currentAudioSequenceDuration} seconds) to start the audio. Waiting for more chunks...` ); return; } this.currentAudioSequenceStartedAt = performance.now(); this.source.start(); } const audioEllapsed = (performance.now() - this.currentAudioSequenceStartedAt) / 1000; this.timeUntilAudioCompleted = this.currentAudioSequenceDuration - audioEllapsed; this.timerID = setTimeout(() => { this.reset(); }, this.timeUntilAudioCompleted * 1000); } private createNewAudioBuffer(): AudioBuffer { return this.audioCtx.createBuffer( 1, this.audioCtx.sampleRate * AUDIO_BUFFER_LENGTH_SEC, this.audioCtx.sampleRate ); } private reset(): void { logger.log("reset"); if (this.source) { try { this.source.stop(); this.source.disconnect(); } catch { // Ignore errors if source was already stopped } } this.offset = 0; this.currentAudioSequenceDuration = 0; this.currentAudioSequenceStartedAt = 0; this.timeUntilAudioCompleted = 0; if (this.timerID) { clearTimeout(this.timerID); this.timerID = null; } this.audioBuffer = this.createNewAudioBuffer(); this.source = this.audioCtx.createBufferSource(); this.source.buffer = this.audioBuffer; this.source.connect(this.audioCtx.destination); } public getSource(): AudioBufferSourceNode { return this.source; } // Immediately stops playing audio. Audio left in the buffer is erased public interrupt(): void { if (this.timerID) { clearTimeout(this.timerID); } this.reset(); } } export default function useAudioPlayer(): AudioPlayer { const audioPlayerRef = useRef(null); if (!audioPlayerRef.current) { audioPlayerRef.current = new AudioPlayer(); } return audioPlayerRef.current; }