Demo-Frontend-Voice-Agent / src /hooks /useVoiceSession.ts
ishaq101's picture
update server.js
017da10
import { useState, useRef, useEffect, useCallback } from "react";
import { AudioRecorder } from "../audio/AudioRecorder";
import { AudioPlayer } from "../audio/AudioPlayer";
import { createWavBlob, speechToText } from "../services/voiceApi";
export type VoiceState =
| "IDLE"
| "CONNECTING"
| "LISTENING"
| "PROCESSING"
| "SPEAKING"
| "ERROR";
export interface VoiceSessionParams {
sttProvider?: string;
ttsProvider?: string;
}
interface UseVoiceSessionOptions {
onTranscript: (text: string) => void;
onError?: (code: string, message: string) => void;
sessionParams?: VoiceSessionParams;
}
export interface UseVoiceSessionReturn {
voiceState: VoiceState;
start: () => Promise<void>;
stop: () => void;
stopRecording: () => void;
setStateExternal: (s: VoiceState) => void;
isActive: boolean;
}
const BUFFER_SOUNDS = [
"/sounds/01_Baik_Saya_sedang_memproses_pertanyaanmu.wav",
"/sounds/02_Oke_mohon_ditunggu_saya_sedang_siapkan_P.wav",
"/sounds/03_Sip_saya_terima_Sedang_saya_proses_Pesan.wav",
];
const RECORDER_SAMPLE_RATE = 16000;
function getVoiceHttpBaseUrl(): string {
const w = window as unknown as { __APP_CONFIG__?: { VOICE_API_URL?: string } };
return (
w.__APP_CONFIG__?.VOICE_API_URL ||
(import.meta as unknown as { env: Record<string, string> }).env.VITE_API_BASE_VOICE_URL ||
"http://localhost:7861"
);
}
export function useVoiceSession(opts: UseVoiceSessionOptions): UseVoiceSessionReturn {
const [voiceState, setVoiceState] = useState<VoiceState>("IDLE");
const stateRef = useRef<VoiceState>("IDLE");
const recorderRef = useRef<AudioRecorder | null>(null);
const playerRef = useRef<AudioPlayer | null>(null);
const chunksRef = useRef<ArrayBuffer[]>([]);
const bufferAudioRef = useRef<HTMLAudioElement | null>(null);
const lastBufferIndexRef = useRef<number>(-1);
const optsRef = useRef(opts);
useEffect(() => { optsRef.current = opts; });
// Defined before setState so setState can call it without circular deps.
const stopBufferSound = useCallback(() => {
if (bufferAudioRef.current) {
bufferAudioRef.current.pause();
bufferAudioRef.current.currentTime = 0;
bufferAudioRef.current = null;
}
}, []);
// Auto-stops the buffer audio when the waiting phase ends (TTS about to start, or session ends).
const setState = useCallback((s: VoiceState) => {
if (s === "SPEAKING" || s === "IDLE" || s === "ERROR") {
stopBufferSound();
}
stateRef.current = s;
setVoiceState(s);
}, [stopBufferSound]);
const playBufferSound = useCallback(() => {
stopBufferSound();
let idx: number;
do {
idx = Math.floor(Math.random() * BUFFER_SOUNDS.length);
} while (BUFFER_SOUNDS.length > 1 && idx === lastBufferIndexRef.current);
lastBufferIndexRef.current = idx;
const audio = new Audio(BUFFER_SOUNDS[idx]);
bufferAudioRef.current = audio;
audio.play().catch(() => {});
}, [stopBufferSound]);
const stopSession = useCallback(() => {
recorderRef.current?.stop();
playerRef.current?.stopImmediately();
chunksRef.current = [];
setState("IDLE"); // setState("IDLE") calls stopBufferSound internally
}, [setState]);
const stopRecording = useCallback(() => {
if (stateRef.current !== "LISTENING") return;
setState("PROCESSING");
recorderRef.current?.stop();
// Play buffer audio — it keeps playing through STT and chatbot processing.
// It stops automatically when setState("SPEAKING"), setState("IDLE"), or setState("ERROR") is called.
playBufferSound();
const chunks = chunksRef.current;
chunksRef.current = [];
void (async () => {
try {
if (chunks.length === 0) {
setState("IDLE");
return;
}
const wav = createWavBlob(chunks, RECORDER_SAMPLE_RATE);
const { text } = await speechToText(wav, optsRef.current.sessionParams?.sttProvider ?? "chirp3");
// Guard: session may have been cancelled while STT was in flight.
if (stateRef.current !== "PROCESSING") return;
if (text.trim()) {
console.log("[Voice] STT transcript →", text);
// Buffer audio continues to play while Main.tsx calls the chatbot API.
// It will stop when setStateExternal("SPEAKING") or setStateExternal("IDLE") is called.
optsRef.current.onTranscript(text);
} else {
setState("IDLE");
}
} catch (err) {
console.error("[STT] Request failed:", err);
optsRef.current.onError?.("STT_ERROR", (err as Error).message);
setState("ERROR"); // setState("ERROR") calls stopBufferSound internally
}
})();
}, [playBufferSound, setState]);
const start = useCallback(async () => {
if (stateRef.current !== "IDLE" && stateRef.current !== "ERROR") return;
setState("CONNECTING");
try {
const res = await fetch(`${getVoiceHttpBaseUrl()}/health`);
if (res.ok) {
const data: { status: string; message?: string } = await res.json();
if (data.status !== "ok") {
setState("ERROR");
optsRef.current.onError?.("HEALTH_CHECK_FAILED", data.message ?? "Service not ready");
return;
}
}
} catch {
// Network error or CORS — proceed with connect attempt
}
try {
chunksRef.current = [];
if (!recorderRef.current) recorderRef.current = new AudioRecorder();
if (!playerRef.current) playerRef.current = new AudioPlayer();
await recorderRef.current.start((chunk: ArrayBuffer) => {
if (stateRef.current === "LISTENING") {
chunksRef.current.push(chunk);
}
});
setState("LISTENING");
} catch (err) {
console.error("[Voice] Failed to start recorder:", err);
recorderRef.current?.stop();
optsRef.current.onError?.("MIC_ERROR", (err as Error).message ?? "Failed to access microphone");
setState("ERROR");
}
}, [setState]);
useEffect(() => {
return () => {
stopSession();
};
// eslint-disable-next-line react-hooks/exhaustive-deps
}, []);
return {
voiceState,
start,
stop: stopSession,
stopRecording,
setStateExternal: setState,
isActive: voiceState !== "IDLE" && voiceState !== "ERROR",
};
}