import { useState, useRef, useEffect, useCallback } from "react"; import { AudioRecorder } from "../audio/AudioRecorder"; import { AudioPlayer } from "../audio/AudioPlayer"; import { createWavBlob, speechToText } from "../services/voiceApi"; export type VoiceState = | "IDLE" | "CONNECTING" | "LISTENING" | "PROCESSING" | "SPEAKING" | "ERROR"; export interface VoiceSessionParams { sttProvider?: string; ttsProvider?: string; } interface UseVoiceSessionOptions { onTranscript: (text: string) => void; onError?: (code: string, message: string) => void; sessionParams?: VoiceSessionParams; } export interface UseVoiceSessionReturn { voiceState: VoiceState; start: () => Promise; stop: () => void; stopRecording: () => void; setStateExternal: (s: VoiceState) => void; isActive: boolean; } const BUFFER_SOUNDS = [ "/sounds/01_Baik_Saya_sedang_memproses_pertanyaanmu.wav", "/sounds/02_Oke_mohon_ditunggu_saya_sedang_siapkan_P.wav", "/sounds/03_Sip_saya_terima_Sedang_saya_proses_Pesan.wav", ]; const RECORDER_SAMPLE_RATE = 16000; function getVoiceHttpBaseUrl(): string { const w = window as unknown as { __APP_CONFIG__?: { VOICE_API_URL?: string } }; return ( w.__APP_CONFIG__?.VOICE_API_URL || (import.meta as unknown as { env: Record }).env.VITE_API_BASE_VOICE_URL || "http://localhost:7861" ); } export function useVoiceSession(opts: UseVoiceSessionOptions): UseVoiceSessionReturn { const [voiceState, setVoiceState] = useState("IDLE"); const stateRef = useRef("IDLE"); const recorderRef = useRef(null); const playerRef = useRef(null); const chunksRef = useRef([]); const bufferAudioRef = useRef(null); const lastBufferIndexRef = useRef(-1); const optsRef = useRef(opts); useEffect(() => { optsRef.current = opts; }); // Defined before setState so setState can call it without circular deps. const stopBufferSound = useCallback(() => { if (bufferAudioRef.current) { bufferAudioRef.current.pause(); bufferAudioRef.current.currentTime = 0; bufferAudioRef.current = null; } }, []); // Auto-stops the buffer audio when the waiting phase ends (TTS about to start, or session ends). const setState = useCallback((s: VoiceState) => { if (s === "SPEAKING" || s === "IDLE" || s === "ERROR") { stopBufferSound(); } stateRef.current = s; setVoiceState(s); }, [stopBufferSound]); const playBufferSound = useCallback(() => { stopBufferSound(); let idx: number; do { idx = Math.floor(Math.random() * BUFFER_SOUNDS.length); } while (BUFFER_SOUNDS.length > 1 && idx === lastBufferIndexRef.current); lastBufferIndexRef.current = idx; const audio = new Audio(BUFFER_SOUNDS[idx]); bufferAudioRef.current = audio; audio.play().catch(() => {}); }, [stopBufferSound]); const stopSession = useCallback(() => { recorderRef.current?.stop(); playerRef.current?.stopImmediately(); chunksRef.current = []; setState("IDLE"); // setState("IDLE") calls stopBufferSound internally }, [setState]); const stopRecording = useCallback(() => { if (stateRef.current !== "LISTENING") return; setState("PROCESSING"); recorderRef.current?.stop(); // Play buffer audio — it keeps playing through STT and chatbot processing. // It stops automatically when setState("SPEAKING"), setState("IDLE"), or setState("ERROR") is called. playBufferSound(); const chunks = chunksRef.current; chunksRef.current = []; void (async () => { try { if (chunks.length === 0) { setState("IDLE"); return; } const wav = createWavBlob(chunks, RECORDER_SAMPLE_RATE); const { text } = await speechToText(wav, optsRef.current.sessionParams?.sttProvider ?? "chirp3"); // Guard: session may have been cancelled while STT was in flight. if (stateRef.current !== "PROCESSING") return; if (text.trim()) { console.log("[Voice] STT transcript →", text); // Buffer audio continues to play while Main.tsx calls the chatbot API. // It will stop when setStateExternal("SPEAKING") or setStateExternal("IDLE") is called. optsRef.current.onTranscript(text); } else { setState("IDLE"); } } catch (err) { console.error("[STT] Request failed:", err); optsRef.current.onError?.("STT_ERROR", (err as Error).message); setState("ERROR"); // setState("ERROR") calls stopBufferSound internally } })(); }, [playBufferSound, setState]); const start = useCallback(async () => { if (stateRef.current !== "IDLE" && stateRef.current !== "ERROR") return; setState("CONNECTING"); try { const res = await fetch(`${getVoiceHttpBaseUrl()}/health`); if (res.ok) { const data: { status: string; message?: string } = await res.json(); if (data.status !== "ok") { setState("ERROR"); optsRef.current.onError?.("HEALTH_CHECK_FAILED", data.message ?? "Service not ready"); return; } } } catch { // Network error or CORS — proceed with connect attempt } try { chunksRef.current = []; if (!recorderRef.current) recorderRef.current = new AudioRecorder(); if (!playerRef.current) playerRef.current = new AudioPlayer(); await recorderRef.current.start((chunk: ArrayBuffer) => { if (stateRef.current === "LISTENING") { chunksRef.current.push(chunk); } }); setState("LISTENING"); } catch (err) { console.error("[Voice] Failed to start recorder:", err); recorderRef.current?.stop(); optsRef.current.onError?.("MIC_ERROR", (err as Error).message ?? "Failed to access microphone"); setState("ERROR"); } }, [setState]); useEffect(() => { return () => { stopSession(); }; // eslint-disable-next-line react-hooks/exhaustive-deps }, []); return { voiceState, start, stop: stopSession, stopRecording, setStateExternal: setState, isActive: voiceState !== "IDLE" && voiceState !== "ERROR", }; }