Spaces:

DataEyond
/

Demo-Frontend-Voice-Agent

Running

App Files Files Community

Demo-Frontend-Voice-Agent / src /hooks /useVoiceSession.ts

ishaq101

update server.js

017da10 14 days ago

raw

history blame contribute delete

6.31 kB

	import { useState, useRef, useEffect, useCallback } from "react";
	import { AudioRecorder } from "../audio/AudioRecorder";
	import { AudioPlayer } from "../audio/AudioPlayer";
	import { createWavBlob, speechToText } from "../services/voiceApi";

	export type VoiceState =
	\| "IDLE"
	\| "CONNECTING"
	\| "LISTENING"
	\| "PROCESSING"
	\| "SPEAKING"
	\| "ERROR";

	export interface VoiceSessionParams {
	sttProvider?: string;
	ttsProvider?: string;
	}

	interface UseVoiceSessionOptions {
	onTranscript: (text: string) => void;
	onError?: (code: string, message: string) => void;
	sessionParams?: VoiceSessionParams;
	}

	export interface UseVoiceSessionReturn {
	voiceState: VoiceState;
	start: () => Promise<void>;
	stop: () => void;
	stopRecording: () => void;
	setStateExternal: (s: VoiceState) => void;
	isActive: boolean;
	}

	const BUFFER_SOUNDS = [
	"/sounds/01_Baik_Saya_sedang_memproses_pertanyaanmu.wav",
	"/sounds/02_Oke_mohon_ditunggu_saya_sedang_siapkan_P.wav",
	"/sounds/03_Sip_saya_terima_Sedang_saya_proses_Pesan.wav",
	];

	const RECORDER_SAMPLE_RATE = 16000;

	function getVoiceHttpBaseUrl(): string {
	const w = window as unknown as { __APP_CONFIG__?: { VOICE_API_URL?: string } };
	return (
	w.__APP_CONFIG__?.VOICE_API_URL \|\|
	(import.meta as unknown as { env: Record<string, string> }).env.VITE_API_BASE_VOICE_URL \|\|
	"http://localhost:7861"
	);
	}

	export function useVoiceSession(opts: UseVoiceSessionOptions): UseVoiceSessionReturn {
	const [voiceState, setVoiceState] = useState<VoiceState>("IDLE");
	const stateRef = useRef<VoiceState>("IDLE");

	const recorderRef = useRef<AudioRecorder \| null>(null);
	const playerRef = useRef<AudioPlayer \| null>(null);
	const chunksRef = useRef<ArrayBuffer[]>([]);

	const bufferAudioRef = useRef<HTMLAudioElement \| null>(null);
	const lastBufferIndexRef = useRef<number>(-1);

	const optsRef = useRef(opts);
	useEffect(() => { optsRef.current = opts; });

	// Defined before setState so setState can call it without circular deps.
	const stopBufferSound = useCallback(() => {
	if (bufferAudioRef.current) {
	bufferAudioRef.current.pause();
	bufferAudioRef.current.currentTime = 0;
	bufferAudioRef.current = null;
	}
	}, []);

	// Auto-stops the buffer audio when the waiting phase ends (TTS about to start, or session ends).
	const setState = useCallback((s: VoiceState) => {
	if (s === "SPEAKING" \|\| s === "IDLE" \|\| s === "ERROR") {
	stopBufferSound();
	}
	stateRef.current = s;
	setVoiceState(s);
	}, [stopBufferSound]);

	const playBufferSound = useCallback(() => {
	stopBufferSound();
	let idx: number;
	do {
	idx = Math.floor(Math.random() * BUFFER_SOUNDS.length);
	} while (BUFFER_SOUNDS.length > 1 && idx === lastBufferIndexRef.current);
	lastBufferIndexRef.current = idx;
	const audio = new Audio(BUFFER_SOUNDS[idx]);
	bufferAudioRef.current = audio;
	audio.play().catch(() => {});
	}, [stopBufferSound]);

	const stopSession = useCallback(() => {
	recorderRef.current?.stop();
	playerRef.current?.stopImmediately();
	chunksRef.current = [];
	setState("IDLE"); // setState("IDLE") calls stopBufferSound internally
	}, [setState]);

	const stopRecording = useCallback(() => {
	if (stateRef.current !== "LISTENING") return;
	setState("PROCESSING");
	recorderRef.current?.stop();
	// Play buffer audio — it keeps playing through STT and chatbot processing.
	// It stops automatically when setState("SPEAKING"), setState("IDLE"), or setState("ERROR") is called.
	playBufferSound();

	const chunks = chunksRef.current;
	chunksRef.current = [];

	void (async () => {
	try {
	if (chunks.length === 0) {
	setState("IDLE");
	return;
	}

	const wav = createWavBlob(chunks, RECORDER_SAMPLE_RATE);
	const { text } = await speechToText(wav, optsRef.current.sessionParams?.sttProvider ?? "chirp3");

	// Guard: session may have been cancelled while STT was in flight.
	if (stateRef.current !== "PROCESSING") return;

	if (text.trim()) {
	console.log("[Voice] STT transcript →", text);
	// Buffer audio continues to play while Main.tsx calls the chatbot API.
	// It will stop when setStateExternal("SPEAKING") or setStateExternal("IDLE") is called.
	optsRef.current.onTranscript(text);
	} else {
	setState("IDLE");
	}
	} catch (err) {
	console.error("[STT] Request failed:", err);
	optsRef.current.onError?.("STT_ERROR", (err as Error).message);
	setState("ERROR"); // setState("ERROR") calls stopBufferSound internally
	}
	})();
	}, [playBufferSound, setState]);

	const start = useCallback(async () => {
	if (stateRef.current !== "IDLE" && stateRef.current !== "ERROR") return;
	setState("CONNECTING");

	try {
	const res = await fetch(`${getVoiceHttpBaseUrl()}/health`);
	if (res.ok) {
	const data: { status: string; message?: string } = await res.json();
	if (data.status !== "ok") {
	setState("ERROR");
	optsRef.current.onError?.("HEALTH_CHECK_FAILED", data.message ?? "Service not ready");
	return;
	}
	}
	} catch {
	// Network error or CORS — proceed with connect attempt
	}

	try {
	chunksRef.current = [];
	if (!recorderRef.current) recorderRef.current = new AudioRecorder();
	if (!playerRef.current) playerRef.current = new AudioPlayer();

	await recorderRef.current.start((chunk: ArrayBuffer) => {
	if (stateRef.current === "LISTENING") {
	chunksRef.current.push(chunk);
	}
	});

	setState("LISTENING");
	} catch (err) {
	console.error("[Voice] Failed to start recorder:", err);
	recorderRef.current?.stop();
	optsRef.current.onError?.("MIC_ERROR", (err as Error).message ?? "Failed to access microphone");
	setState("ERROR");
	}
	}, [setState]);

	useEffect(() => {
	return () => {
	stopSession();
	};
	// eslint-disable-next-line react-hooks/exhaustive-deps
	}, []);

	return {
	voiceState,
	start,
	stop: stopSession,
	stopRecording,
	setStateExternal: setState,
	isActive: voiceState !== "IDLE" && voiceState !== "ERROR",
	};
	}