function getVoiceBaseUrl(): string { const w = window as unknown as { __APP_CONFIG__?: { VOICE_API_URL?: string } }; return ( w.__APP_CONFIG__?.VOICE_API_URL || (import.meta as unknown as { env: Record }).env.VITE_API_BASE_VOICE_URL || "http://localhost:7861" ); } const VOICE_BASE_URL = getVoiceBaseUrl(); function writeString(view: DataView, offset: number, str: string): void { for (let i = 0; i < str.length; i++) view.setUint8(offset + i, str.charCodeAt(i)); } export function createWavBlob(chunks: ArrayBuffer[], sampleRate: number): Blob { const pcmByteLength = chunks.reduce((sum, c) => sum + c.byteLength, 0); const buffer = new ArrayBuffer(44 + pcmByteLength); const view = new DataView(buffer); writeString(view, 0, "RIFF"); view.setUint32(4, 36 + pcmByteLength, true); writeString(view, 8, "WAVE"); writeString(view, 12, "fmt "); view.setUint32(16, 16, true); view.setUint16(20, 1, true); // PCM view.setUint16(22, 1, true); // mono view.setUint32(24, sampleRate, true); view.setUint32(28, sampleRate * 2, true); view.setUint16(32, 2, true); view.setUint16(34, 16, true); writeString(view, 36, "data"); view.setUint32(40, pcmByteLength, true); let offset = 44; for (const chunk of chunks) { new Uint8Array(buffer, offset, chunk.byteLength).set(new Uint8Array(chunk)); offset += chunk.byteLength; } return new Blob([buffer], { type: "audio/wav" }); } export async function speechToText( wavBlob: Blob, provider = "chirp3" ): Promise<{ text: string; language: string; duration: number | null }> { const form = new FormData(); form.append("audio", wavBlob, "recording.wav"); form.append("provider", provider); const res = await fetch(`${VOICE_BASE_URL}/stt`, { method: "POST", body: form }); if (!res.ok) throw new Error(`STT error: ${res.status}`); const contentType = res.headers.get("content-type") ?? ""; if (!contentType.includes("application/json")) { const body = await res.text(); throw new Error(`STT returned non-JSON (${res.status}): ${body.slice(0, 200)}`); } return res.json(); } export async function textToSpeechStreaming( text: string, provider = "gemini" ): Promise<{ sampleRate: number; stream: ReadableStream }> { const abort = new AbortController(); const timer = setTimeout(() => abort.abort(), 120_000); const res = await fetch(`${VOICE_BASE_URL}/tts`, { method: "POST", headers: { "Content-Type": "application/json" }, body: JSON.stringify({ text, provider }), signal: abort.signal, }).finally(() => clearTimeout(timer)); if (!res.ok) throw new Error(`TTS error: ${res.status}`); if (!res.body) throw new Error("TTS response has no body"); const sampleRate = parseInt(res.headers.get("X-Sample-Rate") ?? "24000", 10); return { sampleRate, stream: res.body }; } export async function textToSpeech( text: string, provider = "gemini" ): Promise<{ pcm: ArrayBuffer; sampleRate: number }> { const abort = new AbortController(); const timer = setTimeout(() => abort.abort(), 90_000); const response = await fetch(`${VOICE_BASE_URL}/tts`, { method: "POST", headers: { "Content-Type": "application/json" }, body: JSON.stringify({ text, provider }), signal: abort.signal, }).finally(() => clearTimeout(timer)); if (!response.ok) throw new Error(`TTS error: ${response.status}`); const sampleRate = parseInt( response.headers.get("X-Sample-Rate") ?? "24000", 10 ); const pcm = await response.arrayBuffer(); return { pcm, sampleRate }; }