Spaces:
Running
Running
| function getVoiceBaseUrl(): string { | |
| const w = window as unknown as { __APP_CONFIG__?: { VOICE_API_URL?: string } }; | |
| return ( | |
| w.__APP_CONFIG__?.VOICE_API_URL || | |
| (import.meta as unknown as { env: Record<string, string> }).env.VITE_API_BASE_VOICE_URL || | |
| "http://localhost:7861" | |
| ); | |
| } | |
| const VOICE_BASE_URL = getVoiceBaseUrl(); | |
| function writeString(view: DataView, offset: number, str: string): void { | |
| for (let i = 0; i < str.length; i++) view.setUint8(offset + i, str.charCodeAt(i)); | |
| } | |
| export function createWavBlob(chunks: ArrayBuffer[], sampleRate: number): Blob { | |
| const pcmByteLength = chunks.reduce((sum, c) => sum + c.byteLength, 0); | |
| const buffer = new ArrayBuffer(44 + pcmByteLength); | |
| const view = new DataView(buffer); | |
| writeString(view, 0, "RIFF"); | |
| view.setUint32(4, 36 + pcmByteLength, true); | |
| writeString(view, 8, "WAVE"); | |
| writeString(view, 12, "fmt "); | |
| view.setUint32(16, 16, true); | |
| view.setUint16(20, 1, true); // PCM | |
| view.setUint16(22, 1, true); // mono | |
| view.setUint32(24, sampleRate, true); | |
| view.setUint32(28, sampleRate * 2, true); | |
| view.setUint16(32, 2, true); | |
| view.setUint16(34, 16, true); | |
| writeString(view, 36, "data"); | |
| view.setUint32(40, pcmByteLength, true); | |
| let offset = 44; | |
| for (const chunk of chunks) { | |
| new Uint8Array(buffer, offset, chunk.byteLength).set(new Uint8Array(chunk)); | |
| offset += chunk.byteLength; | |
| } | |
| return new Blob([buffer], { type: "audio/wav" }); | |
| } | |
| export async function speechToText( | |
| wavBlob: Blob, | |
| provider = "chirp3" | |
| ): Promise<{ text: string; language: string; duration: number | null }> { | |
| const form = new FormData(); | |
| form.append("audio", wavBlob, "recording.wav"); | |
| form.append("provider", provider); | |
| const res = await fetch(`${VOICE_BASE_URL}/stt`, { method: "POST", body: form }); | |
| if (!res.ok) throw new Error(`STT error: ${res.status}`); | |
| const contentType = res.headers.get("content-type") ?? ""; | |
| if (!contentType.includes("application/json")) { | |
| const body = await res.text(); | |
| throw new Error(`STT returned non-JSON (${res.status}): ${body.slice(0, 200)}`); | |
| } | |
| return res.json(); | |
| } | |
| export async function textToSpeechStreaming( | |
| text: string, | |
| provider = "gemini" | |
| ): Promise<{ sampleRate: number; stream: ReadableStream<Uint8Array> }> { | |
| const abort = new AbortController(); | |
| const timer = setTimeout(() => abort.abort(), 120_000); | |
| const res = await fetch(`${VOICE_BASE_URL}/tts`, { | |
| method: "POST", | |
| headers: { "Content-Type": "application/json" }, | |
| body: JSON.stringify({ text, provider }), | |
| signal: abort.signal, | |
| }).finally(() => clearTimeout(timer)); | |
| if (!res.ok) throw new Error(`TTS error: ${res.status}`); | |
| if (!res.body) throw new Error("TTS response has no body"); | |
| const sampleRate = parseInt(res.headers.get("X-Sample-Rate") ?? "24000", 10); | |
| return { sampleRate, stream: res.body }; | |
| } | |
| export async function textToSpeech( | |
| text: string, | |
| provider = "gemini" | |
| ): Promise<{ pcm: ArrayBuffer; sampleRate: number }> { | |
| const abort = new AbortController(); | |
| const timer = setTimeout(() => abort.abort(), 90_000); | |
| const response = await fetch(`${VOICE_BASE_URL}/tts`, { | |
| method: "POST", | |
| headers: { "Content-Type": "application/json" }, | |
| body: JSON.stringify({ text, provider }), | |
| signal: abort.signal, | |
| }).finally(() => clearTimeout(timer)); | |
| if (!response.ok) throw new Error(`TTS error: ${response.status}`); | |
| const sampleRate = parseInt( | |
| response.headers.get("X-Sample-Rate") ?? "24000", | |
| 10 | |
| ); | |
| const pcm = await response.arrayBuffer(); | |
| return { pcm, sampleRate }; | |
| } | |