ishaq101's picture
[NOTICKET] Major update, re-stylign and upgrade using maintiva demo setup
c0ddd13
function getVoiceBaseUrl(): string {
const w = window as unknown as { __APP_CONFIG__?: { VOICE_API_URL?: string } };
return (
w.__APP_CONFIG__?.VOICE_API_URL ||
(import.meta as unknown as { env: Record<string, string> }).env.VITE_API_BASE_VOICE_URL ||
"http://localhost:7861"
);
}
const VOICE_BASE_URL = getVoiceBaseUrl();
function writeString(view: DataView, offset: number, str: string): void {
for (let i = 0; i < str.length; i++) view.setUint8(offset + i, str.charCodeAt(i));
}
export function createWavBlob(chunks: ArrayBuffer[], sampleRate: number): Blob {
const pcmByteLength = chunks.reduce((sum, c) => sum + c.byteLength, 0);
const buffer = new ArrayBuffer(44 + pcmByteLength);
const view = new DataView(buffer);
writeString(view, 0, "RIFF");
view.setUint32(4, 36 + pcmByteLength, true);
writeString(view, 8, "WAVE");
writeString(view, 12, "fmt ");
view.setUint32(16, 16, true);
view.setUint16(20, 1, true); // PCM
view.setUint16(22, 1, true); // mono
view.setUint32(24, sampleRate, true);
view.setUint32(28, sampleRate * 2, true);
view.setUint16(32, 2, true);
view.setUint16(34, 16, true);
writeString(view, 36, "data");
view.setUint32(40, pcmByteLength, true);
let offset = 44;
for (const chunk of chunks) {
new Uint8Array(buffer, offset, chunk.byteLength).set(new Uint8Array(chunk));
offset += chunk.byteLength;
}
return new Blob([buffer], { type: "audio/wav" });
}
export async function speechToText(
wavBlob: Blob,
provider = "chirp3"
): Promise<{ text: string; language: string; duration: number | null }> {
const form = new FormData();
form.append("audio", wavBlob, "recording.wav");
form.append("provider", provider);
const res = await fetch(`${VOICE_BASE_URL}/stt`, { method: "POST", body: form });
if (!res.ok) throw new Error(`STT error: ${res.status}`);
const contentType = res.headers.get("content-type") ?? "";
if (!contentType.includes("application/json")) {
const body = await res.text();
throw new Error(`STT returned non-JSON (${res.status}): ${body.slice(0, 200)}`);
}
return res.json();
}
export async function textToSpeechStreaming(
text: string,
provider = "gemini"
): Promise<{ sampleRate: number; stream: ReadableStream<Uint8Array> }> {
const abort = new AbortController();
const timer = setTimeout(() => abort.abort(), 120_000);
const res = await fetch(`${VOICE_BASE_URL}/tts`, {
method: "POST",
headers: { "Content-Type": "application/json" },
body: JSON.stringify({ text, provider }),
signal: abort.signal,
}).finally(() => clearTimeout(timer));
if (!res.ok) throw new Error(`TTS error: ${res.status}`);
if (!res.body) throw new Error("TTS response has no body");
const sampleRate = parseInt(res.headers.get("X-Sample-Rate") ?? "24000", 10);
return { sampleRate, stream: res.body };
}
export async function textToSpeech(
text: string,
provider = "gemini"
): Promise<{ pcm: ArrayBuffer; sampleRate: number }> {
const abort = new AbortController();
const timer = setTimeout(() => abort.abort(), 90_000);
const response = await fetch(`${VOICE_BASE_URL}/tts`, {
method: "POST",
headers: { "Content-Type": "application/json" },
body: JSON.stringify({ text, provider }),
signal: abort.signal,
}).finally(() => clearTimeout(timer));
if (!response.ok) throw new Error(`TTS error: ${response.status}`);
const sampleRate = parseInt(
response.headers.get("X-Sample-Rate") ?? "24000",
10
);
const pcm = await response.arrayBuffer();
return { pcm, sampleRate };
}