// Shared audio output for the PCM TTS engines (Kokoro, Kitten). One AudioContext, // one source at a time — the narrator awaits playSamples() per sentence so chunks // play in order without overlap. Created lazily on first use (which happens after a // user gesture, so resume() is allowed by autoplay policy). let _ctx = null let _cur = null function ctx() { if (!_ctx) _ctx = new (window.AudioContext || window.webkitAudioContext)() return _ctx } export async function playSamples(float32, sampleRate) { const ac = ctx() if (ac.state === 'suspended') { try { await ac.resume() } catch { /* ignore */ } } const buf = ac.createBuffer(1, float32.length, sampleRate) buf.getChannelData(0).set(float32) return new Promise((resolve) => { const src = ac.createBufferSource() src.buffer = buf src.connect(ac.destination) _cur = src src.onended = () => { if (_cur === src) _cur = null; resolve() } src.start() }) } export function stopAudio() { try { if (_cur) _cur.stop() } catch { /* ignore */ } _cur = null } // Encode mono Float32 samples to a 16-bit PCM WAV ArrayBuffer — so PCM engines // (Kokoro/Kitten) can produce a cacheable voice file like Qwen3-TTS does. export function encodeWav(float32, sampleRate) { const n = float32.length const buf = new ArrayBuffer(44 + n * 2) const dv = new DataView(buf) const str = (off, s) => { for (let i = 0; i < s.length; i++) dv.setUint8(off + i, s.charCodeAt(i)) } str(0, 'RIFF'); dv.setUint32(4, 36 + n * 2, true); str(8, 'WAVE') str(12, 'fmt '); dv.setUint32(16, 16, true); dv.setUint16(20, 1, true); dv.setUint16(22, 1, true) dv.setUint32(24, sampleRate, true); dv.setUint32(28, sampleRate * 2, true) dv.setUint16(32, 2, true); dv.setUint16(34, 16, true) str(36, 'data'); dv.setUint32(40, n * 2, true) let off = 44 for (let i = 0; i < n; i++) { const s = Math.max(-1, Math.min(1, float32[i])); dv.setInt16(off, s < 0 ? s * 0x8000 : s * 0x7fff, true); off += 2 } return buf } // Decode a WAV/audio ArrayBuffer to { audio: Float32Array, sampleRate } via the shared // AudioContext (decoding needs no user gesture; only playback does). export async function decodeAudio(arrayBuffer) { const ac = ctx() const buf = await ac.decodeAudioData(arrayBuffer) return { audio: buf.getChannelData(0), sampleRate: buf.sampleRate } }