Spaces:

build-small-hackathon
/

tiny-army

Running

App Files Files Community

tiny-army / web /ttsAudio.js

polats's picture

Voice: provider-driven per-hero voices; settings is provider-only

e648dca 5 days ago

history blame contribute delete

2.35 kB

	// Shared audio output for the PCM TTS engines (Kokoro, Kitten). One AudioContext,
	// one source at a time — the narrator awaits playSamples() per sentence so chunks
	// play in order without overlap. Created lazily on first use (which happens after a
	// user gesture, so resume() is allowed by autoplay policy).
	let _ctx = null
	let _cur = null

	function ctx() {
	if (!_ctx) _ctx = new (window.AudioContext \|\| window.webkitAudioContext)()
	return _ctx
	}

	export async function playSamples(float32, sampleRate) {
	const ac = ctx()
	if (ac.state === 'suspended') { try { await ac.resume() } catch { /* ignore */ } }
	const buf = ac.createBuffer(1, float32.length, sampleRate)
	buf.getChannelData(0).set(float32)
	return new Promise((resolve) => {
	const src = ac.createBufferSource()
	src.buffer = buf
	src.connect(ac.destination)
	_cur = src
	src.onended = () => { if (_cur === src) _cur = null; resolve() }
	src.start()
	})
	}

	export function stopAudio() {
	try { if (_cur) _cur.stop() } catch { /* ignore */ }
	_cur = null
	}

	// Encode mono Float32 samples to a 16-bit PCM WAV ArrayBuffer — so PCM engines
	// (Kokoro/Kitten) can produce a cacheable voice file like Qwen3-TTS does.
	export function encodeWav(float32, sampleRate) {
	const n = float32.length
	const buf = new ArrayBuffer(44 + n * 2)
	const dv = new DataView(buf)
	const str = (off, s) => { for (let i = 0; i < s.length; i++) dv.setUint8(off + i, s.charCodeAt(i)) }
	str(0, 'RIFF'); dv.setUint32(4, 36 + n * 2, true); str(8, 'WAVE')
	str(12, 'fmt '); dv.setUint32(16, 16, true); dv.setUint16(20, 1, true); dv.setUint16(22, 1, true)
	dv.setUint32(24, sampleRate, true); dv.setUint32(28, sampleRate * 2, true)
	dv.setUint16(32, 2, true); dv.setUint16(34, 16, true)
	str(36, 'data'); dv.setUint32(40, n * 2, true)
	let off = 44
	for (let i = 0; i < n; i++) { const s = Math.max(-1, Math.min(1, float32[i])); dv.setInt16(off, s < 0 ? s * 0x8000 : s * 0x7fff, true); off += 2 }
	return buf
	}

	// Decode a WAV/audio ArrayBuffer to { audio: Float32Array, sampleRate } via the shared
	// AudioContext (decoding needs no user gesture; only playback does).
	export async function decodeAudio(arrayBuffer) {
	const ac = ctx()
	const buf = await ac.decodeAudioData(arrayBuffer)
	return { audio: buf.getChannelData(0), sampleRate: buf.sampleRate }
	}