Spaces:
Running
Running
| // TTS facade — mirrors runtime.js (the LLM facade). Picks the active TTS engine | |
| // (Kokoro / Kitten / Web Speech) and voice, and exposes makeNarrator(): a streaming | |
| // reader that speaks sentence-by-sentence so a war diary can narrate itself while the | |
| // LLM is still writing. Panels + the TTS bar import only from here. | |
| import { engine as kokoro } from '/web/ttsKokoro.js' | |
| import { engine as qwen3, engineLocal as qwen3local } from '/web/ttsQwen3.js' | |
| import { engine as voxcpm } from '/web/ttsVoxcpm.js' | |
| import { engine as kitten } from '/web/ttsKitten.js' | |
| import { engine as webspeech } from '/web/ttsWebSpeech.js' | |
| import { playSamples, stopAudio, decodeAudio, encodeWav } from '/web/ttsAudio.js' | |
| import { ensurePersistentStorage } from '/web/storage.js' | |
| const ENGINES = [kokoro, qwen3local, qwen3, voxcpm, kitten, webspeech] | |
| // Default voice provider: local-GPU Qwen3-TTS on localhost (your GPU designs voices), | |
| // in-browser Kokoro in prod (runs on the device — no exhaustible cloud quota). Cloud | |
| // Qwen3-TTS and the others remain selectable in Settings. Persisted across refreshes. | |
| const TTS_ENGINE_KEY = 'tinyarmy.ttsEngine' | |
| let activeId = (() => { | |
| let saved = '' | |
| try { saved = localStorage.getItem(TTS_ENGINE_KEY) || '' } catch { /* ignore */ } | |
| const e = ENGINES.find((x) => x.id === saved) | |
| return e && e.available() ? saved : 'voxcpm' | |
| })() | |
| // Qwen3-TTS designs a voice from a free-form description (the persona's `voice`). | |
| // Panels set it before narrating; previewVoice() plays a one-off sample. | |
| export function setVoiceDescription(desc) { qwen3.setDesc(desc); voxcpm.setDesc(desc) } | |
| export async function previewVoice(desc, text) { | |
| const e = eng() | |
| if (e.setDesc) e.setDesc(desc) | |
| const { audio, sampleRate } = await e.synth(text, 'persona') | |
| return playSamples(audio, sampleRate) | |
| } | |
| export const stopPreview = () => stopAudio() | |
| // Create a persona's voice FILE: synth the line in the designed voice and return the | |
| // raw WAV (ArrayBuffer) so it can be cached + replayed verbatim. Caller plays it. | |
| export async function createVoiceWav(desc, text) { | |
| const e = eng() | |
| if (e.setDesc) e.setDesc(desc) | |
| return e.synthWav(text, 'persona') | |
| } | |
| // Clone `text` from a reference voice file (keep timbre, change words). `desc` is the | |
| // voice design — a fallback so prod (no clone model) can re-design instead. Returns WAV. | |
| export async function cloneVoiceWav(refArrayBuffer, refText, text, desc) { | |
| const e = eng() | |
| if (e.setDesc) e.setDesc(desc) | |
| return e.cloneWav(text, refArrayBuffer, refText, desc) | |
| } | |
| export async function playWav(arrayBuffer) { | |
| const { audio, sampleRate } = await decodeAudio(arrayBuffer) | |
| return playSamples(audio, sampleRate) | |
| } | |
| // ── Fixed-voice engines (Kokoro / Kitten / Web Speech) ─────────────────────── | |
| // These don't "design" a voice from text; a hero picks one of the engine's named | |
| // voices. The persona panel uses these when the active engine is NOT Qwen3. | |
| export const activeEngineIsDesign = () => !!eng().design // Qwen3 → designs from a description | |
| export const activeEngineIsNative = () => eng().mode === 'native' // Web Speech → speaks live, no WAV | |
| export const activeEngineId = () => activeId | |
| export const activeVoices = () => eng().listVoices() | |
| export const activeDefaultVoice = () => eng().defaultVoice | |
| // Synthesize `text` in a NAMED voice with the active PCM engine → a cacheable WAV | |
| // (encode Kokoro/Kitten PCM, or pass through an engine that already returns WAV). | |
| export async function synthVoiceWav(voiceId, text) { | |
| const e = eng() | |
| if (e.needsDownload) { await ensurePersistentStorage(); await e.ensure() } | |
| if (e.synthWav) return e.synthWav(text, voiceId) | |
| const { audio, sampleRate } = await e.synth(text, voiceId) | |
| return encodeWav(audio, sampleRate) | |
| } | |
| // Speak `text` live in a named voice (native engines that can't render to a file). | |
| export async function speakVoiceLive(voiceId, text) { | |
| const e = eng() | |
| if (e.speak) return e.speak(text, voiceId) | |
| const { audio, sampleRate } = await e.synth(text, voiceId) | |
| return playSamples(audio, sampleRate) | |
| } | |
| export function stopVoiceLive() { const e = eng(); if (e.stop) e.stop(); stopAudio() } | |
| const voiceSel = {} // engineId -> chosen voice id | |
| const eng = () => ENGINES.find((e) => e.id === activeId) || ENGINES[0] | |
| export const listTtsEngines = () => | |
| ENGINES.map((e) => ({ id: e.id, label: e.label, available: e.available(), experimental: !!e.experimental, note: e.note || '' })) | |
| export const getTtsEngineId = () => activeId | |
| // Notify listeners (e.g. the persona panel, on another tab) when the provider changes, | |
| // so they can re-render voice controls without polling or relying on tab visibility. | |
| const _engineListeners = new Set() | |
| export function onTtsEngineChange(fn) { _engineListeners.add(fn); return () => _engineListeners.delete(fn) } | |
| export function setTtsEngine(id) { | |
| if (!ENGINES.some((e) => e.id === id) || id === activeId) return | |
| activeId = id | |
| try { localStorage.setItem(TTS_ENGINE_KEY, id) } catch { /* ignore */ } | |
| for (const fn of _engineListeners) { try { fn(id) } catch { /* ignore */ } } | |
| } | |
| export const listVoices = () => eng().listVoices() | |
| export const currentVoiceId = () => (voiceSel[activeId] !== undefined ? voiceSel[activeId] : eng().defaultVoice) | |
| export function setVoice(id) { voiceSel[activeId] = id } | |
| export const ttsNeedsDownload = () => !!eng().needsDownload | |
| export const ttsBackendLabel = () => eng().backendLabel() | |
| export const ttsNetworked = () => !!eng().networked | |
| // "Narrate as it writes" — global now that the picker lives in Settings (the diary | |
| // reads it; the settings voice bar sets it). | |
| let _autoNarrate = false | |
| export const getAutoNarrate = () => _autoNarrate | |
| export const setAutoNarrate = (v) => { _autoNarrate = !!v } | |
| export async function ensureTts(onProgress) { | |
| if (eng().needsDownload) await ensurePersistentStorage() | |
| return eng().ensure(onProgress) | |
| } | |
| // Speak text sentence-by-sentence. push() text as it streams; end() to flush the | |
| // tail; stop() to abort. PCM engines pre-generate the next sentence while the current | |
| // one plays; native engines (Web Speech) just speak each sentence in order. | |
| export function makeNarrator({ onState } = {}) { | |
| const engine = eng() | |
| const voice = currentVoiceId() | |
| let pending = '', sentences = [], closed = false, stopped = false, running = false | |
| const SENT = /[\s\S]*?[.!?…]["')\]]*(?:\s+|$)/g | |
| const wait = (ms) => new Promise((r) => setTimeout(r, ms)) | |
| function drain(force) { | |
| SENT.lastIndex = 0 | |
| let m, last = 0 | |
| while ((m = SENT.exec(pending)) !== null) { | |
| const s = m[0].trim() | |
| if (s) sentences.push(s) | |
| last = SENT.lastIndex | |
| } | |
| pending = pending.slice(last) | |
| if (force && pending.trim()) { sentences.push(pending.trim()); pending = '' } | |
| } | |
| async function loop() { | |
| running = true | |
| onState && onState('speaking') | |
| try { | |
| if (engine.mode === 'native') { | |
| while (!stopped) { | |
| if (sentences.length) await engine.speak(sentences.shift(), voice) | |
| else if (closed) break | |
| else await wait(60) | |
| } | |
| } else { | |
| const startNext = () => (sentences.length ? engine.synth(sentences.shift(), voice, {}) : null) | |
| let synthP = null | |
| while (!stopped) { | |
| if (!synthP) synthP = startNext() | |
| if (!synthP) { if (closed) break; await wait(60); continue } | |
| let cur = null | |
| try { cur = await synthP } catch { cur = null } | |
| synthP = startNext() // pre-generate next while current plays | |
| if (cur && !stopped) { try { await playSamples(cur.audio, cur.sampleRate) } catch { /* ignore */ } } | |
| } | |
| } | |
| } finally { | |
| running = false | |
| onState && onState(stopped ? 'stopped' : 'done') | |
| } | |
| } | |
| return { | |
| push(text) { pending += text; drain(false); if (!running && !stopped) loop() }, | |
| end() { drain(true); closed = true; if (!running && !stopped) loop() }, | |
| stop() { stopped = true; sentences = []; pending = ''; if (engine.stop) engine.stop(); stopAudio() }, | |
| } | |
| } | |