Spaces:
Running
Running
File size: 8,076 Bytes
9c371b5 de78f87 3aafe4e 9c371b5 e648dca 9c371b5 3aafe4e 585578b 750ca83 de78f87 750ca83 717332c 3aafe4e 717332c 3aafe4e 717332c 308478f 3aafe4e 308478f 72160ec 3aafe4e 72160ec 308478f e648dca 9c371b5 e352ff3 9c371b5 e648dca 750ca83 e648dca 9c371b5 dffe06d 9c371b5 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 | // TTS facade — mirrors runtime.js (the LLM facade). Picks the active TTS engine
// (Kokoro / Kitten / Web Speech) and voice, and exposes makeNarrator(): a streaming
// reader that speaks sentence-by-sentence so a war diary can narrate itself while the
// LLM is still writing. Panels + the TTS bar import only from here.
import { engine as kokoro } from '/web/ttsKokoro.js'
import { engine as qwen3, engineLocal as qwen3local } from '/web/ttsQwen3.js'
import { engine as voxcpm } from '/web/ttsVoxcpm.js'
import { engine as kitten } from '/web/ttsKitten.js'
import { engine as webspeech } from '/web/ttsWebSpeech.js'
import { playSamples, stopAudio, decodeAudio, encodeWav } from '/web/ttsAudio.js'
import { ensurePersistentStorage } from '/web/storage.js'
const ENGINES = [kokoro, qwen3local, qwen3, voxcpm, kitten, webspeech]
// Default voice provider: local-GPU Qwen3-TTS on localhost (your GPU designs voices),
// in-browser Kokoro in prod (runs on the device — no exhaustible cloud quota). Cloud
// Qwen3-TTS and the others remain selectable in Settings. Persisted across refreshes.
const TTS_ENGINE_KEY = 'tinyarmy.ttsEngine'
let activeId = (() => {
let saved = ''
try { saved = localStorage.getItem(TTS_ENGINE_KEY) || '' } catch { /* ignore */ }
const e = ENGINES.find((x) => x.id === saved)
return e && e.available() ? saved : 'voxcpm'
})()
// Qwen3-TTS designs a voice from a free-form description (the persona's `voice`).
// Panels set it before narrating; previewVoice() plays a one-off sample.
export function setVoiceDescription(desc) { qwen3.setDesc(desc); voxcpm.setDesc(desc) }
export async function previewVoice(desc, text) {
const e = eng()
if (e.setDesc) e.setDesc(desc)
const { audio, sampleRate } = await e.synth(text, 'persona')
return playSamples(audio, sampleRate)
}
export const stopPreview = () => stopAudio()
// Create a persona's voice FILE: synth the line in the designed voice and return the
// raw WAV (ArrayBuffer) so it can be cached + replayed verbatim. Caller plays it.
export async function createVoiceWav(desc, text) {
const e = eng()
if (e.setDesc) e.setDesc(desc)
return e.synthWav(text, 'persona')
}
// Clone `text` from a reference voice file (keep timbre, change words). `desc` is the
// voice design — a fallback so prod (no clone model) can re-design instead. Returns WAV.
export async function cloneVoiceWav(refArrayBuffer, refText, text, desc) {
const e = eng()
if (e.setDesc) e.setDesc(desc)
return e.cloneWav(text, refArrayBuffer, refText, desc)
}
export async function playWav(arrayBuffer) {
const { audio, sampleRate } = await decodeAudio(arrayBuffer)
return playSamples(audio, sampleRate)
}
// ── Fixed-voice engines (Kokoro / Kitten / Web Speech) ───────────────────────
// These don't "design" a voice from text; a hero picks one of the engine's named
// voices. The persona panel uses these when the active engine is NOT Qwen3.
export const activeEngineIsDesign = () => !!eng().design // Qwen3 → designs from a description
export const activeEngineIsNative = () => eng().mode === 'native' // Web Speech → speaks live, no WAV
export const activeEngineId = () => activeId
export const activeVoices = () => eng().listVoices()
export const activeDefaultVoice = () => eng().defaultVoice
// Synthesize `text` in a NAMED voice with the active PCM engine → a cacheable WAV
// (encode Kokoro/Kitten PCM, or pass through an engine that already returns WAV).
export async function synthVoiceWav(voiceId, text) {
const e = eng()
if (e.needsDownload) { await ensurePersistentStorage(); await e.ensure() }
if (e.synthWav) return e.synthWav(text, voiceId)
const { audio, sampleRate } = await e.synth(text, voiceId)
return encodeWav(audio, sampleRate)
}
// Speak `text` live in a named voice (native engines that can't render to a file).
export async function speakVoiceLive(voiceId, text) {
const e = eng()
if (e.speak) return e.speak(text, voiceId)
const { audio, sampleRate } = await e.synth(text, voiceId)
return playSamples(audio, sampleRate)
}
export function stopVoiceLive() { const e = eng(); if (e.stop) e.stop(); stopAudio() }
const voiceSel = {} // engineId -> chosen voice id
const eng = () => ENGINES.find((e) => e.id === activeId) || ENGINES[0]
export const listTtsEngines = () =>
ENGINES.map((e) => ({ id: e.id, label: e.label, available: e.available(), experimental: !!e.experimental, note: e.note || '' }))
export const getTtsEngineId = () => activeId
// Notify listeners (e.g. the persona panel, on another tab) when the provider changes,
// so they can re-render voice controls without polling or relying on tab visibility.
const _engineListeners = new Set()
export function onTtsEngineChange(fn) { _engineListeners.add(fn); return () => _engineListeners.delete(fn) }
export function setTtsEngine(id) {
if (!ENGINES.some((e) => e.id === id) || id === activeId) return
activeId = id
try { localStorage.setItem(TTS_ENGINE_KEY, id) } catch { /* ignore */ }
for (const fn of _engineListeners) { try { fn(id) } catch { /* ignore */ } }
}
export const listVoices = () => eng().listVoices()
export const currentVoiceId = () => (voiceSel[activeId] !== undefined ? voiceSel[activeId] : eng().defaultVoice)
export function setVoice(id) { voiceSel[activeId] = id }
export const ttsNeedsDownload = () => !!eng().needsDownload
export const ttsBackendLabel = () => eng().backendLabel()
export const ttsNetworked = () => !!eng().networked
// "Narrate as it writes" — global now that the picker lives in Settings (the diary
// reads it; the settings voice bar sets it).
let _autoNarrate = false
export const getAutoNarrate = () => _autoNarrate
export const setAutoNarrate = (v) => { _autoNarrate = !!v }
export async function ensureTts(onProgress) {
if (eng().needsDownload) await ensurePersistentStorage()
return eng().ensure(onProgress)
}
// Speak text sentence-by-sentence. push() text as it streams; end() to flush the
// tail; stop() to abort. PCM engines pre-generate the next sentence while the current
// one plays; native engines (Web Speech) just speak each sentence in order.
export function makeNarrator({ onState } = {}) {
const engine = eng()
const voice = currentVoiceId()
let pending = '', sentences = [], closed = false, stopped = false, running = false
const SENT = /[\s\S]*?[.!?…]["')\]]*(?:\s+|$)/g
const wait = (ms) => new Promise((r) => setTimeout(r, ms))
function drain(force) {
SENT.lastIndex = 0
let m, last = 0
while ((m = SENT.exec(pending)) !== null) {
const s = m[0].trim()
if (s) sentences.push(s)
last = SENT.lastIndex
}
pending = pending.slice(last)
if (force && pending.trim()) { sentences.push(pending.trim()); pending = '' }
}
async function loop() {
running = true
onState && onState('speaking')
try {
if (engine.mode === 'native') {
while (!stopped) {
if (sentences.length) await engine.speak(sentences.shift(), voice)
else if (closed) break
else await wait(60)
}
} else {
const startNext = () => (sentences.length ? engine.synth(sentences.shift(), voice, {}) : null)
let synthP = null
while (!stopped) {
if (!synthP) synthP = startNext()
if (!synthP) { if (closed) break; await wait(60); continue }
let cur = null
try { cur = await synthP } catch { cur = null }
synthP = startNext() // pre-generate next while current plays
if (cur && !stopped) { try { await playSamples(cur.audio, cur.sampleRate) } catch { /* ignore */ } }
}
}
} finally {
running = false
onState && onState(stopped ? 'stopped' : 'done')
}
}
return {
push(text) { pending += text; drain(false); if (!running && !stopped) loop() },
end() { drain(true); closed = true; if (!running && !stopped) loop() },
stop() { stopped = true; sentences = []; pending = ''; if (engine.stop) engine.stop(); stopAudio() },
}
}
|