Spaces:
Running
Persona: red section headings, icon buttons, edit-aware voice + clone-on-play
Browse files- Result laid out in sections (line + red heading): About, Quote, Voice design.
- ▶ play icon sits on the Quote heading; 🎙 create/recreate sits on the Voice design
heading; both simplified to icons (play is neutral, not red).
- Editing the quote or voice marks the cached audio stale → a red badge on ▶.
- Tapping ▶ when badged does NOT redesign the voice: it CLONES the last voice file
(Qwen3-TTS Base model, keeping the exact timbre) for the new line, plays it, saves
over the old file, and clears the badge. Otherwise ▶ replays the cached file.
🎙 is the explicit "new voice from the description" path.
- Backend: TINY_TTS_MODE=local loads the Base model lazily and clones via
generate_voice_clone((numpy,sr) ref). Prod (DashScope) gets the voice description so
it re-designs gracefully (no clone model). Verified clone end-to-end on the GPU (5s).
Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
- app.py +38 -12
- web/personaPanel.js +58 -29
- web/shell/persona.css +22 -12
- web/tts.js +5 -0
- web/ttsQwen3.js +19 -0
|
@@ -246,26 +246,47 @@ _DASHSCOPE_URL = _DASHSCOPE_BASE + "/api/v1/services/audio/tts/customization"
|
|
| 246 |
# origin so no CORS/cert dance — the LeLab pattern). Needs `pip install qwen-tts torch
|
| 247 |
# soundfile`. Lazy-loaded; the Space (cpu-basic) leaves this unset and uses DashScope.
|
| 248 |
TTS_MODE = os.environ.get("TINY_TTS_MODE", "").strip().lower()
|
| 249 |
-
_local_tts = None
|
|
|
|
| 250 |
_local_tts_lock = threading.Lock()
|
| 251 |
|
| 252 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 253 |
def _local_voice_design(text, instruct, language="English"):
|
| 254 |
global _local_tts
|
| 255 |
-
import io
|
| 256 |
with _local_tts_lock: # one GPU model can't decode in parallel
|
| 257 |
if _local_tts is None:
|
| 258 |
-
|
| 259 |
-
from qwen_tts import Qwen3TTSModel
|
| 260 |
-
mid = os.environ.get("QWEN_TTS_MODEL", "Qwen/Qwen3-TTS-12Hz-1.7B-VoiceDesign")
|
| 261 |
-
dev = "cuda:0" if torch.cuda.is_available() else "cpu"
|
| 262 |
-
dt = torch.bfloat16 if dev != "cpu" else torch.float32
|
| 263 |
-
_local_tts = Qwen3TTSModel.from_pretrained(mid, device_map=dev, dtype=dt)
|
| 264 |
-
import soundfile as sf
|
| 265 |
wavs, sr = _local_tts.generate_voice_design(
|
| 266 |
text=text, language=language, instruct=instruct or "A clear, natural voice at a moderate pace.")
|
| 267 |
-
out = io.BytesIO()
|
| 268 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 269 |
return out.getvalue()
|
| 270 |
|
| 271 |
|
|
@@ -303,11 +324,16 @@ async def qwen_tts(request: Request):
|
|
| 303 |
text = (body.get("text") or "").strip()
|
| 304 |
instruct = (body.get("instruct") or "").strip()
|
| 305 |
language = body.get("language") or "English"
|
|
|
|
|
|
|
| 306 |
if not text:
|
| 307 |
return Response("text required", status_code=400)
|
| 308 |
if TTS_MODE == "local": # in-process open weights (dev)
|
| 309 |
try:
|
| 310 |
-
|
|
|
|
|
|
|
|
|
|
| 311 |
except Exception as e: # noqa: BLE001 — surface a clear setup hint
|
| 312 |
return Response(f"local TTS error (pip install qwen-tts torch soundfile?): {e}", status_code=500)
|
| 313 |
return Response(wav, media_type="audio/wav", headers={"Cache-Control": "no-store"})
|
|
|
|
| 246 |
# origin so no CORS/cert dance — the LeLab pattern). Needs `pip install qwen-tts torch
|
| 247 |
# soundfile`. Lazy-loaded; the Space (cpu-basic) leaves this unset and uses DashScope.
|
| 248 |
TTS_MODE = os.environ.get("TINY_TTS_MODE", "").strip().lower()
|
| 249 |
+
_local_tts = None # VoiceDesign model
|
| 250 |
+
_local_clone = None # Base model (voice clone) — lazy, only if a clone is requested
|
| 251 |
_local_tts_lock = threading.Lock()
|
| 252 |
|
| 253 |
|
| 254 |
+
def _load(which):
|
| 255 |
+
import torch
|
| 256 |
+
from qwen_tts import Qwen3TTSModel
|
| 257 |
+
mid = os.environ.get(
|
| 258 |
+
"QWEN_TTS_MODEL" if which == "design" else "QWEN_TTS_CLONE_MODEL",
|
| 259 |
+
"Qwen/Qwen3-TTS-12Hz-1.7B-VoiceDesign" if which == "design" else "Qwen/Qwen3-TTS-12Hz-1.7B-Base")
|
| 260 |
+
dev = "cuda:0" if torch.cuda.is_available() else "cpu"
|
| 261 |
+
dt = torch.bfloat16 if dev != "cpu" else torch.float32
|
| 262 |
+
return Qwen3TTSModel.from_pretrained(mid, device_map=dev, dtype=dt)
|
| 263 |
+
|
| 264 |
+
|
| 265 |
def _local_voice_design(text, instruct, language="English"):
|
| 266 |
global _local_tts
|
| 267 |
+
import io, soundfile as sf
|
| 268 |
with _local_tts_lock: # one GPU model can't decode in parallel
|
| 269 |
if _local_tts is None:
|
| 270 |
+
_local_tts = _load("design")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 271 |
wavs, sr = _local_tts.generate_voice_design(
|
| 272 |
text=text, language=language, instruct=instruct or "A clear, natural voice at a moderate pace.")
|
| 273 |
+
out = io.BytesIO(); sf.write(out, wavs[0], sr, format="WAV")
|
| 274 |
+
return out.getvalue()
|
| 275 |
+
|
| 276 |
+
|
| 277 |
+
def _local_voice_clone(text, ref_audio_b64, ref_text, language="English"):
|
| 278 |
+
# Keep the SAME timbre as a previously-created voice by cloning from its audio (the
|
| 279 |
+
# "Voice Design → Clone" workflow). qwen-tts wants ref_audio as a (numpy, sr) tuple
|
| 280 |
+
# (a raw base64 string gets mistaken for a file path), so decode the WAV here.
|
| 281 |
+
global _local_clone
|
| 282 |
+
import io, soundfile as sf
|
| 283 |
+
ref_np, ref_sr = sf.read(io.BytesIO(base64.b64decode(ref_audio_b64)))
|
| 284 |
+
with _local_tts_lock:
|
| 285 |
+
if _local_clone is None:
|
| 286 |
+
_local_clone = _load("clone")
|
| 287 |
+
wavs, sr = _local_clone.generate_voice_clone(
|
| 288 |
+
text=text, language=language, ref_audio=(ref_np, ref_sr), ref_text=ref_text or "")
|
| 289 |
+
out = io.BytesIO(); sf.write(out, wavs[0], sr, format="WAV")
|
| 290 |
return out.getvalue()
|
| 291 |
|
| 292 |
|
|
|
|
| 324 |
text = (body.get("text") or "").strip()
|
| 325 |
instruct = (body.get("instruct") or "").strip()
|
| 326 |
language = body.get("language") or "English"
|
| 327 |
+
ref_audio = body.get("ref_audio") # base64 WAV → clone (keep timbre, new words)
|
| 328 |
+
ref_text = body.get("ref_text") or ""
|
| 329 |
if not text:
|
| 330 |
return Response("text required", status_code=400)
|
| 331 |
if TTS_MODE == "local": # in-process open weights (dev)
|
| 332 |
try:
|
| 333 |
+
if ref_audio:
|
| 334 |
+
wav = await asyncio.to_thread(_local_voice_clone, text, ref_audio, ref_text, language)
|
| 335 |
+
else:
|
| 336 |
+
wav = await asyncio.to_thread(_local_voice_design, text, instruct, language)
|
| 337 |
except Exception as e: # noqa: BLE001 — surface a clear setup hint
|
| 338 |
return Response(f"local TTS error (pip install qwen-tts torch soundfile?): {e}", status_code=500)
|
| 339 |
return Response(wav, media_type="audio/wav", headers={"Cache-Control": "no-store"})
|
|
@@ -7,7 +7,7 @@ import { streamChat, ensureModel, currentModel, currentModelId, getEngineId, bac
|
|
| 7 |
import { extractLivePersona } from '/web/personaStream.js'
|
| 8 |
import { parsePersonaJson } from '/web/personaParse.js'
|
| 9 |
import { PERSONA_SYSTEM, personaUserPrompt, stripThink, stripThinkFinal, noThink } from '/web/personaPrompts.js'
|
| 10 |
-
import { createVoiceWav, playWav, stopPreview } from '/web/tts.js'
|
| 11 |
import { listPersonas, savePersona, removePersona, onRosterChange, putAudio, getAudio } from '/web/personaStore.js'
|
| 12 |
|
| 13 |
const CLASSES = ['Warrior', 'Ranger', 'Monk', 'Assassin', 'Mage', 'Paladin', 'Cleric', 'Knight']
|
|
@@ -36,30 +36,50 @@ export function mountPersonaPanel(host) {
|
|
| 36 |
const tagsEl = el('div', { class: 'persona-tags' })
|
| 37 |
const aboutEl = el('div', { class: 'persona-about persona-edit', 'data-ph': 'Their story…' })
|
| 38 |
const quoteEl = el('blockquote', { class: 'persona-quote persona-edit', 'data-ph': 'A line they say…' })
|
| 39 |
-
const replayBtn = el('button', { class: 'persona-replay', type: 'button', title: 'Replay voice', style: 'display:none' }, '▶')
|
| 40 |
-
const quoteRow = el('div', { class: 'persona-quote-row' }, [quoteEl, replayBtn])
|
| 41 |
-
const voiceLabel = el('div', { class: 'persona-voice-lbl' }, '🎙 Voice design')
|
| 42 |
const voiceEl = el('div', { class: 'persona-voice-desc persona-edit', 'data-ph': 'How they sound…' })
|
| 43 |
-
|
|
|
|
|
|
|
| 44 |
const thinkEl = el('pre', { class: 'persona-think' })
|
| 45 |
const copyBtn = el('button', { class: 'persona-copy', type: 'button' }, '📋 Copy debug')
|
| 46 |
const thinkWrap = el('details', { class: 'persona-think-wrap' },
|
| 47 |
[el('summary', {}, 'model output / debug (raw)'), copyBtn, thinkEl])
|
| 48 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 49 |
const controls = el('aside', { class: 'persona-controls' }, [
|
| 50 |
el('label', { class: 'persona-label' }, 'Class'), sel,
|
| 51 |
el('label', { class: 'persona-label' }, 'Seed'), seed,
|
| 52 |
btn, stats, status,
|
| 53 |
el('label', { class: 'persona-label persona-roster-label' }, 'Barracks (saved)'), rosterEl,
|
| 54 |
])
|
| 55 |
-
const result = el('div', { class: 'persona-result' },
|
| 56 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 57 |
host.appendChild(el('div', { class: 'persona-view' }, [controls, result]))
|
| 58 |
|
| 59 |
let lastPersona = null // the persona currently shown
|
| 60 |
let savedId = null // its roster id (set the moment it's shown — always saved)
|
|
|
|
| 61 |
let working = false
|
| 62 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 63 |
function autosave() {
|
| 64 |
if (!lastPersona) return
|
| 65 |
const rec = savePersona({ ...lastPersona, id: savedId, unitClass: lastPersona.unitClass || sel.value, seed: lastPersona.seed || seed.value })
|
|
@@ -77,6 +97,7 @@ export function mountPersonaPanel(host) {
|
|
| 77 |
if ((lastPersona[field] || '') === v) return
|
| 78 |
lastPersona[field] = v
|
| 79 |
autosave()
|
|
|
|
| 80 |
})
|
| 81 |
}
|
| 82 |
editable(nameEl, 'name', { single: true })
|
|
@@ -97,42 +118,50 @@ export function mountPersonaPanel(host) {
|
|
| 97 |
aboutEl.textContent = p.about || ''
|
| 98 |
quoteEl.textContent = p.quote || ''
|
| 99 |
voiceEl.textContent = p.voice || ''
|
| 100 |
-
|
| 101 |
-
|
| 102 |
-
const has = savedId ? !!(await getAudio(savedId)) : false
|
| 103 |
-
replayBtn.style.display = has ? '' : 'none'
|
| 104 |
-
createBtn.textContent = has ? '🎙 Recreate voice' : '🎙 Create voice'
|
| 105 |
}
|
| 106 |
|
| 107 |
-
// 🎙 Create voice —
|
| 108 |
async function createVoice() {
|
| 109 |
if (working || !lastPersona) return
|
| 110 |
-
const line = (lastPersona.quote || '').trim() || (lastPersona.about || '').trim() || `${lastPersona.name} reporting for duty.`
|
| 111 |
if (!lastPersona.voice) { status.textContent = 'add a voice design first'; return }
|
| 112 |
autosave() // ensure an id to key the audio
|
| 113 |
-
|
| 114 |
-
|
|
|
|
| 115 |
try {
|
| 116 |
const wav = await createVoiceWav(lastPersona.voice, line)
|
| 117 |
await putAudio(savedId, new Blob([wav], { type: 'audio/wav' }))
|
| 118 |
-
|
| 119 |
-
|
| 120 |
-
|
| 121 |
status.textContent = prev
|
| 122 |
-
} catch (e) {
|
| 123 |
-
|
| 124 |
-
createBtn.textContent = prevTxt
|
| 125 |
-
} finally { working = false; createBtn.disabled = false }
|
| 126 |
}
|
| 127 |
createBtn.addEventListener('click', createVoice)
|
| 128 |
|
| 129 |
-
|
| 130 |
-
|
|
|
|
|
|
|
| 131 |
const blob = await getAudio(savedId)
|
| 132 |
-
if (!blob)
|
| 133 |
-
try { await playWav(await blob.arrayBuffer()) } catch { /* ignore */ }
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 134 |
}
|
| 135 |
-
|
| 136 |
|
| 137 |
// ── Barracks roster (saved soldiers) ──────────────────────────────────────
|
| 138 |
function renderRoster(personas) {
|
|
@@ -180,7 +209,7 @@ export function mountPersonaPanel(host) {
|
|
| 180 |
if (window.innerWidth <= 768) result.scrollIntoView({ behavior: 'smooth', block: 'start' })
|
| 181 |
nameEl.textContent = '…'; aboutEl.textContent = ''; tagsEl.replaceChildren()
|
| 182 |
quoteEl.textContent = ''; voiceEl.textContent = ''
|
| 183 |
-
createBtn.style.display = 'none';
|
| 184 |
stopPreview()
|
| 185 |
thinkEl.textContent = ''; thinkWrap.open = true; stats.textContent = ''
|
| 186 |
let acc = ''
|
|
|
|
| 7 |
import { extractLivePersona } from '/web/personaStream.js'
|
| 8 |
import { parsePersonaJson } from '/web/personaParse.js'
|
| 9 |
import { PERSONA_SYSTEM, personaUserPrompt, stripThink, stripThinkFinal, noThink } from '/web/personaPrompts.js'
|
| 10 |
+
import { createVoiceWav, cloneVoiceWav, playWav, stopPreview } from '/web/tts.js'
|
| 11 |
import { listPersonas, savePersona, removePersona, onRosterChange, putAudio, getAudio } from '/web/personaStore.js'
|
| 12 |
|
| 13 |
const CLASSES = ['Warrior', 'Ranger', 'Monk', 'Assassin', 'Mage', 'Paladin', 'Cleric', 'Knight']
|
|
|
|
| 36 |
const tagsEl = el('div', { class: 'persona-tags' })
|
| 37 |
const aboutEl = el('div', { class: 'persona-about persona-edit', 'data-ph': 'Their story…' })
|
| 38 |
const quoteEl = el('blockquote', { class: 'persona-quote persona-edit', 'data-ph': 'A line they say…' })
|
|
|
|
|
|
|
|
|
|
| 39 |
const voiceEl = el('div', { class: 'persona-voice-desc persona-edit', 'data-ph': 'How they sound…' })
|
| 40 |
+
// ▶ play sits on the Quote heading; 🎙 create sits on the Voice design heading.
|
| 41 |
+
const playBtn = el('button', { class: 'persona-ico persona-play', type: 'button', title: 'Play voice', style: 'display:none' }, '▶')
|
| 42 |
+
const createBtn = el('button', { class: 'persona-ico persona-create', type: 'button', title: 'Create voice', style: 'display:none' }, '🎙')
|
| 43 |
const thinkEl = el('pre', { class: 'persona-think' })
|
| 44 |
const copyBtn = el('button', { class: 'persona-copy', type: 'button' }, '📋 Copy debug')
|
| 45 |
const thinkWrap = el('details', { class: 'persona-think-wrap' },
|
| 46 |
[el('summary', {}, 'model output / debug (raw)'), copyBtn, thinkEl])
|
| 47 |
|
| 48 |
+
// A section header: a top line + a small red heading, with an optional action on the right.
|
| 49 |
+
const secHead = (title, action) =>
|
| 50 |
+
el('div', { class: 'persona-sec' }, [el('div', { class: 'persona-sec-title' }, title), action || el('span')])
|
| 51 |
+
|
| 52 |
const controls = el('aside', { class: 'persona-controls' }, [
|
| 53 |
el('label', { class: 'persona-label' }, 'Class'), sel,
|
| 54 |
el('label', { class: 'persona-label' }, 'Seed'), seed,
|
| 55 |
btn, stats, status,
|
| 56 |
el('label', { class: 'persona-label persona-roster-label' }, 'Barracks (saved)'), rosterEl,
|
| 57 |
])
|
| 58 |
+
const result = el('div', { class: 'persona-result' }, [
|
| 59 |
+
nameEl, tagsEl,
|
| 60 |
+
secHead('About'), aboutEl,
|
| 61 |
+
secHead('Quote', playBtn), quoteEl,
|
| 62 |
+
secHead('Voice design', createBtn), voiceEl,
|
| 63 |
+
thinkWrap,
|
| 64 |
+
])
|
| 65 |
host.appendChild(el('div', { class: 'persona-view' }, [controls, result]))
|
| 66 |
|
| 67 |
let lastPersona = null // the persona currently shown
|
| 68 |
let savedId = null // its roster id (set the moment it's shown — always saved)
|
| 69 |
+
let hasVoice = false // a cached voice file exists for this persona
|
| 70 |
let working = false
|
| 71 |
|
| 72 |
+
// The line the voice actually says (quote, else about, else a fallback).
|
| 73 |
+
const lineFor = (p) => (p.quote || '').trim() || (p.about || '').trim() || `${p.name || 'A soldier'} reporting for duty.`
|
| 74 |
+
// Cached audio is stale if the line or the voice design changed since it was made.
|
| 75 |
+
const isDirty = () => hasVoice && lastPersona && (lineFor(lastPersona) !== lastPersona.voiceQuote || (lastPersona.voice || '') !== (lastPersona.voiceDesignUsed || ''))
|
| 76 |
+
function updateVoiceUI() {
|
| 77 |
+
playBtn.style.display = hasVoice ? '' : 'none'
|
| 78 |
+
playBtn.classList.toggle('badged', isDirty())
|
| 79 |
+
createBtn.style.display = lastPersona ? '' : 'none'
|
| 80 |
+
createBtn.title = hasVoice ? 'Recreate voice' : 'Create voice'
|
| 81 |
+
}
|
| 82 |
+
|
| 83 |
function autosave() {
|
| 84 |
if (!lastPersona) return
|
| 85 |
const rec = savePersona({ ...lastPersona, id: savedId, unitClass: lastPersona.unitClass || sel.value, seed: lastPersona.seed || seed.value })
|
|
|
|
| 97 |
if ((lastPersona[field] || '') === v) return
|
| 98 |
lastPersona[field] = v
|
| 99 |
autosave()
|
| 100 |
+
if (field === 'quote' || field === 'voice') updateVoiceUI() // may go stale → badge
|
| 101 |
})
|
| 102 |
}
|
| 103 |
editable(nameEl, 'name', { single: true })
|
|
|
|
| 118 |
aboutEl.textContent = p.about || ''
|
| 119 |
quoteEl.textContent = p.quote || ''
|
| 120 |
voiceEl.textContent = p.voice || ''
|
| 121 |
+
hasVoice = savedId ? !!(await getAudio(savedId)) : false
|
| 122 |
+
updateVoiceUI()
|
|
|
|
|
|
|
|
|
|
| 123 |
}
|
| 124 |
|
| 125 |
+
// 🎙 Create / Recreate voice — DESIGN a fresh voice from the description and cache it.
|
| 126 |
async function createVoice() {
|
| 127 |
if (working || !lastPersona) return
|
|
|
|
| 128 |
if (!lastPersona.voice) { status.textContent = 'add a voice design first'; return }
|
| 129 |
autosave() // ensure an id to key the audio
|
| 130 |
+
const line = lineFor(lastPersona)
|
| 131 |
+
working = true; createBtn.classList.add('busy'); createBtn.disabled = true
|
| 132 |
+
const prev = status.textContent; status.textContent = 'designing the voice…'
|
| 133 |
try {
|
| 134 |
const wav = await createVoiceWav(lastPersona.voice, line)
|
| 135 |
await putAudio(savedId, new Blob([wav], { type: 'audio/wav' }))
|
| 136 |
+
lastPersona.voiceQuote = line; lastPersona.voiceDesignUsed = lastPersona.voice
|
| 137 |
+
hasVoice = true; autosave()
|
| 138 |
+
try { await playWav(wav.slice(0)) } catch { /* autoplay blocked — ▶ still works */ }
|
| 139 |
status.textContent = prev
|
| 140 |
+
} catch (e) { status.textContent = `voice failed: ${e.message || e}` }
|
| 141 |
+
finally { working = false; createBtn.classList.remove('busy'); createBtn.disabled = false; updateVoiceUI() }
|
|
|
|
|
|
|
| 142 |
}
|
| 143 |
createBtn.addEventListener('click', createVoice)
|
| 144 |
|
| 145 |
+
// ▶ Play — plays the cached file. If the quote/voice changed since (badge), re-render
|
| 146 |
+
// the new line by CLONING the last voice (keeps the same timbre), then save over it.
|
| 147 |
+
async function play() {
|
| 148 |
+
if (working || !hasVoice || !savedId) return
|
| 149 |
const blob = await getAudio(savedId)
|
| 150 |
+
if (!blob) { hasVoice = false; updateVoiceUI(); return }
|
| 151 |
+
if (!isDirty()) { try { await playWav(await blob.arrayBuffer()) } catch { /* ignore */ } return }
|
| 152 |
+
working = true; playBtn.classList.add('busy'); playBtn.disabled = true
|
| 153 |
+
const prev = status.textContent; status.textContent = 'updating the voice…'
|
| 154 |
+
try {
|
| 155 |
+
const line = lineFor(lastPersona)
|
| 156 |
+
const newWav = await cloneVoiceWav(await blob.arrayBuffer(), lastPersona.voiceQuote || '', line, lastPersona.voice || '')
|
| 157 |
+
try { await playWav(newWav.slice(0)) } catch { /* ignore */ }
|
| 158 |
+
await putAudio(savedId, new Blob([newWav], { type: 'audio/wav' })) // save over
|
| 159 |
+
lastPersona.voiceQuote = line; lastPersona.voiceDesignUsed = lastPersona.voice
|
| 160 |
+
autosave(); status.textContent = prev
|
| 161 |
+
} catch (e) { status.textContent = `voice update failed: ${e.message || e}` }
|
| 162 |
+
finally { working = false; playBtn.classList.remove('busy'); playBtn.disabled = false; updateVoiceUI() }
|
| 163 |
}
|
| 164 |
+
playBtn.addEventListener('click', play)
|
| 165 |
|
| 166 |
// ── Barracks roster (saved soldiers) ──────────────────────────────────────
|
| 167 |
function renderRoster(personas) {
|
|
|
|
| 209 |
if (window.innerWidth <= 768) result.scrollIntoView({ behavior: 'smooth', block: 'start' })
|
| 210 |
nameEl.textContent = '…'; aboutEl.textContent = ''; tagsEl.replaceChildren()
|
| 211 |
quoteEl.textContent = ''; voiceEl.textContent = ''
|
| 212 |
+
createBtn.style.display = 'none'; playBtn.style.display = 'none'; lastPersona = null; savedId = null; hasVoice = false
|
| 213 |
stopPreview()
|
| 214 |
thinkEl.textContent = ''; thinkWrap.open = true; stats.textContent = ''
|
| 215 |
let acc = ''
|
|
@@ -62,29 +62,39 @@
|
|
| 62 |
font-size: 17px; line-height: 1.6; max-width: 60ch; color: var(--p-ink);
|
| 63 |
white-space: pre-wrap;
|
| 64 |
}
|
| 65 |
-
|
| 66 |
-
|
| 67 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 68 |
}
|
| 69 |
.persona-voice-desc {
|
| 70 |
font-family: var(--p-mono); font-size: 12px; line-height: 1.5; color: var(--p-muted);
|
| 71 |
-
max-width: 60ch; margin-top:
|
| 72 |
}
|
| 73 |
-
.persona-quote-row { display: flex; align-items: flex-start; gap: 10px; margin-top: 16px; }
|
| 74 |
.persona-quote {
|
| 75 |
-
|
| 76 |
font-family: 'Fraunces', Georgia, serif; font-size: 21px; font-style: italic;
|
| 77 |
line-height: 1.35; color: var(--p-ink); max-width: 54ch;
|
| 78 |
}
|
| 79 |
.persona-quote:not(:empty)::before { content: '“'; }
|
| 80 |
.persona-quote:not(:empty)::after { content: '”'; }
|
| 81 |
-
|
| 82 |
-
|
| 83 |
-
|
| 84 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 85 |
}
|
| 86 |
-
.persona-replay:hover { background: var(--p-ink) !important; border-color: var(--p-ink) !important; }
|
| 87 |
-
.persona-actions { display: flex; flex-wrap: wrap; gap: 10px; margin-top: 14px; }
|
| 88 |
|
| 89 |
/* Click-to-edit fields (name / about / quote / voice) — auto-saved on blur. */
|
| 90 |
.persona-edit { cursor: text; border-radius: 0; outline: none; transition: background .12s; }
|
|
|
|
| 62 |
font-size: 17px; line-height: 1.6; max-width: 60ch; color: var(--p-ink);
|
| 63 |
white-space: pre-wrap;
|
| 64 |
}
|
| 65 |
+
/* ── Section headers (line + red heading, action on the right) ─────────────── */
|
| 66 |
+
.persona-sec {
|
| 67 |
+
display: flex; align-items: center; justify-content: space-between; gap: 10px;
|
| 68 |
+
margin-top: 20px; padding-top: 9px; border-top: 1px solid var(--p-ink);
|
| 69 |
+
}
|
| 70 |
+
.persona-sec-title {
|
| 71 |
+
font-family: var(--p-mono); font-size: 11px; font-weight: 500; letter-spacing: .2em;
|
| 72 |
+
text-transform: uppercase; color: var(--p-transmit);
|
| 73 |
}
|
| 74 |
.persona-voice-desc {
|
| 75 |
font-family: var(--p-mono); font-size: 12px; line-height: 1.5; color: var(--p-muted);
|
| 76 |
+
max-width: 60ch; margin-top: 8px; font-style: italic;
|
| 77 |
}
|
|
|
|
| 78 |
.persona-quote {
|
| 79 |
+
margin: 8px 0 0; padding: 4px 0 4px 16px; border-left: 3px solid var(--p-transmit);
|
| 80 |
font-family: 'Fraunces', Georgia, serif; font-size: 21px; font-style: italic;
|
| 81 |
line-height: 1.35; color: var(--p-ink); max-width: 54ch;
|
| 82 |
}
|
| 83 |
.persona-quote:not(:empty)::before { content: '“'; }
|
| 84 |
.persona-quote:not(:empty)::after { content: '”'; }
|
| 85 |
+
|
| 86 |
+
/* Simple icon buttons on the section headers. */
|
| 87 |
+
.persona-ico {
|
| 88 |
+
position: relative; cursor: pointer; flex-shrink: 0; line-height: 1;
|
| 89 |
+
font-size: 13px !important; color: var(--p-ink) !important; background: var(--p-card) !important;
|
| 90 |
+
border: 1.5px solid var(--p-ink) !important; border-radius: 0 !important; padding: 5px 10px !important;
|
| 91 |
+
}
|
| 92 |
+
.persona-ico:hover { background: var(--p-paper-2) !important; }
|
| 93 |
+
.persona-ico.busy { opacity: .55; cursor: default; }
|
| 94 |
+
.persona-play.badged::after { /* "voice changed — tap to refresh" badge */
|
| 95 |
+
content: ''; position: absolute; top: -4px; right: -4px; width: 9px; height: 9px;
|
| 96 |
+
background: var(--p-transmit); border: 1.5px solid var(--p-card); border-radius: 50%;
|
| 97 |
}
|
|
|
|
|
|
|
| 98 |
|
| 99 |
/* Click-to-edit fields (name / about / quote / voice) — auto-saved on blur. */
|
| 100 |
.persona-edit { cursor: text; border-radius: 0; outline: none; transition: background .12s; }
|
|
@@ -30,6 +30,11 @@ export async function createVoiceWav(desc, text) {
|
|
| 30 |
qwen3.setDesc(desc)
|
| 31 |
return qwen3.synthWav(text, 'persona')
|
| 32 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 33 |
export async function playWav(arrayBuffer) {
|
| 34 |
const { audio, sampleRate } = await decodeAudio(arrayBuffer)
|
| 35 |
return playSamples(audio, sampleRate)
|
|
|
|
| 30 |
qwen3.setDesc(desc)
|
| 31 |
return qwen3.synthWav(text, 'persona')
|
| 32 |
}
|
| 33 |
+
// Clone `text` from a reference voice file (keep timbre, change words). `desc` is the
|
| 34 |
+
// voice design — a fallback so prod (no clone model) can re-design instead. Returns WAV.
|
| 35 |
+
export async function cloneVoiceWav(refArrayBuffer, refText, text, desc) {
|
| 36 |
+
return qwen3.cloneWav(text, refArrayBuffer, refText, desc)
|
| 37 |
+
}
|
| 38 |
export async function playWav(arrayBuffer) {
|
| 39 |
const { audio, sampleRate } = await decodeAudio(arrayBuffer)
|
| 40 |
return playSamples(audio, sampleRate)
|
|
@@ -47,6 +47,24 @@ async function postSynthWav(base, text, voiceId) {
|
|
| 47 |
}
|
| 48 |
const postSynth = async (base, text, voiceId) => decodeAudio(await postSynthWav(base, text, voiceId))
|
| 49 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 50 |
const common = {
|
| 51 |
mode: 'pcm', needsDownload: false, networked: true,
|
| 52 |
listVoices: () => VOICES, defaultVoice: 'persona',
|
|
@@ -62,6 +80,7 @@ export const engine = {
|
|
| 62 |
available: () => true,
|
| 63 |
synth: (text, voiceId) => postSynth(ttsBase(), text, voiceId),
|
| 64 |
synthWav: (text, voiceId) => postSynthWav(ttsBase(), text, voiceId),
|
|
|
|
| 65 |
backendLabel: () => { const b = ttsBase(); try { return b ? '🖥 ' + new URL(b).host : '☁ DashScope' } catch { return '☁ DashScope' } },
|
| 66 |
}
|
| 67 |
|
|
|
|
| 47 |
}
|
| 48 |
const postSynth = async (base, text, voiceId) => decodeAudio(await postSynthWav(base, text, voiceId))
|
| 49 |
|
| 50 |
+
// Voice CLONE: synth `text` using a reference WAV (the last created voice) so the timbre
|
| 51 |
+
// stays identical — only the words change. ref is an ArrayBuffer; sent as base64.
|
| 52 |
+
function abToB64(ab) {
|
| 53 |
+
let s = ''; const u = new Uint8Array(ab); const C = 0x8000
|
| 54 |
+
for (let i = 0; i < u.length; i += C) s += String.fromCharCode.apply(null, u.subarray(i, i + C))
|
| 55 |
+
return btoa(s)
|
| 56 |
+
}
|
| 57 |
+
async function postClone(base, text, refAb, refText, instruct) {
|
| 58 |
+
const resp = await fetch(`${base}/qwen-tts`, {
|
| 59 |
+
method: 'POST', headers: { 'Content-Type': 'application/json' },
|
| 60 |
+
// instruct lets prod (DashScope, no clone model) gracefully re-design from the
|
| 61 |
+
// description instead of cloning; local mode uses ref_audio to clone the timbre.
|
| 62 |
+
body: JSON.stringify({ text, ref_audio: abToB64(refAb), ref_text: refText || '', instruct: instruct || '', language: 'English' }),
|
| 63 |
+
})
|
| 64 |
+
if (!resp.ok) throw new Error(`Qwen3-TTS clone ${resp.status}: ${(await resp.text()).slice(0, 140)}`)
|
| 65 |
+
return resp.arrayBuffer()
|
| 66 |
+
}
|
| 67 |
+
|
| 68 |
const common = {
|
| 69 |
mode: 'pcm', needsDownload: false, networked: true,
|
| 70 |
listVoices: () => VOICES, defaultVoice: 'persona',
|
|
|
|
| 80 |
available: () => true,
|
| 81 |
synth: (text, voiceId) => postSynth(ttsBase(), text, voiceId),
|
| 82 |
synthWav: (text, voiceId) => postSynthWav(ttsBase(), text, voiceId),
|
| 83 |
+
cloneWav: (text, refAb, refText, instruct) => postClone(ttsBase(), text, refAb, refText, instruct),
|
| 84 |
backendLabel: () => { const b = ttsBase(); try { return b ? '🖥 ' + new URL(b).host : '☁ DashScope' } catch { return '☁ DashScope' } },
|
| 85 |
}
|
| 86 |
|