polats Claude Opus 4.8 (1M context) commited on
Commit
72160ec
·
1 Parent(s): 308478f

Persona: red section headings, icon buttons, edit-aware voice + clone-on-play

Browse files

- Result laid out in sections (line + red heading): About, Quote, Voice design.
- ▶ play icon sits on the Quote heading; 🎙 create/recreate sits on the Voice design
heading; both simplified to icons (play is neutral, not red).
- Editing the quote or voice marks the cached audio stale → a red badge on ▶.
- Tapping ▶ when badged does NOT redesign the voice: it CLONES the last voice file
(Qwen3-TTS Base model, keeping the exact timbre) for the new line, plays it, saves
over the old file, and clears the badge. Otherwise ▶ replays the cached file.
🎙 is the explicit "new voice from the description" path.
- Backend: TINY_TTS_MODE=local loads the Base model lazily and clones via
generate_voice_clone((numpy,sr) ref). Prod (DashScope) gets the voice description so
it re-designs gracefully (no clone model). Verified clone end-to-end on the GPU (5s).

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>

Files changed (5) hide show
  1. app.py +38 -12
  2. web/personaPanel.js +58 -29
  3. web/shell/persona.css +22 -12
  4. web/tts.js +5 -0
  5. web/ttsQwen3.js +19 -0
app.py CHANGED
@@ -246,26 +246,47 @@ _DASHSCOPE_URL = _DASHSCOPE_BASE + "/api/v1/services/audio/tts/customization"
246
  # origin so no CORS/cert dance — the LeLab pattern). Needs `pip install qwen-tts torch
247
  # soundfile`. Lazy-loaded; the Space (cpu-basic) leaves this unset and uses DashScope.
248
  TTS_MODE = os.environ.get("TINY_TTS_MODE", "").strip().lower()
249
- _local_tts = None
 
250
  _local_tts_lock = threading.Lock()
251
 
252
 
 
 
 
 
 
 
 
 
 
 
 
253
  def _local_voice_design(text, instruct, language="English"):
254
  global _local_tts
255
- import io
256
  with _local_tts_lock: # one GPU model can't decode in parallel
257
  if _local_tts is None:
258
- import torch
259
- from qwen_tts import Qwen3TTSModel
260
- mid = os.environ.get("QWEN_TTS_MODEL", "Qwen/Qwen3-TTS-12Hz-1.7B-VoiceDesign")
261
- dev = "cuda:0" if torch.cuda.is_available() else "cpu"
262
- dt = torch.bfloat16 if dev != "cpu" else torch.float32
263
- _local_tts = Qwen3TTSModel.from_pretrained(mid, device_map=dev, dtype=dt)
264
- import soundfile as sf
265
  wavs, sr = _local_tts.generate_voice_design(
266
  text=text, language=language, instruct=instruct or "A clear, natural voice at a moderate pace.")
267
- out = io.BytesIO()
268
- sf.write(out, wavs[0], sr, format="WAV")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
269
  return out.getvalue()
270
 
271
 
@@ -303,11 +324,16 @@ async def qwen_tts(request: Request):
303
  text = (body.get("text") or "").strip()
304
  instruct = (body.get("instruct") or "").strip()
305
  language = body.get("language") or "English"
 
 
306
  if not text:
307
  return Response("text required", status_code=400)
308
  if TTS_MODE == "local": # in-process open weights (dev)
309
  try:
310
- wav = await asyncio.to_thread(_local_voice_design, text, instruct, language)
 
 
 
311
  except Exception as e: # noqa: BLE001 — surface a clear setup hint
312
  return Response(f"local TTS error (pip install qwen-tts torch soundfile?): {e}", status_code=500)
313
  return Response(wav, media_type="audio/wav", headers={"Cache-Control": "no-store"})
 
246
  # origin so no CORS/cert dance — the LeLab pattern). Needs `pip install qwen-tts torch
247
  # soundfile`. Lazy-loaded; the Space (cpu-basic) leaves this unset and uses DashScope.
248
  TTS_MODE = os.environ.get("TINY_TTS_MODE", "").strip().lower()
249
+ _local_tts = None # VoiceDesign model
250
+ _local_clone = None # Base model (voice clone) — lazy, only if a clone is requested
251
  _local_tts_lock = threading.Lock()
252
 
253
 
254
+ def _load(which):
255
+ import torch
256
+ from qwen_tts import Qwen3TTSModel
257
+ mid = os.environ.get(
258
+ "QWEN_TTS_MODEL" if which == "design" else "QWEN_TTS_CLONE_MODEL",
259
+ "Qwen/Qwen3-TTS-12Hz-1.7B-VoiceDesign" if which == "design" else "Qwen/Qwen3-TTS-12Hz-1.7B-Base")
260
+ dev = "cuda:0" if torch.cuda.is_available() else "cpu"
261
+ dt = torch.bfloat16 if dev != "cpu" else torch.float32
262
+ return Qwen3TTSModel.from_pretrained(mid, device_map=dev, dtype=dt)
263
+
264
+
265
  def _local_voice_design(text, instruct, language="English"):
266
  global _local_tts
267
+ import io, soundfile as sf
268
  with _local_tts_lock: # one GPU model can't decode in parallel
269
  if _local_tts is None:
270
+ _local_tts = _load("design")
 
 
 
 
 
 
271
  wavs, sr = _local_tts.generate_voice_design(
272
  text=text, language=language, instruct=instruct or "A clear, natural voice at a moderate pace.")
273
+ out = io.BytesIO(); sf.write(out, wavs[0], sr, format="WAV")
274
+ return out.getvalue()
275
+
276
+
277
+ def _local_voice_clone(text, ref_audio_b64, ref_text, language="English"):
278
+ # Keep the SAME timbre as a previously-created voice by cloning from its audio (the
279
+ # "Voice Design → Clone" workflow). qwen-tts wants ref_audio as a (numpy, sr) tuple
280
+ # (a raw base64 string gets mistaken for a file path), so decode the WAV here.
281
+ global _local_clone
282
+ import io, soundfile as sf
283
+ ref_np, ref_sr = sf.read(io.BytesIO(base64.b64decode(ref_audio_b64)))
284
+ with _local_tts_lock:
285
+ if _local_clone is None:
286
+ _local_clone = _load("clone")
287
+ wavs, sr = _local_clone.generate_voice_clone(
288
+ text=text, language=language, ref_audio=(ref_np, ref_sr), ref_text=ref_text or "")
289
+ out = io.BytesIO(); sf.write(out, wavs[0], sr, format="WAV")
290
  return out.getvalue()
291
 
292
 
 
324
  text = (body.get("text") or "").strip()
325
  instruct = (body.get("instruct") or "").strip()
326
  language = body.get("language") or "English"
327
+ ref_audio = body.get("ref_audio") # base64 WAV → clone (keep timbre, new words)
328
+ ref_text = body.get("ref_text") or ""
329
  if not text:
330
  return Response("text required", status_code=400)
331
  if TTS_MODE == "local": # in-process open weights (dev)
332
  try:
333
+ if ref_audio:
334
+ wav = await asyncio.to_thread(_local_voice_clone, text, ref_audio, ref_text, language)
335
+ else:
336
+ wav = await asyncio.to_thread(_local_voice_design, text, instruct, language)
337
  except Exception as e: # noqa: BLE001 — surface a clear setup hint
338
  return Response(f"local TTS error (pip install qwen-tts torch soundfile?): {e}", status_code=500)
339
  return Response(wav, media_type="audio/wav", headers={"Cache-Control": "no-store"})
web/personaPanel.js CHANGED
@@ -7,7 +7,7 @@ import { streamChat, ensureModel, currentModel, currentModelId, getEngineId, bac
7
  import { extractLivePersona } from '/web/personaStream.js'
8
  import { parsePersonaJson } from '/web/personaParse.js'
9
  import { PERSONA_SYSTEM, personaUserPrompt, stripThink, stripThinkFinal, noThink } from '/web/personaPrompts.js'
10
- import { createVoiceWav, playWav, stopPreview } from '/web/tts.js'
11
  import { listPersonas, savePersona, removePersona, onRosterChange, putAudio, getAudio } from '/web/personaStore.js'
12
 
13
  const CLASSES = ['Warrior', 'Ranger', 'Monk', 'Assassin', 'Mage', 'Paladin', 'Cleric', 'Knight']
@@ -36,30 +36,50 @@ export function mountPersonaPanel(host) {
36
  const tagsEl = el('div', { class: 'persona-tags' })
37
  const aboutEl = el('div', { class: 'persona-about persona-edit', 'data-ph': 'Their story…' })
38
  const quoteEl = el('blockquote', { class: 'persona-quote persona-edit', 'data-ph': 'A line they say…' })
39
- const replayBtn = el('button', { class: 'persona-replay', type: 'button', title: 'Replay voice', style: 'display:none' }, '▶')
40
- const quoteRow = el('div', { class: 'persona-quote-row' }, [quoteEl, replayBtn])
41
- const voiceLabel = el('div', { class: 'persona-voice-lbl' }, '🎙 Voice design')
42
  const voiceEl = el('div', { class: 'persona-voice-desc persona-edit', 'data-ph': 'How they sound…' })
43
- const createBtn = el('button', { class: 'persona-go persona-go-alt persona-create', type: 'button', style: 'display:none' }, '🎙 Create voice')
 
 
44
  const thinkEl = el('pre', { class: 'persona-think' })
45
  const copyBtn = el('button', { class: 'persona-copy', type: 'button' }, '📋 Copy debug')
46
  const thinkWrap = el('details', { class: 'persona-think-wrap' },
47
  [el('summary', {}, 'model output / debug (raw)'), copyBtn, thinkEl])
48
 
 
 
 
 
49
  const controls = el('aside', { class: 'persona-controls' }, [
50
  el('label', { class: 'persona-label' }, 'Class'), sel,
51
  el('label', { class: 'persona-label' }, 'Seed'), seed,
52
  btn, stats, status,
53
  el('label', { class: 'persona-label persona-roster-label' }, 'Barracks (saved)'), rosterEl,
54
  ])
55
- const result = el('div', { class: 'persona-result' },
56
- [nameEl, tagsEl, aboutEl, quoteRow, voiceLabel, voiceEl, el('div', { class: 'persona-actions' }, [createBtn]), thinkWrap])
 
 
 
 
 
57
  host.appendChild(el('div', { class: 'persona-view' }, [controls, result]))
58
 
59
  let lastPersona = null // the persona currently shown
60
  let savedId = null // its roster id (set the moment it's shown — always saved)
 
61
  let working = false
62
 
 
 
 
 
 
 
 
 
 
 
 
63
  function autosave() {
64
  if (!lastPersona) return
65
  const rec = savePersona({ ...lastPersona, id: savedId, unitClass: lastPersona.unitClass || sel.value, seed: lastPersona.seed || seed.value })
@@ -77,6 +97,7 @@ export function mountPersonaPanel(host) {
77
  if ((lastPersona[field] || '') === v) return
78
  lastPersona[field] = v
79
  autosave()
 
80
  })
81
  }
82
  editable(nameEl, 'name', { single: true })
@@ -97,42 +118,50 @@ export function mountPersonaPanel(host) {
97
  aboutEl.textContent = p.about || ''
98
  quoteEl.textContent = p.quote || ''
99
  voiceEl.textContent = p.voice || ''
100
- createBtn.style.display = ''
101
- // Show replay if we have a cached voice file for this saved persona.
102
- const has = savedId ? !!(await getAudio(savedId)) : false
103
- replayBtn.style.display = has ? '' : 'none'
104
- createBtn.textContent = has ? '🎙 Recreate voice' : '🎙 Create voice'
105
  }
106
 
107
- // 🎙 Create voice — synth the QUOTE in the designed voice, cache the WAV, play it.
108
  async function createVoice() {
109
  if (working || !lastPersona) return
110
- const line = (lastPersona.quote || '').trim() || (lastPersona.about || '').trim() || `${lastPersona.name} reporting for duty.`
111
  if (!lastPersona.voice) { status.textContent = 'add a voice design first'; return }
112
  autosave() // ensure an id to key the audio
113
- working = true; const prevTxt = createBtn.textContent; createBtn.textContent = '🎙 designing…'; createBtn.disabled = true
114
- const prev = status.textContent
 
115
  try {
116
  const wav = await createVoiceWav(lastPersona.voice, line)
117
  await putAudio(savedId, new Blob([wav], { type: 'audio/wav' }))
118
- try { await playWav(wav.slice(0)) } catch { /* autoplay blocked — replay button still works */ }
119
- replayBtn.style.display = ''
120
- createBtn.textContent = '🎙 Recreate voice'
121
  status.textContent = prev
122
- } catch (e) {
123
- status.textContent = `voice failed: ${e.message || e}`
124
- createBtn.textContent = prevTxt
125
- } finally { working = false; createBtn.disabled = false }
126
  }
127
  createBtn.addEventListener('click', createVoice)
128
 
129
- async function replay() {
130
- if (!savedId) return
 
 
131
  const blob = await getAudio(savedId)
132
- if (!blob) return createVoice()
133
- try { await playWav(await blob.arrayBuffer()) } catch { /* ignore */ }
 
 
 
 
 
 
 
 
 
 
 
134
  }
135
- replayBtn.addEventListener('click', replay)
136
 
137
  // ── Barracks roster (saved soldiers) ──────────────────────────────────────
138
  function renderRoster(personas) {
@@ -180,7 +209,7 @@ export function mountPersonaPanel(host) {
180
  if (window.innerWidth <= 768) result.scrollIntoView({ behavior: 'smooth', block: 'start' })
181
  nameEl.textContent = '…'; aboutEl.textContent = ''; tagsEl.replaceChildren()
182
  quoteEl.textContent = ''; voiceEl.textContent = ''
183
- createBtn.style.display = 'none'; replayBtn.style.display = 'none'; lastPersona = null; savedId = null
184
  stopPreview()
185
  thinkEl.textContent = ''; thinkWrap.open = true; stats.textContent = ''
186
  let acc = ''
 
7
  import { extractLivePersona } from '/web/personaStream.js'
8
  import { parsePersonaJson } from '/web/personaParse.js'
9
  import { PERSONA_SYSTEM, personaUserPrompt, stripThink, stripThinkFinal, noThink } from '/web/personaPrompts.js'
10
+ import { createVoiceWav, cloneVoiceWav, playWav, stopPreview } from '/web/tts.js'
11
  import { listPersonas, savePersona, removePersona, onRosterChange, putAudio, getAudio } from '/web/personaStore.js'
12
 
13
  const CLASSES = ['Warrior', 'Ranger', 'Monk', 'Assassin', 'Mage', 'Paladin', 'Cleric', 'Knight']
 
36
  const tagsEl = el('div', { class: 'persona-tags' })
37
  const aboutEl = el('div', { class: 'persona-about persona-edit', 'data-ph': 'Their story…' })
38
  const quoteEl = el('blockquote', { class: 'persona-quote persona-edit', 'data-ph': 'A line they say…' })
 
 
 
39
  const voiceEl = el('div', { class: 'persona-voice-desc persona-edit', 'data-ph': 'How they sound…' })
40
+ // play sits on the Quote heading; 🎙 create sits on the Voice design heading.
41
+ const playBtn = el('button', { class: 'persona-ico persona-play', type: 'button', title: 'Play voice', style: 'display:none' }, '▶')
42
+ const createBtn = el('button', { class: 'persona-ico persona-create', type: 'button', title: 'Create voice', style: 'display:none' }, '🎙')
43
  const thinkEl = el('pre', { class: 'persona-think' })
44
  const copyBtn = el('button', { class: 'persona-copy', type: 'button' }, '📋 Copy debug')
45
  const thinkWrap = el('details', { class: 'persona-think-wrap' },
46
  [el('summary', {}, 'model output / debug (raw)'), copyBtn, thinkEl])
47
 
48
+ // A section header: a top line + a small red heading, with an optional action on the right.
49
+ const secHead = (title, action) =>
50
+ el('div', { class: 'persona-sec' }, [el('div', { class: 'persona-sec-title' }, title), action || el('span')])
51
+
52
  const controls = el('aside', { class: 'persona-controls' }, [
53
  el('label', { class: 'persona-label' }, 'Class'), sel,
54
  el('label', { class: 'persona-label' }, 'Seed'), seed,
55
  btn, stats, status,
56
  el('label', { class: 'persona-label persona-roster-label' }, 'Barracks (saved)'), rosterEl,
57
  ])
58
+ const result = el('div', { class: 'persona-result' }, [
59
+ nameEl, tagsEl,
60
+ secHead('About'), aboutEl,
61
+ secHead('Quote', playBtn), quoteEl,
62
+ secHead('Voice design', createBtn), voiceEl,
63
+ thinkWrap,
64
+ ])
65
  host.appendChild(el('div', { class: 'persona-view' }, [controls, result]))
66
 
67
  let lastPersona = null // the persona currently shown
68
  let savedId = null // its roster id (set the moment it's shown — always saved)
69
+ let hasVoice = false // a cached voice file exists for this persona
70
  let working = false
71
 
72
+ // The line the voice actually says (quote, else about, else a fallback).
73
+ const lineFor = (p) => (p.quote || '').trim() || (p.about || '').trim() || `${p.name || 'A soldier'} reporting for duty.`
74
+ // Cached audio is stale if the line or the voice design changed since it was made.
75
+ const isDirty = () => hasVoice && lastPersona && (lineFor(lastPersona) !== lastPersona.voiceQuote || (lastPersona.voice || '') !== (lastPersona.voiceDesignUsed || ''))
76
+ function updateVoiceUI() {
77
+ playBtn.style.display = hasVoice ? '' : 'none'
78
+ playBtn.classList.toggle('badged', isDirty())
79
+ createBtn.style.display = lastPersona ? '' : 'none'
80
+ createBtn.title = hasVoice ? 'Recreate voice' : 'Create voice'
81
+ }
82
+
83
  function autosave() {
84
  if (!lastPersona) return
85
  const rec = savePersona({ ...lastPersona, id: savedId, unitClass: lastPersona.unitClass || sel.value, seed: lastPersona.seed || seed.value })
 
97
  if ((lastPersona[field] || '') === v) return
98
  lastPersona[field] = v
99
  autosave()
100
+ if (field === 'quote' || field === 'voice') updateVoiceUI() // may go stale → badge
101
  })
102
  }
103
  editable(nameEl, 'name', { single: true })
 
118
  aboutEl.textContent = p.about || ''
119
  quoteEl.textContent = p.quote || ''
120
  voiceEl.textContent = p.voice || ''
121
+ hasVoice = savedId ? !!(await getAudio(savedId)) : false
122
+ updateVoiceUI()
 
 
 
123
  }
124
 
125
+ // 🎙 Create / Recreate voice — DESIGN a fresh voice from the description and cache it.
126
  async function createVoice() {
127
  if (working || !lastPersona) return
 
128
  if (!lastPersona.voice) { status.textContent = 'add a voice design first'; return }
129
  autosave() // ensure an id to key the audio
130
+ const line = lineFor(lastPersona)
131
+ working = true; createBtn.classList.add('busy'); createBtn.disabled = true
132
+ const prev = status.textContent; status.textContent = 'designing the voice…'
133
  try {
134
  const wav = await createVoiceWav(lastPersona.voice, line)
135
  await putAudio(savedId, new Blob([wav], { type: 'audio/wav' }))
136
+ lastPersona.voiceQuote = line; lastPersona.voiceDesignUsed = lastPersona.voice
137
+ hasVoice = true; autosave()
138
+ try { await playWav(wav.slice(0)) } catch { /* autoplay blocked — ▶ still works */ }
139
  status.textContent = prev
140
+ } catch (e) { status.textContent = `voice failed: ${e.message || e}` }
141
+ finally { working = false; createBtn.classList.remove('busy'); createBtn.disabled = false; updateVoiceUI() }
 
 
142
  }
143
  createBtn.addEventListener('click', createVoice)
144
 
145
+ // Play — plays the cached file. If the quote/voice changed since (badge), re-render
146
+ // the new line by CLONING the last voice (keeps the same timbre), then save over it.
147
+ async function play() {
148
+ if (working || !hasVoice || !savedId) return
149
  const blob = await getAudio(savedId)
150
+ if (!blob) { hasVoice = false; updateVoiceUI(); return }
151
+ if (!isDirty()) { try { await playWav(await blob.arrayBuffer()) } catch { /* ignore */ } return }
152
+ working = true; playBtn.classList.add('busy'); playBtn.disabled = true
153
+ const prev = status.textContent; status.textContent = 'updating the voice…'
154
+ try {
155
+ const line = lineFor(lastPersona)
156
+ const newWav = await cloneVoiceWav(await blob.arrayBuffer(), lastPersona.voiceQuote || '', line, lastPersona.voice || '')
157
+ try { await playWav(newWav.slice(0)) } catch { /* ignore */ }
158
+ await putAudio(savedId, new Blob([newWav], { type: 'audio/wav' })) // save over
159
+ lastPersona.voiceQuote = line; lastPersona.voiceDesignUsed = lastPersona.voice
160
+ autosave(); status.textContent = prev
161
+ } catch (e) { status.textContent = `voice update failed: ${e.message || e}` }
162
+ finally { working = false; playBtn.classList.remove('busy'); playBtn.disabled = false; updateVoiceUI() }
163
  }
164
+ playBtn.addEventListener('click', play)
165
 
166
  // ── Barracks roster (saved soldiers) ──────────────────────────────────────
167
  function renderRoster(personas) {
 
209
  if (window.innerWidth <= 768) result.scrollIntoView({ behavior: 'smooth', block: 'start' })
210
  nameEl.textContent = '…'; aboutEl.textContent = ''; tagsEl.replaceChildren()
211
  quoteEl.textContent = ''; voiceEl.textContent = ''
212
+ createBtn.style.display = 'none'; playBtn.style.display = 'none'; lastPersona = null; savedId = null; hasVoice = false
213
  stopPreview()
214
  thinkEl.textContent = ''; thinkWrap.open = true; stats.textContent = ''
215
  let acc = ''
web/shell/persona.css CHANGED
@@ -62,29 +62,39 @@
62
  font-size: 17px; line-height: 1.6; max-width: 60ch; color: var(--p-ink);
63
  white-space: pre-wrap;
64
  }
65
- .persona-voice-lbl {
66
- font-family: var(--p-mono); font-size: 10px; letter-spacing: .14em; text-transform: uppercase;
67
- color: var(--p-muted); margin-top: 16px;
 
 
 
 
 
68
  }
69
  .persona-voice-desc {
70
  font-family: var(--p-mono); font-size: 12px; line-height: 1.5; color: var(--p-muted);
71
- max-width: 60ch; margin-top: 4px; font-style: italic;
72
  }
73
- .persona-quote-row { display: flex; align-items: flex-start; gap: 10px; margin-top: 16px; }
74
  .persona-quote {
75
- flex: 1; margin: 0; padding: 6px 0 6px 16px; border-left: 3px solid var(--p-transmit);
76
  font-family: 'Fraunces', Georgia, serif; font-size: 21px; font-style: italic;
77
  line-height: 1.35; color: var(--p-ink); max-width: 54ch;
78
  }
79
  .persona-quote:not(:empty)::before { content: '“'; }
80
  .persona-quote:not(:empty)::after { content: '”'; }
81
- .persona-replay {
82
- flex-shrink: 0; cursor: pointer; margin-top: 4px;
83
- font-size: 13px !important; color: var(--p-paper) !important; background: var(--p-transmit) !important;
84
- border: 1.5px solid var(--p-transmit) !important; border-radius: 0 !important; padding: 6px 11px !important; line-height: 1;
 
 
 
 
 
 
 
 
85
  }
86
- .persona-replay:hover { background: var(--p-ink) !important; border-color: var(--p-ink) !important; }
87
- .persona-actions { display: flex; flex-wrap: wrap; gap: 10px; margin-top: 14px; }
88
 
89
  /* Click-to-edit fields (name / about / quote / voice) — auto-saved on blur. */
90
  .persona-edit { cursor: text; border-radius: 0; outline: none; transition: background .12s; }
 
62
  font-size: 17px; line-height: 1.6; max-width: 60ch; color: var(--p-ink);
63
  white-space: pre-wrap;
64
  }
65
+ /* ── Section headers (line + red heading, action on the right) ─────────────── */
66
+ .persona-sec {
67
+ display: flex; align-items: center; justify-content: space-between; gap: 10px;
68
+ margin-top: 20px; padding-top: 9px; border-top: 1px solid var(--p-ink);
69
+ }
70
+ .persona-sec-title {
71
+ font-family: var(--p-mono); font-size: 11px; font-weight: 500; letter-spacing: .2em;
72
+ text-transform: uppercase; color: var(--p-transmit);
73
  }
74
  .persona-voice-desc {
75
  font-family: var(--p-mono); font-size: 12px; line-height: 1.5; color: var(--p-muted);
76
+ max-width: 60ch; margin-top: 8px; font-style: italic;
77
  }
 
78
  .persona-quote {
79
+ margin: 8px 0 0; padding: 4px 0 4px 16px; border-left: 3px solid var(--p-transmit);
80
  font-family: 'Fraunces', Georgia, serif; font-size: 21px; font-style: italic;
81
  line-height: 1.35; color: var(--p-ink); max-width: 54ch;
82
  }
83
  .persona-quote:not(:empty)::before { content: '“'; }
84
  .persona-quote:not(:empty)::after { content: '”'; }
85
+
86
+ /* Simple icon buttons on the section headers. */
87
+ .persona-ico {
88
+ position: relative; cursor: pointer; flex-shrink: 0; line-height: 1;
89
+ font-size: 13px !important; color: var(--p-ink) !important; background: var(--p-card) !important;
90
+ border: 1.5px solid var(--p-ink) !important; border-radius: 0 !important; padding: 5px 10px !important;
91
+ }
92
+ .persona-ico:hover { background: var(--p-paper-2) !important; }
93
+ .persona-ico.busy { opacity: .55; cursor: default; }
94
+ .persona-play.badged::after { /* "voice changed — tap to refresh" badge */
95
+ content: ''; position: absolute; top: -4px; right: -4px; width: 9px; height: 9px;
96
+ background: var(--p-transmit); border: 1.5px solid var(--p-card); border-radius: 50%;
97
  }
 
 
98
 
99
  /* Click-to-edit fields (name / about / quote / voice) — auto-saved on blur. */
100
  .persona-edit { cursor: text; border-radius: 0; outline: none; transition: background .12s; }
web/tts.js CHANGED
@@ -30,6 +30,11 @@ export async function createVoiceWav(desc, text) {
30
  qwen3.setDesc(desc)
31
  return qwen3.synthWav(text, 'persona')
32
  }
 
 
 
 
 
33
  export async function playWav(arrayBuffer) {
34
  const { audio, sampleRate } = await decodeAudio(arrayBuffer)
35
  return playSamples(audio, sampleRate)
 
30
  qwen3.setDesc(desc)
31
  return qwen3.synthWav(text, 'persona')
32
  }
33
+ // Clone `text` from a reference voice file (keep timbre, change words). `desc` is the
34
+ // voice design — a fallback so prod (no clone model) can re-design instead. Returns WAV.
35
+ export async function cloneVoiceWav(refArrayBuffer, refText, text, desc) {
36
+ return qwen3.cloneWav(text, refArrayBuffer, refText, desc)
37
+ }
38
  export async function playWav(arrayBuffer) {
39
  const { audio, sampleRate } = await decodeAudio(arrayBuffer)
40
  return playSamples(audio, sampleRate)
web/ttsQwen3.js CHANGED
@@ -47,6 +47,24 @@ async function postSynthWav(base, text, voiceId) {
47
  }
48
  const postSynth = async (base, text, voiceId) => decodeAudio(await postSynthWav(base, text, voiceId))
49
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
50
  const common = {
51
  mode: 'pcm', needsDownload: false, networked: true,
52
  listVoices: () => VOICES, defaultVoice: 'persona',
@@ -62,6 +80,7 @@ export const engine = {
62
  available: () => true,
63
  synth: (text, voiceId) => postSynth(ttsBase(), text, voiceId),
64
  synthWav: (text, voiceId) => postSynthWav(ttsBase(), text, voiceId),
 
65
  backendLabel: () => { const b = ttsBase(); try { return b ? '🖥 ' + new URL(b).host : '☁ DashScope' } catch { return '☁ DashScope' } },
66
  }
67
 
 
47
  }
48
  const postSynth = async (base, text, voiceId) => decodeAudio(await postSynthWav(base, text, voiceId))
49
 
50
+ // Voice CLONE: synth `text` using a reference WAV (the last created voice) so the timbre
51
+ // stays identical — only the words change. ref is an ArrayBuffer; sent as base64.
52
+ function abToB64(ab) {
53
+ let s = ''; const u = new Uint8Array(ab); const C = 0x8000
54
+ for (let i = 0; i < u.length; i += C) s += String.fromCharCode.apply(null, u.subarray(i, i + C))
55
+ return btoa(s)
56
+ }
57
+ async function postClone(base, text, refAb, refText, instruct) {
58
+ const resp = await fetch(`${base}/qwen-tts`, {
59
+ method: 'POST', headers: { 'Content-Type': 'application/json' },
60
+ // instruct lets prod (DashScope, no clone model) gracefully re-design from the
61
+ // description instead of cloning; local mode uses ref_audio to clone the timbre.
62
+ body: JSON.stringify({ text, ref_audio: abToB64(refAb), ref_text: refText || '', instruct: instruct || '', language: 'English' }),
63
+ })
64
+ if (!resp.ok) throw new Error(`Qwen3-TTS clone ${resp.status}: ${(await resp.text()).slice(0, 140)}`)
65
+ return resp.arrayBuffer()
66
+ }
67
+
68
  const common = {
69
  mode: 'pcm', needsDownload: false, networked: true,
70
  listVoices: () => VOICES, defaultVoice: 'persona',
 
80
  available: () => true,
81
  synth: (text, voiceId) => postSynth(ttsBase(), text, voiceId),
82
  synthWav: (text, voiceId) => postSynthWav(ttsBase(), text, voiceId),
83
+ cloneWav: (text, refAb, refText, instruct) => postClone(ttsBase(), text, refAb, refText, instruct),
84
  backendLabel: () => { const b = ttsBase(); try { return b ? '🖥 ' + new URL(b).host : '☁ DashScope' } catch { return '☁ DashScope' } },
85
  }
86