polats Claude Opus 4.8 (1M context) commited on
Commit
26cb5c0
·
1 Parent(s): 29474fc

Persona voice: cloud voice-clone, varied voice designs, animated class picker

Browse files

- app.py: DashScope cloud voice-clone for prod — two-call enroll
(qwen-voice-enrollment) → synthesize (qwen3-tts-vc-2026-01-22), then fetch
the returned OSS audio URL. Wired into /qwen-tts when ref_audio is present
(mirrors the local design→clone flow). Verified flow returns 200 + audio
when the account's paid tier is enabled.
- personaPrompts.js: voice field now asks for a DISTINCT, class-fit voice with
wide variety between heroes (was producing near-identical gruff voices).
- personaPanel.js/persona.css: class picker is now a custom dropdown showing
each class's looping idle sprite (front-right row, Web Animations API), wired
to characters.json; mirrors a native <select> (.value, 'change').

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>

Files changed (4) hide show
  1. app.py +52 -1
  2. web/personaPanel.js +103 -1
  3. web/personaPrompts.js +5 -3
  4. web/shell/persona.css +29 -0
app.py CHANGED
@@ -318,6 +318,54 @@ def _dashscope_voice_design(text, instruct):
318
  return base64.b64decode(b64), None
319
 
320
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
321
  @fastapi_app.post("/qwen-tts")
322
  async def qwen_tts(request: Request):
323
  body = await request.json()
@@ -339,7 +387,10 @@ async def qwen_tts(request: Request):
339
  return Response(wav, media_type="audio/wav", headers={"Cache-Control": "no-store"})
340
  if not DASHSCOPE_KEY:
341
  return Response("DASHSCOPE_API_KEY not set (or run with TINY_TTS_MODE=local)", status_code=503)
342
- wav, err = await asyncio.to_thread(_dashscope_voice_design, text, instruct)
 
 
 
343
  if err:
344
  return Response(err, status_code=502)
345
  return Response(wav, media_type="audio/wav", headers={"Cache-Control": "no-store"})
 
318
  return base64.b64decode(b64), None
319
 
320
 
321
+ # Voice CLONE on the cloud is a TWO-CALL flow (mirrors the open-weights design→clone):
322
+ # 1. enroll the reference WAV (qwen-voice-enrollment) → a voice_id
323
+ # 2. synthesize new words in that timbre (qwen3-tts-vc-…) → an OSS-signed audio URL
324
+ # Synthesis returns the audio as a URL (not base64), so we fetch the bytes ourselves.
325
+ _DASHSCOPE_VC_MODEL = os.environ.get("DASHSCOPE_VC_MODEL", "qwen3-tts-vc-2026-01-22")
326
+ _DASHSCOPE_GEN_URL = _DASHSCOPE_BASE + "/api/v1/services/aigc/multimodal-generation/generation"
327
+
328
+
329
+ def _dashscope_post(url, payload):
330
+ req = urllib.request.Request(url, data=_json.dumps(payload).encode(), method="POST", headers={
331
+ "Content-Type": "application/json", "Authorization": f"Bearer {DASHSCOPE_KEY}",
332
+ })
333
+ with urllib.request.urlopen(req, timeout=90) as r:
334
+ return _json.loads(r.read().decode())
335
+
336
+
337
+ def _dashscope_voice_clone(text, ref_audio_b64, ref_text):
338
+ try:
339
+ enroll = _dashscope_post(_DASHSCOPE_URL, {
340
+ "model": "qwen-voice-enrollment",
341
+ "input": {
342
+ "action": "create",
343
+ "target_model": _DASHSCOPE_VC_MODEL,
344
+ "preferred_name": "tinyarmy",
345
+ "audio": {"data": "data:audio/wav;base64," + ref_audio_b64},
346
+ },
347
+ })
348
+ voice_id = (enroll.get("output") or {}).get("voice")
349
+ if not voice_id:
350
+ return None, "no voice_id from enrollment: " + _json.dumps(enroll)[:200]
351
+ gen = _dashscope_post(_DASHSCOPE_GEN_URL, {
352
+ "model": _DASHSCOPE_VC_MODEL,
353
+ "input": {"text": text, "voice": voice_id},
354
+ })
355
+ except urllib.error.HTTPError as e:
356
+ return None, f"dashscope clone {e.code}: {e.read().decode()[:200]}"
357
+ except Exception as e: # noqa: BLE001
358
+ return None, f"dashscope clone error: {e}"
359
+ url = ((gen.get("output") or {}).get("audio") or {}).get("url")
360
+ if not url:
361
+ return None, "no audio url in response: " + _json.dumps(gen)[:200]
362
+ try:
363
+ with urllib.request.urlopen(url, timeout=90) as r:
364
+ return r.read(), None
365
+ except Exception as e: # noqa: BLE001
366
+ return None, f"dashscope clone fetch error: {e}"
367
+
368
+
369
  @fastapi_app.post("/qwen-tts")
370
  async def qwen_tts(request: Request):
371
  body = await request.json()
 
387
  return Response(wav, media_type="audio/wav", headers={"Cache-Control": "no-store"})
388
  if not DASHSCOPE_KEY:
389
  return Response("DASHSCOPE_API_KEY not set (or run with TINY_TTS_MODE=local)", status_code=503)
390
+ if ref_audio: # clone the prior voice's timbre (enroll synthesize)
391
+ wav, err = await asyncio.to_thread(_dashscope_voice_clone, text, ref_audio, ref_text)
392
+ else:
393
+ wav, err = await asyncio.to_thread(_dashscope_voice_design, text, instruct)
394
  if err:
395
  return Response(err, status_code=502)
396
  return Response(wav, media_type="audio/wav", headers={"Cache-Control": "no-store"})
web/personaPanel.js CHANGED
@@ -13,6 +13,22 @@ import { listPersonas, savePersona, removePersona, onRosterChange, putAudio, get
13
  const CLASSES = ['Warrior', 'Ranger', 'Monk', 'Assassin', 'Mage', 'Paladin', 'Cleric', 'Knight']
14
  const MAX_TOKENS = 200 // persona JSON + a voice line + a quote
15
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16
  function el(tag, props = {}, kids = []) {
17
  const n = document.createElement(tag)
18
  for (const [k, v] of Object.entries(props)) {
@@ -24,8 +40,94 @@ function el(tag, props = {}, kids = []) {
24
  return n
25
  }
26
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
27
  export function mountPersonaPanel(host) {
28
- const sel = el('select', { class: 'persona-input' }, CLASSES.map((c) => el('option', { value: c }, c)))
 
29
  const seed = el('input', { class: 'persona-input', type: 'text', placeholder: 'a word, a vibe… (optional)' })
30
  const stats = el('div', { class: 'persona-stats' })
31
  const status = el('div', { class: 'persona-status' }, 'Runs on your device — no cloud.')
 
13
  const CLASSES = ['Warrior', 'Ranger', 'Monk', 'Assassin', 'Mage', 'Paladin', 'Cleric', 'Knight']
14
  const MAX_TOKENS = 200 // persona JSON + a voice line + a quote
15
 
16
+ // Each class shows the idle pose of a fitting sprite (character slug → see
17
+ // characters.json). The sheet's front-right row is animated as a tiny looping
18
+ // icon beside the class name in the dropdown.
19
+ const CLASS_SLUG = {
20
+ Warrior: 'true-heroes-iii-fighter',
21
+ Ranger: 'true-heroes-iii-ranger',
22
+ Monk: 'true-heroes-ii-bard',
23
+ Assassin: 'true-heroes-iv-ninja-assassin',
24
+ Mage: 'true-heroes-iii-wizard',
25
+ Paladin: 'true-heroes-ii-paladin',
26
+ Cleric: 'true-heroes-ii-cleric',
27
+ Knight: 'rts-humans-knight',
28
+ }
29
+ const ICON_PX = 26
30
+ const spriteUrl = (u) => (u || '').replace('/assets/', '/sprites/') // sheets serve at /sprites
31
+
32
  function el(tag, props = {}, kids = []) {
33
  const n = document.createElement(tag)
34
  for (const [k, v] of Object.entries(props)) {
 
40
  return n
41
  }
42
 
43
+ // Idle sheets are 4 rows (facings) × N frame columns; cell = height/4. Show one
44
+ // front-right cell (row 0) scaled to ICON_PX and step across the columns to loop
45
+ // the idle animation — no canvas, just a sized background + the Web Animations API.
46
+ function animateIdleIcon(box, idleUrl) {
47
+ box.getAnimations?.().forEach((a) => a.cancel())
48
+ box.style.backgroundImage = ''
49
+ const img = new Image()
50
+ img.onload = () => {
51
+ const cell = (img.naturalHeight / 4) || img.naturalHeight
52
+ const cols = Math.max(1, Math.round(img.naturalWidth / cell))
53
+ const rows = Math.max(1, Math.round(img.naturalHeight / cell))
54
+ box.style.backgroundImage = `url("${idleUrl}")`
55
+ box.style.backgroundSize = `${cols * ICON_PX}px ${rows * ICON_PX}px`
56
+ box.style.backgroundPosition = '0 0'
57
+ box.animate(
58
+ [{ backgroundPosition: '0 0' }, { backgroundPosition: `-${cols * ICON_PX}px 0` }],
59
+ { duration: cols * 110, iterations: Infinity, easing: `steps(${cols})` },
60
+ )
61
+ }
62
+ img.src = idleUrl
63
+ }
64
+
65
+ // A class picker that mirrors a native <select> API (.value get/set, 'change'
66
+ // event) but renders an animated idle icon beside each class. Icons attach once
67
+ // characters.json resolves (root.setIcons), so the menu is usable immediately.
68
+ function makeClassDropdown(classes) {
69
+ const triggerIco = el('span', { class: 'persona-class-ico' })
70
+ const triggerLabel = el('span', { class: 'persona-classdrop-label' })
71
+ const trigger = el('button', { class: 'persona-input persona-classdrop-trigger', type: 'button' },
72
+ [triggerIco, triggerLabel, el('span', { class: 'persona-classdrop-chev' }, '▾')])
73
+ const menu = el('div', { class: 'persona-classdrop-menu' })
74
+ const root = el('div', { class: 'persona-classdrop' }, [trigger, menu])
75
+
76
+ let value = classes[0]
77
+ let icons = {} // class → idle sheet URL (filled by setIcons)
78
+ const optIco = {} // class → menu icon span
79
+
80
+ const items = classes.map((c) => {
81
+ const ico = el('span', { class: 'persona-class-ico' }); optIco[c] = ico
82
+ const it = el('button', { class: 'persona-classdrop-opt', type: 'button' }, [ico, el('span', {}, c)])
83
+ it.addEventListener('click', () => { set(c); close() })
84
+ return it
85
+ })
86
+ menu.append(...items)
87
+
88
+ function set(c) {
89
+ if (!classes.includes(c)) return
90
+ const changed = c !== value
91
+ value = c
92
+ triggerLabel.textContent = c
93
+ items.forEach((it, i) => it.classList.toggle('sel', classes[i] === c))
94
+ if (icons[c]) animateIdleIcon(triggerIco, icons[c])
95
+ if (changed) root.dispatchEvent(new Event('change'))
96
+ }
97
+ const close = () => root.classList.remove('open')
98
+ trigger.addEventListener('click', (e) => { e.stopPropagation(); root.classList.toggle('open') })
99
+ document.addEventListener('click', (e) => { if (!root.contains(e.target)) close() })
100
+ document.addEventListener('keydown', (e) => { if (e.key === 'Escape') close() })
101
+
102
+ root.setIcons = (map) => {
103
+ icons = map
104
+ for (const c of classes) if (map[c]) animateIdleIcon(optIco[c], map[c])
105
+ if (map[value]) animateIdleIcon(triggerIco, map[value])
106
+ }
107
+ Object.defineProperty(root, 'value', { get: () => value, set: (v) => set(v) })
108
+ triggerLabel.textContent = value
109
+ items.forEach((it, i) => it.classList.toggle('sel', classes[i] === value))
110
+ return root
111
+ }
112
+
113
+ // Resolve each class's idle sheet via characters.json and light up the dropdown.
114
+ async function loadClassIcons(dropdown) {
115
+ try {
116
+ const d = await fetch('/sprites/characters.json').then((r) => r.json())
117
+ const bySlug = {}
118
+ for (const p of d.packs || []) for (const c of p.characters || []) bySlug[c.slug] = c
119
+ const map = {}
120
+ for (const [cls, slug] of Object.entries(CLASS_SLUG)) {
121
+ const idle = bySlug[slug]?.idle
122
+ if (idle) map[cls] = spriteUrl(idle)
123
+ }
124
+ dropdown.setIcons(map)
125
+ } catch { /* no icons — the dropdown still works with labels only */ }
126
+ }
127
+
128
  export function mountPersonaPanel(host) {
129
+ const sel = makeClassDropdown(CLASSES)
130
+ loadClassIcons(sel)
131
  const seed = el('input', { class: 'persona-input', type: 'text', placeholder: 'a word, a vibe… (optional)' })
132
  const stats = el('div', { class: 'persona-stats' })
133
  const status = el('div', { class: 'persona-status' }, 'Runs on your device — no cloud.')
web/personaPrompts.js CHANGED
@@ -10,9 +10,11 @@ export const PERSONA_SYSTEM =
10
  ' "specialty": a 1-3 word combat specialty,\n' +
11
  ' "personality": a 1-3 word personality tag,\n' +
12
  ' "vibe": a 1-3 word vibe,\n' +
13
- ' "voice": one sentence describing how they SOUND for a text-to-speech voice — gender, ' +
14
- 'age, pitch, accent, texture, pace and emotion (e.g. "a gravelly, battle-worn male ' +
15
- 'baritone, slow and weary, with a faint highland accent"),\n' +
 
 
16
  ' "quote": one short punchy line they say aloud — a battle-cry or wry remark, ' +
17
  'first person, under 15 words.\n' +
18
  'Output strictly valid JSON. No preamble, no code fences, no commentary.'
 
10
  ' "specialty": a 1-3 word combat specialty,\n' +
11
  ' "personality": a 1-3 word personality tag,\n' +
12
  ' "vibe": a 1-3 word vibe,\n' +
13
+ ' "voice": one sentence describing how THIS hero sounds for a text-to-speech voice — ' +
14
+ 'pick a gender, age, pitch, accent, texture, pace and emotion that FIT their class and ' +
15
+ 'personality, and make it DISTINCT from a generic gruff soldier. Vary it widely between ' +
16
+ 'heroes — e.g. a bright quick-tongued young woman, a wheezing ancient sage, a velvet-smooth ' +
17
+ 'rogue, a booming jolly giant, a cold precise duelist, a sing-song forest spirit,\n' +
18
  ' "quote": one short punchy line they say aloud — a battle-cry or wry remark, ' +
19
  'first person, under 15 words.\n' +
20
  'Output strictly valid JSON. No preamble, no code fences, no commentary.'
web/shell/persona.css CHANGED
@@ -30,6 +30,35 @@
30
  background: var(--p-card) !important; border: 1.5px solid var(--p-ink) !important;
31
  border-radius: 0 !important; padding: 7px 9px !important; width: 100%;
32
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33
  .persona-go {
34
  margin-top: 10px; font-family: var(--p-mono) !important; font-size: 12px !important;
35
  font-weight: 700 !important; letter-spacing: .04em; text-transform: uppercase;
 
30
  background: var(--p-card) !important; border: 1.5px solid var(--p-ink) !important;
31
  border-radius: 0 !important; padding: 7px 9px !important; width: 100%;
32
  }
33
+ /* ── Class picker — a custom dropdown with an animated idle-pose icon per class.
34
+ Mirrors a native <select> (.persona-input chrome) but each row shows the
35
+ character's looping idle sprite beside the name. ──────────────────────────── */
36
+ .persona-classdrop { position: relative; }
37
+ .persona-classdrop-trigger {
38
+ display: flex !important; align-items: center; gap: 8px; cursor: pointer; text-align: left;
39
+ }
40
+ .persona-classdrop-label { flex: 1; min-width: 0; }
41
+ .persona-classdrop-chev { color: var(--p-muted); font-size: 11px; transition: transform .12s; }
42
+ .persona-classdrop.open .persona-classdrop-chev { transform: rotate(180deg); }
43
+ .persona-class-ico {
44
+ width: 26px; height: 26px; flex-shrink: 0; display: inline-block;
45
+ background-repeat: no-repeat; background-position: 0 0; image-rendering: pixelated;
46
+ }
47
+ .persona-classdrop-menu {
48
+ display: none; position: absolute; top: calc(100% + 2px); left: 0; right: 0; z-index: 30;
49
+ background: var(--p-card); border: 1.5px solid var(--p-ink);
50
+ box-shadow: 3px 3px 0 var(--p-transmit); max-height: 300px; overflow-y: auto;
51
+ }
52
+ .persona-classdrop.open .persona-classdrop-menu { display: block; }
53
+ .persona-classdrop-opt {
54
+ display: flex; align-items: center; gap: 8px; width: 100%; cursor: pointer; text-align: left;
55
+ font-family: var(--p-sans); font-size: 14px; color: var(--p-ink);
56
+ background: transparent; border: 0; border-bottom: 1px solid var(--p-paper-2); padding: 5px 9px;
57
+ }
58
+ .persona-classdrop-opt:last-child { border-bottom: 0; }
59
+ .persona-classdrop-opt:hover { background: var(--p-paper-2); }
60
+ .persona-classdrop-opt.sel { background: var(--p-ink); color: var(--p-paper); }
61
+
62
  .persona-go {
63
  margin-top: 10px; font-family: var(--p-mono) !important; font-size: 12px !important;
64
  font-weight: 700 !important; letter-spacing: .04em; text-transform: uppercase;