Spaces:
Running
Persona voice: cloud voice-clone, varied voice designs, animated class picker
Browse files- app.py: DashScope cloud voice-clone for prod — two-call enroll
(qwen-voice-enrollment) → synthesize (qwen3-tts-vc-2026-01-22), then fetch
the returned OSS audio URL. Wired into /qwen-tts when ref_audio is present
(mirrors the local design→clone flow). Verified flow returns 200 + audio
when the account's paid tier is enabled.
- personaPrompts.js: voice field now asks for a DISTINCT, class-fit voice with
wide variety between heroes (was producing near-identical gruff voices).
- personaPanel.js/persona.css: class picker is now a custom dropdown showing
each class's looping idle sprite (front-right row, Web Animations API), wired
to characters.json; mirrors a native <select> (.value, 'change').
Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
- app.py +52 -1
- web/personaPanel.js +103 -1
- web/personaPrompts.js +5 -3
- web/shell/persona.css +29 -0
|
@@ -318,6 +318,54 @@ def _dashscope_voice_design(text, instruct):
|
|
| 318 |
return base64.b64decode(b64), None
|
| 319 |
|
| 320 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 321 |
@fastapi_app.post("/qwen-tts")
|
| 322 |
async def qwen_tts(request: Request):
|
| 323 |
body = await request.json()
|
|
@@ -339,7 +387,10 @@ async def qwen_tts(request: Request):
|
|
| 339 |
return Response(wav, media_type="audio/wav", headers={"Cache-Control": "no-store"})
|
| 340 |
if not DASHSCOPE_KEY:
|
| 341 |
return Response("DASHSCOPE_API_KEY not set (or run with TINY_TTS_MODE=local)", status_code=503)
|
| 342 |
-
|
|
|
|
|
|
|
|
|
|
| 343 |
if err:
|
| 344 |
return Response(err, status_code=502)
|
| 345 |
return Response(wav, media_type="audio/wav", headers={"Cache-Control": "no-store"})
|
|
|
|
| 318 |
return base64.b64decode(b64), None
|
| 319 |
|
| 320 |
|
| 321 |
+
# Voice CLONE on the cloud is a TWO-CALL flow (mirrors the open-weights design→clone):
|
| 322 |
+
# 1. enroll the reference WAV (qwen-voice-enrollment) → a voice_id
|
| 323 |
+
# 2. synthesize new words in that timbre (qwen3-tts-vc-…) → an OSS-signed audio URL
|
| 324 |
+
# Synthesis returns the audio as a URL (not base64), so we fetch the bytes ourselves.
|
| 325 |
+
_DASHSCOPE_VC_MODEL = os.environ.get("DASHSCOPE_VC_MODEL", "qwen3-tts-vc-2026-01-22")
|
| 326 |
+
_DASHSCOPE_GEN_URL = _DASHSCOPE_BASE + "/api/v1/services/aigc/multimodal-generation/generation"
|
| 327 |
+
|
| 328 |
+
|
| 329 |
+
def _dashscope_post(url, payload):
|
| 330 |
+
req = urllib.request.Request(url, data=_json.dumps(payload).encode(), method="POST", headers={
|
| 331 |
+
"Content-Type": "application/json", "Authorization": f"Bearer {DASHSCOPE_KEY}",
|
| 332 |
+
})
|
| 333 |
+
with urllib.request.urlopen(req, timeout=90) as r:
|
| 334 |
+
return _json.loads(r.read().decode())
|
| 335 |
+
|
| 336 |
+
|
| 337 |
+
def _dashscope_voice_clone(text, ref_audio_b64, ref_text):
|
| 338 |
+
try:
|
| 339 |
+
enroll = _dashscope_post(_DASHSCOPE_URL, {
|
| 340 |
+
"model": "qwen-voice-enrollment",
|
| 341 |
+
"input": {
|
| 342 |
+
"action": "create",
|
| 343 |
+
"target_model": _DASHSCOPE_VC_MODEL,
|
| 344 |
+
"preferred_name": "tinyarmy",
|
| 345 |
+
"audio": {"data": "data:audio/wav;base64," + ref_audio_b64},
|
| 346 |
+
},
|
| 347 |
+
})
|
| 348 |
+
voice_id = (enroll.get("output") or {}).get("voice")
|
| 349 |
+
if not voice_id:
|
| 350 |
+
return None, "no voice_id from enrollment: " + _json.dumps(enroll)[:200]
|
| 351 |
+
gen = _dashscope_post(_DASHSCOPE_GEN_URL, {
|
| 352 |
+
"model": _DASHSCOPE_VC_MODEL,
|
| 353 |
+
"input": {"text": text, "voice": voice_id},
|
| 354 |
+
})
|
| 355 |
+
except urllib.error.HTTPError as e:
|
| 356 |
+
return None, f"dashscope clone {e.code}: {e.read().decode()[:200]}"
|
| 357 |
+
except Exception as e: # noqa: BLE001
|
| 358 |
+
return None, f"dashscope clone error: {e}"
|
| 359 |
+
url = ((gen.get("output") or {}).get("audio") or {}).get("url")
|
| 360 |
+
if not url:
|
| 361 |
+
return None, "no audio url in response: " + _json.dumps(gen)[:200]
|
| 362 |
+
try:
|
| 363 |
+
with urllib.request.urlopen(url, timeout=90) as r:
|
| 364 |
+
return r.read(), None
|
| 365 |
+
except Exception as e: # noqa: BLE001
|
| 366 |
+
return None, f"dashscope clone fetch error: {e}"
|
| 367 |
+
|
| 368 |
+
|
| 369 |
@fastapi_app.post("/qwen-tts")
|
| 370 |
async def qwen_tts(request: Request):
|
| 371 |
body = await request.json()
|
|
|
|
| 387 |
return Response(wav, media_type="audio/wav", headers={"Cache-Control": "no-store"})
|
| 388 |
if not DASHSCOPE_KEY:
|
| 389 |
return Response("DASHSCOPE_API_KEY not set (or run with TINY_TTS_MODE=local)", status_code=503)
|
| 390 |
+
if ref_audio: # clone the prior voice's timbre (enroll → synthesize)
|
| 391 |
+
wav, err = await asyncio.to_thread(_dashscope_voice_clone, text, ref_audio, ref_text)
|
| 392 |
+
else:
|
| 393 |
+
wav, err = await asyncio.to_thread(_dashscope_voice_design, text, instruct)
|
| 394 |
if err:
|
| 395 |
return Response(err, status_code=502)
|
| 396 |
return Response(wav, media_type="audio/wav", headers={"Cache-Control": "no-store"})
|
|
@@ -13,6 +13,22 @@ import { listPersonas, savePersona, removePersona, onRosterChange, putAudio, get
|
|
| 13 |
const CLASSES = ['Warrior', 'Ranger', 'Monk', 'Assassin', 'Mage', 'Paladin', 'Cleric', 'Knight']
|
| 14 |
const MAX_TOKENS = 200 // persona JSON + a voice line + a quote
|
| 15 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 16 |
function el(tag, props = {}, kids = []) {
|
| 17 |
const n = document.createElement(tag)
|
| 18 |
for (const [k, v] of Object.entries(props)) {
|
|
@@ -24,8 +40,94 @@ function el(tag, props = {}, kids = []) {
|
|
| 24 |
return n
|
| 25 |
}
|
| 26 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 27 |
export function mountPersonaPanel(host) {
|
| 28 |
-
const sel =
|
|
|
|
| 29 |
const seed = el('input', { class: 'persona-input', type: 'text', placeholder: 'a word, a vibe… (optional)' })
|
| 30 |
const stats = el('div', { class: 'persona-stats' })
|
| 31 |
const status = el('div', { class: 'persona-status' }, 'Runs on your device — no cloud.')
|
|
|
|
| 13 |
const CLASSES = ['Warrior', 'Ranger', 'Monk', 'Assassin', 'Mage', 'Paladin', 'Cleric', 'Knight']
|
| 14 |
const MAX_TOKENS = 200 // persona JSON + a voice line + a quote
|
| 15 |
|
| 16 |
+
// Each class shows the idle pose of a fitting sprite (character slug → see
|
| 17 |
+
// characters.json). The sheet's front-right row is animated as a tiny looping
|
| 18 |
+
// icon beside the class name in the dropdown.
|
| 19 |
+
const CLASS_SLUG = {
|
| 20 |
+
Warrior: 'true-heroes-iii-fighter',
|
| 21 |
+
Ranger: 'true-heroes-iii-ranger',
|
| 22 |
+
Monk: 'true-heroes-ii-bard',
|
| 23 |
+
Assassin: 'true-heroes-iv-ninja-assassin',
|
| 24 |
+
Mage: 'true-heroes-iii-wizard',
|
| 25 |
+
Paladin: 'true-heroes-ii-paladin',
|
| 26 |
+
Cleric: 'true-heroes-ii-cleric',
|
| 27 |
+
Knight: 'rts-humans-knight',
|
| 28 |
+
}
|
| 29 |
+
const ICON_PX = 26
|
| 30 |
+
const spriteUrl = (u) => (u || '').replace('/assets/', '/sprites/') // sheets serve at /sprites
|
| 31 |
+
|
| 32 |
function el(tag, props = {}, kids = []) {
|
| 33 |
const n = document.createElement(tag)
|
| 34 |
for (const [k, v] of Object.entries(props)) {
|
|
|
|
| 40 |
return n
|
| 41 |
}
|
| 42 |
|
| 43 |
+
// Idle sheets are 4 rows (facings) × N frame columns; cell = height/4. Show one
|
| 44 |
+
// front-right cell (row 0) scaled to ICON_PX and step across the columns to loop
|
| 45 |
+
// the idle animation — no canvas, just a sized background + the Web Animations API.
|
| 46 |
+
function animateIdleIcon(box, idleUrl) {
|
| 47 |
+
box.getAnimations?.().forEach((a) => a.cancel())
|
| 48 |
+
box.style.backgroundImage = ''
|
| 49 |
+
const img = new Image()
|
| 50 |
+
img.onload = () => {
|
| 51 |
+
const cell = (img.naturalHeight / 4) || img.naturalHeight
|
| 52 |
+
const cols = Math.max(1, Math.round(img.naturalWidth / cell))
|
| 53 |
+
const rows = Math.max(1, Math.round(img.naturalHeight / cell))
|
| 54 |
+
box.style.backgroundImage = `url("${idleUrl}")`
|
| 55 |
+
box.style.backgroundSize = `${cols * ICON_PX}px ${rows * ICON_PX}px`
|
| 56 |
+
box.style.backgroundPosition = '0 0'
|
| 57 |
+
box.animate(
|
| 58 |
+
[{ backgroundPosition: '0 0' }, { backgroundPosition: `-${cols * ICON_PX}px 0` }],
|
| 59 |
+
{ duration: cols * 110, iterations: Infinity, easing: `steps(${cols})` },
|
| 60 |
+
)
|
| 61 |
+
}
|
| 62 |
+
img.src = idleUrl
|
| 63 |
+
}
|
| 64 |
+
|
| 65 |
+
// A class picker that mirrors a native <select> API (.value get/set, 'change'
|
| 66 |
+
// event) but renders an animated idle icon beside each class. Icons attach once
|
| 67 |
+
// characters.json resolves (root.setIcons), so the menu is usable immediately.
|
| 68 |
+
function makeClassDropdown(classes) {
|
| 69 |
+
const triggerIco = el('span', { class: 'persona-class-ico' })
|
| 70 |
+
const triggerLabel = el('span', { class: 'persona-classdrop-label' })
|
| 71 |
+
const trigger = el('button', { class: 'persona-input persona-classdrop-trigger', type: 'button' },
|
| 72 |
+
[triggerIco, triggerLabel, el('span', { class: 'persona-classdrop-chev' }, '▾')])
|
| 73 |
+
const menu = el('div', { class: 'persona-classdrop-menu' })
|
| 74 |
+
const root = el('div', { class: 'persona-classdrop' }, [trigger, menu])
|
| 75 |
+
|
| 76 |
+
let value = classes[0]
|
| 77 |
+
let icons = {} // class → idle sheet URL (filled by setIcons)
|
| 78 |
+
const optIco = {} // class → menu icon span
|
| 79 |
+
|
| 80 |
+
const items = classes.map((c) => {
|
| 81 |
+
const ico = el('span', { class: 'persona-class-ico' }); optIco[c] = ico
|
| 82 |
+
const it = el('button', { class: 'persona-classdrop-opt', type: 'button' }, [ico, el('span', {}, c)])
|
| 83 |
+
it.addEventListener('click', () => { set(c); close() })
|
| 84 |
+
return it
|
| 85 |
+
})
|
| 86 |
+
menu.append(...items)
|
| 87 |
+
|
| 88 |
+
function set(c) {
|
| 89 |
+
if (!classes.includes(c)) return
|
| 90 |
+
const changed = c !== value
|
| 91 |
+
value = c
|
| 92 |
+
triggerLabel.textContent = c
|
| 93 |
+
items.forEach((it, i) => it.classList.toggle('sel', classes[i] === c))
|
| 94 |
+
if (icons[c]) animateIdleIcon(triggerIco, icons[c])
|
| 95 |
+
if (changed) root.dispatchEvent(new Event('change'))
|
| 96 |
+
}
|
| 97 |
+
const close = () => root.classList.remove('open')
|
| 98 |
+
trigger.addEventListener('click', (e) => { e.stopPropagation(); root.classList.toggle('open') })
|
| 99 |
+
document.addEventListener('click', (e) => { if (!root.contains(e.target)) close() })
|
| 100 |
+
document.addEventListener('keydown', (e) => { if (e.key === 'Escape') close() })
|
| 101 |
+
|
| 102 |
+
root.setIcons = (map) => {
|
| 103 |
+
icons = map
|
| 104 |
+
for (const c of classes) if (map[c]) animateIdleIcon(optIco[c], map[c])
|
| 105 |
+
if (map[value]) animateIdleIcon(triggerIco, map[value])
|
| 106 |
+
}
|
| 107 |
+
Object.defineProperty(root, 'value', { get: () => value, set: (v) => set(v) })
|
| 108 |
+
triggerLabel.textContent = value
|
| 109 |
+
items.forEach((it, i) => it.classList.toggle('sel', classes[i] === value))
|
| 110 |
+
return root
|
| 111 |
+
}
|
| 112 |
+
|
| 113 |
+
// Resolve each class's idle sheet via characters.json and light up the dropdown.
|
| 114 |
+
async function loadClassIcons(dropdown) {
|
| 115 |
+
try {
|
| 116 |
+
const d = await fetch('/sprites/characters.json').then((r) => r.json())
|
| 117 |
+
const bySlug = {}
|
| 118 |
+
for (const p of d.packs || []) for (const c of p.characters || []) bySlug[c.slug] = c
|
| 119 |
+
const map = {}
|
| 120 |
+
for (const [cls, slug] of Object.entries(CLASS_SLUG)) {
|
| 121 |
+
const idle = bySlug[slug]?.idle
|
| 122 |
+
if (idle) map[cls] = spriteUrl(idle)
|
| 123 |
+
}
|
| 124 |
+
dropdown.setIcons(map)
|
| 125 |
+
} catch { /* no icons — the dropdown still works with labels only */ }
|
| 126 |
+
}
|
| 127 |
+
|
| 128 |
export function mountPersonaPanel(host) {
|
| 129 |
+
const sel = makeClassDropdown(CLASSES)
|
| 130 |
+
loadClassIcons(sel)
|
| 131 |
const seed = el('input', { class: 'persona-input', type: 'text', placeholder: 'a word, a vibe… (optional)' })
|
| 132 |
const stats = el('div', { class: 'persona-stats' })
|
| 133 |
const status = el('div', { class: 'persona-status' }, 'Runs on your device — no cloud.')
|
|
@@ -10,9 +10,11 @@ export const PERSONA_SYSTEM =
|
|
| 10 |
' "specialty": a 1-3 word combat specialty,\n' +
|
| 11 |
' "personality": a 1-3 word personality tag,\n' +
|
| 12 |
' "vibe": a 1-3 word vibe,\n' +
|
| 13 |
-
' "voice": one sentence describing how
|
| 14 |
-
'age, pitch, accent, texture, pace and emotion
|
| 15 |
-
'
|
|
|
|
|
|
|
| 16 |
' "quote": one short punchy line they say aloud — a battle-cry or wry remark, ' +
|
| 17 |
'first person, under 15 words.\n' +
|
| 18 |
'Output strictly valid JSON. No preamble, no code fences, no commentary.'
|
|
|
|
| 10 |
' "specialty": a 1-3 word combat specialty,\n' +
|
| 11 |
' "personality": a 1-3 word personality tag,\n' +
|
| 12 |
' "vibe": a 1-3 word vibe,\n' +
|
| 13 |
+
' "voice": one sentence describing how THIS hero sounds for a text-to-speech voice — ' +
|
| 14 |
+
'pick a gender, age, pitch, accent, texture, pace and emotion that FIT their class and ' +
|
| 15 |
+
'personality, and make it DISTINCT from a generic gruff soldier. Vary it widely between ' +
|
| 16 |
+
'heroes — e.g. a bright quick-tongued young woman, a wheezing ancient sage, a velvet-smooth ' +
|
| 17 |
+
'rogue, a booming jolly giant, a cold precise duelist, a sing-song forest spirit,\n' +
|
| 18 |
' "quote": one short punchy line they say aloud — a battle-cry or wry remark, ' +
|
| 19 |
'first person, under 15 words.\n' +
|
| 20 |
'Output strictly valid JSON. No preamble, no code fences, no commentary.'
|
|
@@ -30,6 +30,35 @@
|
|
| 30 |
background: var(--p-card) !important; border: 1.5px solid var(--p-ink) !important;
|
| 31 |
border-radius: 0 !important; padding: 7px 9px !important; width: 100%;
|
| 32 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 33 |
.persona-go {
|
| 34 |
margin-top: 10px; font-family: var(--p-mono) !important; font-size: 12px !important;
|
| 35 |
font-weight: 700 !important; letter-spacing: .04em; text-transform: uppercase;
|
|
|
|
| 30 |
background: var(--p-card) !important; border: 1.5px solid var(--p-ink) !important;
|
| 31 |
border-radius: 0 !important; padding: 7px 9px !important; width: 100%;
|
| 32 |
}
|
| 33 |
+
/* ── Class picker — a custom dropdown with an animated idle-pose icon per class.
|
| 34 |
+
Mirrors a native <select> (.persona-input chrome) but each row shows the
|
| 35 |
+
character's looping idle sprite beside the name. ──────────────────────────── */
|
| 36 |
+
.persona-classdrop { position: relative; }
|
| 37 |
+
.persona-classdrop-trigger {
|
| 38 |
+
display: flex !important; align-items: center; gap: 8px; cursor: pointer; text-align: left;
|
| 39 |
+
}
|
| 40 |
+
.persona-classdrop-label { flex: 1; min-width: 0; }
|
| 41 |
+
.persona-classdrop-chev { color: var(--p-muted); font-size: 11px; transition: transform .12s; }
|
| 42 |
+
.persona-classdrop.open .persona-classdrop-chev { transform: rotate(180deg); }
|
| 43 |
+
.persona-class-ico {
|
| 44 |
+
width: 26px; height: 26px; flex-shrink: 0; display: inline-block;
|
| 45 |
+
background-repeat: no-repeat; background-position: 0 0; image-rendering: pixelated;
|
| 46 |
+
}
|
| 47 |
+
.persona-classdrop-menu {
|
| 48 |
+
display: none; position: absolute; top: calc(100% + 2px); left: 0; right: 0; z-index: 30;
|
| 49 |
+
background: var(--p-card); border: 1.5px solid var(--p-ink);
|
| 50 |
+
box-shadow: 3px 3px 0 var(--p-transmit); max-height: 300px; overflow-y: auto;
|
| 51 |
+
}
|
| 52 |
+
.persona-classdrop.open .persona-classdrop-menu { display: block; }
|
| 53 |
+
.persona-classdrop-opt {
|
| 54 |
+
display: flex; align-items: center; gap: 8px; width: 100%; cursor: pointer; text-align: left;
|
| 55 |
+
font-family: var(--p-sans); font-size: 14px; color: var(--p-ink);
|
| 56 |
+
background: transparent; border: 0; border-bottom: 1px solid var(--p-paper-2); padding: 5px 9px;
|
| 57 |
+
}
|
| 58 |
+
.persona-classdrop-opt:last-child { border-bottom: 0; }
|
| 59 |
+
.persona-classdrop-opt:hover { background: var(--p-paper-2); }
|
| 60 |
+
.persona-classdrop-opt.sel { background: var(--p-ink); color: var(--p-paper); }
|
| 61 |
+
|
| 62 |
.persona-go {
|
| 63 |
margin-top: 10px; font-family: var(--p-mono) !important; font-size: 12px !important;
|
| 64 |
font-weight: 700 !important; letter-spacing: .04em; text-transform: uppercase;
|