File size: 2,707 Bytes
3aafe4e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8000b92
 
 
 
 
 
 
 
 
 
 
 
 
 
3aafe4e
 
 
 
 
 
 
 
 
 
 
 
8000b92
3aafe4e
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
// TTS engine: VoxCPM2 via a ZeroGPU sidecar, proxied through /voxcpm-tts so
// tokens and sidecar details stay server-side. Like Qwen3, this designs a voice
// from each hero's free-form voice description.
import { decodeAudio } from '/web/ttsAudio.js'

let _desc = ''
const VOICES = [
  { id: 'persona', label: '✨ Persona voice (designed)', desc: () => _desc },
  { id: 'veteran', label: 'Gruff veteran', desc: () => 'A gravelly, battle-worn male baritone - slow, deliberate, weary, with a wry edge.' },
  { id: 'herald', label: 'Bright herald', desc: () => 'A clear, bright young male voice - brisk, energetic, projecting and confident.' },
  { id: 'medic', label: 'Steady medic', desc: () => 'A calm, warm female voice - measured pace, clear articulation, reassuring.' },
  { id: 'rogue', label: 'Sly rogue', desc: () => 'A low, smooth voice with a sly, amused lilt - unhurried, with a dangerous edge.' },
]
const get = (id) => VOICES.find((v) => v.id === id) || VOICES[0]

async function postSynthWav(text, voiceId) {
  const instruct = (get(voiceId).desc() || '').trim()
  const resp = await fetch('/voxcpm-tts', {
    method: 'POST', headers: { 'Content-Type': 'application/json' },
    body: JSON.stringify({ text, instruct, language: 'English' }),
  })
  if (!resp.ok) throw new Error(`VoxCPM ${resp.status}: ${(await resp.text()).slice(0, 140)}`)
  return resp.arrayBuffer()
}
const postSynth = async (text, voiceId) => decodeAudio(await postSynthWav(text, voiceId))

function abToB64(ab) {
  let s = ''; const u = new Uint8Array(ab); const C = 0x8000
  for (let i = 0; i < u.length; i += C) s += String.fromCharCode.apply(null, u.subarray(i, i + C))
  return btoa(s)
}
async function postCloneWav(text, refAb, refText, instruct) {
  const resp = await fetch('/voxcpm-clone', {
    method: 'POST', headers: { 'Content-Type': 'application/json' },
    body: JSON.stringify({ text, ref_audio: abToB64(refAb), ref_text: refText || '', instruct: instruct || '', language: 'English' }),
  })
  if (!resp.ok) throw new Error(`VoxCPM clone ${resp.status}: ${(await resp.text()).slice(0, 140)}`)
  return resp.arrayBuffer()
}

export const engine = {
  mode: 'pcm', needsDownload: false, networked: true, design: true,
  id: 'voxcpm',
  label: 'VoxCPM2 · Voice Design (ZeroGPU)',
  experimental: true,
  available: () => true,
  listVoices: () => VOICES,
  defaultVoice: 'persona',
  ensure: async () => {},
  setDesc(d) { _desc = (d || '').trim() },
  synth: (text, voiceId) => postSynth(text, voiceId),
  synthWav: (text, voiceId) => postSynthWav(text, voiceId),
  cloneWav: (text, refAb, refText, instruct) => postCloneWav(text, refAb, refText, instruct),
  backendLabel: () => 'ZeroGPU VoxCPM2',
}