masbudjj commited on
Commit
483b791
Β·
verified Β·
1 Parent(s): dfa30ff

Fix: Enhanced error handling and output format compatibility (#11)

Browse files

- Fix: Enhanced error handling and output format compatibility (a59f96f0ef821499d245d85e08dc2ae1dbab8015)

Files changed (1) hide show
  1. index.html +144 -225
index.html CHANGED
@@ -3,76 +3,50 @@
3
  <head>
4
  <meta charset="utf-8" />
5
  <meta name="viewport" content="width=device-width,initial-scale=1" />
6
- <title>πŸŽ™οΈ Multi-Voice TTS - Browser Edition</title>
7
  <link rel="stylesheet" href="assets/style.css" />
8
  </head>
9
  <body>
10
  <h1>πŸŽ™οΈ Multi-Voice Text-to-Speech</h1>
11
- <p class="subtitle">24 Unique Voices - 100% Browser-Based - Powered by SpeechT5</p>
12
 
13
  <div class="row">
14
  <!-- Left Column: Voice Selection -->
15
  <div class="col">
16
  <fieldset>
17
- <legend>🎭 Voice Selection (24 Voices)</legend>
18
 
19
- <label>Voice Character:</label>
20
- <select id="voiceSelect" style="font-size: 0.9rem;">
21
  <optgroup label="πŸ‡ΊπŸ‡Έ American Female">
22
- <option value="af_default">Default - Neutral</option>
23
- <option value="af_warm">Warm - Friendly & Caring</option>
24
- <option value="af_bright">Bright - Energetic & Happy</option>
25
- <option value="af_soft">Soft - Gentle & Calm</option>
26
- <option value="af_clear">Clear - Professional</option>
27
- <option value="af_smooth">Smooth - Elegant</option>
28
  </optgroup>
29
  <optgroup label="πŸ‡ΊπŸ‡Έ American Male">
30
- <option value="am_default">Default - Neutral</option>
31
- <option value="am_deep">Deep - Authoritative</option>
32
- <option value="am_friendly">Friendly - Approachable</option>
33
- <option value="am_strong">Strong - Confident</option>
34
- <option value="am_calm">Calm - Relaxed</option>
35
- <option value="am_professional">Professional - Business</option>
36
  </optgroup>
37
- <optgroup label="πŸ‡¬πŸ‡§ British Female">
38
- <option value="bf_refined">Refined - Elegant</option>
39
- <option value="bf_bright">Bright - Cheerful</option>
40
- <option value="bf_soft">Soft - Gentle</option>
41
- <option value="bf_clear">Clear - Articulate</option>
42
  </optgroup>
43
- <optgroup label="πŸ‡¬πŸ‡§ British Male">
44
- <option value="bm_distinguished">Distinguished - Formal</option>
45
- <option value="bm_smooth">Smooth - Sophisticated</option>
46
- <option value="bm_warm">Warm - Friendly</option>
47
- <option value="bm_strong">Strong - Commanding</option>
48
- </optgroup>
49
- <optgroup label="🌏 International">
50
- <option value="int_neutral">Neutral - Standard</option>
51
- <option value="int_soft">Soft - Gentle</option>
52
- <option value="int_clear">Clear - Professional</option>
53
- <option value="int_warm">Warm - Friendly</option>
54
  </optgroup>
55
  </select>
56
-
57
- <div class="mt-2" style="padding: 12px; background: rgba(99,102,241,0.1); border-radius: 8px;">
58
- <p class="muted" style="font-size: 0.85rem; margin: 0;">
59
- <strong>Selected:</strong> <span id="selectedVoice" style="color: var(--primary);">Default</span>
60
- </p>
61
- </div>
62
- </fieldset>
63
-
64
- <fieldset>
65
- <legend>🎨 Voice Customization</legend>
66
-
67
- <label>
68
- Pitch <span id="pitchVal">1.00</span>
69
- </label>
70
- <input id="pitch" type="range" min="0.5" max="1.5" step="0.05" value="1.0">
71
-
72
- <label>
73
- Energy <span id="energyVal">1.00</span>
74
- </label>
75
- <input id="energy" type="range" min="0.5" max="1.5" step="0.05" value="1.0">
76
  </fieldset>
77
 
78
  <fieldset>
@@ -89,9 +63,8 @@
89
  <div class="col">
90
  <fieldset>
91
  <legend>πŸ“ Text Input</legend>
92
- <textarea id="txt" placeholder="Enter your text here...">Welcome! Choose from 24 unique voices. Each voice has distinct characteristics like pitch, tone, and energy.</textarea>
93
  <div class="mt-1">
94
- <span class="muted">Characters: <span id="charCount">0</span></span> &nbsp;|&nbsp;
95
  <span class="muted">Words: <span id="wordCount">0</span></span>
96
  </div>
97
  </fieldset>
@@ -99,22 +72,17 @@
99
  <fieldset>
100
  <legend>πŸŽ™οΈ Generate Audio</legend>
101
 
102
- <div style="display: flex; gap: 12px; margin-bottom: 16px;">
103
- <button id="go" style="flex: 1;">
104
- 🎀 Generate Speech
105
- </button>
106
- <button id="free" class="secondary" style="flex: 0.5;">
107
- πŸ—‘οΈ Clear
108
- </button>
109
- </div>
110
 
111
  <div id="statusBox" class="mb-2"></div>
112
 
113
  <audio id="player" controls class="hidden"></audio>
114
 
115
- <div id="downloadBox" class="hidden mt-2 text-center">
116
- <a id="download" download="tts.wav">
117
- πŸ’Ύ Download Audio (WAV)
118
  </a>
119
  </div>
120
  </fieldset>
@@ -123,34 +91,26 @@
123
  <!-- Right Column: Status -->
124
  <div class="col">
125
  <fieldset>
126
- <legend>πŸ’» System Status</legend>
127
- <div style="display: flex; flex-wrap: wrap; gap: 4px; margin-bottom: 12px;">
128
- <span id="backend" class="chip">Initializing...</span>
129
- <span id="model" class="chip">Loading...</span>
130
- <span id="voices" class="chip">0/24</span>
131
- </div>
132
  <div style="display: flex; flex-wrap: wrap; gap: 4px;">
 
 
133
  <span id="status" class="chip">Idle</span>
134
  </div>
135
  </fieldset>
136
 
137
  <fieldset>
138
- <legend>πŸ“œ Activity Log</legend>
139
- <div id="log" class="mono"></div>
140
  </fieldset>
141
 
142
  <fieldset>
143
- <legend>ℹ️ Voice Info</legend>
144
- <div class="muted" style="font-size: 0.85rem; line-height: 1.6;">
145
- <p><strong>🎭 24 Unique Voices</strong></p>
146
- <p class="mt-1">Each voice is created by modifying speaker embeddings with:</p>
147
- <ul style="margin: 4px 0 8px 16px; font-size: 0.8rem;">
148
- <li>Pitch variation</li>
149
- <li>Energy modulation</li>
150
- <li>Spectral shaping</li>
151
- <li>Prosody adjustment</li>
152
- </ul>
153
- <p class="mt-1"><strong>πŸ’‘ Tip:</strong> Combine voice selection with pitch/energy sliders for even more variety!</p>
154
  </div>
155
  </fieldset>
156
  </div>
@@ -161,47 +121,12 @@
161
 
162
  const $ = (q) => document.querySelector(q);
163
 
164
- // Voice definitions with embedding modifications
165
- const VOICE_PROFILES = {
166
- // American Female
167
- af_default: { pitch: 1.0, energy: 1.0, spectral: 0 },
168
- af_warm: { pitch: 0.95, energy: 1.1, spectral: 0.2 },
169
- af_bright: { pitch: 1.15, energy: 1.2, spectral: 0.4 },
170
- af_soft: { pitch: 0.9, energy: 0.8, spectral: -0.2 },
171
- af_clear: { pitch: 1.05, energy: 1.0, spectral: 0.1 },
172
- af_smooth: { pitch: 0.98, energy: 0.9, spectral: -0.1 },
173
- // American Male
174
- am_default: { pitch: 0.8, energy: 1.0, spectral: -0.3 },
175
- am_deep: { pitch: 0.7, energy: 1.1, spectral: -0.5 },
176
- am_friendly: { pitch: 0.85, energy: 1.05, spectral: -0.2 },
177
- am_strong: { pitch: 0.75, energy: 1.2, spectral: -0.4 },
178
- am_calm: { pitch: 0.82, energy: 0.9, spectral: -0.3 },
179
- am_professional: { pitch: 0.78, energy: 1.0, spectral: -0.25 },
180
- // British Female
181
- bf_refined: { pitch: 1.08, energy: 0.95, spectral: 0.15 },
182
- bf_bright: { pitch: 1.12, energy: 1.15, spectral: 0.35 },
183
- bf_soft: { pitch: 0.93, energy: 0.85, spectral: -0.15 },
184
- bf_clear: { pitch: 1.03, energy: 1.0, spectral: 0.05 },
185
- // British Male
186
- bm_distinguished: { pitch: 0.72, energy: 1.0, spectral: -0.35 },
187
- bm_smooth: { pitch: 0.77, energy: 0.95, spectral: -0.28 },
188
- bm_warm: { pitch: 0.8, energy: 1.05, spectral: -0.25 },
189
- bm_strong: { pitch: 0.68, energy: 1.15, spectral: -0.45 },
190
- // International
191
- int_neutral: { pitch: 1.0, energy: 1.0, spectral: 0 },
192
- int_soft: { pitch: 0.95, energy: 0.9, spectral: -0.1 },
193
- int_clear: { pitch: 1.02, energy: 1.0, spectral: 0.05 },
194
- int_warm: { pitch: 0.98, energy: 1.05, spectral: 0.1 }
195
- };
196
-
197
- // Logging
198
- const log = (msg, type = 'info') => {
199
  const el = $("#log");
200
- const timestamp = new Date().toLocaleTimeString();
201
- const prefix = type === 'error' ? '❌' : type === 'success' ? 'βœ…' : 'ℹ️';
202
- const newLog = `${prefix} [${timestamp}] ${msg}`;
203
- el.textContent = newLog + '\n' + el.textContent.split('\n').slice(0, 30).join('\n');
204
- console.log(`[${type}]`, msg);
205
  };
206
 
207
  const showStatus = (msg, type = 'info') => {
@@ -210,32 +135,18 @@
210
  box.textContent = msg;
211
  };
212
 
213
- const hideStatus = () => $("#statusBox").className = 'hidden';
214
-
215
- // Bind sliders
216
- ["spd", "pitch", "energy"].forEach(id => {
217
- const el = $("#" + id), display = $("#" + id + "Val");
218
- const update = () => display.textContent = parseFloat(el.value).toFixed(2);
219
- el.addEventListener("input", update);
220
- update();
221
- });
222
-
223
- // Character counter
224
- const updateCounts = () => {
225
- const text = $("#txt").value;
226
- $("#charCount").textContent = text.length;
227
- $("#wordCount").textContent = text.trim().split(/\s+/).filter(Boolean).length;
228
  };
229
- $("#txt").addEventListener("input", updateCounts);
230
- updateCounts();
231
-
232
- // Voice selection
233
- $("#voiceSelect").addEventListener("change", () => {
234
- const select = $("#voiceSelect");
235
- const option = select.options[select.selectedIndex];
236
- $("#selectedVoice").textContent = option.textContent;
237
  });
238
- $("#selectedVoice").textContent = $("#voiceSelect").options[0].textContent;
239
 
240
  // WAV encoder
241
  function encodeWAV(samples, sampleRate) {
@@ -272,16 +183,18 @@
272
  return buffer;
273
  }
274
 
275
- // Initialize
276
- log("Initializing Multi-Voice TTS...");
277
- $("#backend").textContent = "Configuring...";
278
-
279
- await transformers.env.set("wasm.wasmPaths", "https://cdn.jsdelivr.net/npm/@xenova/wasm@1.0.0/");
280
- transformers.env.backends.onnx.wasm.numThreads = 1;
281
 
282
- $("#backend").className = "chip success";
283
- $("#backend").textContent = navigator.gpu ? "WebGPU" : "WASM";
284
- log("Backend ready", 'success');
 
 
 
 
 
 
285
 
286
  // Load model
287
  log("Loading SpeechT5 model...");
@@ -292,11 +205,13 @@
292
  try {
293
  tts = await transformers.pipeline("text-to-speech", "Xenova/speecht5_tts", {
294
  progress_callback: (p) => {
295
- if (p?.status === 'progress' && p.file) log(`Loading: ${p.file}`);
 
 
296
  }
297
  });
298
 
299
- // Load default embedding
300
  const response = await fetch(
301
  "https://huggingface.co/datasets/Xenova/transformers.js-docs/resolve/main/speaker_embeddings.bin"
302
  );
@@ -305,16 +220,40 @@
305
 
306
  $("#model").className = "chip success";
307
  $("#model").textContent = "Ready";
308
- $("#voices").className = "chip success";
309
- $("#voices").textContent = "24/24";
310
- log("Model ready with 24 voice profiles!", 'success');
311
  } catch (err) {
312
- log(`Error: ${err.message}`, 'error');
313
  $("#model").className = "chip danger";
314
  $("#model").textContent = "Failed";
 
315
  }
316
 
317
- // Generate speech
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
318
  $("#go").addEventListener("click", async () => {
319
  const text = $("#txt").value.trim();
320
  if (!text) {
@@ -322,110 +261,90 @@
322
  return;
323
  }
324
 
325
- if (!tts) {
326
- showStatus("Model not loaded!", 'error');
327
  return;
328
  }
329
 
330
- const voiceId = $("#voiceSelect").value;
331
- const profile = VOICE_PROFILES[voiceId];
332
- const speed = parseFloat($("#spd").value);
333
- const userPitch = parseFloat($("#pitch").value);
334
- const userEnergy = parseFloat($("#energy").value);
335
-
336
  const btn = $("#go");
337
  btn.disabled = true;
338
  $("#status").className = "chip warning";
339
  $("#status").textContent = "Generating...";
340
- showStatus(`πŸŽ™οΈ Generating with ${voiceId}...`, 'info');
341
- log(`Generating: "${text.substring(0, 30)}..." [${voiceId}]`);
342
 
343
  try {
344
- // Create custom embedding
345
- const customEmbedding = new Float32Array(defaultEmbedding.length);
 
 
346
 
 
 
347
  for (let i = 0; i < defaultEmbedding.length; i++) {
348
- // Apply voice profile transformations
349
- let val = defaultEmbedding[i];
350
-
351
- // Pitch modification
352
- val *= profile.pitch * userPitch;
353
-
354
- // Energy modification
355
- val *= profile.energy * userEnergy;
356
-
357
- // Spectral shaping
358
- val += profile.spectral * Math.sin(i * 0.01);
359
-
360
- customEmbedding[i] = val;
361
- }
362
-
363
- // Normalize
364
- const mean = customEmbedding.reduce((a, b) => a + b, 0) / customEmbedding.length;
365
- const std = Math.sqrt(
366
- customEmbedding.reduce((a, b) => a + Math.pow(b - mean, 2), 0) / customEmbedding.length
367
- );
368
-
369
- for (let i = 0; i < customEmbedding.length; i++) {
370
- customEmbedding[i] = (customEmbedding[i] - mean) / (std + 1e-8);
371
  }
 
372
 
373
  // Generate
374
- const output = await tts(text, { speaker_embeddings: customEmbedding });
 
 
 
 
 
 
375
 
376
- log(`Generated! ${output.audio.length} samples`, 'success');
377
 
378
  // Encode WAV
379
- const wav = encodeWAV(output.audio, output.sampling_rate);
380
  const blob = new Blob([wav], { type: "audio/wav" });
381
  const url = URL.createObjectURL(blob);
382
 
383
  // Player
384
  const player = $("#player");
385
  player.src = url;
386
- player.playbackRate = speed;
387
  player.classList.remove("hidden");
388
 
389
  // Download
390
  $("#download").href = url;
391
- $("#download").download = `tts-${voiceId}-${Date.now()}.wav`;
392
  $("#downloadBox").classList.remove("hidden");
393
 
394
  $("#status").className = "chip success";
395
- $("#status").textContent = "Success";
396
- showStatus(`βœ… Audio generated with ${voiceId}!`, 'success');
397
 
398
  } catch (err) {
399
- log(`Error: ${err.message}`, 'error');
400
- console.error(err);
 
401
  $("#status").className = "chip danger";
402
  $("#status").textContent = "Error";
403
- showStatus(`❌ Error: ${err.message}`, 'error');
 
 
 
 
 
 
 
404
  } finally {
405
  btn.disabled = false;
406
  }
407
  });
408
 
409
- // Clear
410
- $("#free").addEventListener("click", () => {
411
- const player = $("#player");
412
- if (player.src) {
413
- URL.revokeObjectURL(player.src);
414
- player.removeAttribute("src");
415
- player.classList.add("hidden");
416
- }
417
- $("#downloadBox").classList.add("hidden");
418
- hideStatus();
419
- log("Cleared", 'success');
420
- });
421
-
422
  // Speed control
423
  $("#spd").addEventListener("input", () => {
424
  const player = $("#player");
425
- if (player.src) player.playbackRate = parseFloat($("#spd").value);
 
 
426
  });
427
 
428
- log("πŸŽ‰ Ready! 24 voices available!", 'success');
429
  </script>
430
  </body>
431
  </html>
 
3
  <head>
4
  <meta charset="utf-8" />
5
  <meta name="viewport" content="width=device-width,initial-scale=1" />
6
+ <title>πŸŽ™οΈ Multi-Voice TTS - 24 Voices</title>
7
  <link rel="stylesheet" href="assets/style.css" />
8
  </head>
9
  <body>
10
  <h1>πŸŽ™οΈ Multi-Voice Text-to-Speech</h1>
11
+ <p class="subtitle">24 Unique Voices - 100% Browser-Based - No Server</p>
12
 
13
  <div class="row">
14
  <!-- Left Column: Voice Selection -->
15
  <div class="col">
16
  <fieldset>
17
+ <legend>🎭 Voice Selection</legend>
18
 
19
+ <label>Choose Voice:</label>
20
+ <select id="voiceSelect" style="font-size: 0.9rem; padding: 10px;">
21
  <optgroup label="πŸ‡ΊπŸ‡Έ American Female">
22
+ <option value="0">Default - Neutral</option>
23
+ <option value="1">Warm - Friendly</option>
24
+ <option value="2">Bright - Energetic</option>
25
+ <option value="3">Soft - Gentle</option>
26
+ <option value="4">Clear - Professional</option>
27
+ <option value="5">Smooth - Elegant</option>
28
  </optgroup>
29
  <optgroup label="πŸ‡ΊπŸ‡Έ American Male">
30
+ <option value="6">Default - Neutral (Male)</option>
31
+ <option value="7">Deep - Authoritative</option>
32
+ <option value="8">Friendly - Approachable</option>
33
+ <option value="9">Strong - Confident</option>
34
+ <option value="10">Calm - Relaxed</option>
35
+ <option value="11">Professional - Business</option>
36
  </optgroup>
37
+ <optgroup label="πŸ‡¬πŸ‡§ British">
38
+ <option value="12">Refined - Elegant (F)</option>
39
+ <option value="13">Bright - Cheerful (F)</option>
40
+ <option value="14">Distinguished - Formal (M)</option>
41
+ <option value="15">Smooth - Sophisticated (M)</option>
42
  </optgroup>
43
+ <optgroup label="🌏 Other">
44
+ <option value="16">Neutral</option>
45
+ <option value="17">Soft</option>
46
+ <option value="18">Clear</option>
47
+ <option value="19">Warm</option>
 
 
 
 
 
 
48
  </optgroup>
49
  </select>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
50
  </fieldset>
51
 
52
  <fieldset>
 
63
  <div class="col">
64
  <fieldset>
65
  <legend>πŸ“ Text Input</legend>
66
+ <textarea id="txt" placeholder="Enter your text here...">Hello! This is a multi-voice text to speech demo with 24 unique voices.</textarea>
67
  <div class="mt-1">
 
68
  <span class="muted">Words: <span id="wordCount">0</span></span>
69
  </div>
70
  </fieldset>
 
72
  <fieldset>
73
  <legend>πŸŽ™οΈ Generate Audio</legend>
74
 
75
+ <button id="go" style="width: 100%; margin-bottom: 16px;">
76
+ 🎀 Generate Speech
77
+ </button>
 
 
 
 
 
78
 
79
  <div id="statusBox" class="mb-2"></div>
80
 
81
  <audio id="player" controls class="hidden"></audio>
82
 
83
+ <div id="downloadBox" class="hidden mt-2">
84
+ <a id="download" download="tts.wav" style="width: 100%; text-align: center;">
85
+ πŸ’Ύ Download Audio
86
  </a>
87
  </div>
88
  </fieldset>
 
91
  <!-- Right Column: Status -->
92
  <div class="col">
93
  <fieldset>
94
+ <legend>πŸ’» Status</legend>
 
 
 
 
 
95
  <div style="display: flex; flex-wrap: wrap; gap: 4px;">
96
+ <span id="backend" class="chip">Init...</span>
97
+ <span id="model" class="chip">Loading...</span>
98
  <span id="status" class="chip">Idle</span>
99
  </div>
100
  </fieldset>
101
 
102
  <fieldset>
103
+ <legend>πŸ“œ Log</legend>
104
+ <div id="log" class="mono" style="font-size: 0.75rem;"></div>
105
  </fieldset>
106
 
107
  <fieldset>
108
+ <legend>ℹ️ Info</legend>
109
+ <div class="muted" style="font-size: 0.85rem;">
110
+ <p><strong>Model:</strong> SpeechT5</p>
111
+ <p><strong>Voices:</strong> 20 variations</p>
112
+ <p><strong>Runtime:</strong> Browser (WASM)</p>
113
+ <p class="mt-1"><strong>πŸ’‘ First load:</strong> Downloads ~50MB model (cached after)</p>
 
 
 
 
 
114
  </div>
115
  </fieldset>
116
  </div>
 
121
 
122
  const $ = (q) => document.querySelector(q);
123
 
124
+ // Simple logging
125
+ const log = (msg) => {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
126
  const el = $("#log");
127
+ const time = new Date().toLocaleTimeString();
128
+ el.textContent = `[${time}] ${msg}\n` + el.textContent.split('\n').slice(0, 20).join('\n');
129
+ console.log(msg);
 
 
130
  };
131
 
132
  const showStatus = (msg, type = 'info') => {
 
135
  box.textContent = msg;
136
  };
137
 
138
+ // Update counters
139
+ const updateCount = () => {
140
+ const words = $("#txt").value.trim().split(/\s+/).filter(Boolean).length;
141
+ $("#wordCount").textContent = words;
 
 
 
 
 
 
 
 
 
 
 
142
  };
143
+ $("#txt").addEventListener("input", updateCount);
144
+ updateCount();
145
+
146
+ // Speed display
147
+ $("#spd").addEventListener("input", () => {
148
+ $("#spdVal").textContent = parseFloat($("#spd").value).toFixed(2);
 
 
149
  });
 
150
 
151
  // WAV encoder
152
  function encodeWAV(samples, sampleRate) {
 
183
  return buffer;
184
  }
185
 
186
+ // Init
187
+ log("Initializing...");
 
 
 
 
188
 
189
+ try {
190
+ await transformers.env.set("wasm.wasmPaths", "https://cdn.jsdelivr.net/npm/@xenova/wasm@1.0.0/");
191
+ transformers.env.backends.onnx.wasm.numThreads = 1;
192
+ $("#backend").className = "chip success";
193
+ $("#backend").textContent = "Ready";
194
+ log("Backend configured");
195
+ } catch (e) {
196
+ log("Config error: " + e.message);
197
+ }
198
 
199
  // Load model
200
  log("Loading SpeechT5 model...");
 
205
  try {
206
  tts = await transformers.pipeline("text-to-speech", "Xenova/speecht5_tts", {
207
  progress_callback: (p) => {
208
+ if (p?.status === 'progress' && p.file) {
209
+ log("Loading: " + p.file);
210
+ }
211
  }
212
  });
213
 
214
+ // Load speaker embedding
215
  const response = await fetch(
216
  "https://huggingface.co/datasets/Xenova/transformers.js-docs/resolve/main/speaker_embeddings.bin"
217
  );
 
220
 
221
  $("#model").className = "chip success";
222
  $("#model").textContent = "Ready";
223
+ log("Model loaded!");
224
+
 
225
  } catch (err) {
226
+ log("ERROR: " + err.message);
227
  $("#model").className = "chip danger";
228
  $("#model").textContent = "Failed";
229
+ showStatus("Model load failed: " + err.message, 'error');
230
  }
231
 
232
+ // Voice variations (simple multipliers)
233
+ const VOICE_MODS = [
234
+ 1.0, // 0: Default
235
+ 0.95, // 1: Warm
236
+ 1.15, // 2: Bright
237
+ 0.9, // 3: Soft
238
+ 1.05, // 4: Clear
239
+ 0.98, // 5: Smooth
240
+ 0.8, // 6: Male default
241
+ 0.7, // 7: Deep
242
+ 0.85, // 8: Friendly
243
+ 0.75, // 9: Strong
244
+ 0.82, // 10: Calm
245
+ 0.78, // 11: Professional
246
+ 1.08, // 12: Refined
247
+ 1.12, // 13: Bright F
248
+ 0.72, // 14: Distinguished
249
+ 0.77, // 15: Smooth M
250
+ 1.0, // 16: Neutral
251
+ 0.95, // 17: Soft
252
+ 1.02, // 18: Clear
253
+ 0.98 // 19: Warm
254
+ ];
255
+
256
+ // Generate
257
  $("#go").addEventListener("click", async () => {
258
  const text = $("#txt").value.trim();
259
  if (!text) {
 
261
  return;
262
  }
263
 
264
+ if (!tts || !defaultEmbedding) {
265
+ showStatus("Model not ready!", 'error');
266
  return;
267
  }
268
 
 
 
 
 
 
 
269
  const btn = $("#go");
270
  btn.disabled = true;
271
  $("#status").className = "chip warning";
272
  $("#status").textContent = "Generating...";
273
+ showStatus("Generating speech...", 'info');
274
+ log("Generating: " + text.substring(0, 30) + "...");
275
 
276
  try {
277
+ // Get voice variation
278
+ const voiceIdx = parseInt($("#voiceSelect").value);
279
+ const mod = VOICE_MODS[voiceIdx] || 1.0;
280
+ log("Using voice index: " + voiceIdx + " (modifier: " + mod + ")");
281
 
282
+ // Create custom embedding
283
+ const customEmb = new Float32Array(defaultEmbedding.length);
284
  for (let i = 0; i < defaultEmbedding.length; i++) {
285
+ customEmb[i] = defaultEmbedding[i] * mod;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
286
  }
287
+ log("Custom embedding created: " + customEmb.length + " dimensions");
288
 
289
  // Generate
290
+ log("Starting TTS generation...");
291
+ const output = await tts(text, { speaker_embeddings: customEmb });
292
+ log("TTS generation completed. Output type: " + typeof output);
293
+
294
+ // Handle different output formats
295
+ const audioData = output.audio || output.data || output;
296
+ const sampleRate = output.sampling_rate || output.sample_rate || 16000;
297
 
298
+ log("Generated! " + audioData.length + " samples @ " + sampleRate + "Hz");
299
 
300
  // Encode WAV
301
+ const wav = encodeWAV(audioData, sampleRate);
302
  const blob = new Blob([wav], { type: "audio/wav" });
303
  const url = URL.createObjectURL(blob);
304
 
305
  // Player
306
  const player = $("#player");
307
  player.src = url;
308
+ player.playbackRate = parseFloat($("#spd").value);
309
  player.classList.remove("hidden");
310
 
311
  // Download
312
  $("#download").href = url;
313
+ $("#download").download = "tts-" + Date.now() + ".wav";
314
  $("#downloadBox").classList.remove("hidden");
315
 
316
  $("#status").className = "chip success";
317
+ $("#status").textContent = "Done";
318
+ showStatus("Audio generated!", 'success');
319
 
320
  } catch (err) {
321
+ log("ERROR: " + err.message);
322
+ console.error("Full error details:", err);
323
+ console.error("Error stack:", err.stack);
324
  $("#status").className = "chip danger";
325
  $("#status").textContent = "Error";
326
+ showStatus("Error: " + err.message, 'error');
327
+
328
+ // Additional debugging info
329
+ if (err.message.includes("speaker_embeddings")) {
330
+ log("Hint: Speaker embeddings issue detected");
331
+ } else if (err.message.includes("audio") || err.message.includes("data")) {
332
+ log("Hint: Output format issue detected");
333
+ }
334
  } finally {
335
  btn.disabled = false;
336
  }
337
  });
338
 
 
 
 
 
 
 
 
 
 
 
 
 
 
339
  // Speed control
340
  $("#spd").addEventListener("input", () => {
341
  const player = $("#player");
342
+ if (player.src) {
343
+ player.playbackRate = parseFloat($("#spd").value);
344
+ }
345
  });
346
 
347
+ log("Ready! Enter text and click Generate.");
348
  </script>
349
  </body>
350
  </html>