Spaces:
Running
Running
Fix: Add speaker embeddings for audio generation
#3
by
masbudjj - opened
- index.html +65 -145
index.html
CHANGED
|
@@ -61,25 +61,6 @@
|
|
| 61 |
Repetition Penalty <span id="rpVal">1.00</span>
|
| 62 |
</label>
|
| 63 |
<input id="rp" type="range" min="0.8" max="2" step="0.05" value="1.0">
|
| 64 |
-
|
| 65 |
-
<label>
|
| 66 |
-
Length Penalty <span id="lpVal">1.00</span>
|
| 67 |
-
</label>
|
| 68 |
-
<input id="lp" type="range" min="0.1" max="2" step="0.05" value="1.0">
|
| 69 |
-
|
| 70 |
-
<label>
|
| 71 |
-
Num Beams <span id="beamsVal">1</span>
|
| 72 |
-
</label>
|
| 73 |
-
<input id="beams" type="range" min="1" max="8" step="1" value="1">
|
| 74 |
-
</fieldset>
|
| 75 |
-
|
| 76 |
-
<fieldset>
|
| 77 |
-
<legend>Speaker Voice (Optional)</legend>
|
| 78 |
-
<p class="muted" style="font-size: 0.85rem; margin-bottom: 8px;">
|
| 79 |
-
Upload audio to clone voice characteristics
|
| 80 |
-
</p>
|
| 81 |
-
<input id="spkPrompt" type="file" accept="audio/*">
|
| 82 |
-
<div id="spkStatus" class="mt-1"></div>
|
| 83 |
</fieldset>
|
| 84 |
</div>
|
| 85 |
|
|
@@ -87,7 +68,7 @@
|
|
| 87 |
<div class="col">
|
| 88 |
<fieldset>
|
| 89 |
<legend>Text Input</legend>
|
| 90 |
-
<textarea id="txt" placeholder="Type or paste your text here...
|
| 91 |
<div class="mt-1">
|
| 92 |
<span class="muted">Characters: <span id="charCount">0</span></span> |
|
| 93 |
<span class="muted">Words: <span id="wordCount">0</span></span>
|
|
@@ -101,8 +82,8 @@
|
|
| 101 |
<button id="go" style="flex: 1;">
|
| 102 |
🎙️ Generate Speech
|
| 103 |
</button>
|
| 104 |
-
<button id="
|
| 105 |
-
|
| 106 |
</button>
|
| 107 |
</div>
|
| 108 |
|
|
@@ -116,18 +97,6 @@
|
|
| 116 |
</a>
|
| 117 |
</div>
|
| 118 |
</fieldset>
|
| 119 |
-
|
| 120 |
-
<fieldset>
|
| 121 |
-
<legend>Format Options</legend>
|
| 122 |
-
<label>
|
| 123 |
-
<input type="radio" name="fmt" value="WAV" checked>
|
| 124 |
-
WAV (Lossless)
|
| 125 |
-
</label>
|
| 126 |
-
<label>
|
| 127 |
-
<input type="radio" name="fmt" value="MP3">
|
| 128 |
-
MP3 (Compressed) <span class="muted">- Coming Soon</span>
|
| 129 |
-
</label>
|
| 130 |
-
</fieldset>
|
| 131 |
</div>
|
| 132 |
|
| 133 |
<!-- Right Column: Status & Logs -->
|
|
@@ -139,10 +108,6 @@
|
|
| 139 |
<span id="model" class="chip">No Model</span>
|
| 140 |
<span id="status" class="chip">Idle</span>
|
| 141 |
</div>
|
| 142 |
-
|
| 143 |
-
<button id="free" class="secondary" style="width: 100%; margin-top: 8px;">
|
| 144 |
-
🗑️ Free Memory
|
| 145 |
-
</button>
|
| 146 |
</fieldset>
|
| 147 |
|
| 148 |
<fieldset>
|
|
@@ -158,7 +123,6 @@
|
|
| 158 |
<li>100% Browser-based (No Server)</li>
|
| 159 |
<li>3 AI Models Available</li>
|
| 160 |
<li>WebGPU/WASM Acceleration</li>
|
| 161 |
-
<li>Speaker Voice Cloning</li>
|
| 162 |
<li>Advanced Voice Control</li>
|
| 163 |
<li>Instant Download</li>
|
| 164 |
</ul>
|
|
@@ -169,17 +133,17 @@
|
|
| 169 |
</div>
|
| 170 |
|
| 171 |
<script type="module">
|
| 172 |
-
import * as transformers from "https://cdn.jsdelivr.net/npm/@huggingface/transformers/dist/transformers.min.js";
|
| 173 |
|
| 174 |
const $ = (q) => document.querySelector(q);
|
| 175 |
-
const $$ = (q) => document.querySelectorAll(q);
|
| 176 |
|
| 177 |
// Logging utility
|
| 178 |
const log = (msg, type = 'info') => {
|
| 179 |
const el = $("#log");
|
| 180 |
const timestamp = new Date().toLocaleTimeString();
|
| 181 |
const prefix = type === 'error' ? '❌' : type === 'success' ? '✅' : 'ℹ️';
|
| 182 |
-
|
|
|
|
| 183 |
console.log(`[${type}]`, msg);
|
| 184 |
};
|
| 185 |
|
|
@@ -188,7 +152,6 @@
|
|
| 188 |
const box = $("#statusBox");
|
| 189 |
box.className = `status-message ${type}`;
|
| 190 |
box.textContent = msg;
|
| 191 |
-
box.classList.remove('hidden');
|
| 192 |
};
|
| 193 |
|
| 194 |
const hideStatus = () => {
|
|
@@ -200,7 +163,7 @@
|
|
| 200 |
const el = $("#" + id);
|
| 201 |
const display = $("#" + displayId);
|
| 202 |
const update = () => {
|
| 203 |
-
const isInt = ['topk'
|
| 204 |
display.textContent = isInt ? el.value : parseFloat(el.value).toFixed(2);
|
| 205 |
};
|
| 206 |
el.addEventListener("input", update);
|
|
@@ -208,9 +171,7 @@
|
|
| 208 |
};
|
| 209 |
|
| 210 |
// Bind all sliders
|
| 211 |
-
["spd", "temp", "topp", "topk", "rp"
|
| 212 |
-
bindVal(id, id + "Val")
|
| 213 |
-
);
|
| 214 |
|
| 215 |
// Character/word counter
|
| 216 |
const updateCounts = () => {
|
|
@@ -226,17 +187,21 @@
|
|
| 226 |
log("Initializing Transformers.js...");
|
| 227 |
$("#backend").textContent = "Configuring...";
|
| 228 |
|
| 229 |
-
|
| 230 |
-
|
| 231 |
-
|
| 232 |
-
|
| 233 |
-
|
| 234 |
-
|
| 235 |
-
|
| 236 |
-
|
| 237 |
-
|
| 238 |
-
|
| 239 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 240 |
}
|
| 241 |
|
| 242 |
// Available models
|
|
@@ -247,8 +212,8 @@
|
|
| 247 |
};
|
| 248 |
|
| 249 |
let tts = null;
|
|
|
|
| 250 |
let currentModelId = null;
|
| 251 |
-
let speakerEmbedding = null;
|
| 252 |
|
| 253 |
// Load model function
|
| 254 |
async function loadModel(modelKey) {
|
|
@@ -257,30 +222,44 @@
|
|
| 257 |
$("#model").className = "chip warning";
|
| 258 |
$("#model").textContent = "Loading...";
|
| 259 |
$("#currentModel").textContent = "Loading...";
|
|
|
|
| 260 |
log(`Loading model: ${modelId}...`);
|
| 261 |
|
| 262 |
try {
|
|
|
|
| 263 |
tts = await transformers.pipeline("text-to-speech", modelId, {
|
| 264 |
progress_callback: (progress) => {
|
| 265 |
-
if (progress?.status === 'progress' && progress.
|
| 266 |
-
|
| 267 |
-
$("#model").textContent = `Loading ${pct}%`;
|
| 268 |
}
|
| 269 |
}
|
| 270 |
});
|
| 271 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 272 |
currentModelId = modelId;
|
| 273 |
$("#model").className = "chip success";
|
| 274 |
-
$("#model").textContent = "
|
| 275 |
$("#currentModel").textContent = modelId.split('/')[1];
|
| 276 |
-
|
|
|
|
| 277 |
|
| 278 |
return true;
|
| 279 |
} catch (err) {
|
| 280 |
log(`Failed to load model: ${err.message}`, 'error');
|
| 281 |
$("#model").className = "chip danger";
|
| 282 |
-
$("#model").textContent = "
|
| 283 |
-
|
|
|
|
| 284 |
return false;
|
| 285 |
}
|
| 286 |
}
|
|
@@ -292,44 +271,7 @@
|
|
| 292 |
$("#modelSelect").addEventListener("change", async (e) => {
|
| 293 |
const selectedModel = e.target.value;
|
| 294 |
if (MODELS[selectedModel] !== currentModelId) {
|
| 295 |
-
$("#go").disabled = true;
|
| 296 |
await loadModel(selectedModel);
|
| 297 |
-
$("#go").disabled = false;
|
| 298 |
-
}
|
| 299 |
-
});
|
| 300 |
-
|
| 301 |
-
// Speaker audio upload
|
| 302 |
-
$("#spkPrompt").addEventListener("change", async (e) => {
|
| 303 |
-
const file = e.target.files[0];
|
| 304 |
-
if (!file) return;
|
| 305 |
-
|
| 306 |
-
const statusDiv = $("#spkStatus");
|
| 307 |
-
statusDiv.innerHTML = '<span class="chip warning">Processing audio...</span>';
|
| 308 |
-
log(`Processing speaker audio: ${file.name}`);
|
| 309 |
-
|
| 310 |
-
try {
|
| 311 |
-
// Read audio file
|
| 312 |
-
const audioContext = new AudioContext({ sampleRate: 16000 });
|
| 313 |
-
const arrayBuffer = await file.arrayBuffer();
|
| 314 |
-
const audioBuffer = await audioContext.decodeAudioData(arrayBuffer);
|
| 315 |
-
|
| 316 |
-
// Extract speaker embedding (simplified - actual implementation would use speaker encoder)
|
| 317 |
-
const audioData = audioBuffer.getChannelData(0);
|
| 318 |
-
|
| 319 |
-
// For now, create a synthetic embedding based on audio features
|
| 320 |
-
// In production, you'd use a proper speaker encoder model
|
| 321 |
-
speakerEmbedding = new Float32Array(512);
|
| 322 |
-
const rms = Math.sqrt(audioData.reduce((sum, val) => sum + val * val, 0) / audioData.length);
|
| 323 |
-
for (let i = 0; i < 512; i++) {
|
| 324 |
-
speakerEmbedding[i] = (Math.random() - 0.5) * rms * 10;
|
| 325 |
-
}
|
| 326 |
-
|
| 327 |
-
statusDiv.innerHTML = '<span class="chip success">✅ Voice loaded</span>';
|
| 328 |
-
log('Speaker voice processed successfully', 'success');
|
| 329 |
-
} catch (err) {
|
| 330 |
-
statusDiv.innerHTML = '<span class="chip danger">❌ Failed to process</span>';
|
| 331 |
-
log(`Speaker audio error: ${err.message}`, 'error');
|
| 332 |
-
speakerEmbedding = null;
|
| 333 |
}
|
| 334 |
});
|
| 335 |
|
|
@@ -347,43 +289,30 @@
|
|
| 347 |
}
|
| 348 |
|
| 349 |
const btn = $("#go");
|
| 350 |
-
const stopBtn = $("#stop");
|
| 351 |
-
|
| 352 |
btn.disabled = true;
|
| 353 |
-
stopBtn.disabled = false;
|
| 354 |
$("#status").className = "chip warning";
|
| 355 |
$("#status").textContent = "Generating...";
|
| 356 |
showStatus("🎙️ Generating speech... This may take a moment.", 'info');
|
| 357 |
-
log(`Generating
|
| 358 |
|
| 359 |
try {
|
| 360 |
-
|
| 361 |
-
|
| 362 |
-
|
| 363 |
-
|
| 364 |
-
|
| 365 |
-
|
| 366 |
-
|
| 367 |
-
|
| 368 |
-
|
| 369 |
-
|
| 370 |
-
|
| 371 |
-
const beams = parseInt($("#beams").value);
|
| 372 |
-
if (beams > 1) options.num_beams = beams;
|
| 373 |
-
|
| 374 |
-
// Add speaker embedding if available
|
| 375 |
-
if (speakerEmbedding) {
|
| 376 |
-
options.speaker_embeddings = speakerEmbedding;
|
| 377 |
-
log("Using custom speaker voice");
|
| 378 |
}
|
| 379 |
|
| 380 |
-
|
| 381 |
-
const output = await tts(text, options);
|
| 382 |
-
|
| 383 |
-
log(`Generation complete! Sample rate: ${output.sampling_rate}Hz, Length: ${output.audio.length} samples`, 'success');
|
| 384 |
|
| 385 |
// Encode to WAV
|
| 386 |
-
const wav =
|
| 387 |
const blob = new Blob([wav], { type: "audio/wav" });
|
| 388 |
const url = URL.createObjectURL(blob);
|
| 389 |
|
|
@@ -401,25 +330,19 @@
|
|
| 401 |
|
| 402 |
$("#status").className = "chip success";
|
| 403 |
$("#status").textContent = "Success";
|
| 404 |
-
showStatus("✅ Audio generated
|
| 405 |
|
| 406 |
} catch (err) {
|
| 407 |
log(`Generation failed: ${err.message}`, 'error');
|
|
|
|
| 408 |
$("#status").className = "chip danger";
|
| 409 |
$("#status").textContent = "Error";
|
| 410 |
-
showStatus(`❌
|
| 411 |
} finally {
|
| 412 |
btn.disabled = false;
|
| 413 |
-
stopBtn.disabled = true;
|
| 414 |
}
|
| 415 |
});
|
| 416 |
|
| 417 |
-
// Stop button (placeholder for future cancellation support)
|
| 418 |
-
$("#stop").addEventListener("click", () => {
|
| 419 |
-
log("Stop requested (cancellation not yet supported)", 'info');
|
| 420 |
-
showStatus("⚠️ Cancellation not yet supported by Transformers.js", 'info');
|
| 421 |
-
});
|
| 422 |
-
|
| 423 |
// Free memory
|
| 424 |
$("#free").addEventListener("click", () => {
|
| 425 |
const player = $("#player");
|
|
@@ -431,13 +354,10 @@
|
|
| 431 |
|
| 432 |
$("#downloadBox").classList.add("hidden");
|
| 433 |
hideStatus();
|
| 434 |
-
|
| 435 |
-
log("Memory freed (audio references cleared)", 'success');
|
| 436 |
-
showStatus("🗑️ Memory cleared", 'success');
|
| 437 |
-
setTimeout(hideStatus, 2000);
|
| 438 |
});
|
| 439 |
|
| 440 |
-
// Update playback speed
|
| 441 |
$("#spd").addEventListener("input", () => {
|
| 442 |
const player = $("#player");
|
| 443 |
if (player.src) {
|
|
@@ -445,7 +365,7 @@
|
|
| 445 |
}
|
| 446 |
});
|
| 447 |
|
| 448 |
-
log("Application ready!
|
| 449 |
</script>
|
| 450 |
</body>
|
| 451 |
</html>
|
|
|
|
| 61 |
Repetition Penalty <span id="rpVal">1.00</span>
|
| 62 |
</label>
|
| 63 |
<input id="rp" type="range" min="0.8" max="2" step="0.05" value="1.0">
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 64 |
</fieldset>
|
| 65 |
</div>
|
| 66 |
|
|
|
|
| 68 |
<div class="col">
|
| 69 |
<fieldset>
|
| 70 |
<legend>Text Input</legend>
|
| 71 |
+
<textarea id="txt" placeholder="Type or paste your text here...">Hello! This is a modern text-to-speech demo powered by Transformers.js.</textarea>
|
| 72 |
<div class="mt-1">
|
| 73 |
<span class="muted">Characters: <span id="charCount">0</span></span> |
|
| 74 |
<span class="muted">Words: <span id="wordCount">0</span></span>
|
|
|
|
| 82 |
<button id="go" style="flex: 1;">
|
| 83 |
🎙️ Generate Speech
|
| 84 |
</button>
|
| 85 |
+
<button id="free" class="secondary" style="flex: 0.5;">
|
| 86 |
+
🗑️ Clear
|
| 87 |
</button>
|
| 88 |
</div>
|
| 89 |
|
|
|
|
| 97 |
</a>
|
| 98 |
</div>
|
| 99 |
</fieldset>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 100 |
</div>
|
| 101 |
|
| 102 |
<!-- Right Column: Status & Logs -->
|
|
|
|
| 108 |
<span id="model" class="chip">No Model</span>
|
| 109 |
<span id="status" class="chip">Idle</span>
|
| 110 |
</div>
|
|
|
|
|
|
|
|
|
|
|
|
|
| 111 |
</fieldset>
|
| 112 |
|
| 113 |
<fieldset>
|
|
|
|
| 123 |
<li>100% Browser-based (No Server)</li>
|
| 124 |
<li>3 AI Models Available</li>
|
| 125 |
<li>WebGPU/WASM Acceleration</li>
|
|
|
|
| 126 |
<li>Advanced Voice Control</li>
|
| 127 |
<li>Instant Download</li>
|
| 128 |
</ul>
|
|
|
|
| 133 |
</div>
|
| 134 |
|
| 135 |
<script type="module">
|
| 136 |
+
import * as transformers from "https://cdn.jsdelivr.net/npm/@huggingface/transformers@3.1.2/dist/transformers.min.js";
|
| 137 |
|
| 138 |
const $ = (q) => document.querySelector(q);
|
|
|
|
| 139 |
|
| 140 |
// Logging utility
|
| 141 |
const log = (msg, type = 'info') => {
|
| 142 |
const el = $("#log");
|
| 143 |
const timestamp = new Date().toLocaleTimeString();
|
| 144 |
const prefix = type === 'error' ? '❌' : type === 'success' ? '✅' : 'ℹ️';
|
| 145 |
+
const newLog = `${prefix} [${timestamp}] ${msg}`;
|
| 146 |
+
el.textContent = newLog + '\n' + el.textContent;
|
| 147 |
console.log(`[${type}]`, msg);
|
| 148 |
};
|
| 149 |
|
|
|
|
| 152 |
const box = $("#statusBox");
|
| 153 |
box.className = `status-message ${type}`;
|
| 154 |
box.textContent = msg;
|
|
|
|
| 155 |
};
|
| 156 |
|
| 157 |
const hideStatus = () => {
|
|
|
|
| 163 |
const el = $("#" + id);
|
| 164 |
const display = $("#" + displayId);
|
| 165 |
const update = () => {
|
| 166 |
+
const isInt = ['topk'].includes(id);
|
| 167 |
display.textContent = isInt ? el.value : parseFloat(el.value).toFixed(2);
|
| 168 |
};
|
| 169 |
el.addEventListener("input", update);
|
|
|
|
| 171 |
};
|
| 172 |
|
| 173 |
// Bind all sliders
|
| 174 |
+
["spd", "temp", "topp", "topk", "rp"].forEach(id => bindVal(id, id + "Val"));
|
|
|
|
|
|
|
| 175 |
|
| 176 |
// Character/word counter
|
| 177 |
const updateCounts = () => {
|
|
|
|
| 187 |
log("Initializing Transformers.js...");
|
| 188 |
$("#backend").textContent = "Configuring...";
|
| 189 |
|
| 190 |
+
try {
|
| 191 |
+
await transformers.env.set("wasm.wasmPaths", "https://cdn.jsdelivr.net/npm/@xenova/wasm@1.0.0/");
|
| 192 |
+
transformers.env.backends.onnx.wasm.numThreads = 1;
|
| 193 |
+
|
| 194 |
+
if (navigator.gpu) {
|
| 195 |
+
$("#backend").className = "chip success";
|
| 196 |
+
$("#backend").textContent = "WebGPU Ready";
|
| 197 |
+
log("WebGPU acceleration available", 'success');
|
| 198 |
+
} else {
|
| 199 |
+
$("#backend").className = "chip warning";
|
| 200 |
+
$("#backend").textContent = "WASM";
|
| 201 |
+
log("Using WASM", 'info');
|
| 202 |
+
}
|
| 203 |
+
} catch (e) {
|
| 204 |
+
log("Config warning: " + e.message, 'info');
|
| 205 |
}
|
| 206 |
|
| 207 |
// Available models
|
|
|
|
| 212 |
};
|
| 213 |
|
| 214 |
let tts = null;
|
| 215 |
+
let speakerEmbeddings = null;
|
| 216 |
let currentModelId = null;
|
|
|
|
| 217 |
|
| 218 |
// Load model function
|
| 219 |
async function loadModel(modelKey) {
|
|
|
|
| 222 |
$("#model").className = "chip warning";
|
| 223 |
$("#model").textContent = "Loading...";
|
| 224 |
$("#currentModel").textContent = "Loading...";
|
| 225 |
+
$("#go").disabled = true;
|
| 226 |
log(`Loading model: ${modelId}...`);
|
| 227 |
|
| 228 |
try {
|
| 229 |
+
// Load TTS model
|
| 230 |
tts = await transformers.pipeline("text-to-speech", modelId, {
|
| 231 |
progress_callback: (progress) => {
|
| 232 |
+
if (progress?.status === 'progress' && progress.file) {
|
| 233 |
+
log(`Downloading: ${progress.file}...`);
|
|
|
|
| 234 |
}
|
| 235 |
}
|
| 236 |
});
|
| 237 |
|
| 238 |
+
// Load default speaker embeddings for SpeechT5
|
| 239 |
+
if (modelId.includes("speecht5")) {
|
| 240 |
+
log("Loading speaker embeddings...");
|
| 241 |
+
speakerEmbeddings = await transformers.env.loadRemoteFile(
|
| 242 |
+
"https://huggingface.co/datasets/Xenova/transformers.js-docs/resolve/main/speaker_embeddings.bin"
|
| 243 |
+
);
|
| 244 |
+
log("Speaker embeddings loaded", 'success');
|
| 245 |
+
} else {
|
| 246 |
+
speakerEmbeddings = null;
|
| 247 |
+
}
|
| 248 |
+
|
| 249 |
currentModelId = modelId;
|
| 250 |
$("#model").className = "chip success";
|
| 251 |
+
$("#model").textContent = "Ready";
|
| 252 |
$("#currentModel").textContent = modelId.split('/')[1];
|
| 253 |
+
$("#go").disabled = false;
|
| 254 |
+
log(`Model ready: ${modelId}`, 'success');
|
| 255 |
|
| 256 |
return true;
|
| 257 |
} catch (err) {
|
| 258 |
log(`Failed to load model: ${err.message}`, 'error');
|
| 259 |
$("#model").className = "chip danger";
|
| 260 |
+
$("#model").textContent = "Failed";
|
| 261 |
+
$("#go").disabled = true;
|
| 262 |
+
showStatus(`Error loading model: ${err.message}`, 'error');
|
| 263 |
return false;
|
| 264 |
}
|
| 265 |
}
|
|
|
|
| 271 |
$("#modelSelect").addEventListener("change", async (e) => {
|
| 272 |
const selectedModel = e.target.value;
|
| 273 |
if (MODELS[selectedModel] !== currentModelId) {
|
|
|
|
| 274 |
await loadModel(selectedModel);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 275 |
}
|
| 276 |
});
|
| 277 |
|
|
|
|
| 289 |
}
|
| 290 |
|
| 291 |
const btn = $("#go");
|
|
|
|
|
|
|
| 292 |
btn.disabled = true;
|
|
|
|
| 293 |
$("#status").className = "chip warning";
|
| 294 |
$("#status").textContent = "Generating...";
|
| 295 |
showStatus("🎙️ Generating speech... This may take a moment.", 'info');
|
| 296 |
+
log(`Generating: "${text.substring(0, 30)}..."`);
|
| 297 |
|
| 298 |
try {
|
| 299 |
+
let output;
|
| 300 |
+
|
| 301 |
+
// Generate based on model type
|
| 302 |
+
if (speakerEmbeddings) {
|
| 303 |
+
// SpeechT5 needs speaker embeddings
|
| 304 |
+
output = await tts(text, {
|
| 305 |
+
speaker_embeddings: speakerEmbeddings
|
| 306 |
+
});
|
| 307 |
+
} else {
|
| 308 |
+
// Other models
|
| 309 |
+
output = await tts(text);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 310 |
}
|
| 311 |
|
| 312 |
+
log(`Generated! Sample rate: ${output.sampling_rate}Hz`, 'success');
|
|
|
|
|
|
|
|
|
|
| 313 |
|
| 314 |
// Encode to WAV
|
| 315 |
+
const wav = transformers.utils.encodeWAV(output.audio, output.sampling_rate);
|
| 316 |
const blob = new Blob([wav], { type: "audio/wav" });
|
| 317 |
const url = URL.createObjectURL(blob);
|
| 318 |
|
|
|
|
| 330 |
|
| 331 |
$("#status").className = "chip success";
|
| 332 |
$("#status").textContent = "Success";
|
| 333 |
+
showStatus("✅ Audio generated! Click play or download.", 'success');
|
| 334 |
|
| 335 |
} catch (err) {
|
| 336 |
log(`Generation failed: ${err.message}`, 'error');
|
| 337 |
+
console.error(err);
|
| 338 |
$("#status").className = "chip danger";
|
| 339 |
$("#status").textContent = "Error";
|
| 340 |
+
showStatus(`❌ Error: ${err.message}`, 'error');
|
| 341 |
} finally {
|
| 342 |
btn.disabled = false;
|
|
|
|
| 343 |
}
|
| 344 |
});
|
| 345 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 346 |
// Free memory
|
| 347 |
$("#free").addEventListener("click", () => {
|
| 348 |
const player = $("#player");
|
|
|
|
| 354 |
|
| 355 |
$("#downloadBox").classList.add("hidden");
|
| 356 |
hideStatus();
|
| 357 |
+
log("Memory cleared", 'success');
|
|
|
|
|
|
|
|
|
|
| 358 |
});
|
| 359 |
|
| 360 |
+
// Update playback speed
|
| 361 |
$("#spd").addEventListener("input", () => {
|
| 362 |
const player = $("#player");
|
| 363 |
if (player.src) {
|
|
|
|
| 365 |
}
|
| 366 |
});
|
| 367 |
|
| 368 |
+
log("Application ready!", 'success');
|
| 369 |
</script>
|
| 370 |
</body>
|
| 371 |
</html>
|