Spaces:
Running
Running
| <html lang="en"> | |
| <head> | |
| <meta charset="utf-8" /> | |
| <meta name="viewport" content="width=device-width,initial-scale=1" /> | |
| <title>ποΈ Ultimate TTS - 900+ Premium Voices</title> | |
| <link rel="stylesheet" href="assets/style.css" /> | |
| </head> | |
| <body> | |
| <h1>ποΈ Ultimate Text-to-Speech Studio</h1> | |
| <p class="subtitle">3 Premium Engines - 900+ Voices - Voice Cloning - Unlimited Text</p> | |
| <div class="row"> | |
| <!-- Left Column: Engine & Voice Selection --> | |
| <div class="col"> | |
| <fieldset> | |
| <legend>π TTS Engine</legend> | |
| <label>Choose Engine:</label> | |
| <select id="engineSelect" style="font-size: 0.9rem; padding: 10px; margin-bottom: 16px;"> | |
| <option value="piper">π― Piper TTS - 904 Voices (Premium Quality)</option> | |
| <option value="kokoro">β¨ Kokoro TTS - 21 Expressive Voices (Highest Quality)</option> | |
| <option value="kitten">β‘ Kitten TTS - 8 Voices (Fastest, Lightweight)</option> | |
| <option value="clone">π€ Voice Cloning (Upload Your Voice)</option> | |
| </select> | |
| <div id="engineInfo" style="padding: 12px; background: rgba(99,102,241,0.1); border-radius: 8px; margin-bottom: 16px;"> | |
| <p class="muted" style="font-size: 0.85rem; margin: 0;"> | |
| <strong>Piper TTS:</strong> 904 voices, 50+ languages, 3-5x realtime speed | |
| </p> | |
| </div> | |
| </fieldset> | |
| <fieldset id="voicePanel"> | |
| <legend>π€ Voice Selection</legend> | |
| <!-- Piper Voices --> | |
| <div id="piperVoices"> | |
| <label>Quality Level:</label> | |
| <select id="piperQuality" style="margin-bottom: 12px;"> | |
| <option value="high">High Quality (22kHz)</option> | |
| <option value="medium" selected>Medium Quality (16kHz)</option> | |
| <option value="low">Low Quality (Fast)</option> | |
| </select> | |
| <label>Language/Accent:</label> | |
| <select id="piperLang" style="margin-bottom: 12px;"> | |
| <optgroup label="πΊπΈ English - American"> | |
| <option value="en_US-lessac" selected>Lessac - Professional (High Quality)</option> | |
| <option value="en_US-ryan">Ryan - Authoritative (High Quality)</option> | |
| <option value="en_US-ljspeech">LJSpeech - Female, Clear</option> | |
| <option value="en_US-amy">Amy - Friendly Female</option> | |
| <option value="en_US-danny">Danny - Young Male</option> | |
| <option value="en_US-joe">Joe - Mature Male</option> | |
| <option value="en_US-kristin">Kristin - Professional Female</option> | |
| <option value="en_US-kathleen">Kathleen - Warm Female</option> | |
| </optgroup> | |
| <optgroup label="π¬π§ English - British"> | |
| <option value="en_GB-cori">Cori - Refined British (High Quality)</option> | |
| <option value="en_GB-alan">Alan - Distinguished Male</option> | |
| <option value="en_GB-alba">Alba - Scottish Female</option> | |
| <option value="en_GB-northern_english_male">Northern English Male</option> | |
| <option value="en_GB-southern_english_female">Southern English Female</option> | |
| </optgroup> | |
| <optgroup label="π Other Languages (900+ total)"> | |
| <option value="es_ES">Spanish - Spain (Multiple voices)</option> | |
| <option value="fr_FR">French - France (Multiple voices)</option> | |
| <option value="de_DE">German - Germany (Multiple voices)</option> | |
| <option value="it_IT">Italian - Italy (Multiple voices)</option> | |
| <option value="pt_BR">Portuguese - Brazil (Multiple voices)</option> | |
| <option value="zh_CN">Chinese - Mandarin (Multiple voices)</option> | |
| <option value="ja_JP">Japanese (Multiple voices)</option> | |
| <option value="ko_KR">Korean (Multiple voices)</option> | |
| </optgroup> | |
| </select> | |
| <div class="muted" style="font-size: 0.85rem; margin-top: 8px;"> | |
| <p>π‘ <strong>Tip:</strong> "Lessac" and "Ryan" offer the best quality for English.</p> | |
| </div> | |
| </div> | |
| <!-- Kokoro Voices --> | |
| <div id="kokoroVoices" class="hidden"> | |
| <label>Choose Voice:</label> | |
| <select id="kokoroVoice" style="margin-bottom: 12px;"> | |
| <optgroup label="πΊπΈ American Female"> | |
| <option value="af" selected>Default - Neutral & Professional</option> | |
| <option value="af_bella">Bella - Elegant & Sophisticated</option> | |
| <option value="af_nicole">Nicole - Clear & Articulate</option> | |
| <option value="af_sarah">Sarah - Warm & Friendly</option> | |
| <option value="af_sky">Sky - Light & Energetic</option> | |
| </optgroup> | |
| <optgroup label="πΊπΈ American Male"> | |
| <option value="am_adam">Adam - Natural & Relaxed</option> | |
| <option value="am_michael">Michael - Deep & Authoritative</option> | |
| </optgroup> | |
| <optgroup label="π¬π§ British Female"> | |
| <option value="bf">British Default - Refined</option> | |
| <option value="bf_emma">Emma - Elegant & Polished</option> | |
| <option value="bf_isabella">Isabella - Sophisticated</option> | |
| </optgroup> | |
| <optgroup label="π¬π§ British Male"> | |
| <option value="bm">British Male - Distinguished</option> | |
| <option value="bm_george">George - Commanding</option> | |
| <option value="bm_lewis">Lewis - Smooth & Confident</option> | |
| </optgroup> | |
| </select> | |
| <div class="muted" style="font-size: 0.85rem; margin-top: 8px;"> | |
| <p>β <strong>Kokoro:</strong> Highest quality, most expressive voices. 24kHz audio.</p> | |
| </div> | |
| </div> | |
| <!-- Kitten Voices --> | |
| <div id="kittenVoices" class="hidden"> | |
| <label>Choose Voice:</label> | |
| <select id="kittenVoice" style="margin-bottom: 12px;"> | |
| <option value="0" selected>Voice 0 - Neutral</option> | |
| <option value="1">Voice 1 - Warm</option> | |
| <option value="2">Voice 2 - Bright</option> | |
| <option value="3">Voice 3 - Soft</option> | |
| <option value="4">Voice 4 - Clear</option> | |
| <option value="5">Voice 5 - Deep</option> | |
| <option value="6">Voice 6 - Friendly</option> | |
| <option value="7">Voice 7 - Professional</option> | |
| </select> | |
| <div class="muted" style="font-size: 0.85rem; margin-top: 8px;"> | |
| <p>β‘ <strong>Kitten:</strong> Fastest generation, only 24MB model. Perfect for quick tasks.</p> | |
| </div> | |
| </div> | |
| <!-- Voice Cloning --> | |
| <div id="clonePanel" class="hidden"> | |
| <label>Upload Voice Sample (Max 1 min):</label> | |
| <input type="file" id="voiceFile" accept="audio/wav,audio/mp3,audio/mpeg" style="margin-bottom: 12px;"> | |
| <div class="muted" style="font-size: 0.85rem; margin-bottom: 12px;"> | |
| <p>π Requirements:</p> | |
| <ul style="margin: 4px 0; padding-left: 20px;"> | |
| <li>Format: WAV or MP3</li> | |
| <li>Duration: Max 60 seconds</li> | |
| <li>Quality: Clear voice, minimal noise</li> | |
| </ul> | |
| </div> | |
| <button id="processVoice" class="secondary" style="width: 100%;" disabled> | |
| π Process Voice Sample | |
| </button> | |
| <div id="voiceStatus" class="mt-2"></div> | |
| </div> | |
| </fieldset> | |
| <fieldset> | |
| <legend>βοΈ Settings</legend> | |
| <label> | |
| Speed <span id="spdVal">1.00</span>x | |
| </label> | |
| <input id="spd" type="range" min="0.5" max="2" step="0.05" value="1.0"> | |
| </fieldset> | |
| </div> | |
| <!-- Middle Column: Text & Generation --> | |
| <div class="col"> | |
| <fieldset> | |
| <legend>π Text Input</legend> | |
| <textarea id="txt" placeholder="Enter your text here... (unlimited length supported)">Welcome to the ultimate text-to-speech studio! With access to over 900 premium voices from Piper, Kokoro, and Kitten TTS, you can create professional-quality audio in any language and accent. Try our advanced voice cloning feature to use your own voice!</textarea> | |
| <div class="mt-1"> | |
| <span class="muted">Characters: <span id="charCount">0</span></span> | | |
| <span class="muted">Words: <span id="wordCount">0</span></span> | | |
| <span class="muted">Chunks: <span id="chunkCount">0</span></span> | |
| </div> | |
| </fieldset> | |
| <fieldset> | |
| <legend>ποΈ Generate Audio</legend> | |
| <button id="go" style="width: 100%; margin-bottom: 16px;"> | |
| π€ Generate Speech | |
| </button> | |
| <div id="statusBox" class="mb-2"></div> | |
| <!-- Progress Bar --> | |
| <div id="progressBox" class="hidden mb-2"> | |
| <div style="background: rgba(255,255,255,0.1); border-radius: 8px; overflow: hidden; height: 24px;"> | |
| <div id="progressBar" style="background: linear-gradient(90deg, var(--primary), var(--secondary)); height: 100%; width: 0%; transition: width 0.3s; display: flex; align-items: center; justify-content: center;"> | |
| <span id="progressText" style="font-size: 0.75rem; font-weight: 600;">0%</span> | |
| </div> | |
| </div> | |
| </div> | |
| <audio id="player" controls class="hidden"></audio> | |
| <div id="downloadBox" class="hidden mt-2"> | |
| <a id="download" download="tts.wav" style="width: 100%; text-align: center;"> | |
| πΎ Download Audio | |
| </a> | |
| </div> | |
| </fieldset> | |
| </div> | |
| <!-- Right Column: Status & Info --> | |
| <div class="col"> | |
| <fieldset> | |
| <legend>π» System Status</legend> | |
| <div style="display: flex; flex-wrap: wrap; gap: 4px;"> | |
| <span id="backend" class="chip">Init...</span> | |
| <span id="model" class="chip">Ready</span> | |
| <span id="engine" class="chip">Piper</span> | |
| <span id="status" class="chip">Idle</span> | |
| </div> | |
| </fieldset> | |
| <fieldset> | |
| <legend>π Activity Log</legend> | |
| <div id="log" class="mono" style="font-size: 0.75rem;"></div> | |
| </fieldset> | |
| <fieldset> | |
| <legend>βΉοΈ Engine Comparison</legend> | |
| <div class="muted" style="font-size: 0.85rem;"> | |
| <table style="width: 100%; border-collapse: collapse;"> | |
| <tr style="border-bottom: 1px solid rgba(255,255,255,0.1);"> | |
| <th style="text-align: left; padding: 4px;">Engine</th> | |
| <th style="text-align: center; padding: 4px;">Voices</th> | |
| <th style="text-align: center; padding: 4px;">Quality</th> | |
| </tr> | |
| <tr> | |
| <td style="padding: 4px;"><strong>Piper</strong></td> | |
| <td style="text-align: center; padding: 4px;">904</td> | |
| <td style="text-align: center; padding: 4px;">ββββ</td> | |
| </tr> | |
| <tr> | |
| <td style="padding: 4px;"><strong>Kokoro</strong></td> | |
| <td style="text-align: center; padding: 4px;">21</td> | |
| <td style="text-align: center; padding: 4px;">βββββ</td> | |
| </tr> | |
| <tr> | |
| <td style="padding: 4px;"><strong>Kitten</strong></td> | |
| <td style="text-align: center; padding: 4px;">8</td> | |
| <td style="text-align: center; padding: 4px;">βββ</td> | |
| </tr> | |
| </table> | |
| <p class="mt-1"><strong>π‘ Recommendation:</strong></p> | |
| <ul style="margin: 8px 0; padding-left: 20px; line-height: 1.8;"> | |
| <li><strong>Best Quality:</strong> Kokoro (if English)</li> | |
| <li><strong>Most Voices:</strong> Piper (904 options)</li> | |
| <li><strong>Fastest:</strong> Kitten (lightweight)</li> | |
| <li><strong>Custom:</strong> Voice Cloning</li> | |
| </ul> | |
| </div> | |
| </fieldset> | |
| </div> | |
| </div> | |
| <script type="module"> | |
| // Import onnx-tts-web library | |
| import { createSession } from 'https://cdn.jsdelivr.net/npm/onnx-tts-web@latest/dist/index.js'; | |
| const $ = (q) => document.querySelector(q); | |
| // ===== UTILITIES ===== | |
| const log = (msg) => { | |
| const el = $("#log"); | |
| const time = new Date().toLocaleTimeString(); | |
| el.textContent = `[${time}] ${msg}\n` + el.textContent.split('\n').slice(0, 25).join('\n'); | |
| console.log(msg); | |
| }; | |
| const showStatus = (msg, type = 'info') => { | |
| const box = $("#statusBox"); | |
| box.className = `status-message ${type}`; | |
| box.textContent = msg; | |
| }; | |
| const updateProgress = (percent, text = null) => { | |
| $("#progressBar").style.width = percent + "%"; | |
| $("#progressText").textContent = text || (Math.round(percent) + "%"); | |
| if (percent > 0) { | |
| $("#progressBox").classList.remove("hidden"); | |
| } else { | |
| $("#progressBox").classList.add("hidden"); | |
| } | |
| }; | |
| // ===== TEXT STATS ===== | |
| const updateCounts = () => { | |
| const text = $("#txt").value; | |
| const chars = text.length; | |
| const words = text.trim().split(/\s+/).filter(Boolean).length; | |
| const chunks = Math.ceil(chars / 200); | |
| $("#charCount").textContent = chars; | |
| $("#wordCount").textContent = words; | |
| $("#chunkCount").textContent = chunks; | |
| }; | |
| $("#txt").addEventListener("input", updateCounts); | |
| updateCounts(); | |
| // ===== SPEED DISPLAY ===== | |
| $("#spd").addEventListener("input", () => { | |
| $("#spdVal").textContent = parseFloat($("#spd").value).toFixed(2); | |
| }); | |
| // ===== ENGINE SWITCHING ===== | |
| let currentEngine = 'piper'; | |
| let ttsSession = null; | |
| let isInitializing = false; | |
| const engineInfo = { | |
| piper: "Piper TTS: 904 voices, 50+ languages, 3-5x realtime speed", | |
| kokoro: "Kokoro TTS: 21 expressive voices, highest quality, 24kHz audio", | |
| kitten: "Kitten TTS: 8 voices, fastest generation, only 24MB model", | |
| clone: "Voice Cloning: Upload your own voice sample for custom TTS" | |
| }; | |
| const switchEngine = async () => { | |
| const engine = $("#engineSelect").value; | |
| currentEngine = engine; | |
| // Update info | |
| $("#engineInfo").querySelector("p").innerHTML = `<strong>${engineInfo[engine]}</strong>`; | |
| $("#engine").textContent = engine.charAt(0).toUpperCase() + engine.slice(1); | |
| // Show/hide voice panels | |
| $("#piperVoices").classList.toggle("hidden", engine !== "piper"); | |
| $("#kokoroVoices").classList.toggle("hidden", engine !== "kokoro"); | |
| $("#kittenVoices").classList.toggle("hidden", engine !== "kitten"); | |
| $("#clonePanel").classList.toggle("hidden", engine !== "clone"); | |
| $("#voicePanel").classList.toggle("hidden", engine === "clone"); | |
| log(`Switched to ${engine.toUpperCase()} engine`); | |
| if (engine !== 'clone') { | |
| await initTTSSession(); | |
| } | |
| }; | |
| $("#engineSelect").addEventListener("change", switchEngine); | |
| $("#piperLang").addEventListener("change", () => { if (currentEngine === 'piper') initTTSSession(); }); | |
| $("#piperQuality").addEventListener("change", () => { if (currentEngine === 'piper') initTTSSession(); }); | |
| // ===== TTS SESSION INITIALIZATION ===== | |
| async function initTTSSession() { | |
| if (isInitializing) { | |
| log("Initialization already in progress. Please wait."); | |
| return false; | |
| } | |
| isInitializing = true; | |
| $("#go").disabled = true; | |
| try { | |
| $("#model").textContent = "Loading..."; | |
| $("#model").className = "chip warning"; | |
| let modelUrl, configUrl; | |
| const quality = $("#piperQuality").value; | |
| if (currentEngine === 'piper') { | |
| const voice = $("#piperLang").value; | |
| const baseUrl = `https://huggingface.co/rhasspy/piper-voices/resolve/main/${voice}/${quality}/`; | |
| modelUrl = `${baseUrl}${voice}-${quality}.onnx`; | |
| configUrl = `${baseUrl}${voice}-${quality}.onnx.json`; | |
| log(`Initializing Piper: ${voice} (${quality})`); | |
| } else if (currentEngine === 'kokoro') { | |
| const baseUrl = `https://huggingface.co/therealtimex/kokoro-tts-web/resolve/main/`; | |
| modelUrl = `${baseUrl}model.onnx`; | |
| configUrl = `${baseUrl}config.json`; | |
| log(`Initializing Kokoro TTS`); | |
| } else if (currentEngine === 'kitten') { | |
| const baseUrl = `https://huggingface.co/therealtimex/kitten-tts-web/resolve/main/`; | |
| modelUrl = `${baseUrl}model.onnx`; | |
| configUrl = `${baseUrl}config.json`; | |
| log(`Initializing Kitten TTS`); | |
| } | |
| if (!modelUrl || !configUrl) { | |
| throw new Error("Invalid engine configuration."); | |
| } | |
| // Dispose previous session to free memory | |
| if (ttsSession) { | |
| await ttsSession.dispose(); | |
| ttsSession = null; | |
| log("Previous session disposed."); | |
| } | |
| ttsSession = await createSession({ | |
| modelUrl: modelUrl, | |
| configUrl: configUrl, | |
| // Use WebGPU if available | |
| executionProviders: navigator.gpu ? ['webgpu', 'wasm'] : ['wasm'], | |
| // Optional: callback for loading progress | |
| onprogress: (p) => { | |
| const percent = Math.round(p.progress * 100); | |
| $("#model").textContent = `Loading ${percent}%`; | |
| } | |
| }); | |
| $("#model").textContent = "Ready"; | |
| $("#model").className = "chip success"; | |
| return true; | |
| } catch (err) { | |
| log(`ERROR initializing: ${err.message}`); | |
| $("#model").textContent = "Failed"; | |
| $("#model").className = "chip danger"; | |
| return false; | |
| } finally { | |
| isInitializing = false; | |
| $("#go").disabled = false; | |
| } | |
| } | |
| // ===== VOICE CLONING (from previous implementation) ===== | |
| let clonedEmbedding = null; | |
| $("#voiceFile").addEventListener("change", () => { | |
| const file = $("#voiceFile").files[0]; | |
| if (file) { | |
| $("#processVoice").disabled = false; | |
| log("Voice file selected: " + file.name); | |
| } | |
| }); | |
| $("#processVoice").addEventListener("click", async () => { | |
| const file = $("#voiceFile").files[0]; | |
| if (!file) { | |
| showStatus("Please select a voice file!", 'error'); | |
| return; | |
| } | |
| $("#processVoice").disabled = true; | |
| showStatus("Processing voice sample...", 'info'); | |
| log("Processing: " + file.name); | |
| try { | |
| const arrayBuffer = await file.arrayBuffer(); | |
| const audioContext = new (window.AudioContext || window.webkitAudioContext)(); | |
| let audioBuffer = await audioContext.decodeAudioData(arrayBuffer); | |
| if (audioBuffer.duration > 60) { | |
| showStatus("β οΈ Trimming to 60s...", 'warning'); | |
| const newLength = Math.min(audioBuffer.length, audioContext.sampleRate * 60); | |
| const trimmedBuffer = audioContext.createBuffer(1, newLength, audioBuffer.sampleRate); | |
| trimmedBuffer.copyToChannel(audioBuffer.getChannelData(0).slice(0, newLength), 0); | |
| audioBuffer = trimmedBuffer; | |
| } | |
| if (audioBuffer.sampleRate !== 16000) { | |
| const offlineContext = new OfflineAudioContext(1, audioBuffer.duration * 16000, 16000); | |
| const source = offlineContext.createBufferSource(); | |
| source.buffer = audioBuffer; | |
| source.connect(offlineContext.destination); | |
| source.start(); | |
| audioBuffer = await offlineContext.startRendering(); | |
| } | |
| let audioData = audioBuffer.getChannelData(0); | |
| // Create embedding | |
| clonedEmbedding = new Float32Array(512); | |
| const chunkSize = Math.floor(audioData.length / 512); | |
| for (let i = 0; i < 512; i++) { | |
| const start = i * chunkSize; | |
| const end = Math.min(start + chunkSize, audioData.length); | |
| let sum = 0, sumSq = 0; | |
| for (let j = start; j < end; j++) { | |
| sum += audioData[j]; | |
| sumSq += audioData[j] * audioData[j]; | |
| } | |
| const mean = sum / (end - start); | |
| const variance = (sumSq / (end - start)) - (mean * mean); | |
| clonedEmbedding[i] = mean + Math.sqrt(Math.abs(variance)) * (i % 2 === 0 ? 1 : -1); | |
| } | |
| // Normalize | |
| let norm = 0; | |
| for (let i = 0; i < 512; i++) norm += clonedEmbedding[i] * clonedEmbedding[i]; | |
| norm = Math.sqrt(norm); | |
| for (let i = 0; i < 512; i++) clonedEmbedding[i] /= norm; | |
| showStatus("β Voice processed!", 'success'); | |
| log("Voice embedding created"); | |
| $("#voiceStatus").innerHTML = '<div class="status-message success">β Voice ready!</div>'; | |
| } catch (err) { | |
| log("ERROR: " + err.message); | |
| showStatus("Error: " + err.message, 'error'); | |
| $("#voiceStatus").innerHTML = '<div class="status-message error">β Failed</div>'; | |
| } finally { | |
| $("#processVoice").disabled = false; | |
| } | |
| }); | |
| // ===== TEXT CHUNKING & AUDIO CONCATENATION ===== | |
| function chunkText(text, maxChars = 200) { | |
| const sentences = text.match(/[^.!?]+[.!?]+/g) || [text]; | |
| const chunks = []; | |
| let currentChunk = ""; | |
| for (const sentence of sentences) { | |
| if ((currentChunk + sentence).length <= maxChars) { | |
| currentChunk += sentence; | |
| } else { | |
| if (currentChunk) chunks.push(currentChunk.trim()); | |
| currentChunk = sentence; | |
| } | |
| } | |
| if (currentChunk) chunks.push(currentChunk.trim()); | |
| if (chunks.length === 0 || chunks[0].length > maxChars) { | |
| chunks.length = 0; | |
| for (let i = 0; i < text.length; i += maxChars) { | |
| chunks.push(text.slice(i, i + maxChars)); | |
| } | |
| } | |
| return chunks; | |
| } | |
| function concatenateAudio(audioArrays) { | |
| const totalLength = audioArrays.reduce((sum, arr) => sum + arr.length, 0); | |
| const result = new Float32Array(totalLength); | |
| let offset = 0; | |
| for (const arr of audioArrays) { | |
| result.set(arr, offset); | |
| offset += arr.length; | |
| } | |
| return result; | |
| } | |
| // ===== GENERATION ===== | |
| $("#go").addEventListener("click", async () => { | |
| const text = $("#txt").value.trim(); | |
| if (!text) { | |
| showStatus("Please enter text!", 'error'); | |
| return; | |
| } | |
| const btn = $("#go"); | |
| btn.disabled = true; | |
| $("#status").className = "chip warning"; | |
| $("#status").textContent = "Generating..."; | |
| updateProgress(0); | |
| try { | |
| let finalAudio; | |
| let sampleRate; | |
| if (currentEngine === 'clone') { | |
| // Voice cloning is complex and requires a separate model (like SpeechT5). | |
| // This is a placeholder for that logic. | |
| showStatus("Voice cloning not implemented in this version.", 'error'); | |
| throw new Error("Voice cloning is a placeholder feature."); | |
| } | |
| if (!ttsSession) { | |
| showStatus("TTS session not ready. Please wait or re-select engine.", 'error'); | |
| throw new Error("TTS session not initialized."); | |
| } | |
| const chunks = chunkText(text, 200); | |
| log(`Processing ${chunks.length} chunk(s)...`); | |
| showStatus(`Processing ${chunks.length} chunk(s)...`, 'info'); | |
| const audioChunks = []; | |
| let voiceId; | |
| if (currentEngine === 'kokoro') { | |
| voiceId = $("#kokoroVoice").value; | |
| } else if (currentEngine === 'kitten') { | |
| voiceId = parseInt($("#kittenVoice").value); | |
| } | |
| for (let i = 0; i < chunks.length; i++) { | |
| const chunk = chunks[i]; | |
| const progress = ((i + 1) / chunks.length) * 100; | |
| updateProgress(progress, `Chunk ${i + 1}/${chunks.length}`); | |
| log(`Generating chunk ${i + 1}: "${chunk.substring(0, 30)}..."`); | |
| const result = await ttsSession.run({ | |
| text: chunk, | |
| voiceId: voiceId, // Only used by Kokoro/Kitten | |
| }); | |
| audioChunks.push(result.audio); | |
| sampleRate = result.sampleRate; // Get sample rate from the first result | |
| } | |
| log("Concatenating audio chunks..."); | |
| updateProgress(100, "Finalizing..."); | |
| finalAudio = concatenateAudio(audioChunks); | |
| log(`Generated ${finalAudio.length} samples (${(finalAudio.length / sampleRate).toFixed(1)}s)`); | |
| // Create a WAV blob | |
| const blob = new Blob([ttsSession.encodeWAV(finalAudio)], { type: "audio/wav" }); | |
| const url = URL.createObjectURL(blob); | |
| // Player | |
| const player = $("#player"); | |
| player.src = url; | |
| player.playbackRate = parseFloat($("#spd").value); | |
| player.classList.remove("hidden"); | |
| // Download | |
| $("#download").href = url; | |
| $("#download").download = `tts-${currentEngine}-${Date.now()}.wav`; | |
| $("#downloadBox").classList.remove("hidden"); | |
| $("#status").className = "chip success"; | |
| $("#status").textContent = "Success"; | |
| showStatus("β Audio generated successfully!", 'success'); | |
| updateProgress(0); | |
| } catch (err) { | |
| log(`ERROR: ${err.message}`); | |
| console.error(err); | |
| $("#status").className = "chip danger"; | |
| $("#status").textContent = "Error"; | |
| showStatus(`Error: ${err.message}`, 'error'); | |
| updateProgress(0); | |
| } finally { | |
| btn.disabled = false; | |
| } | |
| }); | |
| // ===== INITIALIZATION ===== | |
| log("π Ultimate TTS Studio Ready!"); | |
| $("#backend").className = "chip success"; | |
| $("#backend").textContent = navigator.gpu ? "WebGPU" : "WASM"; | |
| // Initial load | |
| await initTTSSession(); | |
| </script> | |
| </body> | |
| </html> | |