| <!DOCTYPE html> |
| <html lang="en"> |
| <head> |
| <meta charset="UTF-8"> |
| <meta name="viewport" content="width=device-width, initial-scale=1.0"> |
| <title>IndexTTS2 - Test Console</title> |
| <style> |
| *, *::before, *::after { box-sizing: border-box; margin: 0; padding: 0; } |
| body { |
| font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif; |
| background: #0f0d1a; |
| color: #e2e0eb; |
| min-height: 100vh; |
| padding: 2rem; |
| } |
| .container { max-width: 720px; margin: 0 auto; } |
| h1 { |
| font-size: 1.75rem; |
| font-weight: 700; |
| background: linear-gradient(135deg, #a78bfa, #7c3aed); |
| -webkit-background-clip: text; |
| -webkit-text-fill-color: transparent; |
| margin-bottom: 0.25rem; |
| } |
| .subtitle { color: #9490a8; font-size: 0.875rem; margin-bottom: 2rem; } |
| .card { |
| background: #1a1726; |
| border: 1px solid #2d2a3a; |
| border-radius: 12px; |
| padding: 1.5rem; |
| margin-bottom: 1.25rem; |
| } |
| .card-title { |
| font-size: 0.8rem; |
| font-weight: 600; |
| text-transform: uppercase; |
| letter-spacing: 0.05em; |
| color: #a78bfa; |
| margin-bottom: 1rem; |
| } |
| label { |
| display: block; |
| font-size: 0.8rem; |
| font-weight: 500; |
| color: #b0adc0; |
| margin-bottom: 0.35rem; |
| } |
| textarea, input[type="text"], input[type="number"], select { |
| width: 100%; |
| background: #12101e; |
| border: 1px solid #2d2a3a; |
| border-radius: 8px; |
| color: #e2e0eb; |
| padding: 0.6rem 0.75rem; |
| font-size: 0.875rem; |
| margin-bottom: 1rem; |
| outline: none; |
| transition: border-color 0.2s; |
| } |
| textarea:focus, input:focus, select:focus { |
| border-color: #7c3aed; |
| } |
| textarea { resize: vertical; min-height: 80px; } |
| .row { display: flex; gap: 1rem; } |
| .row > * { flex: 1; } |
| button.primary { |
| width: 100%; |
| padding: 0.75rem; |
| background: linear-gradient(135deg, #7c3aed, #6d28d9); |
| color: white; |
| border: none; |
| border-radius: 8px; |
| font-size: 0.95rem; |
| font-weight: 600; |
| cursor: pointer; |
| transition: opacity 0.2s; |
| } |
| button.primary:hover { opacity: 0.9; } |
| button.primary:disabled { opacity: 0.5; cursor: not-allowed; } |
| #status { |
| margin-top: 1rem; |
| padding: 0.75rem; |
| border-radius: 8px; |
| font-size: 0.85rem; |
| display: none; |
| } |
| #status.error { display: block; background: #2d1520; border: 1px solid #5c2338; color: #f87171; } |
| #status.success { display: block; background: #152d1a; border: 1px solid #235c2d; color: #4ade80; } |
| #status.loading { display: block; background: #1a1726; border: 1px solid #2d2a3a; color: #a78bfa; } |
| #audioResult { margin-top: 1rem; display: none; } |
| #audioResult audio { width: 100%; margin-top: 0.5rem; } |
| .info { |
| font-size: 0.75rem; |
| color: #706d82; |
| margin-top: -0.5rem; |
| margin-bottom: 1rem; |
| } |
| </style> |
| </head> |
| <body> |
| <div class="container"> |
| <h1>IndexTTS2</h1> |
| <p class="subtitle">Emotionally expressive zero-shot voice cloning TTS — Test Console</p> |
|
|
| <div class="card"> |
| <div class="card-title">Voice Reference</div> |
| <label for="voiceFile">Upload reference audio (WAV, 6-15 seconds recommended)</label> |
| <input type="file" id="voiceFile" accept="audio/*" style="margin-bottom:1rem"> |
| <p class="info">IndexTTS2 clones the timbre from your reference audio for zero-shot voice synthesis.</p> |
| </div> |
|
|
| <div class="card"> |
| <div class="card-title">Text & Emotion</div> |
| <label for="inputText">Text to synthesize</label> |
| <textarea id="inputText" rows="4" placeholder="Enter text to convert to speech..."></textarea> |
|
|
| <label for="emotion">Emotion</label> |
| <select id="emotion"> |
| <option value="neutral" selected>Neutral</option> |
| <option value="happy">Happy</option> |
| <option value="sad">Sad</option> |
| <option value="angry">Angry</option> |
| <option value="fear">Fear</option> |
| <option value="surprise">Surprise</option> |
| <option value="disgust">Disgust</option> |
| <option value="excited">Excited</option> |
| <option value="calm">Calm</option> |
| <option value="anxious">Anxious</option> |
| <option value="hopeful">Hopeful</option> |
| <option value="melancholy">Melancholy</option> |
| <option value="tender">Tender</option> |
| <option value="proud">Proud</option> |
| </select> |
|
|
| <div class="row"> |
| <div> |
| <label for="intensity">Intensity (1-100)</label> |
| <input type="number" id="intensity" value="50" min="1" max="100"> |
| </div> |
| <div> |
| <label for="volume">Volume (1-100)</label> |
| <input type="number" id="volume" value="75" min="1" max="100"> |
| </div> |
| </div> |
|
|
| <div class="row"> |
| <div> |
| <label for="speed">Speed adjust</label> |
| <input type="number" id="speed" value="0" min="-5" max="5" step="0.1"> |
| </div> |
| <div> |
| <label for="pitch">Pitch adjust</label> |
| <input type="number" id="pitch" value="0" min="-5" max="5" step="0.1"> |
| </div> |
| </div> |
| </div> |
|
|
| <div class="card"> |
| <div class="card-title">Authentication</div> |
| <label for="apiKey">API Key (if set on server)</label> |
| <input type="text" id="apiKey" placeholder="Leave empty if no auth required"> |
| </div> |
|
|
| <button class="primary" id="generateBtn" onclick="generate()">Generate Speech</button> |
|
|
| <div id="status"></div> |
| <div id="audioResult"> |
| <audio id="audioPlayer" controls></audio> |
| </div> |
| </div> |
|
|
| <script> |
| async function fileToBase64(file) { |
| return new Promise((resolve, reject) => { |
| const reader = new FileReader(); |
| reader.onload = () => { |
| const base64 = reader.result.split(',')[1]; |
| resolve(base64); |
| }; |
| reader.onerror = reject; |
| reader.readAsDataURL(file); |
| }); |
| } |
| |
| async function generate() { |
| const status = document.getElementById('status'); |
| const btn = document.getElementById('generateBtn'); |
| const audioResult = document.getElementById('audioResult'); |
| const audioPlayer = document.getElementById('audioPlayer'); |
| |
| const voiceFile = document.getElementById('voiceFile').files[0]; |
| const text = document.getElementById('inputText').value.trim(); |
| const emotion = document.getElementById('emotion').value; |
| const intensity = parseInt(document.getElementById('intensity').value); |
| const volume = parseInt(document.getElementById('volume').value); |
| const speed = parseFloat(document.getElementById('speed').value); |
| const pitch = parseFloat(document.getElementById('pitch').value); |
| const apiKey = document.getElementById('apiKey').value.trim(); |
| |
| if (!voiceFile) { |
| status.className = 'error'; |
| status.textContent = 'Please upload a reference voice audio file.'; |
| return; |
| } |
| if (!text) { |
| status.className = 'error'; |
| status.textContent = 'Please enter text to synthesize.'; |
| return; |
| } |
| |
| btn.disabled = true; |
| status.className = 'loading'; |
| status.textContent = 'Generating speech... (this may take a moment)'; |
| audioResult.style.display = 'none'; |
| |
| try { |
| const voiceBase64 = await fileToBase64(voiceFile); |
| |
| const headers = { 'Content-Type': 'application/json' }; |
| if (apiKey) headers['Authorization'] = `Bearer ${apiKey}`; |
| |
| const resp = await fetch('/ConvertTextToSpeech', { |
| method: 'POST', |
| headers, |
| body: JSON.stringify({ |
| input_text: text, |
| voice_to_clone_sample: voiceBase64, |
| emotion_set: [emotion], |
| intensity, |
| volume, |
| speed_adjust: speed, |
| pitch_adjust: pitch, |
| }), |
| }); |
| |
| if (!resp.ok) { |
| const err = await resp.json(); |
| throw new Error(err.error || `HTTP ${resp.status}`); |
| } |
| |
| const blob = await resp.blob(); |
| const url = URL.createObjectURL(blob); |
| audioPlayer.src = url; |
| audioResult.style.display = 'block'; |
| status.className = 'success'; |
| status.textContent = 'Speech generated successfully!'; |
| } catch (e) { |
| status.className = 'error'; |
| status.textContent = `Error: ${e.message}`; |
| } finally { |
| btn.disabled = false; |
| } |
| } |
| </script> |
| </body> |
| </html> |
|
|