Spaces:
Running
Running
| <html lang="en"> | |
| <head> | |
| <meta charset="UTF-8"> | |
| <meta name="viewport" content="width=device-width, initial-scale=1.0"> | |
| <title>KOKORO TTS Studio</title> | |
| <link rel="preconnect" href="https://fonts.googleapis.com"> | |
| <link rel="preconnect" href="https://fonts.gstatic.com" crossorigin> | |
| <link href="https://fonts.googleapis.com/css2?family=Inter:wght@300;400;500;600;700&display=swap" rel="stylesheet"> | |
| <script src="https://cdnjs.cloudflare.com/ajax/libs/lucide/0.263.1/lucide.min.js"></script> | |
| <style> | |
| :root { | |
| --bg-dark: #0f172a; | |
| --bg-card: #1e293b; | |
| --bg-input: #334155; | |
| --primary: #6366f1; | |
| --primary-hover: #4f46e5; | |
| --accent: #ec4899; | |
| --text-main: #f8fafc; | |
| --text-muted: #94a3b8; | |
| --border: #475569; | |
| --success: #10b981; | |
| } | |
| * { | |
| margin: 0; | |
| padding: 0; | |
| box-sizing: border-box; | |
| font-family: 'Inter', sans-serif; | |
| } | |
| body { | |
| background-color: var(--bg-dark); | |
| color: var(--text-main); | |
| min-height: 100vh; | |
| background-image: | |
| radial-gradient(at 0% 0%, rgba(99, 102, 241, 0.15) 0px, transparent 50%), | |
| radial-gradient(at 100% 100%, rgba(236, 72, 153, 0.15) 0px, transparent 50%); | |
| } | |
| /* Header */ | |
| header { | |
| display: flex; | |
| justify-content: space-between; | |
| align-items: center; | |
| padding: 1.5rem 2rem; | |
| border-bottom: 1px solid var(--border); | |
| background: rgba(15, 23, 42, 0.8); | |
| backdrop-filter: blur(12px); | |
| position: sticky; | |
| top: 0; | |
| z-index: 100; | |
| } | |
| .logo { | |
| display: flex; | |
| align-items: center; | |
| gap: 0.75rem; | |
| font-weight: 700; | |
| font-size: 1.25rem; | |
| letter-spacing: -0.02em; | |
| } | |
| .logo i { | |
| color: var(--primary); | |
| } | |
| .anycoder-link { | |
| font-size: 0.875rem; | |
| color: var(--text-muted); | |
| text-decoration: none; | |
| display: flex; | |
| align-items: center; | |
| gap: 0.5rem; | |
| padding: 0.5rem 1rem; | |
| border-radius: 999px; | |
| background: rgba(255, 255, 255, 0.05); | |
| transition: all 0.2s ease; | |
| } | |
| .anycoder-link:hover { | |
| background: rgba(255, 255, 255, 0.1); | |
| color: var(--text-main); | |
| } | |
| /* Main Layout */ | |
| main { | |
| max-width: 1200px; | |
| margin: 2rem auto; | |
| padding: 0 1.5rem; | |
| display: grid; | |
| grid-template-columns: 1.5fr 1fr; | |
| gap: 2rem; | |
| } | |
| @media (max-width: 900px) { | |
| main { | |
| grid-template-columns: 1fr; | |
| } | |
| } | |
| /* Cards */ | |
| .card { | |
| background: var(--bg-card); | |
| border: 1px solid var(--border); | |
| border-radius: 1.25rem; | |
| padding: 1.5rem; | |
| box-shadow: 0 10px 30px -10px rgba(0, 0, 0, 0.5); | |
| } | |
| h2 { | |
| font-size: 1.1rem; | |
| margin-bottom: 1.25rem; | |
| color: var(--text-main); | |
| display: flex; | |
| align-items: center; | |
| gap: 0.5rem; | |
| } | |
| /* Form Elements */ | |
| .form-group { | |
| margin-bottom: 1.25rem; | |
| } | |
| label { | |
| display: block; | |
| font-size: 0.875rem; | |
| font-weight: 500; | |
| color: var(--text-muted); | |
| margin-bottom: 0.5rem; | |
| } | |
| textarea, select, input[type="text"] { | |
| width: 100%; | |
| background: var(--bg-input); | |
| border: 1px solid var(--border); | |
| border-radius: 0.75rem; | |
| padding: 0.875rem; | |
| color: var(--text-main); | |
| font-size: 1rem; | |
| transition: border-color 0.2s; | |
| } | |
| textarea:focus, select:focus, input:focus { | |
| outline: none; | |
| border-color: var(--primary); | |
| } | |
| textarea { | |
| min-height: 150px; | |
| resize: vertical; | |
| } | |
| .row { | |
| display: grid; | |
| grid-template-columns: 1fr 1fr; | |
| gap: 1rem; | |
| } | |
| /* Range Slider */ | |
| input[type=range] { | |
| width: 100%; | |
| background: transparent; | |
| -webkit-appearance: none; | |
| margin: 10px 0; | |
| } | |
| input[type=range]::-webkit-slider-runnable-track { | |
| width: 100%; | |
| height: 6px; | |
| background: var(--bg-input); | |
| border-radius: 3px; | |
| } | |
| input[type=range]::-webkit-slider-thumb { | |
| height: 18px; | |
| width: 18px; | |
| border-radius: 50%; | |
| background: var(--primary); | |
| -webkit-appearance: none; | |
| margin-top: -6px; | |
| cursor: pointer; | |
| transition: transform 0.1s; | |
| } | |
| input[type=range]::-webkit-slider-thumb:hover { | |
| transform: scale(1.2); | |
| } | |
| .range-header { | |
| display: flex; | |
| justify-content: space-between; | |
| margin-bottom: 0.25rem; | |
| } | |
| .range-value { | |
| font-size: 0.8rem; | |
| color: var(--primary); | |
| font-weight: 600; | |
| } | |
| /* Drag and Drop Zone */ | |
| .drop-zone { | |
| border: 2px dashed var(--border); | |
| border-radius: 0.75rem; | |
| padding: 2rem; | |
| text-align: center; | |
| transition: all 0.2s; | |
| cursor: pointer; | |
| position: relative; | |
| background: rgba(51, 65, 85, 0.3); | |
| } | |
| .drop-zone:hover, .drop-zone.drag-over { | |
| border-color: var(--primary); | |
| background: rgba(99, 102, 241, 0.1); | |
| } | |
| .drop-zone-icon { | |
| font-size: 2rem; | |
| color: var(--text-muted); | |
| margin-bottom: 0.5rem; | |
| } | |
| .drop-zone-text { | |
| color: var(--text-muted); | |
| font-size: 0.9rem; | |
| } | |
| .drop-zone-text span { | |
| color: var(--primary); | |
| font-weight: 600; | |
| } | |
| .file-info { | |
| margin-top: 0.75rem; | |
| font-size: 0.85rem; | |
| color: var(--success); | |
| display: none; | |
| align-items: center; | |
| justify-content: center; | |
| gap: 0.5rem; | |
| } | |
| /* Toggle Switch */ | |
| .toggle-container { | |
| display: flex; | |
| align-items: center; | |
| justify-content: space-between; | |
| background: var(--bg-input); | |
| padding: 0.75rem 1rem; | |
| border-radius: 0.75rem; | |
| margin-bottom: 1rem; | |
| } | |
| .toggle-label { | |
| display: flex; | |
| flex-direction: column; | |
| } | |
| .toggle-label span:first-child { | |
| font-weight: 500; | |
| font-size: 0.95rem; | |
| } | |
| .toggle-label span:last-child { | |
| font-size: 0.8rem; | |
| color: var(--text-muted); | |
| } | |
| .switch { | |
| position: relative; | |
| display: inline-block; | |
| width: 50px; | |
| height: 26px; | |
| } | |
| .switch input { | |
| opacity: 0; | |
| width: 0; | |
| height: 0; | |
| } | |
| .slider { | |
| position: absolute; | |
| cursor: pointer; | |
| top: 0; | |
| left: 0; | |
| right: 0; | |
| bottom: 0; | |
| background-color: var(--bg-dark); | |
| transition: .4s; | |
| border-radius: 34px; | |
| border: 1px solid var(--border); | |
| } | |
| .slider:before { | |
| position: absolute; | |
| content: ""; | |
| height: 18px; | |
| width: 18px; | |
| left: 3px; | |
| bottom: 3px; | |
| background-color: var(--text-muted); | |
| transition: .4s; | |
| border-radius: 50%; | |
| } | |
| input:checked + .slider { | |
| background-color: var(--primary); | |
| border-color: var(--primary); | |
| } | |
| input:checked + .slider:before { | |
| transform: translateX(24px); | |
| background-color: white; | |
| } | |
| /* Button */ | |
| .btn-generate { | |
| width: 100%; | |
| padding: 1rem; | |
| border: none; | |
| border-radius: 0.75rem; | |
| background: linear-gradient(135deg, var(--primary), var(--accent)); | |
| color: white; | |
| font-weight: 600; | |
| font-size: 1rem; | |
| cursor: pointer; | |
| transition: all 0.2s; | |
| display: flex; | |
| justify-content: center; | |
| align-items: center; | |
| gap: 0.5rem; | |
| box-shadow: 0 4px 15px rgba(99, 102, 241, 0.3); | |
| } | |
| .btn-generate:hover { | |
| transform: translateY(-2px); | |
| box-shadow: 0 8px 20px rgba(99, 102, 241, 0.4); | |
| } | |
| .btn-generate:active { | |
| transform: translateY(0); | |
| } | |
| .btn-generate:disabled { | |
| opacity: 0.7; | |
| cursor: not-allowed; | |
| transform: none; | |
| } | |
| /* Right Panel: Output */ | |
| .audio-player-container { | |
| background: var(--bg-input); | |
| border-radius: 1rem; | |
| padding: 1.5rem; | |
| display: none; /* Hidden by default */ | |
| flex-direction: column; | |
| gap: 1rem; | |
| } | |
| .audio-player-container.active { | |
| display: flex; | |
| animation: fadeIn 0.3s ease; | |
| } | |
| @keyframes fadeIn { | |
| from { opacity: 0; transform: translateY(10px); } | |
| to { opacity: 1; transform: translateY(0); } | |
| } | |
| audio { | |
| width: 100%; | |
| height: 40px; | |
| border-radius: 0.5rem; | |
| } | |
| /* Customizing audio player vaguely */ | |
| audio::-webkit-media-controls-panel { | |
| background-color: var(--bg-card); | |
| } | |
| audio::-webkit-media-controls-play-button { | |
| background-color: var(--primary); | |
| border-radius: 50%; | |
| } | |
| .status-message { | |
| font-size: 0.85rem; | |
| color: var(--text-muted); | |
| text-align: center; | |
| margin-top: 0.5rem; | |
| min-height: 1.2em; | |
| } | |
| .voice-tag { | |
| display: inline-block; | |
| background: rgba(99, 102, 241, 0.2); | |
| color: #a5b4fc; | |
| padding: 0.25rem 0.5rem; | |
| border-radius: 4px; | |
| font-size: 0.75rem; | |
| margin-top: 0.5rem; | |
| } | |
| /* Loading Spinner */ | |
| .spinner { | |
| width: 20px; | |
| height: 20px; | |
| border: 2px solid rgba(255,255,255,0.3); | |
| border-radius: 50%; | |
| border-top-color: #fff; | |
| animation: spin 1s ease-in-out infinite; | |
| display: none; | |
| } | |
| @keyframes spin { | |
| to { transform: rotate(360deg); } | |
| } | |
| .loading .spinner { | |
| display: block; | |
| } | |
| </style> | |
| </head> | |
| <body> | |
| <header> | |
| <div class="logo"> | |
| <i data-lucide="waveform"></i> | |
| <span>KOKORO TTS STUDIO</span> | |
| </div> | |
| <a href="https://huggingface.co/spaces/akhaliq/anycoder" target="_blank" class="anycoder-link"> | |
| Built with <span>anycoder</span> | |
| </a> | |
| </header> | |
| <main> | |
| <!-- Left Column: Controls --> | |
| <div class="left-col"> | |
| <div class="card"> | |
| <h2><i data-lucide="mic-2"></i> Input Configuration</h2> | |
| <!-- Clone Toggle --> | |
| <div class="toggle-container"> | |
| <div class="toggle-label"> | |
| <span>Voice Cloning Mode</span> | |
| <span>Upload reference audio to clone voice</span> | |
| </div> | |
| <label class="switch"> | |
| <input type="checkbox" id="cloneToggle"> | |
| <span class="slider"></span> | |
| </label> | |
| </div> | |
| <!-- Reference Audio Upload (Hidden by default) --> | |
| <div class="form-group" id="referenceGroup" style="display: none;"> | |
| <label>Reference Voice Audio (.wav / .mp3)</label> | |
| <div class="drop-zone" id="dropZone"> | |
| <div class="drop-zone-icon">📁</div> | |
| <div class="drop-zone-text"> | |
| <span>Click to upload</span> or drag and drop | |
| </div> | |
| <div class="file-info" id="fileInfo"> | |
| <i data-lucide="check-circle" size="14"></i> | |
| <span id="fileName">audio.wav</span> | |
| </div> | |
| <input type="file" id="fileInput" accept="audio/*" hidden> | |
| </div> | |
| </div> | |
| <!-- Voice Selection (Hidden when cloning) --> | |
| <div class="form-group" id="voiceSelectGroup"> | |
| <label>Select Language & Voice</label> | |
| <div class="row"> | |
| <select id="langSelect"> | |
| <option value="en">English</option> | |
| <option value="es">Español</option> | |
| <option value="fr">Français</option> | |
| <option value="pt">Português</option> | |
| <option value="ja">日本語</option> | |
| <option value="zh">中文</option> | |
| </select> | |
| <select id="voiceSelect"> | |
| <!-- Populated by JS --> | |
| </select> | |
| </div> | |
| </div> | |
| <!-- Text Input --> | |
| <div class="form-group"> | |
| <label>Text to Synthesize</label> | |
| <textarea id="textInput" placeholder="Enter your text here..."></textarea> | |
| </div> | |
| <!-- Advanced Settings --> | |
| <div class="row"> | |
| <div class="form-group"> | |
| <div class="range-header"> | |
| <label>Speed</label> | |
| <span class="range-value" id="speedValue">1.0</span> | |
| </div> | |
| <input type="range" id="speedInput" min="0.5" max="2.0" step="0.1" value="1.0"> | |
| </div> | |
| <div class="form-group"> | |
| <div class="range-header"> | |
| <label>Pitch</label> | |
| <span class="range-value" id="pitchValue">0</span> | |
| </div> | |
| <input type="range" id="pitchInput" min="-10" max="10" step="1" value="0"> | |
| </div> | |
| </div> | |
| </div> | |
| </div> | |
| <!-- Right Column: Output --> | |
| <div class="right-col"> | |
| <div class="card"> | |
| <h2><i data-lucide="audio-waveform"></i> Output</h2> | |
| <button class="btn-generate" id="generateBtn"> | |
| <div class="spinner"></div> | |
| <span id="btnText">Generate Audio</span> | |
| </button> | |
| <div class="status-message" id="statusMessage">Ready to synthesize</div> | |
| <!-- Audio Player --> | |
| <div class="audio-player-container" id="audioContainer"> | |
| <div style="display:flex; justify-content:space-between; align-items:end;"> | |
| <label>Result</label> | |
| <span class="voice-tag" id="activeVoiceTag">af_heart</span> | |
| </div> | |
| <audio id="audioPlayer" controls></audio> | |
| </div> | |
| </div> | |
| <!-- Tips --> | |
| <div class="card" style="margin-top: 1.5rem; background: rgba(99, 102, 241, 0.05); border-color: rgba(99, 102, 241, 0.2);"> | |
| <h3 style="font-size: 0.95rem; color: #a5b4fc; margin-bottom: 0.5rem;"> | |
| <i data-lucide="info" size="16"></i> Tips for Voice Cloning | |
| </h3> | |
| <p style="font-size: 0.85rem; color: var(--text-muted); line-height: 1.5;"> | |
| For best results, upload a clear WAV file (5-15 seconds) without background noise. The AI will try to mimic the tone and style of the reference audio. | |
| </p> | |
| </div> | |
| </div> | |
| </main> | |
| <script> | |
| // Initialize Icons | |
| lucide.createIcons(); | |
| // DOM Elements | |
| const cloneToggle = document.getElementById('cloneToggle'); | |
| const referenceGroup = document.getElementById('referenceGroup'); | |
| const voiceSelectGroup = document.getElementById('voiceSelectGroup'); | |
| const langSelect = document.getElementById('langSelect'); | |
| const voiceSelect = document.getElementById('voiceSelect'); | |
| const textInput = document.getElementById('textInput'); | |
| const speedInput = document.getElementById('speedInput'); | |
| const pitchInput = document.getElementById('pitchInput'); | |
| const speedValue = document.getElementById('speedValue'); | |
| const pitchValue = document.getElementById('pitchValue'); | |
| const dropZone = document.getElementById('dropZone'); | |
| const fileInput = document.getElementById('fileInput'); | |
| const fileInfo = document.getElementById('fileInfo'); | |
| const fileName = document.getElementById('fileName'); | |
| const generateBtn = document.getElementById('generateBtn'); | |
| const btnText = document.getElementById('btnText'); | |
| const statusMessage = document.getElementById('statusMessage'); | |
| const audioContainer = document.getElementById('audioContainer'); | |
| const audioPlayer = document.getElementById('audioPlayer'); | |
| const activeVoiceTag = document.getElementById('activeVoiceTag'); | |
| // KOKORO Voice Data | |
| const voices = { | |
| en: ['af_heart', 'af_mer', 'af_sarah', 'af_sky', 'bf_emma', 'bf_george', 'bm_george', 'bm_lewis'], | |
| es: ['es_palpha', 'es_salpha', 'es_spanish'], | |
| fr: ['fr_m1', 'fr_m2', 'fr_f1', 'fr_f2'], | |
| pt: ['pt_b1', 'pt_b2', 'pt_c1', 'pt_c2'], | |
| ja: ['ja_1', 'ja_2', 'ja_3', 'ja_4', 'ja_5'], | |
| zh: ['zh_1', 'zh_2', 'zh_3', 'zh_4', 'zh_5'] | |
| }; | |
| let uploadedFile = null; | |
| // --- Event Listeners --- | |
| // Update voice dropdown based on language | |
| function updateVoiceList() { | |
| const lang = langSelect.value; | |
| voiceSelect.innerHTML = ''; | |
| voices[lang].forEach(voice => { | |
| const option = document.createElement('option'); | |
| option.value = voice; | |
| option.textContent = voice; | |
| voiceSelect.appendChild(option); | |
| }); | |
| } | |
| langSelect.addEventListener('change', updateVoiceList); | |
| updateVoiceList(); // Init | |
| // Toggle Clone Mode | |
| cloneToggle.addEventListener('change', (e) => { | |
| if (e.target.checked) { | |
| referenceGroup.style.display = 'block'; | |
| voiceSelectGroup.style.opacity = '0.5'; | |
| voiceSelectGroup.style.pointerEvents = 'none'; | |
| } else { | |
| referenceGroup.style.display = 'none'; | |
| voiceSelectGroup.style.opacity = '1'; | |
| voiceSelectGroup.style.pointerEvents = 'all'; | |
| } | |
| }); | |
| // Range Sliders | |
| speedInput.addEventListener('input', (e) => speedValue.textContent = e.target.value); | |
| pitchInput.addEventListener('input', (e) => pitchValue.textContent = e.target.value); | |
| // Drag and Drop Logic | |
| dropZone.addEventListener('click', () => fileInput.click()); | |
| dropZone.addEventListener('dragover', (e) => { | |
| e.preventDefault(); | |
| dropZone.classList.add('drag-over'); | |
| }); | |
| dropZone.addEventListener('dragleave', () => { | |
| dropZone.classList.remove('drag-over'); | |
| }); | |
| dropZone.addEventListener('drop', (e) => { | |
| e.preventDefault(); | |
| dropZone.classList.remove('drag-over'); | |
| if (e.dataTransfer.files.length) { | |
| handleFile(e.dataTransfer.files[0]); | |
| } | |
| }); | |
| fileInput.addEventListener('change', (e) => { | |
| if (e.target.files.length) { | |
| handleFile(e.target.files[0]); | |
| } | |
| }); | |
| function handleFile(file) { | |
| if (!file.type.startsWith('audio/')) { | |
| statusMessage.textContent = "Error: Please upload an audio file."; | |
| statusMessage.style.color = "#ef4444"; | |
| return; | |
| } | |
| uploadedFile = file; | |
| fileName.textContent = file.name; | |
| fileInfo.style.display = 'flex'; | |
| statusMessage.textContent = "Audio loaded successfully."; | |
| statusMessage.style.color = "#10b981"; | |
| } | |
| // Generate Button Logic | |
| generateBtn.addEventListener('click', async () => { | |
| const text = textInput.value.trim(); | |
| if (!text) { | |
| statusMessage.textContent = "Please enter some text."; | |
| statusMessage.style.color = "#ef4444"; | |
| return; | |
| } | |
| // Start Loading | |
| generateBtn.classList.add('loading'); | |
| generateBtn.disabled = true; | |
| btnText.textContent = "Synthesizing..."; | |
| statusMessage.textContent = "Processing audio... this may take a moment."; | |
| statusMessage.style.color = "#94a3b8"; | |
| // Prepare Form Data | |
| const formData = new FormData(); | |
| formData.append('text', text); | |
| formData.append('speed', speedInput.value); | |
| formData.append('pitch', pitchInput.value); | |
| let endpoint = '/v1/tts'; | |
| if (cloneToggle.checked) { | |
| if (!uploadedFile) { | |
| alert("Please upload a reference audio file."); | |
| resetBtn(); | |
| return; | |
| } | |
| formData.append('reference_audio', uploadedFile); | |
| endpoint = '/v1/tts/clone'; // Hypothetical endpoint | |
| } else { | |
| formData.append('voice', voiceSelect.value); | |
| } | |
| // --- API SIMULATION --- | |
| // Since we don't have a live backend here, we simulate the API call | |
| // and return a placeholder audio. | |
| console.log("Sending request to:", endpoint, Object.fromEntries(formData)); | |
| setTimeout(() => { | |
| // Mock Response | |
| audioPlayer.src = "https://www2.cs.uic.edu/~i101/SoundFiles/BabyElephantWalk60.wav"; // Placeholder audio | |
| audioContainer.classList.add('active'); | |
| activeVoiceTag.textContent = cloneToggle.checked ? "Cloned Voice" : voiceSelect.value; | |
| statusMessage.textContent = "Audio generated successfully!"; | |
| statusMessage.style.color = "#10b981"; | |
| resetBtn(); | |
| }, 2500); // Fake delay | |
| }); | |
| function resetBtn() { | |
| generateBtn.classList.remove('loading'); | |
| generateBtn.disabled = false; | |
| btnText.textContent = "Generate Audio"; | |
| } | |
| </script> | |
| </body> | |
| </html> |