anycoder-a670717a / index.html
JymNils's picture
Upload folder using huggingface_hub
6466b07 verified
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>KOKORO TTS Studio</title>
<link rel="preconnect" href="https://fonts.googleapis.com">
<link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
<link href="https://fonts.googleapis.com/css2?family=Inter:wght@300;400;500;600;700&display=swap" rel="stylesheet">
<script src="https://cdnjs.cloudflare.com/ajax/libs/lucide/0.263.1/lucide.min.js"></script>
<style>
:root {
--bg-dark: #0f172a;
--bg-card: #1e293b;
--bg-input: #334155;
--primary: #6366f1;
--primary-hover: #4f46e5;
--accent: #ec4899;
--text-main: #f8fafc;
--text-muted: #94a3b8;
--border: #475569;
--success: #10b981;
}
* {
margin: 0;
padding: 0;
box-sizing: border-box;
font-family: 'Inter', sans-serif;
}
body {
background-color: var(--bg-dark);
color: var(--text-main);
min-height: 100vh;
background-image:
radial-gradient(at 0% 0%, rgba(99, 102, 241, 0.15) 0px, transparent 50%),
radial-gradient(at 100% 100%, rgba(236, 72, 153, 0.15) 0px, transparent 50%);
}
/* Header */
header {
display: flex;
justify-content: space-between;
align-items: center;
padding: 1.5rem 2rem;
border-bottom: 1px solid var(--border);
background: rgba(15, 23, 42, 0.8);
backdrop-filter: blur(12px);
position: sticky;
top: 0;
z-index: 100;
}
.logo {
display: flex;
align-items: center;
gap: 0.75rem;
font-weight: 700;
font-size: 1.25rem;
letter-spacing: -0.02em;
}
.logo i {
color: var(--primary);
}
.anycoder-link {
font-size: 0.875rem;
color: var(--text-muted);
text-decoration: none;
display: flex;
align-items: center;
gap: 0.5rem;
padding: 0.5rem 1rem;
border-radius: 999px;
background: rgba(255, 255, 255, 0.05);
transition: all 0.2s ease;
}
.anycoder-link:hover {
background: rgba(255, 255, 255, 0.1);
color: var(--text-main);
}
/* Main Layout */
main {
max-width: 1200px;
margin: 2rem auto;
padding: 0 1.5rem;
display: grid;
grid-template-columns: 1.5fr 1fr;
gap: 2rem;
}
@media (max-width: 900px) {
main {
grid-template-columns: 1fr;
}
}
/* Cards */
.card {
background: var(--bg-card);
border: 1px solid var(--border);
border-radius: 1.25rem;
padding: 1.5rem;
box-shadow: 0 10px 30px -10px rgba(0, 0, 0, 0.5);
}
h2 {
font-size: 1.1rem;
margin-bottom: 1.25rem;
color: var(--text-main);
display: flex;
align-items: center;
gap: 0.5rem;
}
/* Form Elements */
.form-group {
margin-bottom: 1.25rem;
}
label {
display: block;
font-size: 0.875rem;
font-weight: 500;
color: var(--text-muted);
margin-bottom: 0.5rem;
}
textarea, select, input[type="text"] {
width: 100%;
background: var(--bg-input);
border: 1px solid var(--border);
border-radius: 0.75rem;
padding: 0.875rem;
color: var(--text-main);
font-size: 1rem;
transition: border-color 0.2s;
}
textarea:focus, select:focus, input:focus {
outline: none;
border-color: var(--primary);
}
textarea {
min-height: 150px;
resize: vertical;
}
.row {
display: grid;
grid-template-columns: 1fr 1fr;
gap: 1rem;
}
/* Range Slider */
input[type=range] {
width: 100%;
background: transparent;
-webkit-appearance: none;
margin: 10px 0;
}
input[type=range]::-webkit-slider-runnable-track {
width: 100%;
height: 6px;
background: var(--bg-input);
border-radius: 3px;
}
input[type=range]::-webkit-slider-thumb {
height: 18px;
width: 18px;
border-radius: 50%;
background: var(--primary);
-webkit-appearance: none;
margin-top: -6px;
cursor: pointer;
transition: transform 0.1s;
}
input[type=range]::-webkit-slider-thumb:hover {
transform: scale(1.2);
}
.range-header {
display: flex;
justify-content: space-between;
margin-bottom: 0.25rem;
}
.range-value {
font-size: 0.8rem;
color: var(--primary);
font-weight: 600;
}
/* Drag and Drop Zone */
.drop-zone {
border: 2px dashed var(--border);
border-radius: 0.75rem;
padding: 2rem;
text-align: center;
transition: all 0.2s;
cursor: pointer;
position: relative;
background: rgba(51, 65, 85, 0.3);
}
.drop-zone:hover, .drop-zone.drag-over {
border-color: var(--primary);
background: rgba(99, 102, 241, 0.1);
}
.drop-zone-icon {
font-size: 2rem;
color: var(--text-muted);
margin-bottom: 0.5rem;
}
.drop-zone-text {
color: var(--text-muted);
font-size: 0.9rem;
}
.drop-zone-text span {
color: var(--primary);
font-weight: 600;
}
.file-info {
margin-top: 0.75rem;
font-size: 0.85rem;
color: var(--success);
display: none;
align-items: center;
justify-content: center;
gap: 0.5rem;
}
/* Toggle Switch */
.toggle-container {
display: flex;
align-items: center;
justify-content: space-between;
background: var(--bg-input);
padding: 0.75rem 1rem;
border-radius: 0.75rem;
margin-bottom: 1rem;
}
.toggle-label {
display: flex;
flex-direction: column;
}
.toggle-label span:first-child {
font-weight: 500;
font-size: 0.95rem;
}
.toggle-label span:last-child {
font-size: 0.8rem;
color: var(--text-muted);
}
.switch {
position: relative;
display: inline-block;
width: 50px;
height: 26px;
}
.switch input {
opacity: 0;
width: 0;
height: 0;
}
.slider {
position: absolute;
cursor: pointer;
top: 0;
left: 0;
right: 0;
bottom: 0;
background-color: var(--bg-dark);
transition: .4s;
border-radius: 34px;
border: 1px solid var(--border);
}
.slider:before {
position: absolute;
content: "";
height: 18px;
width: 18px;
left: 3px;
bottom: 3px;
background-color: var(--text-muted);
transition: .4s;
border-radius: 50%;
}
input:checked + .slider {
background-color: var(--primary);
border-color: var(--primary);
}
input:checked + .slider:before {
transform: translateX(24px);
background-color: white;
}
/* Button */
.btn-generate {
width: 100%;
padding: 1rem;
border: none;
border-radius: 0.75rem;
background: linear-gradient(135deg, var(--primary), var(--accent));
color: white;
font-weight: 600;
font-size: 1rem;
cursor: pointer;
transition: all 0.2s;
display: flex;
justify-content: center;
align-items: center;
gap: 0.5rem;
box-shadow: 0 4px 15px rgba(99, 102, 241, 0.3);
}
.btn-generate:hover {
transform: translateY(-2px);
box-shadow: 0 8px 20px rgba(99, 102, 241, 0.4);
}
.btn-generate:active {
transform: translateY(0);
}
.btn-generate:disabled {
opacity: 0.7;
cursor: not-allowed;
transform: none;
}
/* Right Panel: Output */
.audio-player-container {
background: var(--bg-input);
border-radius: 1rem;
padding: 1.5rem;
display: none; /* Hidden by default */
flex-direction: column;
gap: 1rem;
}
.audio-player-container.active {
display: flex;
animation: fadeIn 0.3s ease;
}
@keyframes fadeIn {
from { opacity: 0; transform: translateY(10px); }
to { opacity: 1; transform: translateY(0); }
}
audio {
width: 100%;
height: 40px;
border-radius: 0.5rem;
}
/* Customizing audio player vaguely */
audio::-webkit-media-controls-panel {
background-color: var(--bg-card);
}
audio::-webkit-media-controls-play-button {
background-color: var(--primary);
border-radius: 50%;
}
.status-message {
font-size: 0.85rem;
color: var(--text-muted);
text-align: center;
margin-top: 0.5rem;
min-height: 1.2em;
}
.voice-tag {
display: inline-block;
background: rgba(99, 102, 241, 0.2);
color: #a5b4fc;
padding: 0.25rem 0.5rem;
border-radius: 4px;
font-size: 0.75rem;
margin-top: 0.5rem;
}
/* Loading Spinner */
.spinner {
width: 20px;
height: 20px;
border: 2px solid rgba(255,255,255,0.3);
border-radius: 50%;
border-top-color: #fff;
animation: spin 1s ease-in-out infinite;
display: none;
}
@keyframes spin {
to { transform: rotate(360deg); }
}
.loading .spinner {
display: block;
}
</style>
</head>
<body>
<header>
<div class="logo">
<i data-lucide="waveform"></i>
<span>KOKORO TTS STUDIO</span>
</div>
<a href="https://huggingface.co/spaces/akhaliq/anycoder" target="_blank" class="anycoder-link">
Built with <span>anycoder</span>
</a>
</header>
<main>
<!-- Left Column: Controls -->
<div class="left-col">
<div class="card">
<h2><i data-lucide="mic-2"></i> Input Configuration</h2>
<!-- Clone Toggle -->
<div class="toggle-container">
<div class="toggle-label">
<span>Voice Cloning Mode</span>
<span>Upload reference audio to clone voice</span>
</div>
<label class="switch">
<input type="checkbox" id="cloneToggle">
<span class="slider"></span>
</label>
</div>
<!-- Reference Audio Upload (Hidden by default) -->
<div class="form-group" id="referenceGroup" style="display: none;">
<label>Reference Voice Audio (.wav / .mp3)</label>
<div class="drop-zone" id="dropZone">
<div class="drop-zone-icon">📁</div>
<div class="drop-zone-text">
<span>Click to upload</span> or drag and drop
</div>
<div class="file-info" id="fileInfo">
<i data-lucide="check-circle" size="14"></i>
<span id="fileName">audio.wav</span>
</div>
<input type="file" id="fileInput" accept="audio/*" hidden>
</div>
</div>
<!-- Voice Selection (Hidden when cloning) -->
<div class="form-group" id="voiceSelectGroup">
<label>Select Language & Voice</label>
<div class="row">
<select id="langSelect">
<option value="en">English</option>
<option value="es">Español</option>
<option value="fr">Français</option>
<option value="pt">Português</option>
<option value="ja">日本語</option>
<option value="zh">中文</option>
</select>
<select id="voiceSelect">
<!-- Populated by JS -->
</select>
</div>
</div>
<!-- Text Input -->
<div class="form-group">
<label>Text to Synthesize</label>
<textarea id="textInput" placeholder="Enter your text here..."></textarea>
</div>
<!-- Advanced Settings -->
<div class="row">
<div class="form-group">
<div class="range-header">
<label>Speed</label>
<span class="range-value" id="speedValue">1.0</span>
</div>
<input type="range" id="speedInput" min="0.5" max="2.0" step="0.1" value="1.0">
</div>
<div class="form-group">
<div class="range-header">
<label>Pitch</label>
<span class="range-value" id="pitchValue">0</span>
</div>
<input type="range" id="pitchInput" min="-10" max="10" step="1" value="0">
</div>
</div>
</div>
</div>
<!-- Right Column: Output -->
<div class="right-col">
<div class="card">
<h2><i data-lucide="audio-waveform"></i> Output</h2>
<button class="btn-generate" id="generateBtn">
<div class="spinner"></div>
<span id="btnText">Generate Audio</span>
</button>
<div class="status-message" id="statusMessage">Ready to synthesize</div>
<!-- Audio Player -->
<div class="audio-player-container" id="audioContainer">
<div style="display:flex; justify-content:space-between; align-items:end;">
<label>Result</label>
<span class="voice-tag" id="activeVoiceTag">af_heart</span>
</div>
<audio id="audioPlayer" controls></audio>
</div>
</div>
<!-- Tips -->
<div class="card" style="margin-top: 1.5rem; background: rgba(99, 102, 241, 0.05); border-color: rgba(99, 102, 241, 0.2);">
<h3 style="font-size: 0.95rem; color: #a5b4fc; margin-bottom: 0.5rem;">
<i data-lucide="info" size="16"></i> Tips for Voice Cloning
</h3>
<p style="font-size: 0.85rem; color: var(--text-muted); line-height: 1.5;">
For best results, upload a clear WAV file (5-15 seconds) without background noise. The AI will try to mimic the tone and style of the reference audio.
</p>
</div>
</div>
</main>
<script>
// Initialize Icons
lucide.createIcons();
// DOM Elements
const cloneToggle = document.getElementById('cloneToggle');
const referenceGroup = document.getElementById('referenceGroup');
const voiceSelectGroup = document.getElementById('voiceSelectGroup');
const langSelect = document.getElementById('langSelect');
const voiceSelect = document.getElementById('voiceSelect');
const textInput = document.getElementById('textInput');
const speedInput = document.getElementById('speedInput');
const pitchInput = document.getElementById('pitchInput');
const speedValue = document.getElementById('speedValue');
const pitchValue = document.getElementById('pitchValue');
const dropZone = document.getElementById('dropZone');
const fileInput = document.getElementById('fileInput');
const fileInfo = document.getElementById('fileInfo');
const fileName = document.getElementById('fileName');
const generateBtn = document.getElementById('generateBtn');
const btnText = document.getElementById('btnText');
const statusMessage = document.getElementById('statusMessage');
const audioContainer = document.getElementById('audioContainer');
const audioPlayer = document.getElementById('audioPlayer');
const activeVoiceTag = document.getElementById('activeVoiceTag');
// KOKORO Voice Data
const voices = {
en: ['af_heart', 'af_mer', 'af_sarah', 'af_sky', 'bf_emma', 'bf_george', 'bm_george', 'bm_lewis'],
es: ['es_palpha', 'es_salpha', 'es_spanish'],
fr: ['fr_m1', 'fr_m2', 'fr_f1', 'fr_f2'],
pt: ['pt_b1', 'pt_b2', 'pt_c1', 'pt_c2'],
ja: ['ja_1', 'ja_2', 'ja_3', 'ja_4', 'ja_5'],
zh: ['zh_1', 'zh_2', 'zh_3', 'zh_4', 'zh_5']
};
let uploadedFile = null;
// --- Event Listeners ---
// Update voice dropdown based on language
function updateVoiceList() {
const lang = langSelect.value;
voiceSelect.innerHTML = '';
voices[lang].forEach(voice => {
const option = document.createElement('option');
option.value = voice;
option.textContent = voice;
voiceSelect.appendChild(option);
});
}
langSelect.addEventListener('change', updateVoiceList);
updateVoiceList(); // Init
// Toggle Clone Mode
cloneToggle.addEventListener('change', (e) => {
if (e.target.checked) {
referenceGroup.style.display = 'block';
voiceSelectGroup.style.opacity = '0.5';
voiceSelectGroup.style.pointerEvents = 'none';
} else {
referenceGroup.style.display = 'none';
voiceSelectGroup.style.opacity = '1';
voiceSelectGroup.style.pointerEvents = 'all';
}
});
// Range Sliders
speedInput.addEventListener('input', (e) => speedValue.textContent = e.target.value);
pitchInput.addEventListener('input', (e) => pitchValue.textContent = e.target.value);
// Drag and Drop Logic
dropZone.addEventListener('click', () => fileInput.click());
dropZone.addEventListener('dragover', (e) => {
e.preventDefault();
dropZone.classList.add('drag-over');
});
dropZone.addEventListener('dragleave', () => {
dropZone.classList.remove('drag-over');
});
dropZone.addEventListener('drop', (e) => {
e.preventDefault();
dropZone.classList.remove('drag-over');
if (e.dataTransfer.files.length) {
handleFile(e.dataTransfer.files[0]);
}
});
fileInput.addEventListener('change', (e) => {
if (e.target.files.length) {
handleFile(e.target.files[0]);
}
});
function handleFile(file) {
if (!file.type.startsWith('audio/')) {
statusMessage.textContent = "Error: Please upload an audio file.";
statusMessage.style.color = "#ef4444";
return;
}
uploadedFile = file;
fileName.textContent = file.name;
fileInfo.style.display = 'flex';
statusMessage.textContent = "Audio loaded successfully.";
statusMessage.style.color = "#10b981";
}
// Generate Button Logic
generateBtn.addEventListener('click', async () => {
const text = textInput.value.trim();
if (!text) {
statusMessage.textContent = "Please enter some text.";
statusMessage.style.color = "#ef4444";
return;
}
// Start Loading
generateBtn.classList.add('loading');
generateBtn.disabled = true;
btnText.textContent = "Synthesizing...";
statusMessage.textContent = "Processing audio... this may take a moment.";
statusMessage.style.color = "#94a3b8";
// Prepare Form Data
const formData = new FormData();
formData.append('text', text);
formData.append('speed', speedInput.value);
formData.append('pitch', pitchInput.value);
let endpoint = '/v1/tts';
if (cloneToggle.checked) {
if (!uploadedFile) {
alert("Please upload a reference audio file.");
resetBtn();
return;
}
formData.append('reference_audio', uploadedFile);
endpoint = '/v1/tts/clone'; // Hypothetical endpoint
} else {
formData.append('voice', voiceSelect.value);
}
// --- API SIMULATION ---
// Since we don't have a live backend here, we simulate the API call
// and return a placeholder audio.
console.log("Sending request to:", endpoint, Object.fromEntries(formData));
setTimeout(() => {
// Mock Response
audioPlayer.src = "https://www2.cs.uic.edu/~i101/SoundFiles/BabyElephantWalk60.wav"; // Placeholder audio
audioContainer.classList.add('active');
activeVoiceTag.textContent = cloneToggle.checked ? "Cloned Voice" : voiceSelect.value;
statusMessage.textContent = "Audio generated successfully!";
statusMessage.style.color = "#10b981";
resetBtn();
}, 2500); // Fake delay
});
function resetBtn() {
generateBtn.classList.remove('loading');
generateBtn.disabled = false;
btnText.textContent = "Generate Audio";
}
</script>
</body>
</html>