vlengine-indextts2 / index.html
CherithCutestory's picture
Moved files to right place
d8290d9
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>IndexTTS2 - Test Console</title>
<style>
*, *::before, *::after { box-sizing: border-box; margin: 0; padding: 0; }
body {
font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif;
background: #0f0d1a;
color: #e2e0eb;
min-height: 100vh;
padding: 2rem;
}
.container { max-width: 720px; margin: 0 auto; }
h1 {
font-size: 1.75rem;
font-weight: 700;
background: linear-gradient(135deg, #a78bfa, #7c3aed);
-webkit-background-clip: text;
-webkit-text-fill-color: transparent;
margin-bottom: 0.25rem;
}
.subtitle { color: #9490a8; font-size: 0.875rem; margin-bottom: 2rem; }
.card {
background: #1a1726;
border: 1px solid #2d2a3a;
border-radius: 12px;
padding: 1.5rem;
margin-bottom: 1.25rem;
}
.card-title {
font-size: 0.8rem;
font-weight: 600;
text-transform: uppercase;
letter-spacing: 0.05em;
color: #a78bfa;
margin-bottom: 1rem;
}
label {
display: block;
font-size: 0.8rem;
font-weight: 500;
color: #b0adc0;
margin-bottom: 0.35rem;
}
textarea, input[type="text"], input[type="number"], select {
width: 100%;
background: #12101e;
border: 1px solid #2d2a3a;
border-radius: 8px;
color: #e2e0eb;
padding: 0.6rem 0.75rem;
font-size: 0.875rem;
margin-bottom: 1rem;
outline: none;
transition: border-color 0.2s;
}
textarea:focus, input:focus, select:focus {
border-color: #7c3aed;
}
textarea { resize: vertical; min-height: 80px; }
.row { display: flex; gap: 1rem; }
.row > * { flex: 1; }
button.primary {
width: 100%;
padding: 0.75rem;
background: linear-gradient(135deg, #7c3aed, #6d28d9);
color: white;
border: none;
border-radius: 8px;
font-size: 0.95rem;
font-weight: 600;
cursor: pointer;
transition: opacity 0.2s;
}
button.primary:hover { opacity: 0.9; }
button.primary:disabled { opacity: 0.5; cursor: not-allowed; }
#status {
margin-top: 1rem;
padding: 0.75rem;
border-radius: 8px;
font-size: 0.85rem;
display: none;
}
#status.error { display: block; background: #2d1520; border: 1px solid #5c2338; color: #f87171; }
#status.success { display: block; background: #152d1a; border: 1px solid #235c2d; color: #4ade80; }
#status.loading { display: block; background: #1a1726; border: 1px solid #2d2a3a; color: #a78bfa; }
#audioResult { margin-top: 1rem; display: none; }
#audioResult audio { width: 100%; margin-top: 0.5rem; }
.info {
font-size: 0.75rem;
color: #706d82;
margin-top: -0.5rem;
margin-bottom: 1rem;
}
</style>
</head>
<body>
<div class="container">
<h1>IndexTTS2</h1>
<p class="subtitle">Emotionally expressive zero-shot voice cloning TTS &mdash; Test Console</p>
<div class="card">
<div class="card-title">Voice Reference</div>
<label for="voiceFile">Upload reference audio (WAV, 6-15 seconds recommended)</label>
<input type="file" id="voiceFile" accept="audio/*" style="margin-bottom:1rem">
<p class="info">IndexTTS2 clones the timbre from your reference audio for zero-shot voice synthesis.</p>
</div>
<div class="card">
<div class="card-title">Text &amp; Emotion</div>
<label for="inputText">Text to synthesize</label>
<textarea id="inputText" rows="4" placeholder="Enter text to convert to speech..."></textarea>
<label for="emotion">Emotion</label>
<select id="emotion">
<option value="neutral" selected>Neutral</option>
<option value="happy">Happy</option>
<option value="sad">Sad</option>
<option value="angry">Angry</option>
<option value="fear">Fear</option>
<option value="surprise">Surprise</option>
<option value="disgust">Disgust</option>
<option value="excited">Excited</option>
<option value="calm">Calm</option>
<option value="anxious">Anxious</option>
<option value="hopeful">Hopeful</option>
<option value="melancholy">Melancholy</option>
<option value="tender">Tender</option>
<option value="proud">Proud</option>
</select>
<div class="row">
<div>
<label for="intensity">Intensity (1-100)</label>
<input type="number" id="intensity" value="50" min="1" max="100">
</div>
<div>
<label for="volume">Volume (1-100)</label>
<input type="number" id="volume" value="75" min="1" max="100">
</div>
</div>
<div class="row">
<div>
<label for="speed">Speed adjust</label>
<input type="number" id="speed" value="0" min="-5" max="5" step="0.1">
</div>
<div>
<label for="pitch">Pitch adjust</label>
<input type="number" id="pitch" value="0" min="-5" max="5" step="0.1">
</div>
</div>
</div>
<div class="card">
<div class="card-title">Authentication</div>
<label for="apiKey">API Key (if set on server)</label>
<input type="text" id="apiKey" placeholder="Leave empty if no auth required">
</div>
<button class="primary" id="generateBtn" onclick="generate()">Generate Speech</button>
<div id="status"></div>
<div id="audioResult">
<audio id="audioPlayer" controls></audio>
</div>
</div>
<script>
async function fileToBase64(file) {
return new Promise((resolve, reject) => {
const reader = new FileReader();
reader.onload = () => {
const base64 = reader.result.split(',')[1];
resolve(base64);
};
reader.onerror = reject;
reader.readAsDataURL(file);
});
}
async function generate() {
const status = document.getElementById('status');
const btn = document.getElementById('generateBtn');
const audioResult = document.getElementById('audioResult');
const audioPlayer = document.getElementById('audioPlayer');
const voiceFile = document.getElementById('voiceFile').files[0];
const text = document.getElementById('inputText').value.trim();
const emotion = document.getElementById('emotion').value;
const intensity = parseInt(document.getElementById('intensity').value);
const volume = parseInt(document.getElementById('volume').value);
const speed = parseFloat(document.getElementById('speed').value);
const pitch = parseFloat(document.getElementById('pitch').value);
const apiKey = document.getElementById('apiKey').value.trim();
if (!voiceFile) {
status.className = 'error';
status.textContent = 'Please upload a reference voice audio file.';
return;
}
if (!text) {
status.className = 'error';
status.textContent = 'Please enter text to synthesize.';
return;
}
btn.disabled = true;
status.className = 'loading';
status.textContent = 'Generating speech... (this may take a moment)';
audioResult.style.display = 'none';
try {
const voiceBase64 = await fileToBase64(voiceFile);
const headers = { 'Content-Type': 'application/json' };
if (apiKey) headers['Authorization'] = `Bearer ${apiKey}`;
const resp = await fetch('/ConvertTextToSpeech', {
method: 'POST',
headers,
body: JSON.stringify({
input_text: text,
voice_to_clone_sample: voiceBase64,
emotion_set: [emotion],
intensity,
volume,
speed_adjust: speed,
pitch_adjust: pitch,
}),
});
if (!resp.ok) {
const err = await resp.json();
throw new Error(err.error || `HTTP ${resp.status}`);
}
const blob = await resp.blob();
const url = URL.createObjectURL(blob);
audioPlayer.src = url;
audioResult.style.display = 'block';
status.className = 'success';
status.textContent = 'Speech generated successfully!';
} catch (e) {
status.className = 'error';
status.textContent = `Error: ${e.message}`;
} finally {
btn.disabled = false;
}
}
</script>
</body>
</html>