Kokoro-TTS-FastAPI / static /index.html
xxparthparekhxx's picture
added the app
64801d5
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>Kokoro TTS</title>
<link
href="https://fonts.googleapis.com/css2?family=Syne:wght@400;600;700;800&family=DM+Mono:wght@300;400;500&display=swap"
rel="stylesheet" />
<style>
:root {
--bg: #0a0a0f;
--surface: #111118;
--surface2: #1a1a24;
--surface3: #22222f;
--border: #2a2a3a;
--border2: #333345;
--accent: #7c6af7;
--accent2: #a78bfa;
--accent-glow: rgba(124, 106, 247, 0.25);
--green: #34d399;
--red: #f87171;
--yellow: #fbbf24;
--text: #e8e8f0;
--text-dim: #888898;
--text-muted: #555565;
--radius: 12px;
--radius-sm: 8px;
}
*,
*::before,
*::after {
box-sizing: border-box;
margin: 0;
padding: 0;
}
html {
background: var(--bg);
}
body {
font-family: 'DM Mono', monospace;
background: var(--bg);
color: var(--text);
min-height: 100vh;
display: grid;
grid-template-rows: auto 1fr auto;
overflow-x: hidden;
}
/* Subtle noise texture */
body::before {
content: '';
position: fixed;
inset: 0;
background-image: url("data:image/svg+xml,%3Csvg viewBox='0 0 200 200' xmlns='http://www.w3.org/2000/svg'%3E%3Cfilter id='n'%3E%3CfeTurbulence type='fractalNoise' baseFrequency='0.9' numOctaves='4' stitchTiles='stitch'/%3E%3C/filter%3E%3Crect width='100%25' height='100%25' filter='url(%23n)' opacity='0.03'/%3E%3C/svg%3E");
pointer-events: none;
z-index: 0;
}
/* ── Header ── */
header {
position: relative;
z-index: 10;
padding: 24px 40px;
border-bottom: 1px solid var(--border);
display: flex;
align-items: center;
justify-content: space-between;
background: rgba(10, 10, 15, 0.8);
backdrop-filter: blur(12px);
}
.logo {
font-family: 'Syne', sans-serif;
font-size: 22px;
font-weight: 800;
letter-spacing: -0.5px;
display: flex;
align-items: center;
gap: 10px;
}
.logo-icon {
width: 32px;
height: 32px;
background: linear-gradient(135deg, var(--accent), #c084fc);
border-radius: 8px;
display: grid;
place-items: center;
font-size: 16px;
}
.logo-text span {
color: var(--accent2);
}
.status-badge {
display: flex;
align-items: center;
gap: 8px;
padding: 6px 14px;
background: var(--surface2);
border: 1px solid var(--border);
border-radius: 100px;
font-size: 11px;
color: var(--text-dim);
font-family: 'DM Mono', monospace;
}
.status-dot {
width: 7px;
height: 7px;
border-radius: 50%;
background: var(--text-muted);
transition: background 0.3s, box-shadow 0.3s;
}
.status-dot.ready {
background: var(--green);
box-shadow: 0 0 8px var(--green);
}
.status-dot.loading {
background: var(--yellow);
animation: pulse 1s infinite;
}
.status-dot.error {
background: var(--red);
}
@keyframes pulse {
0%,
100% {
opacity: 1
}
50% {
opacity: 0.4
}
}
/* ── Tabs ── */
.tabs {
display: flex;
gap: 2px;
padding: 0 40px;
border-bottom: 1px solid var(--border);
background: var(--surface);
position: relative;
z-index: 9;
}
.tab-btn {
font-family: 'Syne', sans-serif;
font-size: 13px;
font-weight: 600;
letter-spacing: 0.5px;
padding: 14px 20px;
border: none;
background: transparent;
color: var(--text-muted);
cursor: pointer;
position: relative;
transition: color 0.2s;
display: flex;
align-items: center;
gap: 7px;
}
.tab-btn:hover {
color: var(--text-dim);
}
.tab-btn.active {
color: var(--accent2);
}
.tab-btn.active::after {
content: '';
position: absolute;
bottom: -1px;
left: 0;
right: 0;
height: 2px;
background: var(--accent);
border-radius: 2px 2px 0 0;
}
/* ── Main Layout ── */
main {
position: relative;
z-index: 5;
max-width: 1200px;
width: 100%;
margin: 0 auto;
padding: 32px 40px;
}
.tab-panel {
display: none;
}
.tab-panel.active {
display: block;
}
/* ── Generate Panel ── */
.generate-grid {
display: grid;
grid-template-columns: 1fr 420px;
gap: 24px;
align-items: start;
}
/* ── Card ── */
.card {
background: var(--surface);
border: 1px solid var(--border);
border-radius: var(--radius);
overflow: hidden;
}
.card-header {
padding: 16px 20px;
border-bottom: 1px solid var(--border);
display: flex;
align-items: center;
justify-content: space-between;
}
.card-title {
font-family: 'Syne', sans-serif;
font-size: 12px;
font-weight: 700;
letter-spacing: 1.5px;
text-transform: uppercase;
color: var(--text-dim);
}
.card-body {
padding: 20px;
}
/* ── Textarea ── */
textarea {
width: 100%;
min-height: 200px;
background: var(--surface2);
border: 1px solid var(--border);
border-radius: var(--radius-sm);
color: var(--text);
font-family: 'DM Mono', monospace;
font-size: 13px;
line-height: 1.7;
padding: 14px 16px;
resize: vertical;
transition: border-color 0.2s, box-shadow 0.2s;
outline: none;
}
textarea:focus {
border-color: var(--accent);
box-shadow: 0 0 0 3px var(--accent-glow);
}
.char-count {
font-size: 11px;
color: var(--text-muted);
text-align: right;
margin-top: 6px;
}
.char-count.warn {
color: var(--yellow);
}
.char-count.danger {
color: var(--red);
}
/* ── Examples ── */
.examples-section {
margin-top: 16px;
}
.examples-label {
font-size: 11px;
color: var(--text-muted);
letter-spacing: 1px;
text-transform: uppercase;
margin-bottom: 10px;
}
.examples-grid {
display: flex;
flex-wrap: wrap;
gap: 8px;
}
.example-chip {
font-family: 'DM Mono', monospace;
font-size: 11px;
padding: 5px 12px;
background: var(--surface2);
border: 1px solid var(--border);
border-radius: 100px;
color: var(--text-dim);
cursor: pointer;
transition: all 0.18s;
white-space: nowrap;
}
.example-chip:hover {
border-color: var(--accent);
color: var(--accent2);
background: rgba(124, 106, 247, 0.08);
}
/* ── Voice Selector ── */
.voice-search {
width: 100%;
background: var(--surface2);
border: 1px solid var(--border);
border-radius: var(--radius-sm);
color: var(--text);
font-family: 'DM Mono', monospace;
font-size: 12px;
padding: 9px 14px;
outline: none;
transition: border-color 0.2s;
margin-bottom: 12px;
}
.voice-search:focus {
border-color: var(--accent);
}
.voice-list {
max-height: 220px;
overflow-y: auto;
display: flex;
flex-direction: column;
gap: 4px;
}
.voice-list::-webkit-scrollbar {
width: 4px;
}
.voice-list::-webkit-scrollbar-track {
background: transparent;
}
.voice-list::-webkit-scrollbar-thumb {
background: var(--border2);
border-radius: 4px;
}
.voice-item {
display: flex;
align-items: center;
gap: 10px;
padding: 9px 12px;
border-radius: var(--radius-sm);
cursor: pointer;
border: 1px solid transparent;
transition: all 0.15s;
background: transparent;
}
.voice-item:hover {
background: var(--surface2);
border-color: var(--border);
}
.voice-item.selected {
background: rgba(124, 106, 247, 0.12);
border-color: var(--accent);
}
.voice-flag {
font-size: 16px;
line-height: 1;
}
.voice-info {
flex: 1;
}
.voice-name {
font-family: 'Syne', sans-serif;
font-size: 13px;
font-weight: 600;
color: var(--text);
}
.voice-meta {
font-size: 10px;
color: var(--text-muted);
margin-top: 1px;
}
.voice-gender {
font-size: 10px;
padding: 2px 8px;
border-radius: 100px;
font-family: 'DM Mono', monospace;
}
.voice-gender.female {
background: rgba(251, 113, 133, 0.15);
color: #fb7185;
border: 1px solid rgba(251, 113, 133, 0.2);
}
.voice-gender.male {
background: rgba(96, 165, 250, 0.15);
color: #60a5fa;
border: 1px solid rgba(96, 165, 250, 0.2);
}
/* ── Controls ── */
.control-row {
display: flex;
align-items: center;
justify-content: space-between;
gap: 12px;
margin-top: 16px;
}
.control-group {
display: flex;
flex-direction: column;
gap: 6px;
flex: 1;
}
.control-label {
font-size: 10px;
letter-spacing: 1.2px;
text-transform: uppercase;
color: var(--text-muted);
}
.control-value {
font-size: 12px;
color: var(--accent2);
font-weight: 500;
}
input[type=range] {
width: 100%;
appearance: none;
height: 4px;
background: var(--surface3);
border-radius: 4px;
outline: none;
cursor: pointer;
}
input[type=range]::-webkit-slider-thumb {
appearance: none;
width: 14px;
height: 14px;
background: var(--accent);
border-radius: 50%;
box-shadow: 0 0 6px var(--accent-glow);
cursor: pointer;
}
.format-toggle {
display: flex;
gap: 4px;
background: var(--surface2);
border: 1px solid var(--border);
border-radius: var(--radius-sm);
padding: 3px;
}
.format-btn {
font-family: 'DM Mono', monospace;
font-size: 11px;
padding: 5px 14px;
border: none;
border-radius: 6px;
cursor: pointer;
background: transparent;
color: var(--text-dim);
transition: all 0.15s;
}
.format-btn.active {
background: var(--accent);
color: #fff;
box-shadow: 0 2px 8px var(--accent-glow);
}
/* ── Generate Button ── */
.generate-btn {
width: 100%;
margin-top: 20px;
padding: 14px;
font-family: 'Syne', sans-serif;
font-size: 14px;
font-weight: 700;
letter-spacing: 0.5px;
border: none;
border-radius: var(--radius-sm);
background: linear-gradient(135deg, var(--accent) 0%, #a855f7 100%);
color: #fff;
cursor: pointer;
position: relative;
overflow: hidden;
transition: opacity 0.2s, transform 0.1s;
box-shadow: 0 4px 20px rgba(124, 106, 247, 0.35);
}
.generate-btn:hover {
opacity: 0.9;
transform: translateY(-1px);
}
.generate-btn:active {
transform: translateY(0);
}
.generate-btn:disabled {
opacity: 0.4;
cursor: not-allowed;
transform: none;
}
.generate-btn .spinner {
display: none;
width: 16px;
height: 16px;
border: 2px solid rgba(255, 255, 255, 0.3);
border-top-color: #fff;
border-radius: 50%;
animation: spin 0.7s linear infinite;
margin-right: 8px;
}
.generate-btn.loading .spinner {
display: inline-block;
}
.generate-btn.loading .btn-text::before {
content: 'Generating...';
}
.generate-btn:not(.loading) .btn-text::before {
content: '⚡ Generate Audio';
}
@keyframes spin {
to {
transform: rotate(360deg);
}
}
/* ── Output Panel ── */
.output-card {
position: sticky;
top: 24px;
}
.waveform-placeholder {
height: 80px;
background: var(--surface2);
border-radius: var(--radius-sm);
display: flex;
align-items: center;
justify-content: center;
color: var(--text-muted);
font-size: 12px;
border: 1px dashed var(--border);
margin-bottom: 16px;
position: relative;
overflow: hidden;
}
.waveform-bars {
display: flex;
align-items: center;
gap: 3px;
height: 50px;
}
.wbar {
width: 3px;
background: var(--accent);
border-radius: 2px;
opacity: 0.4;
}
audio {
width: 100%;
border-radius: var(--radius-sm);
accent-color: var(--accent);
background: var(--surface2);
}
/* Custom audio player */
.audio-player {
display: none;
flex-direction: column;
gap: 12px;
}
.audio-player.visible {
display: flex;
}
.player-controls {
display: flex;
align-items: center;
gap: 12px;
}
.play-btn {
width: 42px;
height: 42px;
border-radius: 50%;
background: var(--accent);
border: none;
cursor: pointer;
display: grid;
place-items: center;
color: #fff;
font-size: 16px;
flex-shrink: 0;
transition: all 0.15s;
box-shadow: 0 2px 12px var(--accent-glow);
}
.play-btn:hover {
transform: scale(1.08);
}
.progress-wrap {
flex: 1;
display: flex;
flex-direction: column;
gap: 4px;
}
.progress-bar-bg {
height: 4px;
background: var(--surface3);
border-radius: 4px;
cursor: pointer;
position: relative;
}
.progress-bar-fill {
height: 100%;
background: var(--accent);
border-radius: 4px;
width: 0%;
transition: width 0.1s;
position: relative;
}
.progress-bar-fill::after {
content: '';
position: absolute;
right: -5px;
top: -4px;
width: 12px;
height: 12px;
background: var(--accent2);
border-radius: 50%;
box-shadow: 0 0 6px var(--accent-glow);
}
.time-row {
display: flex;
justify-content: space-between;
font-size: 10px;
color: var(--text-muted);
}
.download-row {
display: flex;
gap: 8px;
}
.action-btn {
flex: 1;
padding: 8px;
font-family: 'DM Mono', monospace;
font-size: 11px;
border-radius: var(--radius-sm);
border: 1px solid var(--border);
background: var(--surface2);
color: var(--text-dim);
cursor: pointer;
transition: all 0.15s;
display: flex;
align-items: center;
justify-content: center;
gap: 6px;
}
.action-btn:hover {
border-color: var(--accent);
color: var(--accent2);
background: rgba(124, 106, 247, 0.08);
}
/* Meta badges */
.meta-row {
display: flex;
gap: 8px;
flex-wrap: wrap;
}
.meta-badge {
font-size: 10px;
padding: 3px 10px;
background: var(--surface3);
border: 1px solid var(--border);
border-radius: 100px;
color: var(--text-dim);
font-family: 'DM Mono', monospace;
}
/* ── History Panel ── */
.history-toolbar {
display: flex;
align-items: center;
justify-content: space-between;
margin-bottom: 20px;
}
.hist-btn {
font-family: 'DM Mono', monospace;
font-size: 12px;
padding: 8px 16px;
border-radius: var(--radius-sm);
border: 1px solid var(--border);
background: var(--surface2);
color: var(--text-dim);
cursor: pointer;
transition: all 0.15s;
display: flex;
align-items: center;
gap: 6px;
}
.hist-btn:hover {
border-color: var(--accent);
color: var(--accent2);
}
.hist-btn.danger:hover {
border-color: var(--red);
color: var(--red);
}
.hist-list {
display: flex;
flex-direction: column;
gap: 6px;
}
.hist-item {
display: flex;
align-items: center;
gap: 14px;
padding: 14px 16px;
background: var(--surface);
border: 1px solid var(--border);
border-radius: var(--radius-sm);
cursor: pointer;
transition: all 0.15s;
}
.hist-item:hover {
border-color: var(--border2);
background: var(--surface2);
}
.hist-item.selected {
border-color: var(--accent);
background: rgba(124, 106, 247, 0.06);
}
.hist-icon {
width: 36px;
height: 36px;
background: var(--surface2);
border: 1px solid var(--border);
border-radius: 8px;
display: grid;
place-items: center;
font-size: 16px;
flex-shrink: 0;
}
.hist-info {
flex: 1;
min-width: 0;
}
.hist-name {
font-size: 13px;
color: var(--text);
white-space: nowrap;
overflow: hidden;
text-overflow: ellipsis;
}
.hist-sub {
font-size: 11px;
color: var(--text-muted);
margin-top: 2px;
}
.hist-actions {
display: flex;
gap: 6px;
}
.hist-action-btn {
width: 30px;
height: 30px;
border-radius: 6px;
border: 1px solid var(--border);
background: var(--surface2);
color: var(--text-dim);
cursor: pointer;
display: grid;
place-items: center;
font-size: 13px;
transition: all 0.15s;
}
.hist-action-btn:hover {
border-color: var(--accent);
color: var(--accent2);
}
.hist-action-btn.del:hover {
border-color: var(--red);
color: var(--red);
}
.hist-preview {
margin-top: 16px;
padding: 16px;
background: var(--surface);
border: 1px solid var(--border);
border-radius: var(--radius);
display: none;
}
.hist-preview.visible {
display: block;
}
/* ── API Docs ── */
.api-grid {
display: grid;
grid-template-columns: 1fr 1fr;
gap: 20px;
}
.endpoint-card {
background: var(--surface);
border: 1px solid var(--border);
border-radius: var(--radius);
overflow: hidden;
}
.endpoint-header {
padding: 14px 18px;
border-bottom: 1px solid var(--border);
display: flex;
align-items: center;
gap: 10px;
}
.method-badge {
font-family: 'DM Mono', monospace;
font-size: 11px;
font-weight: 500;
padding: 3px 10px;
border-radius: 6px;
}
.method-badge.POST {
background: rgba(52, 211, 153, 0.15);
color: var(--green);
border: 1px solid rgba(52, 211, 153, 0.25);
}
.method-badge.GET {
background: rgba(96, 165, 250, 0.15);
color: #60a5fa;
border: 1px solid rgba(96, 165, 250, 0.25);
}
.method-badge.DEL {
background: rgba(248, 113, 113, 0.15);
color: var(--red);
border: 1px solid rgba(248, 113, 113, 0.25);
}
.endpoint-path {
font-family: 'DM Mono', monospace;
font-size: 13px;
color: var(--text);
}
.endpoint-body {
padding: 16px 18px;
}
pre {
background: var(--surface2);
border: 1px solid var(--border);
border-radius: var(--radius-sm);
padding: 14px;
font-family: 'DM Mono', monospace;
font-size: 11px;
color: var(--text-dim);
overflow-x: auto;
line-height: 1.6;
}
.code-key {
color: #a78bfa;
}
.code-str {
color: #34d399;
}
.code-num {
color: #fbbf24;
}
.code-comment {
color: var(--text-muted);
}
/* ── Empty State ── */
.empty-state {
text-align: center;
padding: 60px 20px;
color: var(--text-muted);
}
.empty-icon {
font-size: 40px;
margin-bottom: 12px;
opacity: 0.5;
}
.empty-title {
font-family: 'Syne', sans-serif;
font-size: 15px;
color: var(--text-dim);
margin-bottom: 6px;
}
.empty-sub {
font-size: 12px;
}
/* ── Toast ── */
.toast-container {
position: fixed;
bottom: 24px;
right: 24px;
z-index: 1000;
display: flex;
flex-direction: column;
gap: 8px;
}
.toast {
padding: 12px 18px;
background: var(--surface2);
border: 1px solid var(--border);
border-radius: var(--radius-sm);
font-size: 13px;
color: var(--text);
backdrop-filter: blur(8px);
animation: slideIn 0.3s ease;
display: flex;
align-items: center;
gap: 8px;
max-width: 320px;
}
.toast.success {
border-left: 3px solid var(--green);
}
.toast.error {
border-left: 3px solid var(--red);
}
@keyframes slideIn {
from {
transform: translateX(20px);
opacity: 0;
}
to {
transform: translateX(0);
opacity: 1;
}
}
@keyframes slideOut {
from {
opacity: 1;
}
to {
opacity: 0;
transform: translateX(20px);
}
}
/* ── Footer ── */
footer {
position: relative;
z-index: 10;
padding: 16px 40px;
border-top: 1px solid var(--border);
display: flex;
align-items: center;
justify-content: space-between;
font-size: 11px;
color: var(--text-muted);
}
footer a {
color: var(--accent2);
text-decoration: none;
}
/* ── Responsive ── */
@media (max-width: 900px) {
header,
.tabs,
main,
footer {
padding-left: 20px;
padding-right: 20px;
}
.generate-grid {
grid-template-columns: 1fr;
}
.api-grid {
grid-template-columns: 1fr;
}
.output-card {
position: static;
}
}
</style>
</head>
<body>
<!-- ── Header ── -->
<header>
<div class="logo">
<div class="logo-icon">🔊</div>
<span>Kokoro <span>TTS</span></span>
</div>
<div class="status-badge">
<div class="status-dot loading" id="statusDot"></div>
<span id="statusText">Loading model…</span>
</div>
</header>
<!-- ── Tabs ── -->
<div class="tabs">
<button class="tab-btn active" onclick="switchTab('generate', this)">
<span></span> Generate
</button>
<button class="tab-btn" onclick="switchTab('api', this)">
<span>📡</span> API Docs
</button>
</div>
<!-- ── Main ── -->
<main>
<!-- Generate Tab -->
<div class="tab-panel active" id="tab-generate">
<div class="generate-grid">
<!-- Left: Input -->
<div>
<div class="card">
<div class="card-header">
<span class="card-title">Input Text</span>
<span class="char-count" id="charCount">0 / 5000</span>
</div>
<div class="card-body">
<textarea id="textInput" placeholder="Enter text to synthesize…"
oninput="updateCharCount()"></textarea>
</div>
</div>
<!-- Examples -->
<div class="examples-section">
<div class="examples-label">Quick Examples</div>
<div class="examples-grid" id="examplesGrid"></div>
</div>
</div>
<!-- Right: Controls + Output -->
<div>
<!-- Voice Selector -->
<div class="card" style="margin-bottom:16px">
<div class="card-header">
<span class="card-title">Voice</span>
<span id="selectedVoiceLabel" style="font-size:12px;color:var(--accent2)"></span>
</div>
<div class="card-body" style="padding-bottom:12px">
<input class="voice-search" type="text" placeholder="Search voices…"
oninput="filterVoices(this.value)" />
<div class="voice-list" id="voiceList">
<div class="empty-state" style="padding:20px">
<div class="empty-sub">Loading voices…</div>
</div>
</div>
</div>
</div>
<!-- Settings -->
<div class="card" style="margin-bottom:16px">
<div class="card-header">
<span class="card-title">Settings</span>
</div>
<div class="card-body">
<div class="control-row">
<div class="control-group">
<div style="display:flex;justify-content:space-between">
<span class="control-label">Speed</span>
<span class="control-value" id="speedVal">1.0×</span>
</div>
<input type="range" id="speedSlider" min="0.5" max="2.0" step="0.1" value="1.0"
oninput="document.getElementById('speedVal').textContent = parseFloat(this.value).toFixed(1) + '×'" />
</div>
<div style="display:flex;flex-direction:column;gap:6px;margin-left:20px">
<span class="control-label">Format</span>
<div class="format-toggle">
<button class="format-btn active" id="fmtWav"
onclick="setFormat('wav')">WAV</button>
<button class="format-btn" id="fmtMp3" onclick="setFormat('mp3')">MP3</button>
</div>
</div>
</div>
</div>
</div>
<!-- Generate Button -->
<button class="generate-btn" id="generateBtn" onclick="generateAudio()">
<span class="spinner" id="btnSpinner"></span>
<span class="btn-text" id="btnText"></span>
</button>
<!-- Output -->
<div class="card output-card" style="margin-top:16px">
<div class="card-header">
<span class="card-title">Output</span>
<div class="meta-row" id="metaRow"></div>
</div>
<div class="card-body">
<div class="waveform-placeholder" id="waveformPlaceholder">
<div id="waveformBars" class="waveform-bars" style="display:none"></div>
<span id="waveformHint" style="font-size:12px">Generate audio to see output</span>
</div>
<div class="audio-player" id="audioPlayer">
<audio id="audioEl" preload="auto"></audio>
<div class="player-controls">
<button class="play-btn" id="playBtn" onclick="togglePlay()"></button>
<div class="progress-wrap">
<div class="progress-bar-bg" id="progressBg" onclick="seekAudio(event)">
<div class="progress-bar-fill" id="progressFill"></div>
</div>
<div class="time-row">
<span id="timeEl">0:00</span>
<span id="durationEl">0:00</span>
</div>
</div>
</div>
<div class="download-row">
<button class="action-btn" id="downloadBtn" onclick="downloadAudio()">
Download</button>
</div>
</div>
</div>
</div>
</div>
</div>
</div>
<!-- API Tab -->
<div class="tab-panel" id="tab-api">
<div style="display:flex;justify-content:flex-end;margin-bottom:20px;gap:8px">
<a href="/docs" target="_blank" class="hist-btn" style="text-decoration:none">
<span>📋</span> Swagger UI
</a>
</div>
<div class="api-grid">
<div class="endpoint-card">
<div class="endpoint-header">
<span class="method-badge POST">POST</span>
<span class="endpoint-path">/tts</span>
</div>
<div class="endpoint-body">
<p style="font-size:12px;color:var(--text-dim);margin-bottom:12px">Synthesize text to audio.
Returns raw audio bytes.</p>
<pre><span class="code-comment">// Request body</span>
{
<span class="code-key">"text"</span>: <span class="code-str">"Hello world!"</span>,
<span class="code-key">"voice"</span>: <span class="code-str">"af_heart"</span>,
<span class="code-key">"speed"</span>: <span class="code-num">1.0</span>,
<span class="code-key">"output_format"</span>: <span class="code-str">"wav"</span>
}
<span class="code-comment">// Response: audio/wav or audio/mpeg stream</span>
<span class="code-comment">// Headers:</span>
<span class="code-comment">// X-Duration-Seconds: 3.45</span>
<span class="code-comment">// Content-Disposition: attachment; filename="kokoro_af_heart.wav"</span></pre>
</div>
</div>
<div class="endpoint-card">
<div class="endpoint-header">
<span class="method-badge GET">GET</span>
<span class="endpoint-path">/voices</span>
</div>
<div class="endpoint-body">
<p style="font-size:12px;color:var(--text-dim);margin-bottom:12px">List all available voices.
</p>
<pre><span class="code-comment">// Response</span>
{
<span class="code-key">"voices"</span>: {
<span class="code-key">"af_heart"</span>: {
<span class="code-key">"label"</span>: <span class="code-str">"Heart"</span>,
<span class="code-key">"lang"</span>: <span class="code-str">"en-US"</span>,
<span class="code-key">"gender"</span>: <span class="code-str">"female"</span>,
<span class="code-key">"flag"</span>: <span class="code-str">"🇺🇸"</span>
},
...
},
<span class="code-key">"total"</span>: <span class="code-num">42</span>
}</pre>
</div>
</div>
<div class="endpoint-card">
<div class="endpoint-header">
<span class="method-badge GET">GET</span>
<span class="endpoint-path">/health</span>
</div>
<div class="endpoint-body">
<p style="font-size:12px;color:var(--text-dim);margin-bottom:12px">Model and device status.</p>
<pre><span class="code-comment">// Response</span>
{
<span class="code-key">"status"</span>: <span class="code-str">"ok"</span>,
<span class="code-key">"model_loaded"</span>: <span class="code-num">true</span>,
<span class="code-key">"device"</span>: <span class="code-str">"cuda"</span>,
<span class="code-key">"cuda"</span>: <span class="code-num">true</span>,
<span class="code-key">"pipelines"</span>: <span class="code-str">["a","b","e","f","h","i","j","p","z"]</span>
}</pre>
</div>
</div>
</div>
<div style="margin-top:24px">
<div class="card">
<div class="card-header"><span class="card-title">Quick Start (Python)</span></div>
<div class="card-body">
<pre><span class="code-comment"># pip install requests</span>
import requests
resp = requests.post(<span class="code-str">"http://localhost:7860/tts"</span>, json={
<span class="code-str">"text"</span>: <span class="code-str">"Hello from Kokoro TTS!"</span>,
<span class="code-str">"voice"</span>: <span class="code-str">"af_heart"</span>,
<span class="code-str">"speed"</span>: <span class="code-num">1.0</span>,
<span class="code-str">"output_format"</span>: <span class="code-str">"wav"</span>
})
with open(<span class="code-str">"output.wav"</span>, <span class="code-str">"wb"</span>) as f:
f.write(resp.content)
duration = resp.headers.get(<span class="code-str">"X-Duration-Seconds"</span>)
print(<span class="code-str">f"Duration: {duration}s"</span>)</pre>
</div>
</div>
</div>
</div>
</main>
<!-- Footer -->
<footer>
<span>Kokoro TTS · Powered by <a href="https://huggingface.co/hexgrad/Kokoro-82M"
target="_blank">hexgrad/Kokoro-82M</a></span>
<span id="deviceInfo"></span>
</footer>
<!-- Toast Container -->
<div class="toast-container" id="toastContainer"></div>
<script>
// ── State ──────────────────────────────────────────────────────────────────────
let allVoices = {};
let selectedVoice = 'af_heart';
let selectedFormat = 'wav';
let currentAudioUrl = null;
let selectedHistFile = null;
// ── Examples ──────────────────────────────────────────────────────────────────
const EXAMPLES = [
{ label: "🎙️ Welcome", text: "Welcome to Kokoro TTS! A powerful text-to-speech system running locally on your machine." },
{ label: "📢 Announcement", text: "Attention please! The meeting has been rescheduled to 3 PM in conference room B." },
{ label: "🤖 Tech Demo", text: "This system uses a lightweight 82 million parameter model to generate high-quality speech in multiple languages." },
{ label: "🌍 Multilingual", text: "Bonjour tout le monde! Hola amigos! नमस्ते दुनिया!" },
{ label: "📖 Narration", text: "In a distant galaxy, beyond the reach of ordinary telescopes, a civilization had mastered the art of turning thought into sound." },
{ label: "💼 Business", text: "Q3 results exceeded expectations, with revenue growing 24% year-over-year. The board has approved a special dividend." },
{ label: "🔢 Numbers", text: "The event starts at 9:30 AM on January 15th, 2025. Please contact us at extension 4200 for more information." },
{ label: "😄 Casual", text: "Hey! What's up? Just wanted to let you know that the pizza arrived and it's absolutely amazing!" },
];
function buildExamples() {
const grid = document.getElementById('examplesGrid');
grid.innerHTML = '';
EXAMPLES.forEach((e, i) => {
const btn = document.createElement('button');
btn.className = 'example-chip';
btn.textContent = e.label;
btn.onclick = () => setExample(e.text);
grid.appendChild(btn);
});
}
function setExample(text) {
document.getElementById('textInput').value = text;
updateCharCount();
}
// ── Char Count ─────────────────────────────────────────────────────────────────
function updateCharCount() {
const len = document.getElementById('textInput').value.length;
const el = document.getElementById('charCount');
el.textContent = `${len} / 5000`;
el.className = 'char-count' + (len > 4500 ? ' danger' : len > 4000 ? ' warn' : '');
}
// ── Tab Switch ─────────────────────────────────────────────────────────────────
function switchTab(name, btn) {
document.querySelectorAll('.tab-panel').forEach(p => p.classList.remove('active'));
document.querySelectorAll('.tab-btn').forEach(b => b.classList.remove('active'));
document.getElementById(`tab-${name}`).classList.add('active');
btn.classList.add('active');
if (name === 'history') loadHistory();
}
// ── Format Toggle ──────────────────────────────────────────────────────────────
function setFormat(fmt) {
selectedFormat = fmt;
document.getElementById('fmtWav').classList.toggle('active', fmt === 'wav');
document.getElementById('fmtMp3').classList.toggle('active', fmt === 'mp3');
}
// ── Voices ─────────────────────────────────────────────────────────────────────
async function loadVoices() {
try {
const res = await fetch('/voices');
const data = await res.json();
allVoices = data.voices;
renderVoices(Object.entries(allVoices));
if (allVoices[selectedVoice]) {
updateSelectedVoiceLabel(selectedVoice);
} else {
selectedVoice = Object.keys(allVoices)[0];
updateSelectedVoiceLabel(selectedVoice);
}
} catch (e) {
console.error('Voice load failed:', e);
}
}
function renderVoices(entries) {
const list = document.getElementById('voiceList');
if (!entries.length) {
list.innerHTML = '<div style="padding:16px;font-size:12px;color:var(--text-muted);text-align:center">No voices found</div>';
return;
}
list.innerHTML = entries.map(([id, info]) => `
<div class="voice-item ${id === selectedVoice ? 'selected' : ''}" onclick="selectVoice('${id}')" id="vi_${id}">
<span class="voice-flag">${info.flag}</span>
<div class="voice-info">
<div class="voice-name">${info.label}</div>
<div class="voice-meta">${info.lang}</div>
</div>
<span class="voice-gender ${info.gender}">${info.gender}</span>
</div>
`).join('');
}
function filterVoices(q) {
const lower = q.toLowerCase();
const filtered = Object.entries(allVoices).filter(([id, info]) =>
info.label.toLowerCase().includes(lower) ||
info.lang.toLowerCase().includes(lower) ||
id.toLowerCase().includes(lower)
);
renderVoices(filtered);
}
function selectVoice(id) {
selectedVoice = id;
document.querySelectorAll('.voice-item').forEach(el => el.classList.remove('selected'));
const el = document.getElementById(`vi_${id}`);
if (el) el.classList.add('selected');
updateSelectedVoiceLabel(id);
}
function updateSelectedVoiceLabel(id) {
const info = allVoices[id];
if (info) {
document.getElementById('selectedVoiceLabel').textContent = `${info.flag} ${info.label}`;
}
}
let currentBlob = null;
let currentBlobUrl = null;
async function generateAudio() {
const text = document.getElementById('textInput').value.trim();
if (!text) { toast('Please enter some text', 'error'); return; }
const btn = document.getElementById('generateBtn');
btn.classList.add('loading');
btn.disabled = true;
try {
const resp = await fetch('/tts', {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({
text,
voice: selectedVoice,
speed: parseFloat(document.getElementById('speedSlider').value),
output_format: selectedFormat,
})
});
if (!resp.ok) {
const err = await resp.json();
throw new Error(err.detail || 'Generation failed');
}
const duration = resp.headers.get('X-Duration-Seconds');
currentBlob = await resp.blob();
// Revoke previous blob URL to free memory
if (currentBlobUrl) URL.revokeObjectURL(currentBlobUrl);
currentBlobUrl = URL.createObjectURL(currentBlob);
showAudioOutput(currentBlobUrl, duration);
toast(`Audio generated · ${duration}s`, 'success');
} catch (e) {
toast(e.message, 'error');
} finally {
btn.classList.remove('loading');
btn.disabled = false;
}
}
function downloadAudio() {
if (!currentBlobUrl) return;
const a = document.createElement('a');
a.href = currentBlobUrl;
a.download = `kokoro_${selectedVoice}.${selectedFormat}`;
a.click();
}
function showAudioOutput(audioUrl, duration) {
const audioEl = document.getElementById('audioEl');
audioEl.src = audioUrl;
audioEl.load();
// Waveform bars
const bars = document.getElementById('waveformBars');
const hint = document.getElementById('waveformHint');
bars.innerHTML = '';
for (let i = 0; i < 40; i++) {
const bar = document.createElement('div');
bar.className = 'wbar';
bar.style.height = (8 + Math.random() * 40) + 'px';
bar.style.opacity = 0.3 + Math.random() * 0.6;
bars.appendChild(bar);
}
bars.style.display = 'flex';
hint.style.display = 'none';
const voiceInfo = allVoices[selectedVoice] || {};
document.getElementById('metaRow').innerHTML = `
<span class="meta-badge">${voiceInfo.flag || ''} ${voiceInfo.label || selectedVoice}</span>
<span class="meta-badge">${duration}s</span>
<span class="meta-badge">${selectedFormat.toUpperCase()}</span>
`;
document.getElementById('audioPlayer').classList.add('visible');
audioEl.ontimeupdate = () => {
const pct = (audioEl.currentTime / (audioEl.duration || 1)) * 100;
document.getElementById('progressFill').style.width = pct + '%';
document.getElementById('timeEl').textContent = fmtTime(audioEl.currentTime);
};
audioEl.onloadedmetadata = () => {
document.getElementById('durationEl').textContent = fmtTime(audioEl.duration);
};
audioEl.onended = () => {
document.getElementById('playBtn').textContent = '▶';
};
}
function togglePlay() {
const audio = document.getElementById('audioEl');
const btn = document.getElementById('playBtn');
if (audio.paused) { audio.play(); btn.textContent = '⏸'; }
else { audio.pause(); btn.textContent = '▶'; }
}
function seekAudio(e) {
const audio = document.getElementById('audioEl');
const bg = document.getElementById('progressBg');
const rect = bg.getBoundingClientRect();
const pct = (e.clientX - rect.left) / rect.width;
audio.currentTime = pct * audio.duration;
}
function fmtTime(s) {
if (!s || isNaN(s)) return '0:00';
const m = Math.floor(s / 60), sec = Math.floor(s % 60);
return `${m}:${sec.toString().padStart(2, '0')}`;
}
function copyAudioUrl() {
if (!currentAudioUrl) return;
navigator.clipboard.writeText(location.origin + currentAudioUrl);
toast('URL copied!', 'success');
}
// ── History ────────────────────────────────────────────────────────────────────
async function loadHistory() {
try {
const res = await fetch('/history');
const data = await res.json();
renderHistory(data.files);
} catch (e) {
console.error(e);
}
}
function selectHist(filename, url) {
selectedHistFile = filename;
const audio = document.getElementById('histAudio');
audio.src = url;
document.getElementById('histPreview').classList.add('visible');
document.getElementById('deleteBtn').style.display = 'flex';
loadHistory();
}
function histDownload(url, filename) {
const a = document.createElement('a');
a.href = url; a.download = filename; a.click();
}
async function deleteFile(filename) {
try {
await fetch(`/history/${filename}`, { method: 'DELETE' });
toast('Deleted', 'success');
if (selectedHistFile === filename) {
selectedHistFile = null;
document.getElementById('histPreview').classList.remove('visible');
document.getElementById('deleteBtn').style.display = 'none';
}
loadHistory();
} catch (e) { toast('Delete failed', 'error'); }
}
function deleteSelected() {
if (selectedHistFile) deleteFile(selectedHistFile);
}
// ── Health Check ──────────────────────────────────────────────────────────────
async function checkHealth() {
try {
const res = await fetch('/health');
const data = await res.json();
const dot = document.getElementById('statusDot');
const txt = document.getElementById('statusText');
const dev = document.getElementById('deviceInfo');
if (data.model_loaded) {
dot.className = 'status-dot ready';
txt.textContent = 'Ready';
} else {
dot.className = 'status-dot loading';
txt.textContent = 'Loading…';
setTimeout(checkHealth, 3000);
}
dev.textContent = `${data.device.toUpperCase()} · ${data.pipelines.length} langs`;
} catch (e) {
document.getElementById('statusDot').className = 'status-dot error';
document.getElementById('statusText').textContent = 'Offline';
setTimeout(checkHealth, 5000);
}
}
// ── Toast ──────────────────────────────────────────────────────────────────────
function toast(msg, type = 'success') {
const c = document.getElementById('toastContainer');
const el = document.createElement('div');
el.className = `toast ${type}`;
el.innerHTML = `<span>${type === 'success' ? '✓' : '✕'}</span> ${msg}`;
c.appendChild(el);
setTimeout(() => {
el.style.animation = 'slideOut 0.3s ease forwards';
setTimeout(() => el.remove(), 300);
}, 3000);
}
// ── Init ──────────────────────────────────────────────────────────────────────
buildExamples();
loadVoices();
checkHealth();
// Default example text
document.getElementById('textInput').value = "Hello! Welcome to Kokoro TTS. This is a powerful, open-source text-to-speech system running locally.";
updateCharCount();
</script>
</body>
</html>