Spaces:

ABAO77
/

kokoro

Sleeping

App Files Files Community

ABAO77 commited on Sep 4, 2025

Commit

693c106

1 Parent(s): 827e976

Refactor code structure for improved readability and maintainability

Browse files

Files changed (6) hide show

.dockerignore +72 -0
.gitignore +2 -0
Dockerfile +72 -20
__pycache__/ui.cpython-311.pyc +0 -0
app.py +222 -686
ui.py +609 -0

.dockerignore ADDED Viewed

	@@ -0,0 +1,72 @@

+# Git files
+.git
+.gitignore
+.gitattributes
+# Documentation
+README.md
+*.md
+docs/
+# Python cache
+__pycache__/
+*.py[cod]
+*$py.class
+*.so
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+# Virtual environments
+venv/
+env/
+ENV/
+# IDE files
+.vscode/
+.idea/
+*.swp
+*.swo
+*~
+# OS files
+.DS_Store
+Thumbs.db
+# Logs
+*.log
+logs/
+# Test files
+test/
+tests/
+*_test.py
+test_*.py
+# Development files
+.env
+.env.local
+docker-compose.yml
+docker-compose.*.yml
+# Model files (will be downloaded in container)
+*.onnx
+*.bin
+# Temporary files
+tmp/
+temp/
+*.tmp

.gitignore ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ **.onnx
2	+ **.bin

Dockerfile CHANGED Viewed

@@ -1,37 +1,89 @@
-FROM python:3.12-slim
-# Set working directory
-WORKDIR /app
-# Install system dependencies
 RUN apt-get update && apt-get install -y \
     wget \
     curl \
     && rm -rf /var/lib/apt/lists/*
-# Copy requirements first for better caching
 COPY requirements.txt .
-# Install Python dependencies
-RUN pip install -r requirements.txt
-# Download model files to /app directory
-RUN wget -O kokoro-v1.0.onnx https://github.com/thewh1teagle/kokoro-onnx/releases/download/model-files-v1.0/kokoro-v1.0.onnx \
-    && wget -O voices-v1.0.bin https://github.com/thewh1teagle/kokoro-onnx/releases/download/model-files-v1.0/voices-v1.0.bin
-# Copy application code
-COPY app.py .
-# Create non-root user for security
-RUN useradd -m -u 1000 appuser && chown -R appuser:appuser /app
 USER appuser
 # Expose port
 EXPOSE 7860
-# Health check
-HEALTHCHECK --interval=30s --timeout=30s --start-period=5s --retries=3 \
-    CMD curl -f http://localhost:8000/ || exit 1
-# Run the application
-CMD ["python", "app.py"]

+# Use multi-stage build for smaller final image
+FROM python:3.12-slim as builder
+# Install build dependencies
 RUN apt-get update && apt-get install -y \
     wget \
     curl \
+    gcc \
+    g++ \
     && rm -rf /var/lib/apt/lists/*
+# Create virtual environment
+RUN python -m venv /opt/venv
+ENV PATH="/opt/venv/bin:$PATH"
+# Copy and install requirements
 COPY requirements.txt .
+RUN pip install --no-cache-dir --upgrade pip && \
+    pip install --no-cache-dir -r requirements.txt
+# Download and verify model files
+RUN wget -O kokoro-v1.0.onnx https://github.com/thewh1teagle/kokoro-onnx/releases/download/model-files-v1.0/kokoro-v1.0.onnx && \
+    wget -O voices-v1.0.bin https://github.com/thewh1teagle/kokoro-onnx/releases/download/model-files-v1.0/voices-v1.0.bin && \
+    # Verify file sizes (basic validation)
+    [ -s kokoro-v1.0.onnx ] && [ -s voices-v1.0.bin ] || (echo "Model download failed" && exit 1)
+# Production stage
+FROM python:3.12-slim as production
+# Install only runtime dependencies
+RUN apt-get update && apt-get install -y \
+    curl \
+    && rm -rf /var/lib/apt/lists/* \
+    && apt-get clean
+# Copy virtual environment from builder
+COPY --from=builder /opt/venv /opt/venv
+ENV PATH="/opt/venv/bin:$PATH"
+# Set working directory
+WORKDIR /app
+# Copy model files from builder
+COPY --from=builder /kokoro-v1.0.onnx ./kokoro-v1.0.onnx
+COPY --from=builder /voices-v1.0.bin ./voices-v1.0.bin
+# Create non-root user and directories
+RUN groupadd -r appgroup && useradd -r -g appgroup -u 1000 appuser && \
+    mkdir -p /app/cache /app/tmp && \
+    chown -R appuser:appgroup /app
+# Copy application files
+COPY --chown=appuser:appgroup app.py .
+COPY --chown=appuser:appgroup ui.py .
+# Switch to non-root user
 USER appuser
+# Set Python optimizations
+ENV PYTHONUNBUFFERED=1 \
+    PYTHONDONTWRITEBYTECODE=1 \
+    PYTHONHASHSEED=random \
+    PIP_NO_CACHE_DIR=1 \
+    PIP_DISABLE_PIP_VERSION_CHECK=1
+# Set memory and performance optimizations
+ENV OMP_NUM_THREADS=4 \
+    MKL_NUM_THREADS=4 \
+    NUMEXPR_MAX_THREADS=4 \
+    OPENBLAS_NUM_THREADS=4
+# Cache directory for application
+ENV CACHE_DIR=/app/cache \
+    TMP_DIR=/app/tmp
 # Expose port
 EXPOSE 7860
+# Health check with proper endpoint and timing
+HEALTHCHECK --interval=30s --timeout=10s --start-period=30s --retries=3 \
+    CMD curl -f http://localhost:7860/languages || exit 1
+# Preload models on startup and run with optimizations
+CMD python -c "from kokoro_onnx import Kokoro; from kokoro_onnx.tokenizer import Tokenizer; \
+    print('Preloading models...'); \
+    tokenizer = Tokenizer(); \
+    kokoro = Kokoro('kokoro-v1.0.onnx', 'voices-v1.0.bin'); \
+    print('Models loaded successfully')" && \
+    exec python app.py

__pycache__/ui.cpython-311.pyc ADDED Viewed

Binary file (25 kB). View file

app.py CHANGED Viewed

@@ -1,22 +1,42 @@
-from fastapi import FastAPI, HTTPException
 from fastapi.responses import StreamingResponse, HTMLResponse
 from fastapi.staticfiles import StaticFiles
-from pydantic import BaseModel
 import numpy as np
 import io
 import wave
 from kokoro_onnx import Kokoro
 from kokoro_onnx.tokenizer import Tokenizer
-from typing import Optional
 import uvicorn
 app = FastAPI(title="Kokoro TTS API", version="1.0.0")
-# Initialize models
 tokenizer = Tokenizer()
 kokoro = Kokoro("kokoro-v1.0.onnx", "voices-v1.0.bin")
 SUPPORTED_LANGUAGES = ["en-us"]
 class TTSRequest(BaseModel):
     text: str
@@ -25,704 +45,166 @@ class TTSRequest(BaseModel):
     blend_voice_name: Optional[str] = None
     speed: float = 1.0
 class TTSResponse(BaseModel):
     phonemes: str
     sample_rate: int
 def numpy_to_wav_bytes(audio_data: np.ndarray, sample_rate: int) -> bytes:
-    """Convert numpy array to WAV bytes"""
-    # Ensure audio is in the right format
     if audio_data.dtype != np.int16:
-        # Convert float to int16
         audio_data = (audio_data * 32767).astype(np.int16)
-    # Create WAV file in memory
     buffer = io.BytesIO()
     with wave.open(buffer, "wb") as wav_file:
-        wav_file.setnchannels(1)  # Mono
-        wav_file.setsampwidth(2)  # 2 bytes per sample (int16)
         wav_file.setframerate(sample_rate)
         wav_file.writeframes(audio_data.tobytes())
     buffer.seek(0)
     return buffer.getvalue()
 @app.get("/", response_class=HTMLResponse)
 async def get_home():
-    """Serve the main UI page"""
-    html_content = """
-    <!DOCTYPE html>
-    <html lang="en">
-    <head>
-        <meta charset="UTF-8">
-        <meta name="viewport" content="width=device-width, initial-scale=1.0">
-        <title>Kokoro TTS Test Interface</title>
-        <style>
-            * {
-                margin: 0;
-                padding: 0;
-                box-sizing: border-box;
-            }
-            body {
-                font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
-                background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
-                min-height: 100vh;
-                padding: 20px;
-                color: #333;
-            }
-            .container {
-                max-width: 800px;
-                margin: 0 auto;
-                background: white;
-                border-radius: 15px;
-                box-shadow: 0 10px 30px rgba(0, 0, 0, 0.3);
-                overflow: hidden;
-            }
-            .header {
-                background: linear-gradient(45deg, #667eea, #764ba2);
-                color: white;
-                text-align: center;
-                padding: 30px;
-            }
-            .header h1 {
-                font-size: 2.5rem;
-                margin-bottom: 10px;
-                text-shadow: 2px 2px 4px rgba(0,0,0,0.3);
-            }
-            .header p {
-                font-size: 1.1rem;
-                opacity: 0.9;
-            }
-            .content {
-                padding: 30px;
-            }
-            .form-group {
-                margin-bottom: 25px;
-            }
-            label {
-                display: block;
-                margin-bottom: 8px;
-                font-weight: 600;
-                color: #555;
-            }
-            input, select, textarea {
-                width: 100%;
-                padding: 12px 15px;
-                border: 2px solid #e1e5e9;
-                border-radius: 8px;
-                font-size: 16px;
-                transition: all 0.3s ease;
-                font-family: inherit;
-            }
-            input:focus, select:focus, textarea:focus {
-                outline: none;
-                border-color: #667eea;
-                box-shadow: 0 0 0 3px rgba(102, 126, 234, 0.1);
-            }
-            textarea {
-                resize: vertical;
-                min-height: 100px;
-            }
-            .range-container {
-                display: flex;
-                align-items: center;
-                gap: 15px;
-            }
-            .range-container input[type="range"] {
-                flex: 1;
-            }
-            .range-value {
-                background: #f8f9fa;
-                padding: 8px 12px;
-                border-radius: 6px;
-                font-weight: 600;
-                min-width: 60px;
-                text-align: center;
-                border: 2px solid #e1e5e9;
-            }
-            .button-group {
-                display: grid;
-                grid-template-columns: repeat(auto-fit, minmax(200px, 1fr));
-                gap: 15px;
-                margin-top: 30px;
-            }
-            .btn {
-                padding: 15px 25px;
-                border: none;
-                border-radius: 8px;
-                font-size: 16px;
-                font-weight: 600;
-                cursor: pointer;
-                transition: all 0.3s ease;
-                text-transform: uppercase;
-                letter-spacing: 0.5px;
-            }
-            .btn-primary {
-                background: linear-gradient(45deg, #667eea, #764ba2);
-                color: white;
-            }
-            .btn-secondary {
-                background: linear-gradient(45deg, #ffecd2, #fcb69f);
-                color: #8b4513;
-            }
-            .btn-info {
-                background: linear-gradient(45deg, #a8edea, #fed6e3);
-                color: #2c3e50;
-            }
-            .btn:hover {
-                transform: translateY(-2px);
-                box-shadow: 0 5px 15px rgba(0, 0, 0, 0.2);
-            }
-            .btn:disabled {
-                opacity: 0.6;
-                cursor: not-allowed;
-                transform: none;
-            }
-            .result-section {
-                margin-top: 30px;
-                padding: 25px;
-                background: #f8f9fa;
-                border-radius: 10px;
-                border-left: 5px solid #667eea;
-            }
-            .result-section h3 {
-                color: #667eea;
-                margin-bottom: 15px;
-                font-size: 1.3rem;
-            }
-            .info-display {
-                background: white;
-                padding: 15px;
-                border-radius: 8px;
-                margin: 10px 0;
-                border: 1px solid #e1e5e9;
-            }
-            .info-display strong {
-                color: #667eea;
-            }
-            .loading {
-                display: none;
-                text-align: center;
-                padding: 20px;
-                color: #667eea;
-            }
-            .loading.show {
-                display: block;
-            }
-            .spinner {
-                display: inline-block;
-                width: 30px;
-                height: 30px;
-                border: 3px solid #f3f3f3;
-                border-top: 3px solid #667eea;
-                border-radius: 50%;
-                animation: spin 1s linear infinite;
-                margin-right: 10px;
-            }
-            @keyframes spin {
-                0% { transform: rotate(0deg); }
-                100% { transform: rotate(360deg); }
-            }
-            .error {
-                background: #fee;
-                color: #c33;
-                padding: 15px;
-                border-radius: 8px;
-                border-left: 5px solid #c33;
-                margin: 15px 0;
-            }
-            .success {
-                background: #efe;
-                color: #363;
-                padding: 15px;
-                border-radius: 8px;
-                border-left: 5px solid #363;
-                margin: 15px 0;
-            }
-            audio {
-                width: 100%;
-                margin-top: 15px;
-            }
-            .checkbox-group {
-                display: flex;
-                align-items: center;
-                gap: 10px;
-                margin-top: 10px;
-            }
-            .checkbox-group input[type="checkbox"] {
-                width: auto;
-            }
-            .example-select {
-                background: #f8f9fa;
-                border: 2px dashed #667eea;
-                border-radius: 6px;
-                font-size: 14px;
-                color: #667eea;
-                margin-bottom: 10px;
-            }
-            .example-select:focus {
-                border-color: #764ba2;
-                background: white;
-            }
-            .example-label {
-                font-size: 0.9em;
-                color: #667eea;
-                font-weight: 500;
-                margin-bottom: 5px;
-            }
-        </style>
-    </head>
-    <body>
-        <div class="container">
-            <div class="header">
-                <h1>🎤 Kokoro TTS</h1>
-                <p>Text-to-Speech Testing Interface</p>
-            </div>
-            <div class="content">
-                <form id="ttsForm">
-                    <div class="form-group">
-                        <label for="text">Text to Convert:</label>
-                        <div style="margin-bottom: 10px;">
-                            <div class="example-label">📝 Quick Examples:</div>
-                            <select id="example-texts" class="example-select" onchange="loadExampleText()">
-                                <option value="">Choose an example...</option>
-                                <option value="Hello! This is a test of the Kokoro text-to-speech system.">Basic Greeting</option>
-                                <option value="The quick brown fox jumps over the lazy dog. This sentence contains every letter of the alphabet.">Alphabet Test</option>
-                                <option value="Welcome to our English tutoring platform! We're here to help you improve your pronunciation and speaking skills.">English Learning</option>
-                                <option value="In a hole in the ground there lived a hobbit. Not a nasty, dirty, wet hole filled with the ends of worms and an oozy smell.">Story Reading</option>
-                                <option value="To be or not to be, that is the question. Whether 'tis nobler in the mind to suffer the slings and arrows of outrageous fortune.">Shakespeare</option>
-                                <option value="Ladies and gentlemen, welcome to today's presentation. We will be discussing the latest developments in artificial intelligence.">Presentation</option>
-                                <option value="The weather today is sunny with a high of 75 degrees Fahrenheit. Perfect for outdoor activities and picnics in the park.">Weather Report</option>
-                                <option value="One, two, three, four, five. Ten, twenty, thirty, forty, fifty. One hundred, one thousand, one million.">Numbers Practice</option>
-                                <option value="How are you doing today? I hope you're having a wonderful time learning English with our voice synthesis technology.">Conversational</option>
-                                <option value="Science and technology have revolutionized the way we communicate, learn, and interact with the world around us.">Technical</option>
-                                <option value="Once upon a time, in a faraway kingdom, there lived a brave princess who could speak to animals and understand their language.">Fairy Tale</option>
-                                <option value="Please remember to wash your hands frequently, wear a mask when necessary, and maintain social distance for everyone's safety.">Instructions</option>
-                                <option value="The pronunciation of English can be challenging, but with practice and patience, you can master the sounds and rhythms of the language.">Educational</option>
-                                <option value="Artificial intelligence and machine learning are transforming industries from healthcare to transportation, creating new possibilities.">AI Topic</option>
-                                <option value="Thank you for using our text-to-speech service. We hope it helps you in your language learning journey. Have a great day!">Thank You Message</option>
-                            </select>
-                        </div>
-                        <textarea id="text" name="text" placeholder="Enter your text here or choose an example above..." required>Hello! This is a test of the Kokoro text-to-speech system.</textarea>
-                    </div>
-                    <div class="form-group">
-                        <label for="voice">Voice:</label>
-                        <select id="voice" name="voice" required>
-                            <option value="">Loading voices...</option>
-                        </select>
-                    </div>
-                    <div class="form-group">
-                        <label for="language">Language:</label>
-                        <select id="language" name="language" required>
-                            <option value="">Loading languages...</option>
-                        </select>
-                    </div>
-                    <div class="form-group">
-                        <label for="blend_voice">Blend Voice (Optional):</label>
-                        <select id="blend_voice" name="blend_voice_name">
-                            <option value="">No blending</option>
-                        </select>
-                    </div>
-                    <div class="form-group">
-                        <label for="speed">Speed:</label>
-                        <div class="range-container">
-                            <input type="range" id="speed" name="speed" min="0.5" max="2.0" step="0.1" value="1.0">
-                            <div class="range-value" id="speedValue">1.0</div>
-                        </div>
-                    </div>
-                    <div class="button-group">
-                        <button type="button" class="btn btn-primary" onclick="generateAudio()">
-                            🎵 Generate Audio
-                        </button>
-                        <button type="button" class="btn btn-secondary" onclick="generateInfo()">
-                            📝 Get Info Only
-                        </button>
-                        <button type="button" class="btn btn-info" onclick="generateBoth()">
-                            🎯 Generate Both
-                        </button>
-                    </div>
-                </form>
-                <div class="loading" id="loading">
-                    <div class="spinner"></div>
-                    Processing your request...
-                </div>
-                <div id="results"></div>
-            </div>
-        </div>
-        <script>
-            // Load voices and languages on page load
-            window.addEventListener('load', async function() {
-                await loadVoices();
-                await loadLanguages();
-                setupEventListeners();
-            });
-            function setupEventListeners() {
-                const speedSlider = document.getElementById('speed');
-                const speedValue = document.getElementById('speedValue');
-                speedSlider.addEventListener('input', function() {
-                    speedValue.textContent = this.value;
-                });
-            }
-            function loadExampleText() {
-                const exampleSelect = document.getElementById('example-texts');
-                const textArea = document.getElementById('text');
-                if (exampleSelect.value) {
-                    textArea.value = exampleSelect.value;
-                    // Reset the select to show "Choose an example..."
-                    exampleSelect.value = '';
-                }
-            }
-            async function loadVoices() {
-                try {
-                    const response = await fetch('/voices');
-                    const data = await response.json();
-                    const voiceSelect = document.getElementById('voice');
-                    const blendSelect = document.getElementById('blend_voice');
-                    voiceSelect.innerHTML = '';
-                    blendSelect.innerHTML = '<option value="">No blending</option>';
-                    data.voices.forEach(voice => {
-                        const option1 = new Option(voice, voice);
-                        const option2 = new Option(voice, voice);
-                        voiceSelect.add(option1);
-                        blendSelect.add(option2);
-                    });
-                    if (data.voices.length > 0) {
-                        voiceSelect.value = data.voices[0];
-                    }
-                } catch (error) {
-                    showError('Failed to load voices: ' + error.message);
-                }
-            }
-            async function loadLanguages() {
-                try {
-                    const response = await fetch('/languages');
-                    const data = await response.json();
-                    const languageSelect = document.getElementById('language');
-                    languageSelect.innerHTML = '';
-                    data.languages.forEach(lang => {
-                        const option = new Option(lang, lang);
-                        languageSelect.add(option);
-                    });
-                    if (data.languages.length > 0) {
-                        languageSelect.value = data.languages[0];
-                    }
-                } catch (error) {
-                    showError('Failed to load languages: ' + error.message);
-                }
-            }
-            function getFormData() {
-                return {
-                    text: document.getElementById('text').value,
-                    voice: document.getElementById('voice').value,
-                    language: document.getElementById('language').value,
-                    blend_voice_name: document.getElementById('blend_voice').value || null,
-                    speed: parseFloat(document.getElementById('speed').value)
-                };
-            }
-            function showLoading() {
-                document.getElementById('loading').classList.add('show');
-                document.getElementById('results').innerHTML = '';
-            }
-            function hideLoading() {
-                document.getElementById('loading').classList.remove('show');
-            }
-            function showError(message) {
-                hideLoading();
-                document.getElementById('results').innerHTML =
-                    `<div class="error"><strong>Error:</strong> ${message}</div>`;
-            }
-            function showSuccess(content) {
-                hideLoading();
-                document.getElementById('results').innerHTML = content;
-            }
-            async function generateAudio() {
-                showLoading();
-                try {
-                    const formData = getFormData();
-                    const response = await fetch('/tts/audio', {
-                        method: 'POST',
-                        headers: {
-                            'Content-Type': 'application/json'
-                        },
-                        body: JSON.stringify(formData)
-                    });
-                    if (!response.ok) {
-                        const error = await response.json();
-                        throw new Error(error.detail || 'Failed to generate audio');
-                    }
-                    const audioBlob = await response.blob();
-                    const audioUrl = URL.createObjectURL(audioBlob);
-                    showSuccess(`
-                        <div class="result-section">
-                            <h3>🎵 Generated Audio</h3>
-                            <div class="success">Audio generated successfully!</div>
-                            <audio controls>
-                                <source src="${audioUrl}" type="audio/wav">
-                                Your browser does not support the audio element.
-                            </audio>
-                            <p style="margin-top: 10px; font-size: 0.9em; color: #666;">
-                                Right-click on the audio player and select "Save audio as..." to download.
-                            </p>
-                        </div>
-                    `);
-                } catch (error) {
-                    showError(error.message);
-                }
-            }
-            async function generateInfo() {
-                showLoading();
-                try {
-                    const formData = getFormData();
-                    const response = await fetch('/tts/info', {
-                        method: 'POST',
-                        headers: {
-                            'Content-Type': 'application/json'
-                        },
-                        body: JSON.stringify(formData)
-                    });
-                    if (!response.ok) {
-                        const error = await response.json();
-                        throw new Error(error.detail || 'Failed to generate info');
-                    }
-                    const data = await response.json();
-                    showSuccess(`
-                        <div class="result-section">
-                            <h3>📝 Text Analysis</h3>
-                            <div class="success">Analysis completed successfully!</div>
-                            <div class="info-display">
-                                <strong>Original Text:</strong><br>
-                                ${formData.text}
-                            </div>
-                            <div class="info-display">
-                                <strong>Phonemes:</strong><br>
-                                ${data.phonemes}
-                            </div>
-                            <div class="info-display">
-                                <strong>Sample Rate:</strong> ${data.sample_rate} Hz
-                            </div>
-                            <div class="info-display">
-                                <strong>Voice:</strong> ${formData.voice}
-                            </div>
-                            <div class="info-display">
-                                <strong>Speed:</strong> ${formData.speed}x
-                            </div>
-                        </div>
-                    `);
-                } catch (error) {
-                    showError(error.message);
-                }
-            }
-            async function generateBoth() {
-                showLoading();
-                try {
-                    const formData = getFormData();
-                    const response = await fetch('/tts/both', {
-                        method: 'POST',
-                        headers: {
-                            'Content-Type': 'application/json'
-                        },
-                        body: JSON.stringify(formData)
-                    });
-                    if (!response.ok) {
-                        const error = await response.json();
-                        throw new Error(error.detail || 'Failed to generate audio and info');
-                    }
-                    const data = await response.json();
-                    // Convert base64 to blob for audio playback
-                    const audioBytes = atob(data.audio_base64);
-                    const audioArray = new Uint8Array(audioBytes.length);
-                    for (let i = 0; i < audioBytes.length; i++) {
-                        audioArray[i] = audioBytes.charCodeAt(i);
-                    }
-                    const audioBlob = new Blob([audioArray], { type: 'audio/wav' });
-                    const audioUrl = URL.createObjectURL(audioBlob);
-                    showSuccess(`
-                        <div class="result-section">
-                            <h3>🎯 Complete Analysis & Audio</h3>
-                            <div class="success">Generation completed successfully!</div>
-                            <h4 style="margin-top: 20px; color: #667eea;">📊 Analysis Information</h4>
-                            <div class="info-display">
-                                <strong>Original Text:</strong><br>
-                                ${formData.text}
-                            </div>
-                            <div class="info-display">
-                                <strong>Phonemes:</strong><br>
-                                ${data.phonemes}
-                            </div>
-                            <div class="info-display">
-                                <strong>Sample Rate:</strong> ${data.sample_rate} Hz
-                            </div>
-                            <div class="info-display">
-                                <strong>Voice:</strong> ${formData.voice}
-                                ${formData.blend_voice_name ? ` (blended with ${formData.blend_voice_name})` : ''}
-                            </div>
-                            <div class="info-display">
-                                <strong>Speed:</strong> ${formData.speed}x
-                            </div>
-                            <h4 style="margin-top: 20px; color: #667eea;">🎵 Generated Audio</h4>
-                            <audio controls>
-                                <source src="${audioUrl}" type="audio/wav">
-                                Your browser does not support the audio element.
-                            </audio>
-                            <p style="margin-top: 10px; font-size: 0.9em; color: #666;">
-                                Right-click on the audio player and select "Save audio as..." to download.
-                            </p>
-                        </div>
-                    `);
-                } catch (error) {
-                    showError(error.message);
-                }
-            }
-        </script>
-    </body>
-    </html>
-    """
     return HTMLResponse(content=html_content)
 @app.get("/voices")
 async def get_voices():
-    """Get list of available voices"""
-    return {"voices": sorted(kokoro.get_voices())}
 @app.get("/languages")
 async def get_languages():
-    """Get list of supported languages"""
     return {"languages": SUPPORTED_LANGUAGES}
 @app.post("/tts/audio")
 async def generate_audio(request: TTSRequest):
-    """Generate audio from text and return as WAV file"""
     try:
-        # Validate language
         if request.language not in SUPPORTED_LANGUAGES:
             raise HTTPException(
                 status_code=400, detail=f"Unsupported language: {request.language}"
             )
-        # Validate voice
         available_voices = kokoro.get_voices()
         if request.voice not in available_voices:
             raise HTTPException(
                 status_code=400, detail=f"Unsupported voice: {request.voice}"
             )
-        # Validate blend voice if provided
-        if (
-            request.blend_voice_name
-            and request.blend_voice_name not in available_voices
-        ):
             raise HTTPException(
-                status_code=400,
-                detail=f"Unsupported blend voice: {request.blend_voice_name}",
             )
-        # Convert text to phonemes
-        phonemes = tokenizer.phonemize(request.text, lang=request.language)
-        # Handle voice blending
-        voice = request.voice
-        if request.blend_voice_name:
-            first_voice = kokoro.get_voice_style(request.voice)
-            second_voice = kokoro.get_voice_style(request.blend_voice_name)
-            voice = np.add(first_voice * 0.5, second_voice * 0.5)
-        # Generate audio
-        samples, sample_rate = kokoro.create(
-            phonemes, voice=voice, speed=request.speed, is_phonemes=True
         )
-        # Convert to WAV bytes
-        wav_bytes = numpy_to_wav_bytes(samples, sample_rate)
-        # Return as streaming response
         return StreamingResponse(
             io.BytesIO(wav_bytes),
             media_type="audio/wav",
@@ -732,34 +214,70 @@ async def generate_audio(request: TTSRequest):
     except Exception as e:
         raise HTTPException(status_code=500, detail=str(e))
 @app.post("/tts/info", response_model=TTSResponse)
 async def generate_info(request: TTSRequest):
-    """Generate phonemes and return metadata without audio"""
     try:
-        # Validate language
         if request.language not in SUPPORTED_LANGUAGES:
             raise HTTPException(
                 status_code=400, detail=f"Unsupported language: {request.language}"
             )
-        # Convert text to phonemes
-        phonemes = tokenizer.phonemize(request.text, lang=request.language)
-        # Get sample rate (standard for this model)
-        sample_rate = 24000  # Kokoro typically uses 24kHz
         return TTSResponse(phonemes=phonemes, sample_rate=sample_rate)
     except Exception as e:
         raise HTTPException(status_code=500, detail=str(e))
 @app.post("/tts/both")
 async def generate_both(request: TTSRequest):
-    """Generate both audio and metadata"""
     try:
-        # Validate inputs (same as audio endpoint)
         if request.language not in SUPPORTED_LANGUAGES:
             raise HTTPException(
                 status_code=400, detail=f"Unsupported language: {request.language}"
@@ -771,34 +289,44 @@ async def generate_both(request: TTSRequest):
                 status_code=400, detail=f"Unsupported voice: {request.voice}"
             )
-        if (
-            request.blend_voice_name
-            and request.blend_voice_name not in available_voices
-        ):
             raise HTTPException(
-                status_code=400,
-                detail=f"Unsupported blend voice: {request.blend_voice_name}",
             )
-        # Convert text to phonemes
-        phonemes = tokenizer.phonemize(request.text, lang=request.language)
-        # Handle voice blending
-        voice = request.voice
-        if request.blend_voice_name:
-            first_voice = kokoro.get_voice_style(request.voice)
-            second_voice = kokoro.get_voice_style(request.blend_voice_name)
-            voice = np.add(first_voice * 0.5, second_voice * 0.5)
-        # Generate audio
-        samples, sample_rate = kokoro.create(
-            phonemes, voice=voice, speed=request.speed, is_phonemes=True
         )
-        # Convert to base64 for JSON response
-        wav_bytes = numpy_to_wav_bytes(samples, sample_rate)
         import base64
         audio_base64 = base64.b64encode(wav_bytes).decode()
         return {
@@ -811,6 +339,14 @@ async def generate_both(request: TTSRequest):
     except Exception as e:
         raise HTTPException(status_code=500, detail=str(e))
 if __name__ == "__main__":
-    uvicorn.run(app, host="0.0.0.0", port=7860)

+from fastapi import FastAPI, HTTPException, BackgroundTasks
 from fastapi.responses import StreamingResponse, HTMLResponse
 from fastapi.staticfiles import StaticFiles
+from pydantic import BaseModel, validator
 import numpy as np
 import io
 import wave
 from kokoro_onnx import Kokoro
 from kokoro_onnx.tokenizer import Tokenizer
+from typing import Optional, Dict, Tuple
 import uvicorn
+from ui import html_content
+import asyncio
+import concurrent.futures
+from functools import lru_cache
+import threading
+from queue import Queue
+import time
+import hashlib
 app = FastAPI(title="Kokoro TTS API", version="1.0.0")
+# Thread pool for CPU-intensive tasks
+executor = concurrent.futures.ThreadPoolExecutor(max_workers=4)
+# Initialize models once
 tokenizer = Tokenizer()
 kokoro = Kokoro("kokoro-v1.0.onnx", "voices-v1.0.bin")
 SUPPORTED_LANGUAGES = ["en-us"]
+# Cache for phonemes and voice styles
+phoneme_cache: Dict[str, str] = {}
+voice_style_cache: Dict[str, np.ndarray] = {}
+audio_cache: Dict[str, Tuple[np.ndarray, int]] = {}
+# Request queue for batching
+request_queue = Queue()
+batch_size = 4
+batch_timeout = 0.1  # 100ms
 class TTSRequest(BaseModel):
     text: str
     blend_voice_name: Optional[str] = None
     speed: float = 1.0
 class TTSResponse(BaseModel):
     phonemes: str
     sample_rate: int
+def get_cache_key(text: str, language: str, voice: str, blend_voice: Optional[str], speed: float) -> str:
+    """Generate cache key for request"""
+    key_data = f"{text}|{language}|{voice}|{blend_voice}|{speed}"
+    return hashlib.md5(key_data.encode()).hexdigest()
+@lru_cache(maxsize=1000)
+def cached_phonemize(text: str, language: str) -> str:
+    """Cache phoneme conversion"""
+    return tokenizer.phonemize(text, lang=language)
+def get_cached_voice_style(voice_name: str) -> np.ndarray:
+    """Cache voice styles to avoid repeated loading"""
+    if voice_name not in voice_style_cache:
+        voice_style_cache[voice_name] = kokoro.get_voice_style(voice_name)
+    return voice_style_cache[voice_name]
+def process_voice_blend(voice: str, blend_voice_name: Optional[str]) -> np.ndarray:
+    """Optimized voice blending with caching"""
+    if not blend_voice_name:
+        return get_cached_voice_style(voice)  # Fixed: return the voice style, not the voice name
+    blend_key = f"{voice}+{blend_voice_name}"
+    if blend_key not in voice_style_cache:
+        first_voice = get_cached_voice_style(voice)
+        second_voice = get_cached_voice_style(blend_voice_name)
+        blended_voice = np.add(first_voice * 0.5, second_voice * 0.5)
+        voice_style_cache[blend_key] = blended_voice
+    return voice_style_cache[blend_key]
+def batch_process_tts(requests: list) -> list:
+    """Process multiple TTS requests in batch"""
+    results = []
+    # Pre-process all phonemes
+    phonemes_batch = []
+    for req in requests:
+        phonemes = cached_phonemize(req.text, req.language)
+        phonemes_batch.append(phonemes)
+    # Process audio generation for each request
+    for i, req in enumerate(requests):
+        try:
+            phonemes = phonemes_batch[i]
+            voice = process_voice_blend(req.voice, req.blend_voice_name)
+            # Generate audio - Fixed parameter order
+            samples, sample_rate = kokoro.create(
+                phonemes, voice=voice, speed=req.speed, lang=None, is_phonemes=True
+            )
+            results.append((samples, sample_rate, phonemes, None))
+        except Exception as e:
+            results.append((None, None, None, str(e)))
+    return results
 def numpy_to_wav_bytes(audio_data: np.ndarray, sample_rate: int) -> bytes:
+    """Optimized WAV conversion with pre-allocated buffer"""
     if audio_data.dtype != np.int16:
         audio_data = (audio_data * 32767).astype(np.int16)
+    # Pre-calculate buffer size
+    buffer_size = len(audio_data) * 2 + 44  # audio data + WAV header
     buffer = io.BytesIO()
+    buffer.truncate(buffer_size)
+    buffer.seek(0)
     with wave.open(buffer, "wb") as wav_file:
+        wav_file.setnchannels(1)
+        wav_file.setsampwidth(2)
         wav_file.setframerate(sample_rate)
         wav_file.writeframes(audio_data.tobytes())
     buffer.seek(0)
     return buffer.getvalue()
+async def run_in_executor(func, *args, **kwargs):
+    """Run CPU-intensive function in thread pool"""
+    loop = asyncio.get_event_loop()
+    if kwargs:
+        # Use functools.partial for keyword arguments
+        from functools import partial
+        func_with_args = partial(func, *args, **kwargs)
+        return await loop.run_in_executor(executor, func_with_args)
+    else:
+        return await loop.run_in_executor(executor, func, *args)
 @app.get("/", response_class=HTMLResponse)
 async def get_home():
     return HTMLResponse(content=html_content)
 @app.get("/voices")
 async def get_voices():
+    # Cache voice list
+    if not hasattr(get_voices, '_cached_voices'):
+        get_voices._cached_voices = {"voices": sorted(kokoro.get_voices())}
+    return get_voices._cached_voices
 @app.get("/languages")
 async def get_languages():
     return {"languages": SUPPORTED_LANGUAGES}
 @app.post("/tts/audio")
 async def generate_audio(request: TTSRequest):
+    """Optimized audio generation with caching"""
     try:
+        # Validate inputs
         if request.language not in SUPPORTED_LANGUAGES:
             raise HTTPException(
                 status_code=400, detail=f"Unsupported language: {request.language}"
             )
         available_voices = kokoro.get_voices()
         if request.voice not in available_voices:
             raise HTTPException(
                 status_code=400, detail=f"Unsupported voice: {request.voice}"
             )
+        if request.blend_voice_name and request.blend_voice_name not in available_voices:
             raise HTTPException(
+                status_code=400, detail=f"Unsupported blend voice: {request.blend_voice_name}"
             )
+        # Check cache first
+        cache_key = get_cache_key(
+            request.text, request.language, request.voice,
+            request.blend_voice_name, request.speed
         )
+        if cache_key in audio_cache:
+            samples, sample_rate = audio_cache[cache_key]
+        else:
+            # Generate phonemes (cached)
+            phonemes = cached_phonemize(request.text, request.language)
+            # Process voice (cached)
+            voice = process_voice_blend(request.voice, request.blend_voice_name)
+            # Generate audio in thread pool - Fixed parameter passing
+            samples, sample_rate = await run_in_executor(
+                kokoro.create,
+                phonemes,
+                voice=voice,
+                speed=request.speed,
+                lang=None,
+                is_phonemes=True
+            )
+            # Cache result (limit cache size)
+            if len(audio_cache) < 100:
+                audio_cache[cache_key] = (samples, sample_rate)
+        # Convert to WAV in thread pool
+        wav_bytes = await run_in_executor(numpy_to_wav_bytes, samples, sample_rate)
         return StreamingResponse(
             io.BytesIO(wav_bytes),
             media_type="audio/wav",
     except Exception as e:
         raise HTTPException(status_code=500, detail=str(e))
 @app.post("/tts/info", response_model=TTSResponse)
 async def generate_info(request: TTSRequest):
+    """Optimized info generation with caching"""
     try:
         if request.language not in SUPPORTED_LANGUAGES:
             raise HTTPException(
                 status_code=400, detail=f"Unsupported language: {request.language}"
             )
+        # Use cached phonemization
+        phonemes = cached_phonemize(request.text, request.language)
+        sample_rate = 24000
         return TTSResponse(phonemes=phonemes, sample_rate=sample_rate)
     except Exception as e:
         raise HTTPException(status_code=500, detail=str(e))
+@app.post("/tts/batch")
+async def generate_batch(requests: list[TTSRequest]):
+    """Batch processing endpoint for multiple requests"""
+    try:
+        # Validate all requests first
+        available_voices = kokoro.get_voices()
+        for req in requests:
+            if req.language not in SUPPORTED_LANGUAGES:
+                raise HTTPException(
+                    status_code=400, detail=f"Unsupported language: {req.language}"
+                )
+            if req.voice not in available_voices:
+                raise HTTPException(
+                    status_code=400, detail=f"Unsupported voice: {req.voice}"
+                )
+        # Process batch in thread pool
+        results = await run_in_executor(batch_process_tts, requests)
+        # Convert results
+        response_data = []
+        for i, (samples, sample_rate, phonemes, error) in enumerate(results):
+            if error:
+                response_data.append({"error": error})
+            else:
+                wav_bytes = await run_in_executor(numpy_to_wav_bytes, samples, sample_rate)
+                import base64
+                audio_base64 = base64.b64encode(wav_bytes).decode()
+                response_data.append({
+                    "phonemes": phonemes,
+                    "sample_rate": sample_rate,
+                    "audio_base64": audio_base64,
+                    "audio_format": "wav"
+                })
+        return {"results": response_data}
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e))
 @app.post("/tts/both")
 async def generate_both(request: TTSRequest):
+    """Generate both audio and metadata with optimizations"""
     try:
+        # Validate inputs
         if request.language not in SUPPORTED_LANGUAGES:
             raise HTTPException(
                 status_code=400, detail=f"Unsupported language: {request.language}"
                 status_code=400, detail=f"Unsupported voice: {request.voice}"
             )
+        if request.blend_voice_name and request.blend_voice_name not in available_voices:
             raise HTTPException(
+                status_code=400, detail=f"Unsupported blend voice: {request.blend_voice_name}"
             )
+        # Check cache
+        cache_key = get_cache_key(
+            request.text, request.language, request.voice,
+            request.blend_voice_name, request.speed
         )
+        if cache_key in audio_cache:
+            samples, sample_rate = audio_cache[cache_key]
+            phonemes = cached_phonemize(request.text, request.language)
+        else:
+            # Generate phonemes
+            phonemes = cached_phonemize(request.text, request.language)
+            # Process voice
+            voice = process_voice_blend(request.voice, request.blend_voice_name)
+            # Generate audio - Fixed parameter passing
+            samples, sample_rate = await run_in_executor(
+                kokoro.create,
+                phonemes,
+                voice=voice,
+                speed=request.speed,
+                lang=None,
+                is_phonemes=True
+            )
+            # Cache result
+            if len(audio_cache) < 100:
+                audio_cache[cache_key] = (samples, sample_rate)
+        # Convert to base64
+        wav_bytes = await run_in_executor(numpy_to_wav_bytes, samples, sample_rate)
         import base64
         audio_base64 = base64.b64encode(wav_bytes).decode()
         return {
     except Exception as e:
         raise HTTPException(status_code=500, detail=str(e))
+# Cleanup function for cache management
+@app.on_event("startup")
+async def startup_event():
+    """Preload commonly used voices"""
+    common_voices = ["af_heart", "af_bella", "af_sarah"]
+    for voice in common_voices:
+        if voice in kokoro.get_voices():
+            get_cached_voice_style(voice)
 if __name__ == "__main__":
+    uvicorn.run(app, host="0.0.0.0", port=7860, workers=1)

ui.py ADDED Viewed

	@@ -0,0 +1,609 @@

+html_content = """
+    <!DOCTYPE html>
+    <html lang="en">
+    <head>
+        <meta charset="UTF-8">
+        <meta name="viewport" content="width=device-width, initial-scale=1.0">
+        <title>Kokoro TTS Test Interface</title>
+        <style>
+            * {
+                margin: 0;
+                padding: 0;
+                box-sizing: border-box;
+            }
+            body {
+                font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
+                background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
+                min-height: 100vh;
+                padding: 20px;
+                color: #333;
+            }
+            .container {
+                max-width: 800px;
+                margin: 0 auto;
+                background: white;
+                border-radius: 15px;
+                box-shadow: 0 10px 30px rgba(0, 0, 0, 0.3);
+                overflow: hidden;
+            }
+            .header {
+                background: linear-gradient(45deg, #667eea, #764ba2);
+                color: white;
+                text-align: center;
+                padding: 30px;
+            }
+            .header h1 {
+                font-size: 2.5rem;
+                margin-bottom: 10px;
+                text-shadow: 2px 2px 4px rgba(0,0,0,0.3);
+            }
+            .header p {
+                font-size: 1.1rem;
+                opacity: 0.9;
+            }
+            .content {
+                padding: 30px;
+            }
+            .form-group {
+                margin-bottom: 25px;
+            }
+            label {
+                display: block;
+                margin-bottom: 8px;
+                font-weight: 600;
+                color: #555;
+            }
+            input, select, textarea {
+                width: 100%;
+                padding: 12px 15px;
+                border: 2px solid #e1e5e9;
+                border-radius: 8px;
+                font-size: 16px;
+                transition: all 0.3s ease;
+                font-family: inherit;
+            }
+            input:focus, select:focus, textarea:focus {
+                outline: none;
+                border-color: #667eea;
+                box-shadow: 0 0 0 3px rgba(102, 126, 234, 0.1);
+            }
+            textarea {
+                resize: vertical;
+                min-height: 100px;
+            }
+            .range-container {
+                display: flex;
+                align-items: center;
+                gap: 15px;
+            }
+            .range-container input[type="range"] {
+                flex: 1;
+            }
+            .range-value {
+                background: #f8f9fa;
+                padding: 8px 12px;
+                border-radius: 6px;
+                font-weight: 600;
+                min-width: 60px;
+                text-align: center;
+                border: 2px solid #e1e5e9;
+            }
+            .button-group {
+                display: grid;
+                grid-template-columns: repeat(auto-fit, minmax(200px, 1fr));
+                gap: 15px;
+                margin-top: 30px;
+            }
+            .btn {
+                padding: 15px 25px;
+                border: none;
+                border-radius: 8px;
+                font-size: 16px;
+                font-weight: 600;
+                cursor: pointer;
+                transition: all 0.3s ease;
+                text-transform: uppercase;
+                letter-spacing: 0.5px;
+            }
+            .btn-primary {
+                background: linear-gradient(45deg, #667eea, #764ba2);
+                color: white;
+            }
+            .btn-secondary {
+                background: linear-gradient(45deg, #ffecd2, #fcb69f);
+                color: #8b4513;
+            }
+            .btn-info {
+                background: linear-gradient(45deg, #a8edea, #fed6e3);
+                color: #2c3e50;
+            }
+            .btn:hover {
+                transform: translateY(-2px);
+                box-shadow: 0 5px 15px rgba(0, 0, 0, 0.2);
+            }
+            .btn:disabled {
+                opacity: 0.6;
+                cursor: not-allowed;
+                transform: none;
+            }
+            .result-section {
+                margin-top: 30px;
+                padding: 25px;
+                background: #f8f9fa;
+                border-radius: 10px;
+                border-left: 5px solid #667eea;
+            }
+            .result-section h3 {
+                color: #667eea;
+                margin-bottom: 15px;
+                font-size: 1.3rem;
+            }
+            .info-display {
+                background: white;
+                padding: 15px;
+                border-radius: 8px;
+                margin: 10px 0;
+                border: 1px solid #e1e5e9;
+            }
+            .info-display strong {
+                color: #667eea;
+            }
+            .loading {
+                display: none;
+                text-align: center;
+                padding: 20px;
+                color: #667eea;
+            }
+            .loading.show {
+                display: block;
+            }
+            .spinner {
+                display: inline-block;
+                width: 30px;
+                height: 30px;
+                border: 3px solid #f3f3f3;
+                border-top: 3px solid #667eea;
+                border-radius: 50%;
+                animation: spin 1s linear infinite;
+                margin-right: 10px;
+            }
+            @keyframes spin {
+                0% { transform: rotate(0deg); }
+                100% { transform: rotate(360deg); }
+            }
+            .error {
+                background: #fee;
+                color: #c33;
+                padding: 15px;
+                border-radius: 8px;
+                border-left: 5px solid #c33;
+                margin: 15px 0;
+            }
+            .success {
+                background: #efe;
+                color: #363;
+                padding: 15px;
+                border-radius: 8px;
+                border-left: 5px solid #363;
+                margin: 15px 0;
+            }
+            audio {
+                width: 100%;
+                margin-top: 15px;
+            }
+            .checkbox-group {
+                display: flex;
+                align-items: center;
+                gap: 10px;
+                margin-top: 10px;
+            }
+            .checkbox-group input[type="checkbox"] {
+                width: auto;
+            }
+            .example-select {
+                background: #f8f9fa;
+                border: 2px dashed #667eea;
+                border-radius: 6px;
+                font-size: 14px;
+                color: #667eea;
+                margin-bottom: 10px;
+            }
+            .example-select:focus {
+                border-color: #764ba2;
+                background: white;
+            }
+            .example-label {
+                font-size: 0.9em;
+                color: #667eea;
+                font-weight: 500;
+                margin-bottom: 5px;
+            }
+        </style>
+    </head>
+    <body>
+        <div class="container">
+            <div class="header">
+                <h1>🎤 Kokoro TTS</h1>
+                <p>Text-to-Speech Testing Interface</p>
+            </div>
+            <div class="content">
+                <form id="ttsForm">
+                    <div class="form-group">
+                        <label for="text">Text to Convert:</label>
+                        <div style="margin-bottom: 10px;">
+                            <div class="example-label">📝 Quick Examples:</div>
+                            <select id="example-texts" class="example-select" onchange="loadExampleText()">
+                                <option value="">Choose an example...</option>
+                                <option value="Hello! This is a test of the Kokoro text-to-speech system.">Basic Greeting</option>
+                                <option value="The quick brown fox jumps over the lazy dog. This sentence contains every letter of the alphabet.">Alphabet Test</option>
+                                <option value="Welcome to our English tutoring platform! We're here to help you improve your pronunciation and speaking skills.">English Learning</option>
+                                <option value="In a hole in the ground there lived a hobbit. Not a nasty, dirty, wet hole filled with the ends of worms and an oozy smell.">Story Reading</option>
+                                <option value="To be or not to be, that is the question. Whether 'tis nobler in the mind to suffer the slings and arrows of outrageous fortune.">Shakespeare</option>
+                                <option value="Ladies and gentlemen, welcome to today's presentation. We will be discussing the latest developments in artificial intelligence.">Presentation</option>
+                                <option value="The weather today is sunny with a high of 75 degrees Fahrenheit. Perfect for outdoor activities and picnics in the park.">Weather Report</option>
+                                <option value="One, two, three, four, five. Ten, twenty, thirty, forty, fifty. One hundred, one thousand, one million.">Numbers Practice</option>
+                                <option value="How are you doing today? I hope you're having a wonderful time learning English with our voice synthesis technology.">Conversational</option>
+                                <option value="Science and technology have revolutionized the way we communicate, learn, and interact with the world around us.">Technical</option>
+                                <option value="Once upon a time, in a faraway kingdom, there lived a brave princess who could speak to animals and understand their language.">Fairy Tale</option>
+                                <option value="Please remember to wash your hands frequently, wear a mask when necessary, and maintain social distance for everyone's safety.">Instructions</option>
+                                <option value="The pronunciation of English can be challenging, but with practice and patience, you can master the sounds and rhythms of the language.">Educational</option>
+                                <option value="Artificial intelligence and machine learning are transforming industries from healthcare to transportation, creating new possibilities.">AI Topic</option>
+                                <option value="Thank you for using our text-to-speech service. We hope it helps you in your language learning journey. Have a great day!">Thank You Message</option>
+                            </select>
+                        </div>
+                        <textarea id="text" name="text" placeholder="Enter your text here or choose an example above..." required>Hello! This is a test of the Kokoro text-to-speech system.</textarea>
+                    </div>
+                    <div class="form-group">
+                        <label for="voice">Voice:</label>
+                        <select id="voice" name="voice" required>
+                            <option value="">Loading voices...</option>
+                        </select>
+                    </div>
+                    <div class="form-group">
+                        <label for="language">Language:</label>
+                        <select id="language" name="language" required>
+                            <option value="">Loading languages...</option>
+                        </select>
+                    </div>
+                    <div class="form-group">
+                        <label for="blend_voice">Blend Voice (Optional):</label>
+                        <select id="blend_voice" name="blend_voice_name">
+                            <option value="">No blending</option>
+                        </select>
+                    </div>
+                    <div class="form-group">
+                        <label for="speed">Speed:</label>
+                        <div class="range-container">
+                            <input type="range" id="speed" name="speed" min="0.5" max="2.0" step="0.1" value="1.0">
+                            <div class="range-value" id="speedValue">1.0</div>
+                        </div>
+                    </div>
+                    <div class="button-group">
+                        <button type="button" class="btn btn-primary" onclick="generateAudio()">
+                            🎵 Generate Audio
+                        </button>
+                        <button type="button" class="btn btn-secondary" onclick="generateInfo()">
+                            📝 Get Info Only
+                        </button>
+                        <button type="button" class="btn btn-info" onclick="generateBoth()">
+                            🎯 Generate Both
+                        </button>
+                    </div>
+                </form>
+                <div class="loading" id="loading">
+                    <div class="spinner"></div>
+                    Processing your request...
+                </div>
+                <div id="results"></div>
+            </div>
+        </div>
+        <script>
+            // Load voices and languages on page load
+            window.addEventListener('load', async function() {
+                await loadVoices();
+                await loadLanguages();
+                setupEventListeners();
+            });
+            function setupEventListeners() {
+                const speedSlider = document.getElementById('speed');
+                const speedValue = document.getElementById('speedValue');
+                speedSlider.addEventListener('input', function() {
+                    speedValue.textContent = this.value;
+                });
+            }
+            function loadExampleText() {
+                const exampleSelect = document.getElementById('example-texts');
+                const textArea = document.getElementById('text');
+                if (exampleSelect.value) {
+                    textArea.value = exampleSelect.value;
+                    // Reset the select to show "Choose an example..."
+                    exampleSelect.value = '';
+                }
+            }
+            async function loadVoices() {
+                try {
+                    const response = await fetch('/voices');
+                    const data = await response.json();
+                    const voiceSelect = document.getElementById('voice');
+                    const blendSelect = document.getElementById('blend_voice');
+                    voiceSelect.innerHTML = '';
+                    blendSelect.innerHTML = '<option value="">No blending</option>';
+                    data.voices.forEach(voice => {
+                        const option1 = new Option(voice, voice);
+                        const option2 = new Option(voice, voice);
+                        voiceSelect.add(option1);
+                        blendSelect.add(option2);
+                    });
+                    if (data.voices.length > 0) {
+                        voiceSelect.value = data.voices[0];
+                    }
+                } catch (error) {
+                    showError('Failed to load voices: ' + error.message);
+                }
+            }
+            async function loadLanguages() {
+                try {
+                    const response = await fetch('/languages');
+                    const data = await response.json();
+                    const languageSelect = document.getElementById('language');
+                    languageSelect.innerHTML = '';
+                    data.languages.forEach(lang => {
+                        const option = new Option(lang, lang);
+                        languageSelect.add(option);
+                    });
+                    if (data.languages.length > 0) {
+                        languageSelect.value = data.languages[0];
+                    }
+                } catch (error) {
+                    showError('Failed to load languages: ' + error.message);
+                }
+            }
+            function getFormData() {
+                return {
+                    text: document.getElementById('text').value,
+                    voice: document.getElementById('voice').value,
+                    language: document.getElementById('language').value,
+                    blend_voice_name: document.getElementById('blend_voice').value || null,
+                    speed: parseFloat(document.getElementById('speed').value)
+                };
+            }
+            function showLoading() {
+                document.getElementById('loading').classList.add('show');
+                document.getElementById('results').innerHTML = '';
+            }
+            function hideLoading() {
+                document.getElementById('loading').classList.remove('show');
+            }
+            function showError(message) {
+                hideLoading();
+                document.getElementById('results').innerHTML =
+                    `<div class="error"><strong>Error:</strong> ${message}</div>`;
+            }
+            function showSuccess(content) {
+                hideLoading();
+                document.getElementById('results').innerHTML = content;
+            }
+            async function generateAudio() {
+                showLoading();
+                try {
+                    const formData = getFormData();
+                    const response = await fetch('/tts/audio', {
+                        method: 'POST',
+                        headers: {
+                            'Content-Type': 'application/json'
+                        },
+                        body: JSON.stringify(formData)
+                    });
+                    if (!response.ok) {
+                        const error = await response.json();
+                        throw new Error(error.detail || 'Failed to generate audio');
+                    }
+                    const audioBlob = await response.blob();
+                    const audioUrl = URL.createObjectURL(audioBlob);
+                    showSuccess(`
+                        <div class="result-section">
+                            <h3>🎵 Generated Audio</h3>
+                            <div class="success">Audio generated successfully!</div>
+                            <audio controls>
+                                <source src="${audioUrl}" type="audio/wav">
+                                Your browser does not support the audio element.
+                            </audio>
+                            <p style="margin-top: 10px; font-size: 0.9em; color: #666;">
+                                Right-click on the audio player and select "Save audio as..." to download.
+                            </p>
+                        </div>
+                    `);
+                } catch (error) {
+                    showError(error.message);
+                }
+            }
+            async function generateInfo() {
+                showLoading();
+                try {
+                    const formData = getFormData();
+                    const response = await fetch('/tts/info', {
+                        method: 'POST',
+                        headers: {
+                            'Content-Type': 'application/json'
+                        },
+                        body: JSON.stringify(formData)
+                    });
+                    if (!response.ok) {
+                        const error = await response.json();
+                        throw new Error(error.detail || 'Failed to generate info');
+                    }
+                    const data = await response.json();
+                    showSuccess(`
+                        <div class="result-section">
+                            <h3>📝 Text Analysis</h3>
+                            <div class="success">Analysis completed successfully!</div>
+                            <div class="info-display">
+                                <strong>Original Text:</strong><br>
+                                ${formData.text}
+                            </div>
+                            <div class="info-display">
+                                <strong>Phonemes:</strong><br>
+                                ${data.phonemes}
+                            </div>
+                            <div class="info-display">
+                                <strong>Sample Rate:</strong> ${data.sample_rate} Hz
+                            </div>
+                            <div class="info-display">
+                                <strong>Voice:</strong> ${formData.voice}
+                            </div>
+                            <div class="info-display">
+                                <strong>Speed:</strong> ${formData.speed}x
+                            </div>
+                        </div>
+                    `);
+                } catch (error) {
+                    showError(error.message);
+                }
+            }
+            async function generateBoth() {
+                showLoading();
+                try {
+                    const formData = getFormData();
+                    const response = await fetch('/tts/both', {
+                        method: 'POST',
+                        headers: {
+                            'Content-Type': 'application/json'
+                        },
+                        body: JSON.stringify(formData)
+                    });
+                    if (!response.ok) {
+                        const error = await response.json();
+                        throw new Error(error.detail || 'Failed to generate audio and info');
+                    }
+                    const data = await response.json();
+                    // Convert base64 to blob for audio playback
+                    const audioBytes = atob(data.audio_base64);
+                    const audioArray = new Uint8Array(audioBytes.length);
+                    for (let i = 0; i < audioBytes.length; i++) {
+                        audioArray[i] = audioBytes.charCodeAt(i);
+                    }
+                    const audioBlob = new Blob([audioArray], { type: 'audio/wav' });
+                    const audioUrl = URL.createObjectURL(audioBlob);
+                    showSuccess(`
+                        <div class="result-section">
+                            <h3>🎯 Complete Analysis & Audio</h3>
+                            <div class="success">Generation completed successfully!</div>
+                            <h4 style="margin-top: 20px; color: #667eea;">📊 Analysis Information</h4>
+                            <div class="info-display">
+                                <strong>Original Text:</strong><br>
+                                ${formData.text}
+                            </div>
+                            <div class="info-display">
+                                <strong>Phonemes:</strong><br>
+                                ${data.phonemes}
+                            </div>
+                            <div class="info-display">
+                                <strong>Sample Rate:</strong> ${data.sample_rate} Hz
+                            </div>
+                            <div class="info-display">
+                                <strong>Voice:</strong> ${formData.voice}
+                                ${formData.blend_voice_name ? ` (blended with ${formData.blend_voice_name})` : ''}
+                            </div>
+                            <div class="info-display">
+                                <strong>Speed:</strong> ${formData.speed}x
+                            </div>
+                            <h4 style="margin-top: 20px; color: #667eea;">🎵 Generated Audio</h4>
+                            <audio controls>
+                                <source src="${audioUrl}" type="audio/wav">
+                                Your browser does not support the audio element.
+                            </audio>
+                            <p style="margin-top: 10px; font-size: 0.9em; color: #666;">
+                                Right-click on the audio player and select "Save audio as..." to download.
+                            </p>
+                        </div>
+                    `);
+                } catch (error) {
+                    showError(error.message);
+                }
+            }
+        </script>
+    </body>
+    </html>
+    """