ABAO77 commited on
Commit
693c106
Β·
1 Parent(s): 827e976

Refactor code structure for improved readability and maintainability

Browse files
Files changed (6) hide show
  1. .dockerignore +72 -0
  2. .gitignore +2 -0
  3. Dockerfile +72 -20
  4. __pycache__/ui.cpython-311.pyc +0 -0
  5. app.py +222 -686
  6. ui.py +609 -0
.dockerignore ADDED
@@ -0,0 +1,72 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Git files
2
+ .git
3
+ .gitignore
4
+ .gitattributes
5
+
6
+ # Documentation
7
+ README.md
8
+ *.md
9
+ docs/
10
+
11
+ # Python cache
12
+ __pycache__/
13
+ *.py[cod]
14
+ *$py.class
15
+ *.so
16
+ .Python
17
+ build/
18
+ develop-eggs/
19
+ dist/
20
+ downloads/
21
+ eggs/
22
+ .eggs/
23
+ lib/
24
+ lib64/
25
+ parts/
26
+ sdist/
27
+ var/
28
+ wheels/
29
+ *.egg-info/
30
+ .installed.cfg
31
+ *.egg
32
+
33
+ # Virtual environments
34
+ venv/
35
+ env/
36
+ ENV/
37
+
38
+ # IDE files
39
+ .vscode/
40
+ .idea/
41
+ *.swp
42
+ *.swo
43
+ *~
44
+
45
+ # OS files
46
+ .DS_Store
47
+ Thumbs.db
48
+
49
+ # Logs
50
+ *.log
51
+ logs/
52
+
53
+ # Test files
54
+ test/
55
+ tests/
56
+ *_test.py
57
+ test_*.py
58
+
59
+ # Development files
60
+ .env
61
+ .env.local
62
+ docker-compose.yml
63
+ docker-compose.*.yml
64
+
65
+ # Model files (will be downloaded in container)
66
+ *.onnx
67
+ *.bin
68
+
69
+ # Temporary files
70
+ tmp/
71
+ temp/
72
+ *.tmp
.gitignore ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ **.onnx
2
+ **.bin
Dockerfile CHANGED
@@ -1,37 +1,89 @@
1
- FROM python:3.12-slim
 
2
 
3
- # Set working directory
4
- WORKDIR /app
5
-
6
- # Install system dependencies
7
  RUN apt-get update && apt-get install -y \
8
  wget \
9
  curl \
 
 
10
  && rm -rf /var/lib/apt/lists/*
11
 
12
- # Copy requirements first for better caching
 
 
 
 
13
  COPY requirements.txt .
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
 
15
- # Install Python dependencies
16
- RUN pip install -r requirements.txt
 
17
 
18
- # Download model files to /app directory
19
- RUN wget -O kokoro-v1.0.onnx https://github.com/thewh1teagle/kokoro-onnx/releases/download/model-files-v1.0/kokoro-v1.0.onnx \
20
- && wget -O voices-v1.0.bin https://github.com/thewh1teagle/kokoro-onnx/releases/download/model-files-v1.0/voices-v1.0.bin
 
21
 
22
- # Copy application code
23
- COPY app.py .
 
24
 
25
- # Create non-root user for security
26
- RUN useradd -m -u 1000 appuser && chown -R appuser:appuser /app
27
  USER appuser
28
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29
  # Expose port
30
  EXPOSE 7860
31
 
32
- # Health check
33
- HEALTHCHECK --interval=30s --timeout=30s --start-period=5s --retries=3 \
34
- CMD curl -f http://localhost:8000/ || exit 1
35
 
36
- # Run the application
37
- CMD ["python", "app.py"]
 
 
 
 
 
 
1
+ # Use multi-stage build for smaller final image
2
+ FROM python:3.12-slim as builder
3
 
4
+ # Install build dependencies
 
 
 
5
  RUN apt-get update && apt-get install -y \
6
  wget \
7
  curl \
8
+ gcc \
9
+ g++ \
10
  && rm -rf /var/lib/apt/lists/*
11
 
12
+ # Create virtual environment
13
+ RUN python -m venv /opt/venv
14
+ ENV PATH="/opt/venv/bin:$PATH"
15
+
16
+ # Copy and install requirements
17
  COPY requirements.txt .
18
+ RUN pip install --no-cache-dir --upgrade pip && \
19
+ pip install --no-cache-dir -r requirements.txt
20
+
21
+ # Download and verify model files
22
+ RUN wget -O kokoro-v1.0.onnx https://github.com/thewh1teagle/kokoro-onnx/releases/download/model-files-v1.0/kokoro-v1.0.onnx && \
23
+ wget -O voices-v1.0.bin https://github.com/thewh1teagle/kokoro-onnx/releases/download/model-files-v1.0/voices-v1.0.bin && \
24
+ # Verify file sizes (basic validation)
25
+ [ -s kokoro-v1.0.onnx ] && [ -s voices-v1.0.bin ] || (echo "Model download failed" && exit 1)
26
+
27
+ # Production stage
28
+ FROM python:3.12-slim as production
29
+
30
+ # Install only runtime dependencies
31
+ RUN apt-get update && apt-get install -y \
32
+ curl \
33
+ && rm -rf /var/lib/apt/lists/* \
34
+ && apt-get clean
35
+
36
+ # Copy virtual environment from builder
37
+ COPY --from=builder /opt/venv /opt/venv
38
+ ENV PATH="/opt/venv/bin:$PATH"
39
+
40
+ # Set working directory
41
+ WORKDIR /app
42
 
43
+ # Copy model files from builder
44
+ COPY --from=builder /kokoro-v1.0.onnx ./kokoro-v1.0.onnx
45
+ COPY --from=builder /voices-v1.0.bin ./voices-v1.0.bin
46
 
47
+ # Create non-root user and directories
48
+ RUN groupadd -r appgroup && useradd -r -g appgroup -u 1000 appuser && \
49
+ mkdir -p /app/cache /app/tmp && \
50
+ chown -R appuser:appgroup /app
51
 
52
+ # Copy application files
53
+ COPY --chown=appuser:appgroup app.py .
54
+ COPY --chown=appuser:appgroup ui.py .
55
 
56
+ # Switch to non-root user
 
57
  USER appuser
58
 
59
+ # Set Python optimizations
60
+ ENV PYTHONUNBUFFERED=1 \
61
+ PYTHONDONTWRITEBYTECODE=1 \
62
+ PYTHONHASHSEED=random \
63
+ PIP_NO_CACHE_DIR=1 \
64
+ PIP_DISABLE_PIP_VERSION_CHECK=1
65
+
66
+ # Set memory and performance optimizations
67
+ ENV OMP_NUM_THREADS=4 \
68
+ MKL_NUM_THREADS=4 \
69
+ NUMEXPR_MAX_THREADS=4 \
70
+ OPENBLAS_NUM_THREADS=4
71
+
72
+ # Cache directory for application
73
+ ENV CACHE_DIR=/app/cache \
74
+ TMP_DIR=/app/tmp
75
+
76
  # Expose port
77
  EXPOSE 7860
78
 
79
+ # Health check with proper endpoint and timing
80
+ HEALTHCHECK --interval=30s --timeout=10s --start-period=30s --retries=3 \
81
+ CMD curl -f http://localhost:7860/languages || exit 1
82
 
83
+ # Preload models on startup and run with optimizations
84
+ CMD python -c "from kokoro_onnx import Kokoro; from kokoro_onnx.tokenizer import Tokenizer; \
85
+ print('Preloading models...'); \
86
+ tokenizer = Tokenizer(); \
87
+ kokoro = Kokoro('kokoro-v1.0.onnx', 'voices-v1.0.bin'); \
88
+ print('Models loaded successfully')" && \
89
+ exec python app.py
__pycache__/ui.cpython-311.pyc ADDED
Binary file (25 kB). View file
 
app.py CHANGED
@@ -1,22 +1,42 @@
1
- from fastapi import FastAPI, HTTPException
2
  from fastapi.responses import StreamingResponse, HTMLResponse
3
  from fastapi.staticfiles import StaticFiles
4
- from pydantic import BaseModel
5
  import numpy as np
6
  import io
7
  import wave
8
  from kokoro_onnx import Kokoro
9
  from kokoro_onnx.tokenizer import Tokenizer
10
- from typing import Optional
11
  import uvicorn
 
 
 
 
 
 
 
 
12
 
13
  app = FastAPI(title="Kokoro TTS API", version="1.0.0")
14
 
15
- # Initialize models
 
 
 
16
  tokenizer = Tokenizer()
17
  kokoro = Kokoro("kokoro-v1.0.onnx", "voices-v1.0.bin")
18
  SUPPORTED_LANGUAGES = ["en-us"]
19
 
 
 
 
 
 
 
 
 
 
20
 
21
  class TTSRequest(BaseModel):
22
  text: str
@@ -25,704 +45,166 @@ class TTSRequest(BaseModel):
25
  blend_voice_name: Optional[str] = None
26
  speed: float = 1.0
27
 
28
-
29
  class TTSResponse(BaseModel):
30
  phonemes: str
31
  sample_rate: int
32
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33
 
34
  def numpy_to_wav_bytes(audio_data: np.ndarray, sample_rate: int) -> bytes:
35
- """Convert numpy array to WAV bytes"""
36
- # Ensure audio is in the right format
37
  if audio_data.dtype != np.int16:
38
- # Convert float to int16
39
  audio_data = (audio_data * 32767).astype(np.int16)
40
 
41
- # Create WAV file in memory
 
42
  buffer = io.BytesIO()
 
 
 
43
  with wave.open(buffer, "wb") as wav_file:
44
- wav_file.setnchannels(1) # Mono
45
- wav_file.setsampwidth(2) # 2 bytes per sample (int16)
46
  wav_file.setframerate(sample_rate)
47
  wav_file.writeframes(audio_data.tobytes())
48
 
49
  buffer.seek(0)
50
  return buffer.getvalue()
51
 
 
 
 
 
 
 
 
 
 
 
52
 
53
  @app.get("/", response_class=HTMLResponse)
54
  async def get_home():
55
- """Serve the main UI page"""
56
- html_content = """
57
- <!DOCTYPE html>
58
- <html lang="en">
59
- <head>
60
- <meta charset="UTF-8">
61
- <meta name="viewport" content="width=device-width, initial-scale=1.0">
62
- <title>Kokoro TTS Test Interface</title>
63
- <style>
64
- * {
65
- margin: 0;
66
- padding: 0;
67
- box-sizing: border-box;
68
- }
69
-
70
- body {
71
- font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
72
- background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
73
- min-height: 100vh;
74
- padding: 20px;
75
- color: #333;
76
- }
77
-
78
- .container {
79
- max-width: 800px;
80
- margin: 0 auto;
81
- background: white;
82
- border-radius: 15px;
83
- box-shadow: 0 10px 30px rgba(0, 0, 0, 0.3);
84
- overflow: hidden;
85
- }
86
-
87
- .header {
88
- background: linear-gradient(45deg, #667eea, #764ba2);
89
- color: white;
90
- text-align: center;
91
- padding: 30px;
92
- }
93
-
94
- .header h1 {
95
- font-size: 2.5rem;
96
- margin-bottom: 10px;
97
- text-shadow: 2px 2px 4px rgba(0,0,0,0.3);
98
- }
99
-
100
- .header p {
101
- font-size: 1.1rem;
102
- opacity: 0.9;
103
- }
104
-
105
- .content {
106
- padding: 30px;
107
- }
108
-
109
- .form-group {
110
- margin-bottom: 25px;
111
- }
112
-
113
- label {
114
- display: block;
115
- margin-bottom: 8px;
116
- font-weight: 600;
117
- color: #555;
118
- }
119
-
120
- input, select, textarea {
121
- width: 100%;
122
- padding: 12px 15px;
123
- border: 2px solid #e1e5e9;
124
- border-radius: 8px;
125
- font-size: 16px;
126
- transition: all 0.3s ease;
127
- font-family: inherit;
128
- }
129
-
130
- input:focus, select:focus, textarea:focus {
131
- outline: none;
132
- border-color: #667eea;
133
- box-shadow: 0 0 0 3px rgba(102, 126, 234, 0.1);
134
- }
135
-
136
- textarea {
137
- resize: vertical;
138
- min-height: 100px;
139
- }
140
-
141
- .range-container {
142
- display: flex;
143
- align-items: center;
144
- gap: 15px;
145
- }
146
-
147
- .range-container input[type="range"] {
148
- flex: 1;
149
- }
150
-
151
- .range-value {
152
- background: #f8f9fa;
153
- padding: 8px 12px;
154
- border-radius: 6px;
155
- font-weight: 600;
156
- min-width: 60px;
157
- text-align: center;
158
- border: 2px solid #e1e5e9;
159
- }
160
-
161
- .button-group {
162
- display: grid;
163
- grid-template-columns: repeat(auto-fit, minmax(200px, 1fr));
164
- gap: 15px;
165
- margin-top: 30px;
166
- }
167
-
168
- .btn {
169
- padding: 15px 25px;
170
- border: none;
171
- border-radius: 8px;
172
- font-size: 16px;
173
- font-weight: 600;
174
- cursor: pointer;
175
- transition: all 0.3s ease;
176
- text-transform: uppercase;
177
- letter-spacing: 0.5px;
178
- }
179
-
180
- .btn-primary {
181
- background: linear-gradient(45deg, #667eea, #764ba2);
182
- color: white;
183
- }
184
-
185
- .btn-secondary {
186
- background: linear-gradient(45deg, #ffecd2, #fcb69f);
187
- color: #8b4513;
188
- }
189
-
190
- .btn-info {
191
- background: linear-gradient(45deg, #a8edea, #fed6e3);
192
- color: #2c3e50;
193
- }
194
-
195
- .btn:hover {
196
- transform: translateY(-2px);
197
- box-shadow: 0 5px 15px rgba(0, 0, 0, 0.2);
198
- }
199
-
200
- .btn:disabled {
201
- opacity: 0.6;
202
- cursor: not-allowed;
203
- transform: none;
204
- }
205
-
206
- .result-section {
207
- margin-top: 30px;
208
- padding: 25px;
209
- background: #f8f9fa;
210
- border-radius: 10px;
211
- border-left: 5px solid #667eea;
212
- }
213
-
214
- .result-section h3 {
215
- color: #667eea;
216
- margin-bottom: 15px;
217
- font-size: 1.3rem;
218
- }
219
-
220
- .info-display {
221
- background: white;
222
- padding: 15px;
223
- border-radius: 8px;
224
- margin: 10px 0;
225
- border: 1px solid #e1e5e9;
226
- }
227
-
228
- .info-display strong {
229
- color: #667eea;
230
- }
231
-
232
- .loading {
233
- display: none;
234
- text-align: center;
235
- padding: 20px;
236
- color: #667eea;
237
- }
238
-
239
- .loading.show {
240
- display: block;
241
- }
242
-
243
- .spinner {
244
- display: inline-block;
245
- width: 30px;
246
- height: 30px;
247
- border: 3px solid #f3f3f3;
248
- border-top: 3px solid #667eea;
249
- border-radius: 50%;
250
- animation: spin 1s linear infinite;
251
- margin-right: 10px;
252
- }
253
-
254
- @keyframes spin {
255
- 0% { transform: rotate(0deg); }
256
- 100% { transform: rotate(360deg); }
257
- }
258
-
259
- .error {
260
- background: #fee;
261
- color: #c33;
262
- padding: 15px;
263
- border-radius: 8px;
264
- border-left: 5px solid #c33;
265
- margin: 15px 0;
266
- }
267
-
268
- .success {
269
- background: #efe;
270
- color: #363;
271
- padding: 15px;
272
- border-radius: 8px;
273
- border-left: 5px solid #363;
274
- margin: 15px 0;
275
- }
276
-
277
- audio {
278
- width: 100%;
279
- margin-top: 15px;
280
- }
281
-
282
- .checkbox-group {
283
- display: flex;
284
- align-items: center;
285
- gap: 10px;
286
- margin-top: 10px;
287
- }
288
-
289
- .checkbox-group input[type="checkbox"] {
290
- width: auto;
291
- }
292
-
293
- .example-select {
294
- background: #f8f9fa;
295
- border: 2px dashed #667eea;
296
- border-radius: 6px;
297
- font-size: 14px;
298
- color: #667eea;
299
- margin-bottom: 10px;
300
- }
301
-
302
- .example-select:focus {
303
- border-color: #764ba2;
304
- background: white;
305
- }
306
-
307
- .example-label {
308
- font-size: 0.9em;
309
- color: #667eea;
310
- font-weight: 500;
311
- margin-bottom: 5px;
312
- }
313
- </style>
314
- </head>
315
- <body>
316
- <div class="container">
317
- <div class="header">
318
- <h1>🎀 Kokoro TTS</h1>
319
- <p>Text-to-Speech Testing Interface</p>
320
- </div>
321
-
322
- <div class="content">
323
- <form id="ttsForm">
324
- <div class="form-group">
325
- <label for="text">Text to Convert:</label>
326
- <div style="margin-bottom: 10px;">
327
- <div class="example-label">πŸ“ Quick Examples:</div>
328
- <select id="example-texts" class="example-select" onchange="loadExampleText()">
329
- <option value="">Choose an example...</option>
330
- <option value="Hello! This is a test of the Kokoro text-to-speech system.">Basic Greeting</option>
331
- <option value="The quick brown fox jumps over the lazy dog. This sentence contains every letter of the alphabet.">Alphabet Test</option>
332
- <option value="Welcome to our English tutoring platform! We're here to help you improve your pronunciation and speaking skills.">English Learning</option>
333
- <option value="In a hole in the ground there lived a hobbit. Not a nasty, dirty, wet hole filled with the ends of worms and an oozy smell.">Story Reading</option>
334
- <option value="To be or not to be, that is the question. Whether 'tis nobler in the mind to suffer the slings and arrows of outrageous fortune.">Shakespeare</option>
335
- <option value="Ladies and gentlemen, welcome to today's presentation. We will be discussing the latest developments in artificial intelligence.">Presentation</option>
336
- <option value="The weather today is sunny with a high of 75 degrees Fahrenheit. Perfect for outdoor activities and picnics in the park.">Weather Report</option>
337
- <option value="One, two, three, four, five. Ten, twenty, thirty, forty, fifty. One hundred, one thousand, one million.">Numbers Practice</option>
338
- <option value="How are you doing today? I hope you're having a wonderful time learning English with our voice synthesis technology.">Conversational</option>
339
- <option value="Science and technology have revolutionized the way we communicate, learn, and interact with the world around us.">Technical</option>
340
- <option value="Once upon a time, in a faraway kingdom, there lived a brave princess who could speak to animals and understand their language.">Fairy Tale</option>
341
- <option value="Please remember to wash your hands frequently, wear a mask when necessary, and maintain social distance for everyone's safety.">Instructions</option>
342
- <option value="The pronunciation of English can be challenging, but with practice and patience, you can master the sounds and rhythms of the language.">Educational</option>
343
- <option value="Artificial intelligence and machine learning are transforming industries from healthcare to transportation, creating new possibilities.">AI Topic</option>
344
- <option value="Thank you for using our text-to-speech service. We hope it helps you in your language learning journey. Have a great day!">Thank You Message</option>
345
- </select>
346
- </div>
347
- <textarea id="text" name="text" placeholder="Enter your text here or choose an example above..." required>Hello! This is a test of the Kokoro text-to-speech system.</textarea>
348
- </div>
349
-
350
- <div class="form-group">
351
- <label for="voice">Voice:</label>
352
- <select id="voice" name="voice" required>
353
- <option value="">Loading voices...</option>
354
- </select>
355
- </div>
356
-
357
- <div class="form-group">
358
- <label for="language">Language:</label>
359
- <select id="language" name="language" required>
360
- <option value="">Loading languages...</option>
361
- </select>
362
- </div>
363
-
364
- <div class="form-group">
365
- <label for="blend_voice">Blend Voice (Optional):</label>
366
- <select id="blend_voice" name="blend_voice_name">
367
- <option value="">No blending</option>
368
- </select>
369
- </div>
370
-
371
- <div class="form-group">
372
- <label for="speed">Speed:</label>
373
- <div class="range-container">
374
- <input type="range" id="speed" name="speed" min="0.5" max="2.0" step="0.1" value="1.0">
375
- <div class="range-value" id="speedValue">1.0</div>
376
- </div>
377
- </div>
378
-
379
- <div class="button-group">
380
- <button type="button" class="btn btn-primary" onclick="generateAudio()">
381
- 🎡 Generate Audio
382
- </button>
383
- <button type="button" class="btn btn-secondary" onclick="generateInfo()">
384
- πŸ“ Get Info Only
385
- </button>
386
- <button type="button" class="btn btn-info" onclick="generateBoth()">
387
- 🎯 Generate Both
388
- </button>
389
- </div>
390
- </form>
391
-
392
- <div class="loading" id="loading">
393
- <div class="spinner"></div>
394
- Processing your request...
395
- </div>
396
-
397
- <div id="results"></div>
398
- </div>
399
- </div>
400
-
401
- <script>
402
- // Load voices and languages on page load
403
- window.addEventListener('load', async function() {
404
- await loadVoices();
405
- await loadLanguages();
406
- setupEventListeners();
407
- });
408
-
409
- function setupEventListeners() {
410
- const speedSlider = document.getElementById('speed');
411
- const speedValue = document.getElementById('speedValue');
412
-
413
- speedSlider.addEventListener('input', function() {
414
- speedValue.textContent = this.value;
415
- });
416
- }
417
-
418
- function loadExampleText() {
419
- const exampleSelect = document.getElementById('example-texts');
420
- const textArea = document.getElementById('text');
421
-
422
- if (exampleSelect.value) {
423
- textArea.value = exampleSelect.value;
424
- // Reset the select to show "Choose an example..."
425
- exampleSelect.value = '';
426
- }
427
- }
428
-
429
- async function loadVoices() {
430
- try {
431
- const response = await fetch('/voices');
432
- const data = await response.json();
433
-
434
- const voiceSelect = document.getElementById('voice');
435
- const blendSelect = document.getElementById('blend_voice');
436
-
437
- voiceSelect.innerHTML = '';
438
- blendSelect.innerHTML = '<option value="">No blending</option>';
439
-
440
- data.voices.forEach(voice => {
441
- const option1 = new Option(voice, voice);
442
- const option2 = new Option(voice, voice);
443
- voiceSelect.add(option1);
444
- blendSelect.add(option2);
445
- });
446
-
447
- if (data.voices.length > 0) {
448
- voiceSelect.value = data.voices[0];
449
- }
450
- } catch (error) {
451
- showError('Failed to load voices: ' + error.message);
452
- }
453
- }
454
-
455
- async function loadLanguages() {
456
- try {
457
- const response = await fetch('/languages');
458
- const data = await response.json();
459
-
460
- const languageSelect = document.getElementById('language');
461
- languageSelect.innerHTML = '';
462
-
463
- data.languages.forEach(lang => {
464
- const option = new Option(lang, lang);
465
- languageSelect.add(option);
466
- });
467
-
468
- if (data.languages.length > 0) {
469
- languageSelect.value = data.languages[0];
470
- }
471
- } catch (error) {
472
- showError('Failed to load languages: ' + error.message);
473
- }
474
- }
475
-
476
- function getFormData() {
477
- return {
478
- text: document.getElementById('text').value,
479
- voice: document.getElementById('voice').value,
480
- language: document.getElementById('language').value,
481
- blend_voice_name: document.getElementById('blend_voice').value || null,
482
- speed: parseFloat(document.getElementById('speed').value)
483
- };
484
- }
485
-
486
- function showLoading() {
487
- document.getElementById('loading').classList.add('show');
488
- document.getElementById('results').innerHTML = '';
489
- }
490
-
491
- function hideLoading() {
492
- document.getElementById('loading').classList.remove('show');
493
- }
494
-
495
- function showError(message) {
496
- hideLoading();
497
- document.getElementById('results').innerHTML =
498
- `<div class="error"><strong>Error:</strong> ${message}</div>`;
499
- }
500
-
501
- function showSuccess(content) {
502
- hideLoading();
503
- document.getElementById('results').innerHTML = content;
504
- }
505
-
506
- async function generateAudio() {
507
- showLoading();
508
-
509
- try {
510
- const formData = getFormData();
511
- const response = await fetch('/tts/audio', {
512
- method: 'POST',
513
- headers: {
514
- 'Content-Type': 'application/json'
515
- },
516
- body: JSON.stringify(formData)
517
- });
518
-
519
- if (!response.ok) {
520
- const error = await response.json();
521
- throw new Error(error.detail || 'Failed to generate audio');
522
- }
523
-
524
- const audioBlob = await response.blob();
525
- const audioUrl = URL.createObjectURL(audioBlob);
526
-
527
- showSuccess(`
528
- <div class="result-section">
529
- <h3>🎡 Generated Audio</h3>
530
- <div class="success">Audio generated successfully!</div>
531
- <audio controls>
532
- <source src="${audioUrl}" type="audio/wav">
533
- Your browser does not support the audio element.
534
- </audio>
535
- <p style="margin-top: 10px; font-size: 0.9em; color: #666;">
536
- Right-click on the audio player and select "Save audio as..." to download.
537
- </p>
538
- </div>
539
- `);
540
- } catch (error) {
541
- showError(error.message);
542
- }
543
- }
544
-
545
- async function generateInfo() {
546
- showLoading();
547
-
548
- try {
549
- const formData = getFormData();
550
- const response = await fetch('/tts/info', {
551
- method: 'POST',
552
- headers: {
553
- 'Content-Type': 'application/json'
554
- },
555
- body: JSON.stringify(formData)
556
- });
557
-
558
- if (!response.ok) {
559
- const error = await response.json();
560
- throw new Error(error.detail || 'Failed to generate info');
561
- }
562
-
563
- const data = await response.json();
564
-
565
- showSuccess(`
566
- <div class="result-section">
567
- <h3>πŸ“ Text Analysis</h3>
568
- <div class="success">Analysis completed successfully!</div>
569
- <div class="info-display">
570
- <strong>Original Text:</strong><br>
571
- ${formData.text}
572
- </div>
573
- <div class="info-display">
574
- <strong>Phonemes:</strong><br>
575
- ${data.phonemes}
576
- </div>
577
- <div class="info-display">
578
- <strong>Sample Rate:</strong> ${data.sample_rate} Hz
579
- </div>
580
- <div class="info-display">
581
- <strong>Voice:</strong> ${formData.voice}
582
- </div>
583
- <div class="info-display">
584
- <strong>Speed:</strong> ${formData.speed}x
585
- </div>
586
- </div>
587
- `);
588
- } catch (error) {
589
- showError(error.message);
590
- }
591
- }
592
-
593
- async function generateBoth() {
594
- showLoading();
595
-
596
- try {
597
- const formData = getFormData();
598
- const response = await fetch('/tts/both', {
599
- method: 'POST',
600
- headers: {
601
- 'Content-Type': 'application/json'
602
- },
603
- body: JSON.stringify(formData)
604
- });
605
-
606
- if (!response.ok) {
607
- const error = await response.json();
608
- throw new Error(error.detail || 'Failed to generate audio and info');
609
- }
610
-
611
- const data = await response.json();
612
-
613
- // Convert base64 to blob for audio playback
614
- const audioBytes = atob(data.audio_base64);
615
- const audioArray = new Uint8Array(audioBytes.length);
616
- for (let i = 0; i < audioBytes.length; i++) {
617
- audioArray[i] = audioBytes.charCodeAt(i);
618
- }
619
- const audioBlob = new Blob([audioArray], { type: 'audio/wav' });
620
- const audioUrl = URL.createObjectURL(audioBlob);
621
-
622
- showSuccess(`
623
- <div class="result-section">
624
- <h3>🎯 Complete Analysis & Audio</h3>
625
- <div class="success">Generation completed successfully!</div>
626
-
627
- <h4 style="margin-top: 20px; color: #667eea;">πŸ“Š Analysis Information</h4>
628
- <div class="info-display">
629
- <strong>Original Text:</strong><br>
630
- ${formData.text}
631
- </div>
632
- <div class="info-display">
633
- <strong>Phonemes:</strong><br>
634
- ${data.phonemes}
635
- </div>
636
- <div class="info-display">
637
- <strong>Sample Rate:</strong> ${data.sample_rate} Hz
638
- </div>
639
- <div class="info-display">
640
- <strong>Voice:</strong> ${formData.voice}
641
- ${formData.blend_voice_name ? ` (blended with ${formData.blend_voice_name})` : ''}
642
- </div>
643
- <div class="info-display">
644
- <strong>Speed:</strong> ${formData.speed}x
645
- </div>
646
-
647
- <h4 style="margin-top: 20px; color: #667eea;">🎡 Generated Audio</h4>
648
- <audio controls>
649
- <source src="${audioUrl}" type="audio/wav">
650
- Your browser does not support the audio element.
651
- </audio>
652
- <p style="margin-top: 10px; font-size: 0.9em; color: #666;">
653
- Right-click on the audio player and select "Save audio as..." to download.
654
- </p>
655
- </div>
656
- `);
657
- } catch (error) {
658
- showError(error.message);
659
- }
660
- }
661
- </script>
662
- </body>
663
- </html>
664
- """
665
  return HTMLResponse(content=html_content)
666
 
667
-
668
  @app.get("/voices")
669
  async def get_voices():
670
- """Get list of available voices"""
671
- return {"voices": sorted(kokoro.get_voices())}
672
-
 
673
 
674
  @app.get("/languages")
675
  async def get_languages():
676
- """Get list of supported languages"""
677
  return {"languages": SUPPORTED_LANGUAGES}
678
 
679
-
680
  @app.post("/tts/audio")
681
  async def generate_audio(request: TTSRequest):
682
- """Generate audio from text and return as WAV file"""
683
  try:
684
- # Validate language
685
  if request.language not in SUPPORTED_LANGUAGES:
686
  raise HTTPException(
687
  status_code=400, detail=f"Unsupported language: {request.language}"
688
  )
689
 
690
- # Validate voice
691
  available_voices = kokoro.get_voices()
692
  if request.voice not in available_voices:
693
  raise HTTPException(
694
  status_code=400, detail=f"Unsupported voice: {request.voice}"
695
  )
696
 
697
- # Validate blend voice if provided
698
- if (
699
- request.blend_voice_name
700
- and request.blend_voice_name not in available_voices
701
- ):
702
  raise HTTPException(
703
- status_code=400,
704
- detail=f"Unsupported blend voice: {request.blend_voice_name}",
705
  )
706
 
707
- # Convert text to phonemes
708
- phonemes = tokenizer.phonemize(request.text, lang=request.language)
709
-
710
- # Handle voice blending
711
- voice = request.voice
712
- if request.blend_voice_name:
713
- first_voice = kokoro.get_voice_style(request.voice)
714
- second_voice = kokoro.get_voice_style(request.blend_voice_name)
715
- voice = np.add(first_voice * 0.5, second_voice * 0.5)
716
-
717
- # Generate audio
718
- samples, sample_rate = kokoro.create(
719
- phonemes, voice=voice, speed=request.speed, is_phonemes=True
720
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
721
 
722
- # Convert to WAV bytes
723
- wav_bytes = numpy_to_wav_bytes(samples, sample_rate)
724
 
725
- # Return as streaming response
726
  return StreamingResponse(
727
  io.BytesIO(wav_bytes),
728
  media_type="audio/wav",
@@ -732,34 +214,70 @@ async def generate_audio(request: TTSRequest):
732
  except Exception as e:
733
  raise HTTPException(status_code=500, detail=str(e))
734
 
735
-
736
  @app.post("/tts/info", response_model=TTSResponse)
737
  async def generate_info(request: TTSRequest):
738
- """Generate phonemes and return metadata without audio"""
739
  try:
740
- # Validate language
741
  if request.language not in SUPPORTED_LANGUAGES:
742
  raise HTTPException(
743
  status_code=400, detail=f"Unsupported language: {request.language}"
744
  )
745
 
746
- # Convert text to phonemes
747
- phonemes = tokenizer.phonemize(request.text, lang=request.language)
748
-
749
- # Get sample rate (standard for this model)
750
- sample_rate = 24000 # Kokoro typically uses 24kHz
751
 
752
  return TTSResponse(phonemes=phonemes, sample_rate=sample_rate)
753
 
754
  except Exception as e:
755
  raise HTTPException(status_code=500, detail=str(e))
756
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
757
 
758
  @app.post("/tts/both")
759
  async def generate_both(request: TTSRequest):
760
- """Generate both audio and metadata"""
761
  try:
762
- # Validate inputs (same as audio endpoint)
763
  if request.language not in SUPPORTED_LANGUAGES:
764
  raise HTTPException(
765
  status_code=400, detail=f"Unsupported language: {request.language}"
@@ -771,34 +289,44 @@ async def generate_both(request: TTSRequest):
771
  status_code=400, detail=f"Unsupported voice: {request.voice}"
772
  )
773
 
774
- if (
775
- request.blend_voice_name
776
- and request.blend_voice_name not in available_voices
777
- ):
778
  raise HTTPException(
779
- status_code=400,
780
- detail=f"Unsupported blend voice: {request.blend_voice_name}",
781
  )
782
 
783
- # Convert text to phonemes
784
- phonemes = tokenizer.phonemize(request.text, lang=request.language)
785
-
786
- # Handle voice blending
787
- voice = request.voice
788
- if request.blend_voice_name:
789
- first_voice = kokoro.get_voice_style(request.voice)
790
- second_voice = kokoro.get_voice_style(request.blend_voice_name)
791
- voice = np.add(first_voice * 0.5, second_voice * 0.5)
792
-
793
- # Generate audio
794
- samples, sample_rate = kokoro.create(
795
- phonemes, voice=voice, speed=request.speed, is_phonemes=True
796
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
797
 
798
- # Convert to base64 for JSON response
799
- wav_bytes = numpy_to_wav_bytes(samples, sample_rate)
800
  import base64
801
-
802
  audio_base64 = base64.b64encode(wav_bytes).decode()
803
 
804
  return {
@@ -811,6 +339,14 @@ async def generate_both(request: TTSRequest):
811
  except Exception as e:
812
  raise HTTPException(status_code=500, detail=str(e))
813
 
 
 
 
 
 
 
 
 
814
 
815
  if __name__ == "__main__":
816
- uvicorn.run(app, host="0.0.0.0", port=7860)
 
1
+ from fastapi import FastAPI, HTTPException, BackgroundTasks
2
  from fastapi.responses import StreamingResponse, HTMLResponse
3
  from fastapi.staticfiles import StaticFiles
4
+ from pydantic import BaseModel, validator
5
  import numpy as np
6
  import io
7
  import wave
8
  from kokoro_onnx import Kokoro
9
  from kokoro_onnx.tokenizer import Tokenizer
10
+ from typing import Optional, Dict, Tuple
11
  import uvicorn
12
+ from ui import html_content
13
+ import asyncio
14
+ import concurrent.futures
15
+ from functools import lru_cache
16
+ import threading
17
+ from queue import Queue
18
+ import time
19
+ import hashlib
20
 
21
  app = FastAPI(title="Kokoro TTS API", version="1.0.0")
22
 
23
+ # Thread pool for CPU-intensive tasks
24
+ executor = concurrent.futures.ThreadPoolExecutor(max_workers=4)
25
+
26
+ # Initialize models once
27
  tokenizer = Tokenizer()
28
  kokoro = Kokoro("kokoro-v1.0.onnx", "voices-v1.0.bin")
29
  SUPPORTED_LANGUAGES = ["en-us"]
30
 
31
+ # Cache for phonemes and voice styles
32
+ phoneme_cache: Dict[str, str] = {}
33
+ voice_style_cache: Dict[str, np.ndarray] = {}
34
+ audio_cache: Dict[str, Tuple[np.ndarray, int]] = {}
35
+
36
+ # Request queue for batching
37
+ request_queue = Queue()
38
+ batch_size = 4
39
+ batch_timeout = 0.1 # 100ms
40
 
41
  class TTSRequest(BaseModel):
42
  text: str
 
45
  blend_voice_name: Optional[str] = None
46
  speed: float = 1.0
47
 
 
48
  class TTSResponse(BaseModel):
49
  phonemes: str
50
  sample_rate: int
51
 
52
+ def get_cache_key(text: str, language: str, voice: str, blend_voice: Optional[str], speed: float) -> str:
53
+ """Generate cache key for request"""
54
+ key_data = f"{text}|{language}|{voice}|{blend_voice}|{speed}"
55
+ return hashlib.md5(key_data.encode()).hexdigest()
56
+
57
+ @lru_cache(maxsize=1000)
58
+ def cached_phonemize(text: str, language: str) -> str:
59
+ """Cache phoneme conversion"""
60
+ return tokenizer.phonemize(text, lang=language)
61
+
62
+ def get_cached_voice_style(voice_name: str) -> np.ndarray:
63
+ """Cache voice styles to avoid repeated loading"""
64
+ if voice_name not in voice_style_cache:
65
+ voice_style_cache[voice_name] = kokoro.get_voice_style(voice_name)
66
+ return voice_style_cache[voice_name]
67
+
68
+ def process_voice_blend(voice: str, blend_voice_name: Optional[str]) -> np.ndarray:
69
+ """Optimized voice blending with caching"""
70
+ if not blend_voice_name:
71
+ return get_cached_voice_style(voice) # Fixed: return the voice style, not the voice name
72
+
73
+ blend_key = f"{voice}+{blend_voice_name}"
74
+ if blend_key not in voice_style_cache:
75
+ first_voice = get_cached_voice_style(voice)
76
+ second_voice = get_cached_voice_style(blend_voice_name)
77
+ blended_voice = np.add(first_voice * 0.5, second_voice * 0.5)
78
+ voice_style_cache[blend_key] = blended_voice
79
+
80
+ return voice_style_cache[blend_key]
81
+
82
+ def batch_process_tts(requests: list) -> list:
83
+ """Process multiple TTS requests in batch"""
84
+ results = []
85
+
86
+ # Pre-process all phonemes
87
+ phonemes_batch = []
88
+ for req in requests:
89
+ phonemes = cached_phonemize(req.text, req.language)
90
+ phonemes_batch.append(phonemes)
91
+
92
+ # Process audio generation for each request
93
+ for i, req in enumerate(requests):
94
+ try:
95
+ phonemes = phonemes_batch[i]
96
+ voice = process_voice_blend(req.voice, req.blend_voice_name)
97
+
98
+ # Generate audio - Fixed parameter order
99
+ samples, sample_rate = kokoro.create(
100
+ phonemes, voice=voice, speed=req.speed, lang=None, is_phonemes=True
101
+ )
102
+ results.append((samples, sample_rate, phonemes, None))
103
+
104
+ except Exception as e:
105
+ results.append((None, None, None, str(e)))
106
+
107
+ return results
108
 
109
  def numpy_to_wav_bytes(audio_data: np.ndarray, sample_rate: int) -> bytes:
110
+ """Optimized WAV conversion with pre-allocated buffer"""
 
111
  if audio_data.dtype != np.int16:
 
112
  audio_data = (audio_data * 32767).astype(np.int16)
113
 
114
+ # Pre-calculate buffer size
115
+ buffer_size = len(audio_data) * 2 + 44 # audio data + WAV header
116
  buffer = io.BytesIO()
117
+ buffer.truncate(buffer_size)
118
+ buffer.seek(0)
119
+
120
  with wave.open(buffer, "wb") as wav_file:
121
+ wav_file.setnchannels(1)
122
+ wav_file.setsampwidth(2)
123
  wav_file.setframerate(sample_rate)
124
  wav_file.writeframes(audio_data.tobytes())
125
 
126
  buffer.seek(0)
127
  return buffer.getvalue()
128
 
129
+ async def run_in_executor(func, *args, **kwargs):
130
+ """Run CPU-intensive function in thread pool"""
131
+ loop = asyncio.get_event_loop()
132
+ if kwargs:
133
+ # Use functools.partial for keyword arguments
134
+ from functools import partial
135
+ func_with_args = partial(func, *args, **kwargs)
136
+ return await loop.run_in_executor(executor, func_with_args)
137
+ else:
138
+ return await loop.run_in_executor(executor, func, *args)
139
 
140
  @app.get("/", response_class=HTMLResponse)
141
  async def get_home():
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
142
  return HTMLResponse(content=html_content)
143
 
 
144
  @app.get("/voices")
145
  async def get_voices():
146
+ # Cache voice list
147
+ if not hasattr(get_voices, '_cached_voices'):
148
+ get_voices._cached_voices = {"voices": sorted(kokoro.get_voices())}
149
+ return get_voices._cached_voices
150
 
151
  @app.get("/languages")
152
  async def get_languages():
 
153
  return {"languages": SUPPORTED_LANGUAGES}
154
 
 
155
  @app.post("/tts/audio")
156
  async def generate_audio(request: TTSRequest):
157
+ """Optimized audio generation with caching"""
158
  try:
159
+ # Validate inputs
160
  if request.language not in SUPPORTED_LANGUAGES:
161
  raise HTTPException(
162
  status_code=400, detail=f"Unsupported language: {request.language}"
163
  )
164
 
 
165
  available_voices = kokoro.get_voices()
166
  if request.voice not in available_voices:
167
  raise HTTPException(
168
  status_code=400, detail=f"Unsupported voice: {request.voice}"
169
  )
170
 
171
+ if request.blend_voice_name and request.blend_voice_name not in available_voices:
 
 
 
 
172
  raise HTTPException(
173
+ status_code=400, detail=f"Unsupported blend voice: {request.blend_voice_name}"
 
174
  )
175
 
176
+ # Check cache first
177
+ cache_key = get_cache_key(
178
+ request.text, request.language, request.voice,
179
+ request.blend_voice_name, request.speed
 
 
 
 
 
 
 
 
 
180
  )
181
+
182
+ if cache_key in audio_cache:
183
+ samples, sample_rate = audio_cache[cache_key]
184
+ else:
185
+ # Generate phonemes (cached)
186
+ phonemes = cached_phonemize(request.text, request.language)
187
+
188
+ # Process voice (cached)
189
+ voice = process_voice_blend(request.voice, request.blend_voice_name)
190
+
191
+ # Generate audio in thread pool - Fixed parameter passing
192
+ samples, sample_rate = await run_in_executor(
193
+ kokoro.create,
194
+ phonemes,
195
+ voice=voice,
196
+ speed=request.speed,
197
+ lang=None,
198
+ is_phonemes=True
199
+ )
200
+
201
+ # Cache result (limit cache size)
202
+ if len(audio_cache) < 100:
203
+ audio_cache[cache_key] = (samples, sample_rate)
204
 
205
+ # Convert to WAV in thread pool
206
+ wav_bytes = await run_in_executor(numpy_to_wav_bytes, samples, sample_rate)
207
 
 
208
  return StreamingResponse(
209
  io.BytesIO(wav_bytes),
210
  media_type="audio/wav",
 
214
  except Exception as e:
215
  raise HTTPException(status_code=500, detail=str(e))
216
 
 
217
  @app.post("/tts/info", response_model=TTSResponse)
218
  async def generate_info(request: TTSRequest):
219
+ """Optimized info generation with caching"""
220
  try:
 
221
  if request.language not in SUPPORTED_LANGUAGES:
222
  raise HTTPException(
223
  status_code=400, detail=f"Unsupported language: {request.language}"
224
  )
225
 
226
+ # Use cached phonemization
227
+ phonemes = cached_phonemize(request.text, request.language)
228
+ sample_rate = 24000
 
 
229
 
230
  return TTSResponse(phonemes=phonemes, sample_rate=sample_rate)
231
 
232
  except Exception as e:
233
  raise HTTPException(status_code=500, detail=str(e))
234
 
235
+ @app.post("/tts/batch")
236
+ async def generate_batch(requests: list[TTSRequest]):
237
+ """Batch processing endpoint for multiple requests"""
238
+ try:
239
+ # Validate all requests first
240
+ available_voices = kokoro.get_voices()
241
+ for req in requests:
242
+ if req.language not in SUPPORTED_LANGUAGES:
243
+ raise HTTPException(
244
+ status_code=400, detail=f"Unsupported language: {req.language}"
245
+ )
246
+ if req.voice not in available_voices:
247
+ raise HTTPException(
248
+ status_code=400, detail=f"Unsupported voice: {req.voice}"
249
+ )
250
+
251
+ # Process batch in thread pool
252
+ results = await run_in_executor(batch_process_tts, requests)
253
+
254
+ # Convert results
255
+ response_data = []
256
+ for i, (samples, sample_rate, phonemes, error) in enumerate(results):
257
+ if error:
258
+ response_data.append({"error": error})
259
+ else:
260
+ wav_bytes = await run_in_executor(numpy_to_wav_bytes, samples, sample_rate)
261
+ import base64
262
+ audio_base64 = base64.b64encode(wav_bytes).decode()
263
+
264
+ response_data.append({
265
+ "phonemes": phonemes,
266
+ "sample_rate": sample_rate,
267
+ "audio_base64": audio_base64,
268
+ "audio_format": "wav"
269
+ })
270
+
271
+ return {"results": response_data}
272
+
273
+ except Exception as e:
274
+ raise HTTPException(status_code=500, detail=str(e))
275
 
276
  @app.post("/tts/both")
277
  async def generate_both(request: TTSRequest):
278
+ """Generate both audio and metadata with optimizations"""
279
  try:
280
+ # Validate inputs
281
  if request.language not in SUPPORTED_LANGUAGES:
282
  raise HTTPException(
283
  status_code=400, detail=f"Unsupported language: {request.language}"
 
289
  status_code=400, detail=f"Unsupported voice: {request.voice}"
290
  )
291
 
292
+ if request.blend_voice_name and request.blend_voice_name not in available_voices:
 
 
 
293
  raise HTTPException(
294
+ status_code=400, detail=f"Unsupported blend voice: {request.blend_voice_name}"
 
295
  )
296
 
297
+ # Check cache
298
+ cache_key = get_cache_key(
299
+ request.text, request.language, request.voice,
300
+ request.blend_voice_name, request.speed
 
 
 
 
 
 
 
 
 
301
  )
302
+
303
+ if cache_key in audio_cache:
304
+ samples, sample_rate = audio_cache[cache_key]
305
+ phonemes = cached_phonemize(request.text, request.language)
306
+ else:
307
+ # Generate phonemes
308
+ phonemes = cached_phonemize(request.text, request.language)
309
+
310
+ # Process voice
311
+ voice = process_voice_blend(request.voice, request.blend_voice_name)
312
+
313
+ # Generate audio - Fixed parameter passing
314
+ samples, sample_rate = await run_in_executor(
315
+ kokoro.create,
316
+ phonemes,
317
+ voice=voice,
318
+ speed=request.speed,
319
+ lang=None,
320
+ is_phonemes=True
321
+ )
322
+
323
+ # Cache result
324
+ if len(audio_cache) < 100:
325
+ audio_cache[cache_key] = (samples, sample_rate)
326
 
327
+ # Convert to base64
328
+ wav_bytes = await run_in_executor(numpy_to_wav_bytes, samples, sample_rate)
329
  import base64
 
330
  audio_base64 = base64.b64encode(wav_bytes).decode()
331
 
332
  return {
 
339
  except Exception as e:
340
  raise HTTPException(status_code=500, detail=str(e))
341
 
342
+ # Cleanup function for cache management
343
+ @app.on_event("startup")
344
+ async def startup_event():
345
+ """Preload commonly used voices"""
346
+ common_voices = ["af_heart", "af_bella", "af_sarah"]
347
+ for voice in common_voices:
348
+ if voice in kokoro.get_voices():
349
+ get_cached_voice_style(voice)
350
 
351
  if __name__ == "__main__":
352
+ uvicorn.run(app, host="0.0.0.0", port=7860, workers=1)
ui.py ADDED
@@ -0,0 +1,609 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ html_content = """
2
+ <!DOCTYPE html>
3
+ <html lang="en">
4
+ <head>
5
+ <meta charset="UTF-8">
6
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
7
+ <title>Kokoro TTS Test Interface</title>
8
+ <style>
9
+ * {
10
+ margin: 0;
11
+ padding: 0;
12
+ box-sizing: border-box;
13
+ }
14
+
15
+ body {
16
+ font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
17
+ background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
18
+ min-height: 100vh;
19
+ padding: 20px;
20
+ color: #333;
21
+ }
22
+
23
+ .container {
24
+ max-width: 800px;
25
+ margin: 0 auto;
26
+ background: white;
27
+ border-radius: 15px;
28
+ box-shadow: 0 10px 30px rgba(0, 0, 0, 0.3);
29
+ overflow: hidden;
30
+ }
31
+
32
+ .header {
33
+ background: linear-gradient(45deg, #667eea, #764ba2);
34
+ color: white;
35
+ text-align: center;
36
+ padding: 30px;
37
+ }
38
+
39
+ .header h1 {
40
+ font-size: 2.5rem;
41
+ margin-bottom: 10px;
42
+ text-shadow: 2px 2px 4px rgba(0,0,0,0.3);
43
+ }
44
+
45
+ .header p {
46
+ font-size: 1.1rem;
47
+ opacity: 0.9;
48
+ }
49
+
50
+ .content {
51
+ padding: 30px;
52
+ }
53
+
54
+ .form-group {
55
+ margin-bottom: 25px;
56
+ }
57
+
58
+ label {
59
+ display: block;
60
+ margin-bottom: 8px;
61
+ font-weight: 600;
62
+ color: #555;
63
+ }
64
+
65
+ input, select, textarea {
66
+ width: 100%;
67
+ padding: 12px 15px;
68
+ border: 2px solid #e1e5e9;
69
+ border-radius: 8px;
70
+ font-size: 16px;
71
+ transition: all 0.3s ease;
72
+ font-family: inherit;
73
+ }
74
+
75
+ input:focus, select:focus, textarea:focus {
76
+ outline: none;
77
+ border-color: #667eea;
78
+ box-shadow: 0 0 0 3px rgba(102, 126, 234, 0.1);
79
+ }
80
+
81
+ textarea {
82
+ resize: vertical;
83
+ min-height: 100px;
84
+ }
85
+
86
+ .range-container {
87
+ display: flex;
88
+ align-items: center;
89
+ gap: 15px;
90
+ }
91
+
92
+ .range-container input[type="range"] {
93
+ flex: 1;
94
+ }
95
+
96
+ .range-value {
97
+ background: #f8f9fa;
98
+ padding: 8px 12px;
99
+ border-radius: 6px;
100
+ font-weight: 600;
101
+ min-width: 60px;
102
+ text-align: center;
103
+ border: 2px solid #e1e5e9;
104
+ }
105
+
106
+ .button-group {
107
+ display: grid;
108
+ grid-template-columns: repeat(auto-fit, minmax(200px, 1fr));
109
+ gap: 15px;
110
+ margin-top: 30px;
111
+ }
112
+
113
+ .btn {
114
+ padding: 15px 25px;
115
+ border: none;
116
+ border-radius: 8px;
117
+ font-size: 16px;
118
+ font-weight: 600;
119
+ cursor: pointer;
120
+ transition: all 0.3s ease;
121
+ text-transform: uppercase;
122
+ letter-spacing: 0.5px;
123
+ }
124
+
125
+ .btn-primary {
126
+ background: linear-gradient(45deg, #667eea, #764ba2);
127
+ color: white;
128
+ }
129
+
130
+ .btn-secondary {
131
+ background: linear-gradient(45deg, #ffecd2, #fcb69f);
132
+ color: #8b4513;
133
+ }
134
+
135
+ .btn-info {
136
+ background: linear-gradient(45deg, #a8edea, #fed6e3);
137
+ color: #2c3e50;
138
+ }
139
+
140
+ .btn:hover {
141
+ transform: translateY(-2px);
142
+ box-shadow: 0 5px 15px rgba(0, 0, 0, 0.2);
143
+ }
144
+
145
+ .btn:disabled {
146
+ opacity: 0.6;
147
+ cursor: not-allowed;
148
+ transform: none;
149
+ }
150
+
151
+ .result-section {
152
+ margin-top: 30px;
153
+ padding: 25px;
154
+ background: #f8f9fa;
155
+ border-radius: 10px;
156
+ border-left: 5px solid #667eea;
157
+ }
158
+
159
+ .result-section h3 {
160
+ color: #667eea;
161
+ margin-bottom: 15px;
162
+ font-size: 1.3rem;
163
+ }
164
+
165
+ .info-display {
166
+ background: white;
167
+ padding: 15px;
168
+ border-radius: 8px;
169
+ margin: 10px 0;
170
+ border: 1px solid #e1e5e9;
171
+ }
172
+
173
+ .info-display strong {
174
+ color: #667eea;
175
+ }
176
+
177
+ .loading {
178
+ display: none;
179
+ text-align: center;
180
+ padding: 20px;
181
+ color: #667eea;
182
+ }
183
+
184
+ .loading.show {
185
+ display: block;
186
+ }
187
+
188
+ .spinner {
189
+ display: inline-block;
190
+ width: 30px;
191
+ height: 30px;
192
+ border: 3px solid #f3f3f3;
193
+ border-top: 3px solid #667eea;
194
+ border-radius: 50%;
195
+ animation: spin 1s linear infinite;
196
+ margin-right: 10px;
197
+ }
198
+
199
+ @keyframes spin {
200
+ 0% { transform: rotate(0deg); }
201
+ 100% { transform: rotate(360deg); }
202
+ }
203
+
204
+ .error {
205
+ background: #fee;
206
+ color: #c33;
207
+ padding: 15px;
208
+ border-radius: 8px;
209
+ border-left: 5px solid #c33;
210
+ margin: 15px 0;
211
+ }
212
+
213
+ .success {
214
+ background: #efe;
215
+ color: #363;
216
+ padding: 15px;
217
+ border-radius: 8px;
218
+ border-left: 5px solid #363;
219
+ margin: 15px 0;
220
+ }
221
+
222
+ audio {
223
+ width: 100%;
224
+ margin-top: 15px;
225
+ }
226
+
227
+ .checkbox-group {
228
+ display: flex;
229
+ align-items: center;
230
+ gap: 10px;
231
+ margin-top: 10px;
232
+ }
233
+
234
+ .checkbox-group input[type="checkbox"] {
235
+ width: auto;
236
+ }
237
+
238
+ .example-select {
239
+ background: #f8f9fa;
240
+ border: 2px dashed #667eea;
241
+ border-radius: 6px;
242
+ font-size: 14px;
243
+ color: #667eea;
244
+ margin-bottom: 10px;
245
+ }
246
+
247
+ .example-select:focus {
248
+ border-color: #764ba2;
249
+ background: white;
250
+ }
251
+
252
+ .example-label {
253
+ font-size: 0.9em;
254
+ color: #667eea;
255
+ font-weight: 500;
256
+ margin-bottom: 5px;
257
+ }
258
+ </style>
259
+ </head>
260
+ <body>
261
+ <div class="container">
262
+ <div class="header">
263
+ <h1>🎀 Kokoro TTS</h1>
264
+ <p>Text-to-Speech Testing Interface</p>
265
+ </div>
266
+
267
+ <div class="content">
268
+ <form id="ttsForm">
269
+ <div class="form-group">
270
+ <label for="text">Text to Convert:</label>
271
+ <div style="margin-bottom: 10px;">
272
+ <div class="example-label">πŸ“ Quick Examples:</div>
273
+ <select id="example-texts" class="example-select" onchange="loadExampleText()">
274
+ <option value="">Choose an example...</option>
275
+ <option value="Hello! This is a test of the Kokoro text-to-speech system.">Basic Greeting</option>
276
+ <option value="The quick brown fox jumps over the lazy dog. This sentence contains every letter of the alphabet.">Alphabet Test</option>
277
+ <option value="Welcome to our English tutoring platform! We're here to help you improve your pronunciation and speaking skills.">English Learning</option>
278
+ <option value="In a hole in the ground there lived a hobbit. Not a nasty, dirty, wet hole filled with the ends of worms and an oozy smell.">Story Reading</option>
279
+ <option value="To be or not to be, that is the question. Whether 'tis nobler in the mind to suffer the slings and arrows of outrageous fortune.">Shakespeare</option>
280
+ <option value="Ladies and gentlemen, welcome to today's presentation. We will be discussing the latest developments in artificial intelligence.">Presentation</option>
281
+ <option value="The weather today is sunny with a high of 75 degrees Fahrenheit. Perfect for outdoor activities and picnics in the park.">Weather Report</option>
282
+ <option value="One, two, three, four, five. Ten, twenty, thirty, forty, fifty. One hundred, one thousand, one million.">Numbers Practice</option>
283
+ <option value="How are you doing today? I hope you're having a wonderful time learning English with our voice synthesis technology.">Conversational</option>
284
+ <option value="Science and technology have revolutionized the way we communicate, learn, and interact with the world around us.">Technical</option>
285
+ <option value="Once upon a time, in a faraway kingdom, there lived a brave princess who could speak to animals and understand their language.">Fairy Tale</option>
286
+ <option value="Please remember to wash your hands frequently, wear a mask when necessary, and maintain social distance for everyone's safety.">Instructions</option>
287
+ <option value="The pronunciation of English can be challenging, but with practice and patience, you can master the sounds and rhythms of the language.">Educational</option>
288
+ <option value="Artificial intelligence and machine learning are transforming industries from healthcare to transportation, creating new possibilities.">AI Topic</option>
289
+ <option value="Thank you for using our text-to-speech service. We hope it helps you in your language learning journey. Have a great day!">Thank You Message</option>
290
+ </select>
291
+ </div>
292
+ <textarea id="text" name="text" placeholder="Enter your text here or choose an example above..." required>Hello! This is a test of the Kokoro text-to-speech system.</textarea>
293
+ </div>
294
+
295
+ <div class="form-group">
296
+ <label for="voice">Voice:</label>
297
+ <select id="voice" name="voice" required>
298
+ <option value="">Loading voices...</option>
299
+ </select>
300
+ </div>
301
+
302
+ <div class="form-group">
303
+ <label for="language">Language:</label>
304
+ <select id="language" name="language" required>
305
+ <option value="">Loading languages...</option>
306
+ </select>
307
+ </div>
308
+
309
+ <div class="form-group">
310
+ <label for="blend_voice">Blend Voice (Optional):</label>
311
+ <select id="blend_voice" name="blend_voice_name">
312
+ <option value="">No blending</option>
313
+ </select>
314
+ </div>
315
+
316
+ <div class="form-group">
317
+ <label for="speed">Speed:</label>
318
+ <div class="range-container">
319
+ <input type="range" id="speed" name="speed" min="0.5" max="2.0" step="0.1" value="1.0">
320
+ <div class="range-value" id="speedValue">1.0</div>
321
+ </div>
322
+ </div>
323
+
324
+ <div class="button-group">
325
+ <button type="button" class="btn btn-primary" onclick="generateAudio()">
326
+ 🎡 Generate Audio
327
+ </button>
328
+ <button type="button" class="btn btn-secondary" onclick="generateInfo()">
329
+ πŸ“ Get Info Only
330
+ </button>
331
+ <button type="button" class="btn btn-info" onclick="generateBoth()">
332
+ 🎯 Generate Both
333
+ </button>
334
+ </div>
335
+ </form>
336
+
337
+ <div class="loading" id="loading">
338
+ <div class="spinner"></div>
339
+ Processing your request...
340
+ </div>
341
+
342
+ <div id="results"></div>
343
+ </div>
344
+ </div>
345
+
346
+ <script>
347
+ // Load voices and languages on page load
348
+ window.addEventListener('load', async function() {
349
+ await loadVoices();
350
+ await loadLanguages();
351
+ setupEventListeners();
352
+ });
353
+
354
+ function setupEventListeners() {
355
+ const speedSlider = document.getElementById('speed');
356
+ const speedValue = document.getElementById('speedValue');
357
+
358
+ speedSlider.addEventListener('input', function() {
359
+ speedValue.textContent = this.value;
360
+ });
361
+ }
362
+
363
+ function loadExampleText() {
364
+ const exampleSelect = document.getElementById('example-texts');
365
+ const textArea = document.getElementById('text');
366
+
367
+ if (exampleSelect.value) {
368
+ textArea.value = exampleSelect.value;
369
+ // Reset the select to show "Choose an example..."
370
+ exampleSelect.value = '';
371
+ }
372
+ }
373
+
374
+ async function loadVoices() {
375
+ try {
376
+ const response = await fetch('/voices');
377
+ const data = await response.json();
378
+
379
+ const voiceSelect = document.getElementById('voice');
380
+ const blendSelect = document.getElementById('blend_voice');
381
+
382
+ voiceSelect.innerHTML = '';
383
+ blendSelect.innerHTML = '<option value="">No blending</option>';
384
+
385
+ data.voices.forEach(voice => {
386
+ const option1 = new Option(voice, voice);
387
+ const option2 = new Option(voice, voice);
388
+ voiceSelect.add(option1);
389
+ blendSelect.add(option2);
390
+ });
391
+
392
+ if (data.voices.length > 0) {
393
+ voiceSelect.value = data.voices[0];
394
+ }
395
+ } catch (error) {
396
+ showError('Failed to load voices: ' + error.message);
397
+ }
398
+ }
399
+
400
+ async function loadLanguages() {
401
+ try {
402
+ const response = await fetch('/languages');
403
+ const data = await response.json();
404
+
405
+ const languageSelect = document.getElementById('language');
406
+ languageSelect.innerHTML = '';
407
+
408
+ data.languages.forEach(lang => {
409
+ const option = new Option(lang, lang);
410
+ languageSelect.add(option);
411
+ });
412
+
413
+ if (data.languages.length > 0) {
414
+ languageSelect.value = data.languages[0];
415
+ }
416
+ } catch (error) {
417
+ showError('Failed to load languages: ' + error.message);
418
+ }
419
+ }
420
+
421
+ function getFormData() {
422
+ return {
423
+ text: document.getElementById('text').value,
424
+ voice: document.getElementById('voice').value,
425
+ language: document.getElementById('language').value,
426
+ blend_voice_name: document.getElementById('blend_voice').value || null,
427
+ speed: parseFloat(document.getElementById('speed').value)
428
+ };
429
+ }
430
+
431
+ function showLoading() {
432
+ document.getElementById('loading').classList.add('show');
433
+ document.getElementById('results').innerHTML = '';
434
+ }
435
+
436
+ function hideLoading() {
437
+ document.getElementById('loading').classList.remove('show');
438
+ }
439
+
440
+ function showError(message) {
441
+ hideLoading();
442
+ document.getElementById('results').innerHTML =
443
+ `<div class="error"><strong>Error:</strong> ${message}</div>`;
444
+ }
445
+
446
+ function showSuccess(content) {
447
+ hideLoading();
448
+ document.getElementById('results').innerHTML = content;
449
+ }
450
+
451
+ async function generateAudio() {
452
+ showLoading();
453
+
454
+ try {
455
+ const formData = getFormData();
456
+ const response = await fetch('/tts/audio', {
457
+ method: 'POST',
458
+ headers: {
459
+ 'Content-Type': 'application/json'
460
+ },
461
+ body: JSON.stringify(formData)
462
+ });
463
+
464
+ if (!response.ok) {
465
+ const error = await response.json();
466
+ throw new Error(error.detail || 'Failed to generate audio');
467
+ }
468
+
469
+ const audioBlob = await response.blob();
470
+ const audioUrl = URL.createObjectURL(audioBlob);
471
+
472
+ showSuccess(`
473
+ <div class="result-section">
474
+ <h3>🎡 Generated Audio</h3>
475
+ <div class="success">Audio generated successfully!</div>
476
+ <audio controls>
477
+ <source src="${audioUrl}" type="audio/wav">
478
+ Your browser does not support the audio element.
479
+ </audio>
480
+ <p style="margin-top: 10px; font-size: 0.9em; color: #666;">
481
+ Right-click on the audio player and select "Save audio as..." to download.
482
+ </p>
483
+ </div>
484
+ `);
485
+ } catch (error) {
486
+ showError(error.message);
487
+ }
488
+ }
489
+
490
+ async function generateInfo() {
491
+ showLoading();
492
+
493
+ try {
494
+ const formData = getFormData();
495
+ const response = await fetch('/tts/info', {
496
+ method: 'POST',
497
+ headers: {
498
+ 'Content-Type': 'application/json'
499
+ },
500
+ body: JSON.stringify(formData)
501
+ });
502
+
503
+ if (!response.ok) {
504
+ const error = await response.json();
505
+ throw new Error(error.detail || 'Failed to generate info');
506
+ }
507
+
508
+ const data = await response.json();
509
+
510
+ showSuccess(`
511
+ <div class="result-section">
512
+ <h3>πŸ“ Text Analysis</h3>
513
+ <div class="success">Analysis completed successfully!</div>
514
+ <div class="info-display">
515
+ <strong>Original Text:</strong><br>
516
+ ${formData.text}
517
+ </div>
518
+ <div class="info-display">
519
+ <strong>Phonemes:</strong><br>
520
+ ${data.phonemes}
521
+ </div>
522
+ <div class="info-display">
523
+ <strong>Sample Rate:</strong> ${data.sample_rate} Hz
524
+ </div>
525
+ <div class="info-display">
526
+ <strong>Voice:</strong> ${formData.voice}
527
+ </div>
528
+ <div class="info-display">
529
+ <strong>Speed:</strong> ${formData.speed}x
530
+ </div>
531
+ </div>
532
+ `);
533
+ } catch (error) {
534
+ showError(error.message);
535
+ }
536
+ }
537
+
538
+ async function generateBoth() {
539
+ showLoading();
540
+
541
+ try {
542
+ const formData = getFormData();
543
+ const response = await fetch('/tts/both', {
544
+ method: 'POST',
545
+ headers: {
546
+ 'Content-Type': 'application/json'
547
+ },
548
+ body: JSON.stringify(formData)
549
+ });
550
+
551
+ if (!response.ok) {
552
+ const error = await response.json();
553
+ throw new Error(error.detail || 'Failed to generate audio and info');
554
+ }
555
+
556
+ const data = await response.json();
557
+
558
+ // Convert base64 to blob for audio playback
559
+ const audioBytes = atob(data.audio_base64);
560
+ const audioArray = new Uint8Array(audioBytes.length);
561
+ for (let i = 0; i < audioBytes.length; i++) {
562
+ audioArray[i] = audioBytes.charCodeAt(i);
563
+ }
564
+ const audioBlob = new Blob([audioArray], { type: 'audio/wav' });
565
+ const audioUrl = URL.createObjectURL(audioBlob);
566
+
567
+ showSuccess(`
568
+ <div class="result-section">
569
+ <h3>🎯 Complete Analysis & Audio</h3>
570
+ <div class="success">Generation completed successfully!</div>
571
+
572
+ <h4 style="margin-top: 20px; color: #667eea;">πŸ“Š Analysis Information</h4>
573
+ <div class="info-display">
574
+ <strong>Original Text:</strong><br>
575
+ ${formData.text}
576
+ </div>
577
+ <div class="info-display">
578
+ <strong>Phonemes:</strong><br>
579
+ ${data.phonemes}
580
+ </div>
581
+ <div class="info-display">
582
+ <strong>Sample Rate:</strong> ${data.sample_rate} Hz
583
+ </div>
584
+ <div class="info-display">
585
+ <strong>Voice:</strong> ${formData.voice}
586
+ ${formData.blend_voice_name ? ` (blended with ${formData.blend_voice_name})` : ''}
587
+ </div>
588
+ <div class="info-display">
589
+ <strong>Speed:</strong> ${formData.speed}x
590
+ </div>
591
+
592
+ <h4 style="margin-top: 20px; color: #667eea;">🎡 Generated Audio</h4>
593
+ <audio controls>
594
+ <source src="${audioUrl}" type="audio/wav">
595
+ Your browser does not support the audio element.
596
+ </audio>
597
+ <p style="margin-top: 10px; font-size: 0.9em; color: #666;">
598
+ Right-click on the audio player and select "Save audio as..." to download.
599
+ </p>
600
+ </div>
601
+ `);
602
+ } catch (error) {
603
+ showError(error.message);
604
+ }
605
+ }
606
+ </script>
607
+ </body>
608
+ </html>
609
+ """