PyxiLabs commited on
Commit
312272f
·
verified ·
1 Parent(s): ae52f03

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +491 -0
app.py ADDED
@@ -0,0 +1,491 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Devil Studio — OpenAI-compatible Text-to-Speech API
3
+ Powered by KittenTTS · All models loaded permanently in memory.
4
+
5
+ Endpoints
6
+ ---------
7
+ POST /v1/audio/speech — OpenAI-compatible TTS
8
+ GET /v1/status — Server / model / system status
9
+ GET /health — Simple health-check
10
+ """
11
+
12
+ from __future__ import annotations
13
+
14
+ import io
15
+ import logging
16
+ import os
17
+ import threading
18
+ import time
19
+ from typing import Literal
20
+
21
+ import numpy as np
22
+ import soundfile as sf
23
+ from fastapi import FastAPI, HTTPException
24
+ from fastapi.responses import StreamingResponse
25
+ from pydantic import BaseModel, Field
26
+
27
+ from kittentts import KittenTTS
28
+
29
+ # ---------------------------------------------------------------------------
30
+ # Logging
31
+ # ---------------------------------------------------------------------------
32
+ logging.basicConfig(
33
+ level=logging.INFO,
34
+ format="%(asctime)s %(levelname)-8s %(message)s",
35
+ datefmt="%Y-%m-%d %H:%M:%S",
36
+ )
37
+ log = logging.getLogger("devil-studio")
38
+
39
+ # ---------------------------------------------------------------------------
40
+ # Constants
41
+ # ---------------------------------------------------------------------------
42
+ SAMPLE_RATE = 24_000
43
+ SERVER_START_TIME = time.time()
44
+
45
+ # Model registry — non-alias entries are loaded into memory at startup.
46
+ MODEL_REGISTRY: dict[str, dict] = {
47
+ "tts-1": {
48
+ "id": "KittenML/kitten-tts-nano-0.8-fp32",
49
+ "label": "Nano (15 M — Fastest)",
50
+ "size": "15M",
51
+ "description": "Fastest, lowest latency",
52
+ },
53
+ "tts-1-hd": {
54
+ "id": "KittenML/kitten-tts-micro-0.8",
55
+ "label": "Micro (40 M — Balanced)",
56
+ "size": "40M",
57
+ "description": "Balanced speed and quality",
58
+ },
59
+ "tts-1-hd-mini": {
60
+ "id": "KittenML/kitten-tts-mini-0.8",
61
+ "label": "Mini (80 M — Best Quality)",
62
+ "size": "80M",
63
+ "description": "Best audio quality",
64
+ },
65
+ # Shorthand aliases
66
+ "nano": {"alias": "tts-1"},
67
+ "micro": {"alias": "tts-1-hd"},
68
+ "mini": {"alias": "tts-1-hd-mini"},
69
+ }
70
+
71
+ VOICES: set[str] = {"Bella", "Jasper", "Luna", "Bruno", "Rosie", "Hugo", "Kiki", "Leo"}
72
+
73
+ # OpenAI voice name → KittenTTS voice name
74
+ OPENAI_VOICE_MAP: dict[str, str] = {
75
+ "alloy": "Jasper",
76
+ "echo": "Hugo",
77
+ "fable": "Rosie",
78
+ "onyx": "Bruno",
79
+ "nova": "Luna",
80
+ "shimmer": "Bella",
81
+ "ash": "Kiki",
82
+ "coral": "Rosie",
83
+ "sage": "Luna",
84
+ }
85
+
86
+ FORMAT_MIME: dict[str, str] = {
87
+ "mp3": "audio/mpeg",
88
+ "wav": "audio/wav",
89
+ "flac": "audio/flac",
90
+ "pcm": "audio/pcm",
91
+ "opus": "audio/ogg; codecs=opus",
92
+ "aac": "audio/aac",
93
+ }
94
+
95
+ # ---------------------------------------------------------------------------
96
+ # In-memory model cache + per-model state tracking
97
+ # ---------------------------------------------------------------------------
98
+ _model_cache: dict[str, KittenTTS] = {} # keyed by model_id
99
+ _model_status: dict[str, str] = {} # "loading" | "idle" | "running" | "error"
100
+ _model_lock: dict[str, threading.Lock] = {} # one lock per model for thread-safe status writes
101
+
102
+
103
+ def _canonical_models() -> dict[str, dict]:
104
+ """Return only non-alias entries from MODEL_REGISTRY."""
105
+ return {k: v for k, v in MODEL_REGISTRY.items() if "alias" not in v}
106
+
107
+
108
+ def _resolve_alias(name: str) -> str:
109
+ """Follow alias chain and return the canonical model key."""
110
+ entry = MODEL_REGISTRY.get(name)
111
+ if entry is None:
112
+ raise KeyError(name)
113
+ if "alias" in entry:
114
+ return entry["alias"]
115
+ return name
116
+
117
+
118
+ def load_all_models() -> None:
119
+ """Load every canonical model into RAM at startup."""
120
+ for key, meta in _canonical_models().items():
121
+ model_id = meta["id"]
122
+ _model_status[model_id] = "loading"
123
+ _model_lock[model_id] = threading.Lock()
124
+ log.info("Loading %-16s (%s) …", key, model_id)
125
+ t0 = time.perf_counter()
126
+ try:
127
+ _model_cache[model_id] = KittenTTS(model_id)
128
+ _model_status[model_id] = "idle"
129
+ log.info(" ✓ %s ready in %.1f s", key, time.perf_counter() - t0)
130
+ except Exception as exc:
131
+ _model_status[model_id] = "error"
132
+ log.error(" ✗ failed to load %s: %s", key, exc)
133
+ log.info("Devil Studio — all models ready.")
134
+
135
+
136
+ def get_model(name: str) -> tuple[KittenTTS, str]:
137
+ """Return (model_instance, model_id) or raise HTTPException."""
138
+ try:
139
+ canonical = _resolve_alias(name)
140
+ except KeyError:
141
+ raise HTTPException(
142
+ status_code=400,
143
+ detail=(
144
+ f"Unknown model '{name}'. "
145
+ f"Valid values: {sorted(MODEL_REGISTRY.keys())}"
146
+ ),
147
+ )
148
+ model_id = MODEL_REGISTRY[canonical]["id"]
149
+ instance = _model_cache.get(model_id)
150
+ if instance is None:
151
+ raise HTTPException(
152
+ status_code=503,
153
+ detail=f"Model '{name}' is unavailable (failed to load at startup).",
154
+ )
155
+ return instance, model_id
156
+
157
+
158
+ # ---------------------------------------------------------------------------
159
+ # System / container resource helpers
160
+ # (cgroup v2 → cgroup v1 → /proc/meminfo fallback)
161
+ # ---------------------------------------------------------------------------
162
+ def _read_file(*paths: str) -> str | None:
163
+ for path in paths:
164
+ try:
165
+ with open(path) as fh:
166
+ return fh.read().strip()
167
+ except OSError:
168
+ pass
169
+ return None
170
+
171
+
172
+ def _proc_mem_total_bytes() -> int:
173
+ raw = _read_file("/proc/meminfo")
174
+ if raw:
175
+ for line in raw.splitlines():
176
+ if line.startswith("MemTotal"):
177
+ return int(line.split()[1]) * 1024
178
+ return 0
179
+
180
+
181
+ def _proc_mem_available_bytes() -> int:
182
+ raw = _read_file("/proc/meminfo")
183
+ if raw:
184
+ for line in raw.splitlines():
185
+ if line.startswith("MemAvailable"):
186
+ return int(line.split()[1]) * 1024
187
+ return 0
188
+
189
+
190
+ def _container_memory() -> tuple[int, int]:
191
+ """Return (used_bytes, limit_bytes) from cgroup or /proc/meminfo."""
192
+ # --- cgroup v2 ---
193
+ limit_raw = _read_file("/sys/fs/cgroup/memory.max")
194
+ usage_raw = _read_file("/sys/fs/cgroup/memory.current")
195
+ if limit_raw and usage_raw:
196
+ try:
197
+ limit = _proc_mem_total_bytes() if limit_raw == "max" else int(limit_raw)
198
+ return int(usage_raw), limit
199
+ except ValueError:
200
+ pass
201
+
202
+ # --- cgroup v1 ---
203
+ limit_raw = _read_file("/sys/fs/cgroup/memory/memory.limit_in_bytes")
204
+ usage_raw = _read_file("/sys/fs/cgroup/memory/memory.usage_in_bytes")
205
+ if limit_raw and usage_raw:
206
+ try:
207
+ limit = int(limit_raw)
208
+ used = int(usage_raw)
209
+ if limit > 2 ** 60: # "no limit" sentinel
210
+ limit = _proc_mem_total_bytes()
211
+ return used, limit
212
+ except ValueError:
213
+ pass
214
+
215
+ # --- fallback: host /proc/meminfo ---
216
+ total = _proc_mem_total_bytes()
217
+ available = _proc_mem_available_bytes()
218
+ return total - available, total
219
+
220
+
221
+ def _container_cpu_cores() -> float:
222
+ """Detect CPU quota from cgroup; falls back to os.cpu_count()."""
223
+ # cgroup v2
224
+ cpu_max = _read_file("/sys/fs/cgroup/cpu.max")
225
+ if cpu_max and cpu_max != "max 100000":
226
+ parts = cpu_max.split()
227
+ if len(parts) == 2 and parts[0] != "max":
228
+ try:
229
+ return float(parts[0]) / float(parts[1])
230
+ except ValueError:
231
+ pass
232
+
233
+ # cgroup v1
234
+ quota = _read_file("/sys/fs/cgroup/cpu,cpuacct/cpu.cfs_quota_us")
235
+ period = _read_file("/sys/fs/cgroup/cpu,cpuacct/cpu.cfs_period_us")
236
+ if quota and period:
237
+ try:
238
+ q, p = int(quota), int(period)
239
+ if q > 0:
240
+ return q / p
241
+ except ValueError:
242
+ pass
243
+
244
+ return float(os.cpu_count() or 1)
245
+
246
+
247
+ def _cpu_usage_percent() -> float:
248
+ """Measure CPU usage over a 200 ms window from /proc/stat."""
249
+ def read_stat():
250
+ raw = _read_file("/proc/stat")
251
+ if raw:
252
+ line = raw.splitlines()[0]
253
+ return list(map(int, line.split()[1:]))
254
+ return None
255
+
256
+ try:
257
+ s1 = read_stat()
258
+ time.sleep(0.2)
259
+ s2 = read_stat()
260
+ if s1 and s2:
261
+ d_total = sum(s2) - sum(s1)
262
+ d_idle = s2[3] - s1[3]
263
+ if d_total:
264
+ return round((1 - d_idle / d_total) * 100, 1)
265
+ except Exception:
266
+ pass
267
+ return -1.0
268
+
269
+
270
+ def system_stats() -> dict:
271
+ used_mem, total_mem = _container_memory()
272
+ cpu_cores = _container_cpu_cores()
273
+ cpu_percent = _cpu_usage_percent()
274
+
275
+ def mb(b: int) -> float:
276
+ return round(b / 1024 / 1024, 1)
277
+
278
+ return {
279
+ "cpu_cores_allocated": round(cpu_cores, 2),
280
+ "cpu_usage_percent": cpu_percent if cpu_percent >= 0 else "unavailable",
281
+ "memory": {
282
+ "used_mb": mb(used_mem),
283
+ "total_mb": mb(total_mem),
284
+ "free_mb": mb(max(0, total_mem - used_mem)),
285
+ "used_percent": round(used_mem / total_mem * 100, 1) if total_mem else 0,
286
+ },
287
+ }
288
+
289
+
290
+ # ---------------------------------------------------------------------------
291
+ # Audio encoding
292
+ # ---------------------------------------------------------------------------
293
+ def _encode_audio(audio: np.ndarray, fmt: str) -> bytes:
294
+ buf = io.BytesIO()
295
+ if fmt == "pcm":
296
+ buf.write((audio * 32767).astype(np.int16).tobytes())
297
+ elif fmt == "flac":
298
+ sf.write(buf, audio, SAMPLE_RATE, format="FLAC")
299
+ else:
300
+ # wav / mp3 / opus / aac — serve as WAV
301
+ # (mp3/opus/aac require ffmpeg; WAV is lossless and universally playable)
302
+ sf.write(buf, audio, SAMPLE_RATE, format="WAV", subtype="PCM_16")
303
+ return buf.getvalue()
304
+
305
+
306
+ # ---------------------------------------------------------------------------
307
+ # FastAPI app
308
+ # ---------------------------------------------------------------------------
309
+ app = FastAPI(
310
+ title="Devil Studio — TTS API",
311
+ description=(
312
+ "OpenAI-compatible Text-to-Speech API powered by KittenTTS.\n\n"
313
+ "All models are permanently loaded in memory for stable, low-latency responses."
314
+ ),
315
+ version="1.0.0",
316
+ docs_url="/docs",
317
+ redoc_url="/redoc",
318
+ )
319
+
320
+
321
+ @app.on_event("startup")
322
+ async def _startup() -> None:
323
+ load_all_models()
324
+
325
+
326
+ # ---------------------------------------------------------------------------
327
+ # Request schema
328
+ # ---------------------------------------------------------------------------
329
+ class SpeechRequest(BaseModel):
330
+ model: str = Field(
331
+ default="tts-1-hd",
332
+ description=(
333
+ "Model alias. Supported: tts-1 (nano/fastest), tts-1-hd (micro/balanced), "
334
+ "tts-1-hd-mini (mini/best). Short aliases: nano, micro, mini."
335
+ ),
336
+ examples=["tts-1", "tts-1-hd", "tts-1-hd-mini"],
337
+ )
338
+ input: str = Field(
339
+ ...,
340
+ description="Text to synthesise. Max ~5 000 characters recommended.",
341
+ )
342
+ voice: str = Field(
343
+ default="Jasper",
344
+ description=(
345
+ "Voice name. KittenTTS voices: Bella, Jasper, Luna, Bruno, Rosie, Hugo, Kiki, Leo. "
346
+ "OpenAI voices (alloy, echo, fable, onyx, nova, shimmer, ash, coral, sage) "
347
+ "are mapped automatically."
348
+ ),
349
+ examples=["Jasper", "Luna", "alloy"],
350
+ )
351
+ response_format: Literal["mp3", "wav", "flac", "pcm", "opus", "aac"] = Field(
352
+ default="wav",
353
+ description=(
354
+ "Output format. wav / flac / pcm are lossless and fully supported. "
355
+ "mp3 / opus / aac are served as WAV (ffmpeg not included)."
356
+ ),
357
+ )
358
+ speed: float = Field(
359
+ default=1.0,
360
+ ge=0.25,
361
+ le=4.0,
362
+ description="Speech speed multiplier (0.25 – 4.0).",
363
+ )
364
+
365
+
366
+ # ---------------------------------------------------------------------------
367
+ # Routes
368
+ # ---------------------------------------------------------------------------
369
+ @app.get("/health", tags=["Utility"], summary="Liveness probe")
370
+ async def health():
371
+ return {"status": "ok", "server": "Devil Studio"}
372
+
373
+
374
+ @app.get("/v1/status", tags=["Status"], summary="Full server status")
375
+ async def status():
376
+ """
377
+ Returns:
378
+ - All loaded models with their current status (`idle` / `running` / `loading` / `error`)
379
+ - Available voices and OpenAI voice mappings
380
+ - Container CPU & memory metrics
381
+ - Server uptime
382
+ """
383
+ uptime_s = int(time.time() - SERVER_START_TIME)
384
+ h, rem = divmod(uptime_s, 3600)
385
+ m, s = divmod(rem, 60)
386
+
387
+ models_info = []
388
+ for key, meta in _canonical_models().items():
389
+ model_id = meta["id"]
390
+ models_info.append({
391
+ "name": key,
392
+ "label": meta["label"],
393
+ "size": meta["size"],
394
+ "description": meta["description"],
395
+ "model_id": model_id,
396
+ "status": _model_status.get(model_id, "unknown"),
397
+ "loaded": model_id in _model_cache,
398
+ })
399
+
400
+ aliases = {k: v["alias"] for k, v in MODEL_REGISTRY.items() if "alias" in v}
401
+
402
+ return {
403
+ "server": "Devil Studio",
404
+ "version": "1.0.0",
405
+ "uptime": f"{h:02d}:{m:02d}:{s:02d}",
406
+ "uptime_seconds": uptime_s,
407
+ "models": models_info,
408
+ "aliases": aliases,
409
+ "voices": sorted(VOICES),
410
+ "openai_voice_map": OPENAI_VOICE_MAP,
411
+ "system": system_stats(),
412
+ }
413
+
414
+
415
+ @app.post("/v1/audio/speech", tags=["TTS"], summary="Synthesise speech (OpenAI-compatible)")
416
+ async def create_speech(req: SpeechRequest):
417
+ """
418
+ Drop-in replacement for `POST https://api.openai.com/v1/audio/speech`.
419
+
420
+ **Quick curl example:**
421
+ ```bash
422
+ curl http://localhost:8000/v1/audio/speech \\
423
+ -H "Content-Type: application/json" \\
424
+ -d '{"model":"tts-1-hd","input":"Hello from Devil Studio!","voice":"Jasper"}' \\
425
+ --output speech.wav
426
+ ```
427
+ """
428
+ if not req.input or not req.input.strip():
429
+ raise HTTPException(status_code=400, detail="'input' must not be empty.")
430
+
431
+ # Resolve voice — try OpenAI map first, then pass through as-is
432
+ voice = OPENAI_VOICE_MAP.get(req.voice.lower(), req.voice)
433
+ if voice not in VOICES:
434
+ raise HTTPException(
435
+ status_code=400,
436
+ detail=(
437
+ f"Unknown voice '{req.voice}'. "
438
+ f"KittenTTS voices: {sorted(VOICES)}. "
439
+ f"OpenAI aliases: {sorted(OPENAI_VOICE_MAP.keys())}."
440
+ ),
441
+ )
442
+
443
+ tts, model_id = get_model(req.model)
444
+
445
+ _model_status[model_id] = "running"
446
+ t0 = time.perf_counter()
447
+ try:
448
+ try:
449
+ audio = tts.generate(req.input.strip(), voice=voice, speed=req.speed)
450
+ except TypeError:
451
+ # speed param not supported by this build
452
+ audio = tts.generate(req.input.strip(), voice=voice)
453
+
454
+ audio = np.squeeze(audio).astype(np.float32)
455
+ elapsed = time.perf_counter() - t0
456
+ log.info(
457
+ "Synthesised %.2f s audio in %.3f s [model=%s voice=%s]",
458
+ len(audio) / SAMPLE_RATE, elapsed, req.model, voice,
459
+ )
460
+ finally:
461
+ _model_status[model_id] = "idle"
462
+
463
+ audio_bytes = _encode_audio(audio, req.response_format)
464
+ ext = "wav" if req.response_format in ("mp3", "opus", "aac") else req.response_format
465
+ mime = FORMAT_MIME.get(req.response_format, "audio/wav")
466
+
467
+ return StreamingResponse(
468
+ io.BytesIO(audio_bytes),
469
+ media_type=mime,
470
+ headers={
471
+ "Content-Disposition": f'attachment; filename="speech.{ext}"',
472
+ "X-Devil-Studio-Model": req.model,
473
+ "X-Devil-Studio-Voice": voice,
474
+ "X-Devil-Studio-Latency-Sec": f"{elapsed:.3f}",
475
+ },
476
+ )
477
+
478
+
479
+ # ---------------------------------------------------------------------------
480
+ # Entry point
481
+ # ---------------------------------------------------------------------------
482
+ if __name__ == "__main__":
483
+ import uvicorn
484
+
485
+ uvicorn.run(
486
+ "main:app",
487
+ host="0.0.0.0",
488
+ port=int(os.getenv("PORT", "8000")),
489
+ workers=1, # single worker — all models live in one process
490
+ log_level="info",
491
+ )