Spaces:
Sleeping
Sleeping
| """ | |
| Devil Studio — OpenAI-compatible Text-to-Speech API | |
| Endpoints | |
| --------- | |
| POST /v1/audio/speech — OpenAI-compatible TTS | |
| GET /v1/status — Server / model / system status | |
| GET /health — Simple health-check | |
| """ | |
| from __future__ import annotations | |
| import io | |
| import logging | |
| import os | |
| import threading | |
| import time | |
| from typing import Literal | |
| import numpy as np | |
| import soundfile as sf | |
| from fastapi import FastAPI, HTTPException | |
| from fastapi.responses import HTMLResponse, StreamingResponse | |
| from pydantic import BaseModel, Field | |
| from kittentts import KittenTTS | |
| # --------------------------------------------------------------------------- | |
| # Logging | |
| # --------------------------------------------------------------------------- | |
| logging.basicConfig( | |
| level=logging.INFO, | |
| format="%(asctime)s %(levelname)-8s %(message)s", | |
| datefmt="%Y-%m-%d %H:%M:%S", | |
| ) | |
| log = logging.getLogger("devil-studio") | |
| # --------------------------------------------------------------------------- | |
| # Constants | |
| # --------------------------------------------------------------------------- | |
| SAMPLE_RATE = 24_000 | |
| SERVER_START_TIME = time.time() | |
| # Model registry — non-alias entries are loaded into memory at startup. | |
| MODEL_REGISTRY: dict[str, dict] = { | |
| "tts-1": { | |
| "id": "KittenML/kitten-tts-nano-0.8-fp32", | |
| "label": "Nano (15 M — Fastest)", | |
| "size": "15M", | |
| "description": "Fastest, lowest latency", | |
| }, | |
| "tts-1-hd": { | |
| "id": "KittenML/kitten-tts-micro-0.8", | |
| "label": "Micro (40 M — Balanced)", | |
| "size": "40M", | |
| "description": "Balanced speed and quality", | |
| }, | |
| "tts-1-hd-mini": { | |
| "id": "KittenML/kitten-tts-mini-0.8", | |
| "label": "Mini (80 M — Best Quality)", | |
| "size": "80M", | |
| "description": "Best audio quality", | |
| }, | |
| # Shorthand aliases | |
| "nano": {"alias": "tts-1"}, | |
| "micro": {"alias": "tts-1-hd"}, | |
| "mini": {"alias": "tts-1-hd-mini"}, | |
| } | |
| VOICES: set[str] = {"Bella", "Jasper", "Luna", "Bruno", "Rosie", "Hugo", "Kiki", "Leo"} | |
| # OpenAI voice name → KittenTTS voice name | |
| OPENAI_VOICE_MAP: dict[str, str] = { | |
| "alloy": "Jasper", | |
| "echo": "Hugo", | |
| "fable": "Rosie", | |
| "onyx": "Bruno", | |
| "nova": "Luna", | |
| "shimmer": "Bella", | |
| "ash": "Kiki", | |
| "coral": "Rosie", | |
| "sage": "Luna", | |
| } | |
| FORMAT_MIME: dict[str, str] = { | |
| "mp3": "audio/mpeg", | |
| "wav": "audio/wav", | |
| "flac": "audio/flac", | |
| "pcm": "audio/pcm", | |
| "opus": "audio/ogg; codecs=opus", | |
| "aac": "audio/aac", | |
| } | |
| # --------------------------------------------------------------------------- | |
| # In-memory model cache + per-model state tracking | |
| # --------------------------------------------------------------------------- | |
| _model_cache: dict[str, KittenTTS] = {} # keyed by model_id | |
| _model_status: dict[str, str] = {} # "loading" | "idle" | "running" | "error" | |
| _model_lock: dict[str, threading.Lock] = {} # one lock per model for thread-safe status writes | |
| def _canonical_models() -> dict[str, dict]: | |
| """Return only non-alias entries from MODEL_REGISTRY.""" | |
| return {k: v for k, v in MODEL_REGISTRY.items() if "alias" not in v} | |
| def _resolve_alias(name: str) -> str: | |
| """Follow alias chain and return the canonical model key.""" | |
| entry = MODEL_REGISTRY.get(name) | |
| if entry is None: | |
| raise KeyError(name) | |
| if "alias" in entry: | |
| return entry["alias"] | |
| return name | |
| def load_all_models() -> None: | |
| """Load every canonical model into RAM at startup.""" | |
| for key, meta in _canonical_models().items(): | |
| model_id = meta["id"] | |
| _model_status[model_id] = "loading" | |
| _model_lock[model_id] = threading.Lock() | |
| log.info("Loading %-16s (%s) …", key, model_id) | |
| t0 = time.perf_counter() | |
| try: | |
| _model_cache[model_id] = KittenTTS(model_id) | |
| _model_status[model_id] = "idle" | |
| log.info(" ✓ %s ready in %.1f s", key, time.perf_counter() - t0) | |
| except Exception as exc: | |
| _model_status[model_id] = "error" | |
| log.error(" ✗ failed to load %s: %s", key, exc) | |
| log.info("Devil Studio — all models ready.") | |
| def get_model(name: str) -> tuple[KittenTTS, str]: | |
| """Return (model_instance, model_id) or raise HTTPException.""" | |
| try: | |
| canonical = _resolve_alias(name) | |
| except KeyError: | |
| raise HTTPException( | |
| status_code=400, | |
| detail=( | |
| f"Unknown model '{name}'. " | |
| f"Valid values: {sorted(MODEL_REGISTRY.keys())}" | |
| ), | |
| ) | |
| model_id = MODEL_REGISTRY[canonical]["id"] | |
| instance = _model_cache.get(model_id) | |
| if instance is None: | |
| raise HTTPException( | |
| status_code=503, | |
| detail=f"Model '{name}' is unavailable (failed to load at startup).", | |
| ) | |
| return instance, model_id | |
| # --------------------------------------------------------------------------- | |
| # System / container resource helpers | |
| # (cgroup v2 → cgroup v1 → /proc/meminfo fallback) | |
| # --------------------------------------------------------------------------- | |
| def _read_file(*paths: str) -> str | None: | |
| for path in paths: | |
| try: | |
| with open(path) as fh: | |
| return fh.read().strip() | |
| except OSError: | |
| pass | |
| return None | |
| def _proc_mem_total_bytes() -> int: | |
| raw = _read_file("/proc/meminfo") | |
| if raw: | |
| for line in raw.splitlines(): | |
| if line.startswith("MemTotal"): | |
| return int(line.split()[1]) * 1024 | |
| return 0 | |
| def _proc_mem_available_bytes() -> int: | |
| raw = _read_file("/proc/meminfo") | |
| if raw: | |
| for line in raw.splitlines(): | |
| if line.startswith("MemAvailable"): | |
| return int(line.split()[1]) * 1024 | |
| return 0 | |
| def _container_memory() -> tuple[int, int]: | |
| """Return (used_bytes, limit_bytes) from cgroup or /proc/meminfo.""" | |
| # --- cgroup v2 --- | |
| limit_raw = _read_file("/sys/fs/cgroup/memory.max") | |
| usage_raw = _read_file("/sys/fs/cgroup/memory.current") | |
| if limit_raw and usage_raw: | |
| try: | |
| limit = _proc_mem_total_bytes() if limit_raw == "max" else int(limit_raw) | |
| return int(usage_raw), limit | |
| except ValueError: | |
| pass | |
| # --- cgroup v1 --- | |
| limit_raw = _read_file("/sys/fs/cgroup/memory/memory.limit_in_bytes") | |
| usage_raw = _read_file("/sys/fs/cgroup/memory/memory.usage_in_bytes") | |
| if limit_raw and usage_raw: | |
| try: | |
| limit = int(limit_raw) | |
| used = int(usage_raw) | |
| if limit > 2 ** 60: # "no limit" sentinel | |
| limit = _proc_mem_total_bytes() | |
| return used, limit | |
| except ValueError: | |
| pass | |
| # --- fallback: host /proc/meminfo --- | |
| total = _proc_mem_total_bytes() | |
| available = _proc_mem_available_bytes() | |
| return total - available, total | |
| def _container_cpu_cores() -> float: | |
| """Detect CPU quota from cgroup; falls back to os.cpu_count().""" | |
| # cgroup v2 | |
| cpu_max = _read_file("/sys/fs/cgroup/cpu.max") | |
| if cpu_max and cpu_max != "max 100000": | |
| parts = cpu_max.split() | |
| if len(parts) == 2 and parts[0] != "max": | |
| try: | |
| return float(parts[0]) / float(parts[1]) | |
| except ValueError: | |
| pass | |
| # cgroup v1 | |
| quota = _read_file("/sys/fs/cgroup/cpu,cpuacct/cpu.cfs_quota_us") | |
| period = _read_file("/sys/fs/cgroup/cpu,cpuacct/cpu.cfs_period_us") | |
| if quota and period: | |
| try: | |
| q, p = int(quota), int(period) | |
| if q > 0: | |
| return q / p | |
| except ValueError: | |
| pass | |
| return float(os.cpu_count() or 1) | |
| def _cpu_usage_percent() -> float: | |
| """Measure CPU usage over a 200 ms window from /proc/stat.""" | |
| def read_stat(): | |
| raw = _read_file("/proc/stat") | |
| if raw: | |
| line = raw.splitlines()[0] | |
| return list(map(int, line.split()[1:])) | |
| return None | |
| try: | |
| s1 = read_stat() | |
| time.sleep(0.2) | |
| s2 = read_stat() | |
| if s1 and s2: | |
| d_total = sum(s2) - sum(s1) | |
| d_idle = s2[3] - s1[3] | |
| if d_total: | |
| return round((1 - d_idle / d_total) * 100, 1) | |
| except Exception: | |
| pass | |
| return -1.0 | |
| def system_stats() -> dict: | |
| used_mem, total_mem = _container_memory() | |
| cpu_cores = _container_cpu_cores() | |
| cpu_percent = _cpu_usage_percent() | |
| def mb(b: int) -> float: | |
| return round(b / 1024 / 1024, 1) | |
| return { | |
| "cpu_cores_allocated": round(cpu_cores, 2), | |
| "cpu_usage_percent": cpu_percent if cpu_percent >= 0 else "unavailable", | |
| "memory": { | |
| "used_mb": mb(used_mem), | |
| "total_mb": mb(total_mem), | |
| "free_mb": mb(max(0, total_mem - used_mem)), | |
| "used_percent": round(used_mem / total_mem * 100, 1) if total_mem else 0, | |
| }, | |
| } | |
| # --------------------------------------------------------------------------- | |
| # Audio encoding | |
| # --------------------------------------------------------------------------- | |
| def _encode_audio(audio: np.ndarray, fmt: str) -> bytes: | |
| buf = io.BytesIO() | |
| if fmt == "pcm": | |
| buf.write((audio * 32767).astype(np.int16).tobytes()) | |
| elif fmt == "flac": | |
| sf.write(buf, audio, SAMPLE_RATE, format="FLAC") | |
| else: | |
| # wav / mp3 / opus / aac — serve as WAV | |
| # (mp3/opus/aac require ffmpeg; WAV is lossless and universally playable) | |
| sf.write(buf, audio, SAMPLE_RATE, format="WAV", subtype="PCM_16") | |
| return buf.getvalue() | |
| # --------------------------------------------------------------------------- | |
| # FastAPI app | |
| # --------------------------------------------------------------------------- | |
| app = FastAPI( | |
| title="Devil Studio — TTS API", | |
| description=( | |
| "OpenAI-compatible Text-to-Speech API powered by KittenTTS.\n\n" | |
| "All models are permanently loaded in memory for stable, low-latency responses." | |
| ), | |
| version="1.0.0", | |
| docs_url="/docs", | |
| redoc_url="/redoc", | |
| ) | |
| async def _startup() -> None: | |
| load_all_models() | |
| # --------------------------------------------------------------------------- | |
| # Request schema | |
| # --------------------------------------------------------------------------- | |
| class SpeechRequest(BaseModel): | |
| model: str = Field( | |
| default="tts-1-hd", | |
| description=( | |
| "Model alias. Supported: tts-1 (nano/fastest), tts-1-hd (micro/balanced), " | |
| "tts-1-hd-mini (mini/best). Short aliases: nano, micro, mini." | |
| ), | |
| examples=["tts-1", "tts-1-hd", "tts-1-hd-mini"], | |
| ) | |
| input: str = Field( | |
| ..., | |
| description="Text to synthesise. Max ~5 000 characters recommended.", | |
| ) | |
| voice: str = Field( | |
| default="Jasper", | |
| description=( | |
| "Voice name. KittenTTS voices: Bella, Jasper, Luna, Bruno, Rosie, Hugo, Kiki, Leo. " | |
| "OpenAI voices (alloy, echo, fable, onyx, nova, shimmer, ash, coral, sage) " | |
| "are mapped automatically." | |
| ), | |
| examples=["Jasper", "Luna", "alloy"], | |
| ) | |
| response_format: Literal["mp3", "wav", "flac", "pcm", "opus", "aac"] = Field( | |
| default="wav", | |
| description=( | |
| "Output format. wav / flac / pcm are lossless and fully supported. " | |
| "mp3 / opus / aac are served as WAV (ffmpeg not included)." | |
| ), | |
| ) | |
| speed: float = Field( | |
| default=1.0, | |
| ge=0.25, | |
| le=4.0, | |
| description="Speech speed multiplier (0.25 – 4.0).", | |
| ) | |
| # --------------------------------------------------------------------------- | |
| # Routes | |
| # --------------------------------------------------------------------------- | |
| async def health(): | |
| return {"status": "ok", "server": "Devil Studio"} | |
| async def status(): | |
| """ | |
| Returns: | |
| - All loaded models with their current status (`idle` / `running` / `loading` / `error`) | |
| - Available voices and OpenAI voice mappings | |
| - Container CPU & memory metrics | |
| - Server uptime | |
| """ | |
| uptime_s = int(time.time() - SERVER_START_TIME) | |
| h, rem = divmod(uptime_s, 3600) | |
| m, s = divmod(rem, 60) | |
| models_info = [] | |
| for key, meta in _canonical_models().items(): | |
| model_id = meta["id"] | |
| models_info.append({ | |
| "name": key, | |
| "label": meta["label"], | |
| "size": meta["size"], | |
| "description": meta["description"], | |
| "model_id": model_id, | |
| "status": _model_status.get(model_id, "unknown"), | |
| "loaded": model_id in _model_cache, | |
| }) | |
| aliases = {k: v["alias"] for k, v in MODEL_REGISTRY.items() if "alias" in v} | |
| return { | |
| "server": "Devil Studio", | |
| "version": "1.0.0", | |
| "uptime": f"{h:02d}:{m:02d}:{s:02d}", | |
| "uptime_seconds": uptime_s, | |
| "models": models_info, | |
| "aliases": aliases, | |
| "voices": sorted(VOICES), | |
| "openai_voice_map": OPENAI_VOICE_MAP, | |
| "system": system_stats(), | |
| } | |
| async def create_speech(req: SpeechRequest): | |
| """ | |
| Drop-in replacement for `POST https://api.openai.com/v1/audio/speech`. | |
| **Quick curl example:** | |
| ```bash | |
| curl http://localhost:8000/v1/audio/speech \\ | |
| -H "Content-Type: application/json" \\ | |
| -d '{"model":"tts-1-hd","input":"Hello from Devil Studio!","voice":"Jasper"}' \\ | |
| --output speech.wav | |
| ``` | |
| """ | |
| if not req.input or not req.input.strip(): | |
| raise HTTPException(status_code=400, detail="'input' must not be empty.") | |
| # Resolve voice — try OpenAI map first, then pass through as-is | |
| voice = OPENAI_VOICE_MAP.get(req.voice.lower(), req.voice) | |
| if voice not in VOICES: | |
| raise HTTPException( | |
| status_code=400, | |
| detail=( | |
| f"Unknown voice '{req.voice}'. " | |
| f"KittenTTS voices: {sorted(VOICES)}. " | |
| f"OpenAI aliases: {sorted(OPENAI_VOICE_MAP.keys())}." | |
| ), | |
| ) | |
| tts, model_id = get_model(req.model) | |
| _model_status[model_id] = "running" | |
| t0 = time.perf_counter() | |
| try: | |
| try: | |
| audio = tts.generate(req.input.strip(), voice=voice, speed=req.speed) | |
| except TypeError: | |
| # speed param not supported by this build | |
| audio = tts.generate(req.input.strip(), voice=voice) | |
| audio = np.squeeze(audio).astype(np.float32) | |
| elapsed = time.perf_counter() - t0 | |
| log.info( | |
| "Synthesised %.2f s audio in %.3f s [model=%s voice=%s]", | |
| len(audio) / SAMPLE_RATE, elapsed, req.model, voice, | |
| ) | |
| finally: | |
| _model_status[model_id] = "idle" | |
| audio_bytes = _encode_audio(audio, req.response_format) | |
| ext = "wav" if req.response_format in ("mp3", "opus", "aac") else req.response_format | |
| mime = FORMAT_MIME.get(req.response_format, "audio/wav") | |
| return StreamingResponse( | |
| io.BytesIO(audio_bytes), | |
| media_type=mime, | |
| headers={ | |
| "Content-Disposition": f'attachment; filename="speech.{ext}"', | |
| "X-Devil-Studio-Model": req.model, | |
| "X-Devil-Studio-Voice": voice, | |
| "X-Devil-Studio-Latency-Sec": f"{elapsed:.3f}", | |
| }, | |
| ) | |
| # --------------------------------------------------------------------------- | |
| # Entry point | |
| # --------------------------------------------------------------------------- | |
| if __name__ == "__main__": | |
| import uvicorn | |
| uvicorn.run( | |
| "main:app", | |
| host="0.0.0.0", | |
| port=int(os.getenv("PORT", "7860")), | |
| workers=2, | |
| log_level="info", | |
| ) |