Spaces:

PyxiLabs
/

srv_tts_01

Sleeping

App Files Files Community

PyxiLabs commited on Feb 25

Commit

312272f

verified ·

1 Parent(s): ae52f03

Create app.py

Browse files

Files changed (1) hide show

app.py +491 -0

app.py ADDED Viewed

	@@ -0,0 +1,491 @@

+"""
+Devil Studio — OpenAI-compatible Text-to-Speech API
+Powered by KittenTTS · All models loaded permanently in memory.
+Endpoints
+---------
+POST /v1/audio/speech   — OpenAI-compatible TTS
+GET  /v1/status         — Server / model / system status
+GET  /health            — Simple health-check
+"""
+from __future__ import annotations
+import io
+import logging
+import os
+import threading
+import time
+from typing import Literal
+import numpy as np
+import soundfile as sf
+from fastapi import FastAPI, HTTPException
+from fastapi.responses import StreamingResponse
+from pydantic import BaseModel, Field
+from kittentts import KittenTTS
+# ---------------------------------------------------------------------------
+# Logging
+# ---------------------------------------------------------------------------
+logging.basicConfig(
+    level=logging.INFO,
+    format="%(asctime)s  %(levelname)-8s  %(message)s",
+    datefmt="%Y-%m-%d %H:%M:%S",
+)
+log = logging.getLogger("devil-studio")
+# ---------------------------------------------------------------------------
+# Constants
+# ---------------------------------------------------------------------------
+SAMPLE_RATE = 24_000
+SERVER_START_TIME = time.time()
+# Model registry — non-alias entries are loaded into memory at startup.
+MODEL_REGISTRY: dict[str, dict] = {
+    "tts-1": {
+        "id":          "KittenML/kitten-tts-nano-0.8-fp32",
+        "label":       "Nano (15 M — Fastest)",
+        "size":        "15M",
+        "description": "Fastest, lowest latency",
+    },
+    "tts-1-hd": {
+        "id":          "KittenML/kitten-tts-micro-0.8",
+        "label":       "Micro (40 M — Balanced)",
+        "size":        "40M",
+        "description": "Balanced speed and quality",
+    },
+    "tts-1-hd-mini": {
+        "id":          "KittenML/kitten-tts-mini-0.8",
+        "label":       "Mini (80 M — Best Quality)",
+        "size":        "80M",
+        "description": "Best audio quality",
+    },
+    # Shorthand aliases
+    "nano":  {"alias": "tts-1"},
+    "micro": {"alias": "tts-1-hd"},
+    "mini":  {"alias": "tts-1-hd-mini"},
+}
+VOICES: set[str] = {"Bella", "Jasper", "Luna", "Bruno", "Rosie", "Hugo", "Kiki", "Leo"}
+# OpenAI voice name → KittenTTS voice name
+OPENAI_VOICE_MAP: dict[str, str] = {
+    "alloy":   "Jasper",
+    "echo":    "Hugo",
+    "fable":   "Rosie",
+    "onyx":    "Bruno",
+    "nova":    "Luna",
+    "shimmer": "Bella",
+    "ash":     "Kiki",
+    "coral":   "Rosie",
+    "sage":    "Luna",
+}
+FORMAT_MIME: dict[str, str] = {
+    "mp3":  "audio/mpeg",
+    "wav":  "audio/wav",
+    "flac": "audio/flac",
+    "pcm":  "audio/pcm",
+    "opus": "audio/ogg; codecs=opus",
+    "aac":  "audio/aac",
+}
+# ---------------------------------------------------------------------------
+# In-memory model cache + per-model state tracking
+# ---------------------------------------------------------------------------
+_model_cache:  dict[str, KittenTTS]    = {}   # keyed by model_id
+_model_status: dict[str, str]          = {}   # "loading" | "idle" | "running" | "error"
+_model_lock:   dict[str, threading.Lock] = {} # one lock per model for thread-safe status writes
+def _canonical_models() -> dict[str, dict]:
+    """Return only non-alias entries from MODEL_REGISTRY."""
+    return {k: v for k, v in MODEL_REGISTRY.items() if "alias" not in v}
+def _resolve_alias(name: str) -> str:
+    """Follow alias chain and return the canonical model key."""
+    entry = MODEL_REGISTRY.get(name)
+    if entry is None:
+        raise KeyError(name)
+    if "alias" in entry:
+        return entry["alias"]
+    return name
+def load_all_models() -> None:
+    """Load every canonical model into RAM at startup."""
+    for key, meta in _canonical_models().items():
+        model_id = meta["id"]
+        _model_status[model_id] = "loading"
+        _model_lock[model_id]   = threading.Lock()
+        log.info("Loading %-16s  (%s) …", key, model_id)
+        t0 = time.perf_counter()
+        try:
+            _model_cache[model_id] = KittenTTS(model_id)
+            _model_status[model_id] = "idle"
+            log.info("  ✓ %s ready in %.1f s", key, time.perf_counter() - t0)
+        except Exception as exc:
+            _model_status[model_id] = "error"
+            log.error("  ✗ failed to load %s: %s", key, exc)
+    log.info("Devil Studio — all models ready.")
+def get_model(name: str) -> tuple[KittenTTS, str]:
+    """Return (model_instance, model_id) or raise HTTPException."""
+    try:
+        canonical = _resolve_alias(name)
+    except KeyError:
+        raise HTTPException(
+            status_code=400,
+            detail=(
+                f"Unknown model '{name}'. "
+                f"Valid values: {sorted(MODEL_REGISTRY.keys())}"
+            ),
+        )
+    model_id = MODEL_REGISTRY[canonical]["id"]
+    instance = _model_cache.get(model_id)
+    if instance is None:
+        raise HTTPException(
+            status_code=503,
+            detail=f"Model '{name}' is unavailable (failed to load at startup).",
+        )
+    return instance, model_id
+# ---------------------------------------------------------------------------
+# System / container resource helpers
+# (cgroup v2 → cgroup v1 → /proc/meminfo fallback)
+# ---------------------------------------------------------------------------
+def _read_file(*paths: str) -> str | None:
+    for path in paths:
+        try:
+            with open(path) as fh:
+                return fh.read().strip()
+        except OSError:
+            pass
+    return None
+def _proc_mem_total_bytes() -> int:
+    raw = _read_file("/proc/meminfo")
+    if raw:
+        for line in raw.splitlines():
+            if line.startswith("MemTotal"):
+                return int(line.split()[1]) * 1024
+    return 0
+def _proc_mem_available_bytes() -> int:
+    raw = _read_file("/proc/meminfo")
+    if raw:
+        for line in raw.splitlines():
+            if line.startswith("MemAvailable"):
+                return int(line.split()[1]) * 1024
+    return 0
+def _container_memory() -> tuple[int, int]:
+    """Return (used_bytes, limit_bytes) from cgroup or /proc/meminfo."""
+    # --- cgroup v2 ---
+    limit_raw = _read_file("/sys/fs/cgroup/memory.max")
+    usage_raw = _read_file("/sys/fs/cgroup/memory.current")
+    if limit_raw and usage_raw:
+        try:
+            limit = _proc_mem_total_bytes() if limit_raw == "max" else int(limit_raw)
+            return int(usage_raw), limit
+        except ValueError:
+            pass
+    # --- cgroup v1 ---
+    limit_raw = _read_file("/sys/fs/cgroup/memory/memory.limit_in_bytes")
+    usage_raw = _read_file("/sys/fs/cgroup/memory/memory.usage_in_bytes")
+    if limit_raw and usage_raw:
+        try:
+            limit = int(limit_raw)
+            used  = int(usage_raw)
+            if limit > 2 ** 60:          # "no limit" sentinel
+                limit = _proc_mem_total_bytes()
+            return used, limit
+        except ValueError:
+            pass
+    # --- fallback: host /proc/meminfo ---
+    total     = _proc_mem_total_bytes()
+    available = _proc_mem_available_bytes()
+    return total - available, total
+def _container_cpu_cores() -> float:
+    """Detect CPU quota from cgroup; falls back to os.cpu_count()."""
+    # cgroup v2
+    cpu_max = _read_file("/sys/fs/cgroup/cpu.max")
+    if cpu_max and cpu_max != "max 100000":
+        parts = cpu_max.split()
+        if len(parts) == 2 and parts[0] != "max":
+            try:
+                return float(parts[0]) / float(parts[1])
+            except ValueError:
+                pass
+    # cgroup v1
+    quota  = _read_file("/sys/fs/cgroup/cpu,cpuacct/cpu.cfs_quota_us")
+    period = _read_file("/sys/fs/cgroup/cpu,cpuacct/cpu.cfs_period_us")
+    if quota and period:
+        try:
+            q, p = int(quota), int(period)
+            if q > 0:
+                return q / p
+        except ValueError:
+            pass
+    return float(os.cpu_count() or 1)
+def _cpu_usage_percent() -> float:
+    """Measure CPU usage over a 200 ms window from /proc/stat."""
+    def read_stat():
+        raw = _read_file("/proc/stat")
+        if raw:
+            line = raw.splitlines()[0]
+            return list(map(int, line.split()[1:]))
+        return None
+    try:
+        s1 = read_stat()
+        time.sleep(0.2)
+        s2 = read_stat()
+        if s1 and s2:
+            d_total = sum(s2) - sum(s1)
+            d_idle  = s2[3]  - s1[3]
+            if d_total:
+                return round((1 - d_idle / d_total) * 100, 1)
+    except Exception:
+        pass
+    return -1.0
+def system_stats() -> dict:
+    used_mem, total_mem = _container_memory()
+    cpu_cores   = _container_cpu_cores()
+    cpu_percent = _cpu_usage_percent()
+    def mb(b: int) -> float:
+        return round(b / 1024 / 1024, 1)
+    return {
+        "cpu_cores_allocated": round(cpu_cores, 2),
+        "cpu_usage_percent":   cpu_percent if cpu_percent >= 0 else "unavailable",
+        "memory": {
+            "used_mb":      mb(used_mem),
+            "total_mb":     mb(total_mem),
+            "free_mb":      mb(max(0, total_mem - used_mem)),
+            "used_percent": round(used_mem / total_mem * 100, 1) if total_mem else 0,
+        },
+    }
+# ---------------------------------------------------------------------------
+# Audio encoding
+# ---------------------------------------------------------------------------
+def _encode_audio(audio: np.ndarray, fmt: str) -> bytes:
+    buf = io.BytesIO()
+    if fmt == "pcm":
+        buf.write((audio * 32767).astype(np.int16).tobytes())
+    elif fmt == "flac":
+        sf.write(buf, audio, SAMPLE_RATE, format="FLAC")
+    else:
+        # wav / mp3 / opus / aac — serve as WAV
+        # (mp3/opus/aac require ffmpeg; WAV is lossless and universally playable)
+        sf.write(buf, audio, SAMPLE_RATE, format="WAV", subtype="PCM_16")
+    return buf.getvalue()
+# ---------------------------------------------------------------------------
+# FastAPI app
+# ---------------------------------------------------------------------------
+app = FastAPI(
+    title="Devil Studio — TTS API",
+    description=(
+        "OpenAI-compatible Text-to-Speech API powered by KittenTTS.\n\n"
+        "All models are permanently loaded in memory for stable, low-latency responses."
+    ),
+    version="1.0.0",
+    docs_url="/docs",
+    redoc_url="/redoc",
+)
+@app.on_event("startup")
+async def _startup() -> None:
+    load_all_models()
+# ---------------------------------------------------------------------------
+# Request schema
+# ---------------------------------------------------------------------------
+class SpeechRequest(BaseModel):
+    model: str = Field(
+        default="tts-1-hd",
+        description=(
+            "Model alias. Supported: tts-1 (nano/fastest), tts-1-hd (micro/balanced), "
+            "tts-1-hd-mini (mini/best). Short aliases: nano, micro, mini."
+        ),
+        examples=["tts-1", "tts-1-hd", "tts-1-hd-mini"],
+    )
+    input: str = Field(
+        ...,
+        description="Text to synthesise. Max ~5 000 characters recommended.",
+    )
+    voice: str = Field(
+        default="Jasper",
+        description=(
+            "Voice name. KittenTTS voices: Bella, Jasper, Luna, Bruno, Rosie, Hugo, Kiki, Leo. "
+            "OpenAI voices (alloy, echo, fable, onyx, nova, shimmer, ash, coral, sage) "
+            "are mapped automatically."
+        ),
+        examples=["Jasper", "Luna", "alloy"],
+    )
+    response_format: Literal["mp3", "wav", "flac", "pcm", "opus", "aac"] = Field(
+        default="wav",
+        description=(
+            "Output format. wav / flac / pcm are lossless and fully supported. "
+            "mp3 / opus / aac are served as WAV (ffmpeg not included)."
+        ),
+    )
+    speed: float = Field(
+        default=1.0,
+        ge=0.25,
+        le=4.0,
+        description="Speech speed multiplier (0.25 – 4.0).",
+    )
+# ---------------------------------------------------------------------------
+# Routes
+# ---------------------------------------------------------------------------
+@app.get("/health", tags=["Utility"], summary="Liveness probe")
+async def health():
+    return {"status": "ok", "server": "Devil Studio"}
+@app.get("/v1/status", tags=["Status"], summary="Full server status")
+async def status():
+    """
+    Returns:
+    - All loaded models with their current status (`idle` / `running` / `loading` / `error`)
+    - Available voices and OpenAI voice mappings
+    - Container CPU & memory metrics
+    - Server uptime
+    """
+    uptime_s    = int(time.time() - SERVER_START_TIME)
+    h, rem      = divmod(uptime_s, 3600)
+    m, s        = divmod(rem, 60)
+    models_info = []
+    for key, meta in _canonical_models().items():
+        model_id = meta["id"]
+        models_info.append({
+            "name":        key,
+            "label":       meta["label"],
+            "size":        meta["size"],
+            "description": meta["description"],
+            "model_id":    model_id,
+            "status":      _model_status.get(model_id, "unknown"),
+            "loaded":      model_id in _model_cache,
+        })
+    aliases = {k: v["alias"] for k, v in MODEL_REGISTRY.items() if "alias" in v}
+    return {
+        "server":         "Devil Studio",
+        "version":        "1.0.0",
+        "uptime":         f"{h:02d}:{m:02d}:{s:02d}",
+        "uptime_seconds": uptime_s,
+        "models":         models_info,
+        "aliases":        aliases,
+        "voices":         sorted(VOICES),
+        "openai_voice_map": OPENAI_VOICE_MAP,
+        "system":         system_stats(),
+    }
+@app.post("/v1/audio/speech", tags=["TTS"], summary="Synthesise speech (OpenAI-compatible)")
+async def create_speech(req: SpeechRequest):
+    """
+    Drop-in replacement for `POST https://api.openai.com/v1/audio/speech`.
+    **Quick curl example:**
+    ```bash
+    curl http://localhost:8000/v1/audio/speech \\
+      -H "Content-Type: application/json" \\
+      -d '{"model":"tts-1-hd","input":"Hello from Devil Studio!","voice":"Jasper"}' \\
+      --output speech.wav
+    ```
+    """
+    if not req.input or not req.input.strip():
+        raise HTTPException(status_code=400, detail="'input' must not be empty.")
+    # Resolve voice — try OpenAI map first, then pass through as-is
+    voice = OPENAI_VOICE_MAP.get(req.voice.lower(), req.voice)
+    if voice not in VOICES:
+        raise HTTPException(
+            status_code=400,
+            detail=(
+                f"Unknown voice '{req.voice}'. "
+                f"KittenTTS voices: {sorted(VOICES)}. "
+                f"OpenAI aliases: {sorted(OPENAI_VOICE_MAP.keys())}."
+            ),
+        )
+    tts, model_id = get_model(req.model)
+    _model_status[model_id] = "running"
+    t0 = time.perf_counter()
+    try:
+        try:
+            audio = tts.generate(req.input.strip(), voice=voice, speed=req.speed)
+        except TypeError:
+            # speed param not supported by this build
+            audio = tts.generate(req.input.strip(), voice=voice)
+        audio   = np.squeeze(audio).astype(np.float32)
+        elapsed = time.perf_counter() - t0
+        log.info(
+            "Synthesised %.2f s audio in %.3f s  [model=%s  voice=%s]",
+            len(audio) / SAMPLE_RATE, elapsed, req.model, voice,
+        )
+    finally:
+        _model_status[model_id] = "idle"
+    audio_bytes = _encode_audio(audio, req.response_format)
+    ext  = "wav" if req.response_format in ("mp3", "opus", "aac") else req.response_format
+    mime = FORMAT_MIME.get(req.response_format, "audio/wav")
+    return StreamingResponse(
+        io.BytesIO(audio_bytes),
+        media_type=mime,
+        headers={
+            "Content-Disposition":        f'attachment; filename="speech.{ext}"',
+            "X-Devil-Studio-Model":       req.model,
+            "X-Devil-Studio-Voice":       voice,
+            "X-Devil-Studio-Latency-Sec": f"{elapsed:.3f}",
+        },
+    )
+# ---------------------------------------------------------------------------
+# Entry point
+# ---------------------------------------------------------------------------
+if __name__ == "__main__":
+    import uvicorn
+    uvicorn.run(
+        "main:app",
+        host="0.0.0.0",
+        port=int(os.getenv("PORT", "8000")),
+        workers=1,       # single worker — all models live in one process
+        log_level="info",
+    )