Spaces:

ataberkkilavuzcu
/

indextts2-api

Running

App Files Files Community

ataberkkilavuzcu commited on 5 days ago

Commit

be85c0f

1 Parent(s): a722545

huggingface files.

Browse files

Files changed (5) hide show

Dockerfile +22 -0
README.md +47 -10
app.py +119 -0
requirements.txt +8 -0
spaces.yaml +5 -0

Dockerfile ADDED Viewed

	@@ -0,0 +1,22 @@

+FROM python:3.10-slim
+ENV PYTHONUNBUFFERED=1 \
+    PIP_NO_CACHE_DIR=1 \
+    HF_HOME=/data/cache
+# System deps (ffmpeg required by TTS)
+RUN apt-get update && apt-get install -y --no-install-recommends \
+    ffmpeg git curl ca-certificates && \
+    rm -rf /var/lib/apt/lists/*
+WORKDIR /app
+COPY requirements.txt ./
+RUN pip install --upgrade pip && pip install -r requirements.txt
+COPY app.py ./
+# Default port for Spaces
+ENV PORT=7860
+CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]

README.md CHANGED Viewed

@@ -1,10 +1,47 @@
----
-title: Xtts V2 Api
-emoji: 📈
-colorFrom: red
-colorTo: green
-sdk: docker
-pinned: false
----
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

+# XTTS v2 Hugging Face Space (FastAPI)
+This folder holds ready-to-push files for a Spaces deployment that exposes XTTS v2 via FastAPI.
+## Files
+- `app.py` – FastAPI app with `/health` and `/generate` (URL or base64 speaker input).
+- `requirements.txt` – pinned dependencies.
+- `Dockerfile` – runs uvicorn; installs ffmpeg and deps.
+- `spaces.yaml` – tells Spaces to use the Dockerfile.
+## How to deploy on Hugging Face Spaces
+1) Create a new Space (recommend `Public`, SDK: **Docker**). Name it `xtts-v2-api`.
+2) Clone it locally:
+   ```bash
+   huggingface-cli repo clone spaces/<username>/xtts-v2-api
+   cd xtts-v2-api
+   ```
+3) Copy these files into the Space repo (overwrite if prompted).
+4) Install Git LFS (one time): `git lfs install`.
+5) Commit and push:
+   ```bash
+   git add .
+   git commit -m "Add XTTS v2 FastAPI Space"
+   git push
+   ```
+6) In Space Settings → Secrets, add `SPACE_API_KEY=<your-shared-secret>` (optional but recommended).
+7) In Space Settings → Hardware, choose `T4 GPU` if available. CPU works but slower.
+8) Wait for the build; first startup may take 2–4 minutes. Check Logs if build fails.
+## Smoke tests (after Space is live)
+- Health:
+  ```bash
+  curl -X POST https://<space>.hf.space/health -H "x-api-key: $SPACE_API_KEY"
+  ```
+- Generate (URL speaker):
+  ```bash
+  curl -X POST https://<space>.hf.space/generate \
+    -H "Content-Type: application/json" \
+    -H "x-api-key: $SPACE_API_KEY" \
+    -d '{"text":"Hello from XTTS","speaker_wav":"https://.../sample.wav"}' \
+    --output out.wav
+  ```
+## Integration notes
+- Next.js should call `/generate` with `text`, `speaker_wav` (signed Supabase URL or base64), optional `language`.
+- Set envs in Vercel: `HF_SPACES_API_URL`, `HF_SPACES_API_KEY`.
+- On errors, the API returns `{ "error": "message" }` with 4xx/5xx.

app.py ADDED Viewed

	@@ -0,0 +1,119 @@

+import base64
+import os
+import tempfile
+import uuid
+from pathlib import Path
+from typing import Optional
+import requests
+import torch
+from fastapi import Body, FastAPI, Header, HTTPException
+from fastapi.responses import FileResponse, JSONResponse
+from pydantic import BaseModel, Field, HttpUrl
+from TTS.api import TTS
+SPACE_API_KEY = os.getenv("SPACE_API_KEY")
+MAX_TEXT_LENGTH = 1000
+DEFAULT_LANGUAGE = "en"
+# Pick CUDA if available
+DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
+# Load the XTTS v2 model once at startup
+# Hugging Face Spaces caches model weights on persistent storage
+try:
+    tts_model = TTS("tts_models/multilingual/multi-dataset/xtts_v2", gpu=DEVICE == "cuda")
+except Exception as exc:  # pragma: no cover - startup failure path
+    # Fail fast on startup; Spaces will show the error in logs
+    raise RuntimeError(f"Failed to load XTTS v2 model: {exc}") from exc
+app = FastAPI(title="xtts-v2-api", version="1.0.0")
+class GenerateRequest(BaseModel):
+    text: str = Field(..., min_length=1, max_length=MAX_TEXT_LENGTH)
+    speaker_wav: str = Field(..., description="HTTPS URL or base64-encoded WAV/MP3/M4A")
+    language: Optional[str] = Field(DEFAULT_LANGUAGE, description="ISO language code, default en")
+def _require_api_key(x_api_key: Optional[str]):
+    if not SPACE_API_KEY:
+        return
+    if x_api_key != SPACE_API_KEY:
+        raise HTTPException(status_code=401, detail="Unauthorized")
+def _write_temp_audio_from_url(url: HttpUrl) -> str:
+    response = requests.get(url, stream=True, timeout=30)
+    if response.status_code >= 400:
+        raise HTTPException(status_code=400, detail=f"Could not fetch speaker audio: {response.status_code}")
+    suffix = Path(url.path).suffix or ".wav"
+    with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as tmp:
+        for chunk in response.iter_content(chunk_size=8192):
+            if chunk:
+                tmp.write(chunk)
+        return tmp.name
+def _write_temp_audio_from_base64(payload: str) -> str:
+    try:
+        raw = base64.b64decode(payload)
+    except Exception as exc:  # pragma: no cover - malformed base64
+        raise HTTPException(status_code=400, detail="Invalid base64 speaker_wav") from exc
+    with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp:
+        tmp.write(raw)
+        return tmp.name
+def _temp_speaker_file(speaker_wav: str) -> str:
+    if speaker_wav.startswith("http://") or speaker_wav.startswith("https://"):
+        return _write_temp_audio_from_url(HttpUrl(speaker_wav))
+    return _write_temp_audio_from_base64(speaker_wav)
+@app.post("/health")
+def health(x_api_key: Optional[str] = Header(default=None)):
+    _require_api_key(x_api_key)
+    return {"status": "ok", "model": "xtts_v2", "device": DEVICE}
+@app.post("/generate")
+def generate(
+    payload: GenerateRequest = Body(...),
+    x_api_key: Optional[str] = Header(default=None),
+):
+    _require_api_key(x_api_key)
+    speaker_file = None
+    output_file = None
+    try:
+        speaker_file = _temp_speaker_file(payload.speaker_wav)
+        output_file = os.path.join(tempfile.gettempdir(), f"xtts-{uuid.uuid4()}.wav")
+        tts_model.tts_to_file(
+            text=payload.text,
+            file_path=output_file,
+            speaker_wav=speaker_file,
+            language=payload.language or DEFAULT_LANGUAGE,
+            split_sentences=True,
+            use_cuda=DEVICE == "cuda",
+        )
+        return FileResponse(output_file, media_type="audio/wav", filename="output.wav")
+    except HTTPException:
+        raise
+    except Exception as exc:  # pragma: no cover - runtime failure path
+        # Surface readable errors to client
+        return JSONResponse(status_code=500, content={"error": str(exc)})
+    finally:
+        if speaker_file and Path(speaker_file).exists():
+            Path(speaker_file).unlink(missing_ok=True)
+        if output_file and Path(output_file).exists():
+            Path(output_file).unlink(missing_ok=True)
+@app.get("/")
+def root():
+    return {"name": "xtts-v2-api", "endpoints": ["/health", "/generate"]}

requirements.txt ADDED Viewed

	@@ -0,0 +1,8 @@

+TTS==0.22.0
+fastapi==0.104.1
+uvicorn==0.24.0
+torch==2.1.0
+torchaudio==2.1.0
+python-multipart==0.0.6
+requests==2.31.0
+numpy==1.26.4

spaces.yaml ADDED Viewed

	@@ -0,0 +1,5 @@

+title: xtts-v2-api
+sdk: docker
+dockerfile: Dockerfile
+python_version: 3.10
+app_file: app.py