ataberkkilavuzcu commited on
Commit
be85c0f
·
1 Parent(s): a722545

huggingface files.

Browse files
Files changed (5) hide show
  1. Dockerfile +22 -0
  2. README.md +47 -10
  3. app.py +119 -0
  4. requirements.txt +8 -0
  5. spaces.yaml +5 -0
Dockerfile ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.10-slim
2
+
3
+ ENV PYTHONUNBUFFERED=1 \
4
+ PIP_NO_CACHE_DIR=1 \
5
+ HF_HOME=/data/cache
6
+
7
+ # System deps (ffmpeg required by TTS)
8
+ RUN apt-get update && apt-get install -y --no-install-recommends \
9
+ ffmpeg git curl ca-certificates && \
10
+ rm -rf /var/lib/apt/lists/*
11
+
12
+ WORKDIR /app
13
+
14
+ COPY requirements.txt ./
15
+ RUN pip install --upgrade pip && pip install -r requirements.txt
16
+
17
+ COPY app.py ./
18
+
19
+ # Default port for Spaces
20
+ ENV PORT=7860
21
+
22
+ CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
README.md CHANGED
@@ -1,10 +1,47 @@
1
- ---
2
- title: Xtts V2 Api
3
- emoji: 📈
4
- colorFrom: red
5
- colorTo: green
6
- sdk: docker
7
- pinned: false
8
- ---
9
-
10
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # XTTS v2 Hugging Face Space (FastAPI)
2
+
3
+ This folder holds ready-to-push files for a Spaces deployment that exposes XTTS v2 via FastAPI.
4
+
5
+ ## Files
6
+ - `app.py` – FastAPI app with `/health` and `/generate` (URL or base64 speaker input).
7
+ - `requirements.txt` – pinned dependencies.
8
+ - `Dockerfile` – runs uvicorn; installs ffmpeg and deps.
9
+ - `spaces.yaml` – tells Spaces to use the Dockerfile.
10
+
11
+ ## How to deploy on Hugging Face Spaces
12
+ 1) Create a new Space (recommend `Public`, SDK: **Docker**). Name it `xtts-v2-api`.
13
+ 2) Clone it locally:
14
+ ```bash
15
+ huggingface-cli repo clone spaces/<username>/xtts-v2-api
16
+ cd xtts-v2-api
17
+ ```
18
+ 3) Copy these files into the Space repo (overwrite if prompted).
19
+ 4) Install Git LFS (one time): `git lfs install`.
20
+ 5) Commit and push:
21
+ ```bash
22
+ git add .
23
+ git commit -m "Add XTTS v2 FastAPI Space"
24
+ git push
25
+ ```
26
+ 6) In Space Settings → Secrets, add `SPACE_API_KEY=<your-shared-secret>` (optional but recommended).
27
+ 7) In Space Settings → Hardware, choose `T4 GPU` if available. CPU works but slower.
28
+ 8) Wait for the build; first startup may take 2–4 minutes. Check Logs if build fails.
29
+
30
+ ## Smoke tests (after Space is live)
31
+ - Health:
32
+ ```bash
33
+ curl -X POST https://<space>.hf.space/health -H "x-api-key: $SPACE_API_KEY"
34
+ ```
35
+ - Generate (URL speaker):
36
+ ```bash
37
+ curl -X POST https://<space>.hf.space/generate \
38
+ -H "Content-Type: application/json" \
39
+ -H "x-api-key: $SPACE_API_KEY" \
40
+ -d '{"text":"Hello from XTTS","speaker_wav":"https://.../sample.wav"}' \
41
+ --output out.wav
42
+ ```
43
+
44
+ ## Integration notes
45
+ - Next.js should call `/generate` with `text`, `speaker_wav` (signed Supabase URL or base64), optional `language`.
46
+ - Set envs in Vercel: `HF_SPACES_API_URL`, `HF_SPACES_API_KEY`.
47
+ - On errors, the API returns `{ "error": "message" }` with 4xx/5xx.
app.py ADDED
@@ -0,0 +1,119 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import base64
2
+ import os
3
+ import tempfile
4
+ import uuid
5
+ from pathlib import Path
6
+ from typing import Optional
7
+
8
+ import requests
9
+ import torch
10
+ from fastapi import Body, FastAPI, Header, HTTPException
11
+ from fastapi.responses import FileResponse, JSONResponse
12
+ from pydantic import BaseModel, Field, HttpUrl
13
+ from TTS.api import TTS
14
+
15
+ SPACE_API_KEY = os.getenv("SPACE_API_KEY")
16
+ MAX_TEXT_LENGTH = 1000
17
+ DEFAULT_LANGUAGE = "en"
18
+
19
+ # Pick CUDA if available
20
+ DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
21
+
22
+ # Load the XTTS v2 model once at startup
23
+ # Hugging Face Spaces caches model weights on persistent storage
24
+ try:
25
+ tts_model = TTS("tts_models/multilingual/multi-dataset/xtts_v2", gpu=DEVICE == "cuda")
26
+ except Exception as exc: # pragma: no cover - startup failure path
27
+ # Fail fast on startup; Spaces will show the error in logs
28
+ raise RuntimeError(f"Failed to load XTTS v2 model: {exc}") from exc
29
+
30
+ app = FastAPI(title="xtts-v2-api", version="1.0.0")
31
+
32
+
33
+ class GenerateRequest(BaseModel):
34
+ text: str = Field(..., min_length=1, max_length=MAX_TEXT_LENGTH)
35
+ speaker_wav: str = Field(..., description="HTTPS URL or base64-encoded WAV/MP3/M4A")
36
+ language: Optional[str] = Field(DEFAULT_LANGUAGE, description="ISO language code, default en")
37
+
38
+
39
+ def _require_api_key(x_api_key: Optional[str]):
40
+ if not SPACE_API_KEY:
41
+ return
42
+ if x_api_key != SPACE_API_KEY:
43
+ raise HTTPException(status_code=401, detail="Unauthorized")
44
+
45
+
46
+ def _write_temp_audio_from_url(url: HttpUrl) -> str:
47
+ response = requests.get(url, stream=True, timeout=30)
48
+ if response.status_code >= 400:
49
+ raise HTTPException(status_code=400, detail=f"Could not fetch speaker audio: {response.status_code}")
50
+ suffix = Path(url.path).suffix or ".wav"
51
+ with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as tmp:
52
+ for chunk in response.iter_content(chunk_size=8192):
53
+ if chunk:
54
+ tmp.write(chunk)
55
+ return tmp.name
56
+
57
+
58
+ def _write_temp_audio_from_base64(payload: str) -> str:
59
+ try:
60
+ raw = base64.b64decode(payload)
61
+ except Exception as exc: # pragma: no cover - malformed base64
62
+ raise HTTPException(status_code=400, detail="Invalid base64 speaker_wav") from exc
63
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp:
64
+ tmp.write(raw)
65
+ return tmp.name
66
+
67
+
68
+ def _temp_speaker_file(speaker_wav: str) -> str:
69
+ if speaker_wav.startswith("http://") or speaker_wav.startswith("https://"):
70
+ return _write_temp_audio_from_url(HttpUrl(speaker_wav))
71
+ return _write_temp_audio_from_base64(speaker_wav)
72
+
73
+
74
+ @app.post("/health")
75
+ def health(x_api_key: Optional[str] = Header(default=None)):
76
+ _require_api_key(x_api_key)
77
+ return {"status": "ok", "model": "xtts_v2", "device": DEVICE}
78
+
79
+
80
+ @app.post("/generate")
81
+ def generate(
82
+ payload: GenerateRequest = Body(...),
83
+ x_api_key: Optional[str] = Header(default=None),
84
+ ):
85
+ _require_api_key(x_api_key)
86
+
87
+ speaker_file = None
88
+ output_file = None
89
+
90
+ try:
91
+ speaker_file = _temp_speaker_file(payload.speaker_wav)
92
+ output_file = os.path.join(tempfile.gettempdir(), f"xtts-{uuid.uuid4()}.wav")
93
+
94
+ tts_model.tts_to_file(
95
+ text=payload.text,
96
+ file_path=output_file,
97
+ speaker_wav=speaker_file,
98
+ language=payload.language or DEFAULT_LANGUAGE,
99
+ split_sentences=True,
100
+ use_cuda=DEVICE == "cuda",
101
+ )
102
+
103
+ return FileResponse(output_file, media_type="audio/wav", filename="output.wav")
104
+
105
+ except HTTPException:
106
+ raise
107
+ except Exception as exc: # pragma: no cover - runtime failure path
108
+ # Surface readable errors to client
109
+ return JSONResponse(status_code=500, content={"error": str(exc)})
110
+ finally:
111
+ if speaker_file and Path(speaker_file).exists():
112
+ Path(speaker_file).unlink(missing_ok=True)
113
+ if output_file and Path(output_file).exists():
114
+ Path(output_file).unlink(missing_ok=True)
115
+
116
+
117
+ @app.get("/")
118
+ def root():
119
+ return {"name": "xtts-v2-api", "endpoints": ["/health", "/generate"]}
requirements.txt ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ TTS==0.22.0
2
+ fastapi==0.104.1
3
+ uvicorn==0.24.0
4
+ torch==2.1.0
5
+ torchaudio==2.1.0
6
+ python-multipart==0.0.6
7
+ requests==2.31.0
8
+ numpy==1.26.4
spaces.yaml ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ title: xtts-v2-api
2
+ sdk: docker
3
+ dockerfile: Dockerfile
4
+ python_version: 3.10
5
+ app_file: app.py