Spaces:

iamcodio
/

iamcodio-dia-tts

Paused

App Files Files Community

iamcodio commited on Mar 22

Commit

79ea526

verified ·

1 Parent(s): d128f10

Upload folder using huggingface_hub

Browse files

Files changed (3) hide show

Dockerfile +29 -0
README.md +11 -5
app.py +76 -0

Dockerfile ADDED Viewed

	@@ -0,0 +1,29 @@

+FROM python:3.12-slim
+RUN apt-get update && \
+    apt-get install -y --no-install-recommends git ffmpeg && \
+    rm -rf /var/lib/apt/lists/*
+COPY --from=ghcr.io/astral-sh/uv:latest /uv /usr/local/bin/uv
+WORKDIR /app
+# Install torch with CUDA 12.8 wheels, then dia2 from source
+RUN uv pip install --system \
+    --extra-index-url https://download.pytorch.org/whl/cu128 \
+    "torch>=2.8.0" && \
+    uv pip install --system \
+    "dia2 @ git+https://github.com/nari-labs/dia2.git" \
+    fastapi \
+    uvicorn
+COPY app.py .
+RUN useradd -m -u 1000 user
+USER user
+ENV HF_HOME=/tmp/hf_cache
+ENV TORCH_HOME=/tmp/torch_cache
+EXPOSE 7860
+CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]

README.md CHANGED Viewed

@@ -1,10 +1,16 @@
 ---
-title: Iamcodio Dia Tts
-emoji: 🏢
-colorFrom: yellow
-colorTo: blue
 sdk: docker
 pinned: false
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
+title: iamcodio Dia2 TTS
+emoji: 🎙️
+colorFrom: green
+colorTo: gray
 sdk: docker
+app_port: 7860
 pinned: false
+license: apache-2.0
 ---
+# iamcodio Dia2 TTS
+Dia2 2B multi-speaker dialogue TTS on dedicated L4 GPU.
+FastAPI endpoint at `/generate`. Use `[S1]`/`[S2]` speaker tags.

app.py ADDED Viewed

	@@ -0,0 +1,76 @@

+import io
+import time
+import tempfile
+from pathlib import Path
+from fastapi import FastAPI, HTTPException
+from fastapi.responses import Response
+from pydantic import BaseModel, Field
+app = FastAPI(title="iamcodio Dia2 TTS")
+model = None
+def get_model():
+    global model
+    if model is None:
+        from dia2 import Dia2
+        print("[dia2] Loading Dia2-2B model...")
+        start = time.time()
+        model = Dia2.from_repo("nari-labs/Dia2-2B", device="cuda", dtype="bfloat16")
+        print(f"[dia2] Model loaded in {time.time() - start:.1f}s")
+    return model
+class GenerateRequest(BaseModel):
+    text: str = Field(..., description="Text with [S1]/[S2] speaker tags")
+    cfg_scale: float = Field(default=6.0, ge=1.0, le=10.0)
+    temperature: float = Field(default=0.8, ge=0.1, le=2.0)
+    top_k: int = Field(default=50, ge=1, le=200)
+    use_cuda_graph: bool = Field(default=True)
+@app.get("/health")
+def health():
+    return {"status": "ok", "model_loaded": model is not None}
+@app.post("/generate")
+def generate(req: GenerateRequest):
+    if not req.text or req.text.isspace():
+        raise HTTPException(status_code=400, detail="Text input cannot be empty")
+    from dia2 import GenerationConfig, SamplingConfig
+    dia = get_model()
+    config = GenerationConfig(
+        cfg_scale=req.cfg_scale,
+        audio=SamplingConfig(temperature=req.temperature, top_k=req.top_k),
+        use_cuda_graph=req.use_cuda_graph,
+    )
+    with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as f:
+        tmp_path = f.name
+    try:
+        start = time.time()
+        result = dia.generate(
+            req.text,
+            config=config,
+            output_wav=tmp_path,
+            verbose=True,
+        )
+        elapsed = time.time() - start
+        print(f"[dia2] Generated in {elapsed:.2f}s")
+        wav_bytes = Path(tmp_path).read_bytes()
+        return Response(
+            content=wav_bytes,
+            media_type="audio/wav",
+            headers={
+                "X-Generation-Time": f"{elapsed:.2f}",
+            },
+        )
+    finally:
+        Path(tmp_path).unlink(missing_ok=True)