iamcodio commited on
Commit
79ea526
·
verified ·
1 Parent(s): d128f10

Upload folder using huggingface_hub

Browse files
Files changed (3) hide show
  1. Dockerfile +29 -0
  2. README.md +11 -5
  3. app.py +76 -0
Dockerfile ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.12-slim
2
+
3
+ RUN apt-get update && \
4
+ apt-get install -y --no-install-recommends git ffmpeg && \
5
+ rm -rf /var/lib/apt/lists/*
6
+
7
+ COPY --from=ghcr.io/astral-sh/uv:latest /uv /usr/local/bin/uv
8
+
9
+ WORKDIR /app
10
+
11
+ # Install torch with CUDA 12.8 wheels, then dia2 from source
12
+ RUN uv pip install --system \
13
+ --extra-index-url https://download.pytorch.org/whl/cu128 \
14
+ "torch>=2.8.0" && \
15
+ uv pip install --system \
16
+ "dia2 @ git+https://github.com/nari-labs/dia2.git" \
17
+ fastapi \
18
+ uvicorn
19
+
20
+ COPY app.py .
21
+
22
+ RUN useradd -m -u 1000 user
23
+ USER user
24
+
25
+ ENV HF_HOME=/tmp/hf_cache
26
+ ENV TORCH_HOME=/tmp/torch_cache
27
+
28
+ EXPOSE 7860
29
+ CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
README.md CHANGED
@@ -1,10 +1,16 @@
1
  ---
2
- title: Iamcodio Dia Tts
3
- emoji: 🏢
4
- colorFrom: yellow
5
- colorTo: blue
6
  sdk: docker
 
7
  pinned: false
 
8
  ---
9
 
10
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
1
  ---
2
+ title: iamcodio Dia2 TTS
3
+ emoji: 🎙️
4
+ colorFrom: green
5
+ colorTo: gray
6
  sdk: docker
7
+ app_port: 7860
8
  pinned: false
9
+ license: apache-2.0
10
  ---
11
 
12
+ # iamcodio Dia2 TTS
13
+
14
+ Dia2 2B multi-speaker dialogue TTS on dedicated L4 GPU.
15
+
16
+ FastAPI endpoint at `/generate`. Use `[S1]`/`[S2]` speaker tags.
app.py ADDED
@@ -0,0 +1,76 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import io
2
+ import time
3
+ import tempfile
4
+ from pathlib import Path
5
+
6
+ from fastapi import FastAPI, HTTPException
7
+ from fastapi.responses import Response
8
+ from pydantic import BaseModel, Field
9
+
10
+ app = FastAPI(title="iamcodio Dia2 TTS")
11
+
12
+ model = None
13
+
14
+
15
+ def get_model():
16
+ global model
17
+ if model is None:
18
+ from dia2 import Dia2
19
+ print("[dia2] Loading Dia2-2B model...")
20
+ start = time.time()
21
+ model = Dia2.from_repo("nari-labs/Dia2-2B", device="cuda", dtype="bfloat16")
22
+ print(f"[dia2] Model loaded in {time.time() - start:.1f}s")
23
+ return model
24
+
25
+
26
+ class GenerateRequest(BaseModel):
27
+ text: str = Field(..., description="Text with [S1]/[S2] speaker tags")
28
+ cfg_scale: float = Field(default=6.0, ge=1.0, le=10.0)
29
+ temperature: float = Field(default=0.8, ge=0.1, le=2.0)
30
+ top_k: int = Field(default=50, ge=1, le=200)
31
+ use_cuda_graph: bool = Field(default=True)
32
+
33
+
34
+ @app.get("/health")
35
+ def health():
36
+ return {"status": "ok", "model_loaded": model is not None}
37
+
38
+
39
+ @app.post("/generate")
40
+ def generate(req: GenerateRequest):
41
+ if not req.text or req.text.isspace():
42
+ raise HTTPException(status_code=400, detail="Text input cannot be empty")
43
+
44
+ from dia2 import GenerationConfig, SamplingConfig
45
+
46
+ dia = get_model()
47
+ config = GenerationConfig(
48
+ cfg_scale=req.cfg_scale,
49
+ audio=SamplingConfig(temperature=req.temperature, top_k=req.top_k),
50
+ use_cuda_graph=req.use_cuda_graph,
51
+ )
52
+
53
+ with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as f:
54
+ tmp_path = f.name
55
+
56
+ try:
57
+ start = time.time()
58
+ result = dia.generate(
59
+ req.text,
60
+ config=config,
61
+ output_wav=tmp_path,
62
+ verbose=True,
63
+ )
64
+ elapsed = time.time() - start
65
+ print(f"[dia2] Generated in {elapsed:.2f}s")
66
+
67
+ wav_bytes = Path(tmp_path).read_bytes()
68
+ return Response(
69
+ content=wav_bytes,
70
+ media_type="audio/wav",
71
+ headers={
72
+ "X-Generation-Time": f"{elapsed:.2f}",
73
+ },
74
+ )
75
+ finally:
76
+ Path(tmp_path).unlink(missing_ok=True)