Spaces:

FINAL-Bench
/

Darwin-TTS-1.7B-Cross

Running on T4

App Files Files Community

SeaWolf-AI commited on about 12 hours ago

Commit

095807b

verified ·

1 Parent(s): 8da667b

Update app.py

Browse files

Files changed (1) hide show

app.py +31 -8

app.py CHANGED Viewed

@@ -1,4 +1,4 @@
-# app.py — v2 수정 (레시피 비공개)
 """
 Darwin-TTS-1.7B-Cross v2 — HuggingFace Space
 """
@@ -7,13 +7,24 @@ from pathlib import Path
 from contextlib import asynccontextmanager
 from fastapi import FastAPI, HTTPException
 from fastapi.responses import HTMLResponse, Response
-state = {"model": None}
 @asynccontextmanager
 async def lifespan(app: FastAPI):
     yield
-    state["model"] = None
 app = FastAPI(title="Darwin-TTS-1.7B-Cross", lifespan=lifespan)
@@ -31,9 +42,21 @@ async def synthesize(request: dict):
     try:
         from qwen_tts import Qwen3TTSModel
-        # v2: 사전 블렌딩된 가중치 로드 (레시피 비공개)
-        model_id = "FINAL-Bench/Darwin-TTS-1.7B-Cross" if use_darwin else "Qwen/Qwen3-TTS-12Hz-1.7B-Base"
-        model = Qwen3TTSModel.from_pretrained(model_id, device_map="cuda:0", dtype=torch.bfloat16)
         ref_path = "/tmp/darwin_ref.wav"
         sf.write(ref_path, (0.1 * np.sin(2 * np.pi * 200 * np.linspace(0, 3, 72000))).astype(np.float32), 24000)
@@ -52,7 +75,7 @@ async def synthesize(request: dict):
         return Response(
             content=buf.read(),
             media_type="audio/wav",
-            headers={"X-Duration": f"{len(wav)/sr:.1f}", "X-Model": model_id},
         )
     except Exception as e:
         if model is not None: del model
@@ -61,7 +84,7 @@ async def synthesize(request: dict):
 @app.get("/health")
 async def health():
-    return {"status": "ok", "cuda": torch.cuda.is_available()}
 if __name__ == "__main__":
     import uvicorn

+# app.py — v2 (레시피 비공개, speech_tokenizer 해결)
 """
 Darwin-TTS-1.7B-Cross v2 — HuggingFace Space
 """
 from contextlib import asynccontextmanager
 from fastapi import FastAPI, HTTPException
 from fastapi.responses import HTMLResponse, Response
+from safetensors import safe_open
+state = {"darwin_weights": None}
 @asynccontextmanager
 async def lifespan(app: FastAPI):
+    # Darwin 가중치 사전 로드
+    from huggingface_hub import hf_hub_download
+    print("📦 Loading Darwin weights...")
+    path = hf_hub_download("FINAL-Bench/Darwin-TTS-1.7B-Cross", "model.safetensors")
+    weights = {}
+    with safe_open(path, framework="pt") as s:
+        for k in s.keys():
+            weights[k] = s.get_tensor(k)
+    state["darwin_weights"] = weights
+    print(f"  ✅ {len(weights)} tensors cached")
     yield
+    state["darwin_weights"] = None
 app = FastAPI(title="Darwin-TTS-1.7B-Cross", lifespan=lifespan)
     try:
         from qwen_tts import Qwen3TTSModel
+        # 항상 원본에서 로드 (speech_tokenizer 포함)
+        model = Qwen3TTSModel.from_pretrained(
+            "Qwen/Qwen3-TTS-12Hz-1.7B-Base",
+            device_map="cuda:0", dtype=torch.bfloat16
+        )
+        # Darwin 모드: 사전 블렌딩된 가중치로 교체
+        if use_darwin and state["darwin_weights"]:
+            cnt = 0
+            for n, p in model.model.named_parameters():
+                if n in state["darwin_weights"]:
+                    with torch.no_grad():
+                        p.copy_(state["darwin_weights"][n].to(p.device, p.dtype))
+                    cnt += 1
+            print(f"  Darwin weights applied: {cnt} tensors")
         ref_path = "/tmp/darwin_ref.wav"
         sf.write(ref_path, (0.1 * np.sin(2 * np.pi * 200 * np.linspace(0, 3, 72000))).astype(np.float32), 24000)
         return Response(
             content=buf.read(),
             media_type="audio/wav",
+            headers={"X-Duration": f"{len(wav)/sr:.1f}", "X-Model": "Darwin" if use_darwin else "Original"},
         )
     except Exception as e:
         if model is not None: del model
 @app.get("/health")
 async def health():
+    return {"status": "ok", "cuda": torch.cuda.is_available(), "darwin_loaded": state["darwin_weights"] is not None}
 if __name__ == "__main__":
     import uvicorn