Spaces:

softwarebusters
/

qiuhuaTTSv2-api

Sleeping

App Files Files Community

neboximate commited on Nov 14, 2025

Commit

ede25cd

verified ·

1 Parent(s): 82665ce

Update app.py

Browse files

Files changed (1) hide show

app.py +53 -19

app.py CHANGED Viewed

@@ -1,7 +1,6 @@
 import base64
 import io
 import os
-from typing import Optional
 import numpy as np
 import soundfile as sf
@@ -13,20 +12,26 @@ from safetensors.torch import load_file
 from TTS.tts.configs.xtts_config import XttsConfig
 from TTS.tts.models.xtts import Xtts, XttsArgs, XttsAudioConfig
-# Torch >= 2.6 safety (older versions just ignore this)
 try:
     from torch.serialization import add_safe_globals
     add_safe_globals([XttsConfig, XttsArgs, XttsAudioConfig])
 except Exception:
     pass
-# ---------- CONFIG ----------
-REPO_ID = "softwarebusters/qiuhuaTTSv2"  # HF model repo id
 CHECKPOINT_FILE = "checkpoint_7000_infer_fp16.safetensors"
 CONFIG_FILE = "config.json"
-SPEAKER_REFERENCE = "speaker_ref.wav"  # short wav you will upload
 SR_OUT = 24000
@@ -41,11 +46,39 @@ def pick_device() -> str:
 device = pick_device()
 print(f"🚀 Using device: {device}")
-# ---------- LOAD MODEL AT STARTUP ----------
-print("📥 Downloading model files from Hugging Face…")
-ckpt_path = hf_hub_download(REPO_ID, CHECKPOINT_FILE)
-cfg_path = hf_hub_download(REPO_ID, CONFIG_FILE)
 print("📄 Loading XTTS config…")
 config = XttsConfig()
@@ -54,14 +87,11 @@ config.load_json(cfg_path)
 print("🧠 Initializing XTTS model…")
 model = Xtts.init_from_config(config)
-# base XTTS files (model.pth, dvae.pth, mel_stats.json, vocab.json)
-base_dir = os.path.dirname(ckpt_path)
-print("📦 Loading base XTTS weights…")
 model.load_checkpoint(
     config=config,
     checkpoint_dir=base_dir,
-    vocab_path=os.path.join(base_dir, "vocab.json"),
     use_deepspeed=False,
 )
@@ -74,8 +104,9 @@ model.to(device)
 model.eval()
 print("✅ Model ready.")
-# ---------- SPEAKER LATENTS ----------
 if not os.path.exists(SPEAKER_REFERENCE):
     raise FileNotFoundError(
@@ -90,10 +121,11 @@ with torch.inference_mode():
     )
 print("✅ Speaker latents ready.")
-# ---------- FASTAPI APP ----------
-app = FastAPI(title="XTTS v2 TTS API (Space)")
 class TtsRequest(BaseModel):
@@ -131,10 +163,12 @@ def tts(req: TtsRequest):
     wav = np.asarray(out["wav"], dtype=np.float32)
     buf = io.BytesIO()
     sf.write(buf, wav, SR_OUT, format="WAV")
     audio_bytes = buf.getvalue()
     audio_b64 = base64.b64encode(audio_bytes).decode("utf-8")
     return TtsResponse(audio_base64=audio_b64, sample_rate=SR_OUT)

 import base64
 import io
 import os
 import numpy as np
 import soundfile as sf
 from TTS.tts.configs.xtts_config import XttsConfig
 from TTS.tts.models.xtts import Xtts, XttsArgs, XttsAudioConfig
+# --------------------------------------------------
+# Torch >= 2.6 için güvenlik (eski versiyonlarda sorun olmaz)
+# --------------------------------------------------
 try:
     from torch.serialization import add_safe_globals
     add_safe_globals([XttsConfig, XttsArgs, XttsAudioConfig])
 except Exception:
     pass
+# --------------------------------------------------
+# CONFIG
+# --------------------------------------------------
+REPO_ID = "softwarebusters/qiuhuaTTSv2"  # Hugging Face model repo id
+# Sadece fp16 checkpoint kullanıyoruz (safetensors)
 CHECKPOINT_FILE = "checkpoint_7000_infer_fp16.safetensors"
 CONFIG_FILE = "config.json"
+SPEAKER_REFERENCE = "speaker_ref.wav"   # Space'e yüklediğin kısa wav
 SR_OUT = 24000
 device = pick_device()
 print(f"🚀 Using device: {device}")
+# --------------------------------------------------
+# HUGGING FACE TOKEN (private repo için)
+# --------------------------------------------------
+HF_TOKEN = os.environ.get("HF_TOKEN")  # Space Settings → Variables & secrets
+# --------------------------------------------------
+# MODEL YÜKLEME
+# --------------------------------------------------
+print("📥 Downloading required files from Hugging Face…")
+# 1) Fine-tuned checkpoint (sadece fp16)
+ckpt_path = hf_hub_download(
+    REPO_ID,
+    CHECKPOINT_FILE,
+    token=HF_TOKEN,
+)
+# 2) Config
+cfg_path = hf_hub_download(
+    REPO_ID,
+    CONFIG_FILE,
+    token=HF_TOKEN,
+)
+# 3) Base XTTS files (minimum set)
+model_pth = hf_hub_download(REPO_ID, "model.pth", token=HF_TOKEN)
+dvae_pth = hf_hub_download(REPO_ID, "dvae.pth", token=HF_TOKEN)
+mel_path = hf_hub_download(REPO_ID, "mel_stats.json", token=HF_TOKEN)
+vocab_path = hf_hub_download(REPO_ID, "vocab.json", token=HF_TOKEN)
+base_dir = os.path.dirname(model_pth)  # hepsi aynı cache klasöründe
 print("📄 Loading XTTS config…")
 config = XttsConfig()
 print("🧠 Initializing XTTS model…")
 model = Xtts.init_from_config(config)
+print("📦 Loading base XTTS weights (model.pth, dvae.pth, mel_stats.json)…")
 model.load_checkpoint(
     config=config,
     checkpoint_dir=base_dir,
+    vocab_path=vocab_path,
     use_deepspeed=False,
 )
 model.eval()
 print("✅ Model ready.")
+# --------------------------------------------------
+# SPEAKER LATENTS
+# --------------------------------------------------
 if not os.path.exists(SPEAKER_REFERENCE):
     raise FileNotFoundError(
     )
 print("✅ Speaker latents ready.")
+# --------------------------------------------------
+# FASTAPI APP
+# --------------------------------------------------
+app = FastAPI(title="XTTS v2 TTS API (HuggingFace Space)")
 class TtsRequest(BaseModel):
     wav = np.asarray(out["wav"], dtype=np.float32)
+    # WAV'i memory buffer'a yaz
     buf = io.BytesIO()
     sf.write(buf, wav, SR_OUT, format="WAV")
     audio_bytes = buf.getvalue()
+    # JSON ile göndermek için base64'e çevir
     audio_b64 = base64.b64encode(audio_bytes).decode("utf-8")
     return TtsResponse(audio_base64=audio_b64, sample_rate=SR_OUT)