indextts2-api

Running

App Files Files Community

ataberkkilavuzcu commited on Dec 9, 2025

Commit

b52eab0

verified ·

1 Parent(s): 9cf6a70

Update app.py

Browse files

Files changed (1) hide show

app.py +51 -30

app.py CHANGED Viewed

@@ -1,9 +1,7 @@
 import base64
 import os
-import sys
 import tempfile
 import uuid
-from io import StringIO
 from pathlib import Path
 from typing import Optional
@@ -21,42 +19,62 @@ HF_TOKEN = (
     or os.getenv("HUGGINGFACEHUB_API_TOKEN")
     or os.getenv("HF_TOKEN")
 )
-MODEL_NAME = "tts_models/multilingual/multi-dataset/xtts_v2"
 MAX_TEXT_LENGTH = 1000
 DEFAULT_LANGUAGE = "en"
 DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
-# Set token in environment before importing TTS
 if HF_TOKEN:
     os.environ["HUGGING_FACE_HUB_TOKEN"] = HF_TOKEN
     os.environ["HF_TOKEN"] = HF_TOKEN
-    # Also login explicitly via huggingface_hub
     try:
         from huggingface_hub import login
         login(token=HF_TOKEN, add_to_git_credential=False)
     except ImportError:
-        pass  # huggingface_hub might not be installed, that's okay
-# Mock stdin to automatically accept TTS Terms of Service
-# This prevents the interactive prompt that causes EOFError in containers
-_original_stdin = sys.stdin
-sys.stdin = StringIO("y\n")  # Auto-accept TOS
-from TTS.api import TTS
 try:
-    tts_model = TTS(MODEL_NAME, gpu=DEVICE == "cuda", progress_bar=False)
-except Exception as exc:  # pragma: no cover
-    hint = ""
-    if "EOF when reading a line" in str(exc):
-        hint = " Hint: set HUGGING_FACE_HUB_TOKEN to a Hugging Face token that has accepted the XTTS v2 license."
-    raise RuntimeError(f"Failed to load XTTS v2 model: {exc}.{hint}") from exc
-finally:
-    # Restore stdin after model loading (TOS check happens during model load)
-    sys.stdin = _original_stdin
-app = FastAPI(title="xtts-v2-api", version="1.0.0")
 class GenerateRequest(BaseModel):
@@ -133,7 +151,7 @@ def _preprocess_audio_wav(path: str, target_sr: int = 24000, target_peak: float
 @app.post("/health")
 def health(x_api_key: Optional[str] = Header(default=None)):
     _require_api_key(x_api_key)
-    return {"status": "ok", "model": "xtts_v2", "device": DEVICE}
 def _cleanup_files(*files: str):
@@ -160,14 +178,17 @@ def generate(
     try:
         speaker_file = _temp_speaker_file(payload.speaker_wav)
         speaker_file = _preprocess_audio_wav(speaker_file)
-        output_file = os.path.join(tempfile.gettempdir(), f"xtts-{uuid.uuid4()}.wav")
-        tts_model.tts_to_file(
             text=payload.text,
-            file_path=output_file,
-            speaker_wav=speaker_file,
-            language=payload.language or DEFAULT_LANGUAGE,
-            split_sentences=True,
         )
         # Light post-process to avoid end-of-file artifacts
@@ -198,4 +219,4 @@ def generate(
 @app.get("/")
 def root():
-    return {"name": "xtts-v2-api", "endpoints": ["/health", "/generate"]}

 import base64
 import os
 import tempfile
 import uuid
 from pathlib import Path
 from typing import Optional
     or os.getenv("HUGGINGFACEHUB_API_TOKEN")
     or os.getenv("HF_TOKEN")
 )
+MODEL_REPO = "IndexTeam/IndexTTS-2"
 MAX_TEXT_LENGTH = 1000
 DEFAULT_LANGUAGE = "en"
 DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
+# Set token in environment before importing
 if HF_TOKEN:
     os.environ["HUGGING_FACE_HUB_TOKEN"] = HF_TOKEN
     os.environ["HF_TOKEN"] = HF_TOKEN
     try:
         from huggingface_hub import login
         login(token=HF_TOKEN, add_to_git_credential=False)
     except ImportError:
+        pass
+# Download model checkpoints from Hugging Face
+MODEL_DIR = os.getenv("MODEL_DIR", "/data/indextts2")
+os.makedirs(MODEL_DIR, exist_ok=True)
 try:
+    from huggingface_hub import snapshot_download
+    # Download model if not already present
+    if not Path(MODEL_DIR, "config.yaml").exists():
+        print(f"Downloading IndexTTS2 model from {MODEL_REPO}...")
+        snapshot_download(
+            repo_id=MODEL_REPO,
+            local_dir=MODEL_DIR,
+            token=HF_TOKEN,
+        )
+        print("Model download complete.")
+except Exception as exc:
+    print(f"Warning: Could not download model: {exc}")
+    # Continue anyway - model might already be present
+# Initialize IndexTTS2
+try:
+    from indextts.infer_v2 import IndexTTS2
+    cfg_path = os.path.join(MODEL_DIR, "config.yaml")
+    if not Path(cfg_path).exists():
+        raise FileNotFoundError(f"Config file not found at {cfg_path}. Model may not be downloaded.")
+    tts_model = IndexTTS2(
+        cfg_path=cfg_path,
+        model_dir=MODEL_DIR,
+        use_fp16=False,  # CPU doesn't support FP16
+        use_cuda_kernel=False,  # CPU mode
+        use_deepspeed=False,  # CPU mode
+    )
+    print("IndexTTS2 model loaded successfully.")
+except Exception as exc:
+    raise RuntimeError(f"Failed to load IndexTTS2 model: {exc}") from exc
+app = FastAPI(title="indextts2-api", version="1.0.0")
 class GenerateRequest(BaseModel):
 @app.post("/health")
 def health(x_api_key: Optional[str] = Header(default=None)):
     _require_api_key(x_api_key)
+    return {"status": "ok", "model": "indextts2", "device": DEVICE}
 def _cleanup_files(*files: str):
     try:
         speaker_file = _temp_speaker_file(payload.speaker_wav)
         speaker_file = _preprocess_audio_wav(speaker_file)
+        output_file = os.path.join(tempfile.gettempdir(), f"indextts2-{uuid.uuid4()}.wav")
+        # IndexTTS2 inference
+        # Note: language parameter is kept for API compatibility but IndexTTS2
+        # handles multilingual automatically (supports English, Turkish, Chinese, etc.)
+        tts_model.infer(
+            spk_audio_prompt=speaker_file,
             text=payload.text,
+            output_path=output_file,
+            use_random=False,  # Deterministic output
+            verbose=False,
         )
         # Light post-process to avoid end-of-file artifacts
 @app.get("/")
 def root():
+    return {"name": "indextts2-api", "endpoints": ["/health", "/generate"]}