Spaces:

ataberkkilavuzcu
/

indextts2-api

Running

App Files Files Community

ataberkkilavuzcu commited on about 13 hours ago

Commit

48b31ff

verified ·

1 Parent(s): 16e9700

Update app.py

Browse files

Files changed (1) hide show

app.py +11 -48

app.py CHANGED Viewed

@@ -83,49 +83,13 @@ try:
     tts_model = IndexTTS2(
         cfg_path=cfg_path,
         model_dir=MODEL_DIR,
-        use_fp16=USE_GPU,  # Enable FP16 on GPU for ~30-40% speedup
-        use_cuda_kernel=USE_GPU,  # Enable CUDA kernels on GPU
         use_deepspeed=False,  # Keep disabled for stability
     )
     load_time = time.time() - load_start
     print(f"IndexTTS2 model loaded successfully in {load_time:.2f}s on {DEVICE}")
-    # Warmup inference to initialize all model components
-    # This moves the initialization cost from first request to startup
-    print("Running warmup inference...")
-    warmup_start = time.time()
-    try:
-        # Create a minimal warmup audio file
-        warmup_audio_path = os.path.join(tempfile.gettempdir(), "warmup.wav")
-        warmup_output_path = os.path.join(tempfile.gettempdir(), "warmup_out.wav")
-        # Generate a short sine wave for warmup (1 second at 24kHz)
-        sample_rate = 24000
-        duration = 1.0
-        t = torch.linspace(0, duration, int(sample_rate * duration))
-        warmup_wav = (0.5 * torch.sin(2 * 3.14159 * 440 * t)).unsqueeze(0)
-        torchaudio.save(warmup_audio_path, warmup_wav, sample_rate)
-        # Run minimal inference with inference_mode for speed
-        with torch.inference_mode():
-            tts_model.infer(
-                spk_audio_prompt=warmup_audio_path,
-                text="Hello.",
-                output_path=warmup_output_path,
-                use_random=False,
-                verbose=False,
-            )
-        # Cleanup warmup files
-        Path(warmup_audio_path).unlink(missing_ok=True)
-        Path(warmup_output_path).unlink(missing_ok=True)
-        warmup_time = time.time() - warmup_start
-        print(f"Warmup complete in {warmup_time:.2f}s - model is ready!")
-    except Exception as warmup_exc:
-        print(f"Warmup failed (non-fatal): {warmup_exc}")
-        # Continue anyway - first request will just be slower
 except Exception as exc:
     raise RuntimeError(f"Failed to load IndexTTS2 model: {exc}") from exc
@@ -253,7 +217,7 @@ def _cleanup_files(*files: str):
 def _run_generate_job(job_id: str, payload: Dict[str, str]):
-    """Background job for TTS generation with optimizations."""
     speaker_file = None
     output_file = None
     _set_job(job_id, status="processing")
@@ -271,16 +235,15 @@ def _run_generate_job(job_id: str, payload: Dict[str, str]):
             f"indextts2-{uuid.uuid4()}.wav"
         )
-        # Run inference with torch.inference_mode() for faster execution
         infer_start = time.time()
-        with torch.inference_mode():
-            tts_model.infer(
-                spk_audio_prompt=speaker_file,
-                text=payload["text"],
-                output_path=output_file,
-                use_random=False,
-                verbose=True,  # Keep verbose for timing info
-            )
         infer_time = time.time() - infer_start
         # Post-process output

     tts_model = IndexTTS2(
         cfg_path=cfg_path,
         model_dir=MODEL_DIR,
+        use_fp16=False,  # Keep disabled - can cause issues
+        use_cuda_kernel=False,  # Keep disabled for stability
         use_deepspeed=False,  # Keep disabled for stability
     )
     load_time = time.time() - load_start
     print(f"IndexTTS2 model loaded successfully in {load_time:.2f}s on {DEVICE}")
 except Exception as exc:
     raise RuntimeError(f"Failed to load IndexTTS2 model: {exc}") from exc
 def _run_generate_job(job_id: str, payload: Dict[str, str]):
+    """Background job for TTS generation."""
     speaker_file = None
     output_file = None
     _set_job(job_id, status="processing")
             f"indextts2-{uuid.uuid4()}.wav"
         )
+        # Run inference (no wrapper - let the model handle its own optimizations)
         infer_start = time.time()
+        tts_model.infer(
+            spk_audio_prompt=speaker_file,
+            text=payload["text"],
+            output_path=output_file,
+            use_random=False,
+            verbose=True,  # Keep verbose for timing info
+        )
         infer_time = time.time() - infer_start
         # Post-process output