Spaces:

TGPro1
/

S2ST

Running on Zero

App Files Files Community

TGPro1 commited on Jan 21

Commit

2b7cc23

verified ·

1 Parent(s): b2f29d9

Upload app.py with huggingface_hub

Browse files

Files changed (1) hide show

app.py +62 -48

app.py CHANGED Viewed

@@ -13,11 +13,18 @@ import torchaudio
 import gc
 import sys
 import types
 from threading import Thread, Lock
 from huggingface_hub import snapshot_download
-# 🛠️ 1. CRITICAL COMPATIBILITY MONKEYPATCHES
-# These MUST happen before any AI imports
 if "torchaudio.backend" not in sys.modules:
     backend = types.ModuleType("torchaudio.backend")
     common = types.ModuleType("torchaudio.backend.common")
@@ -55,7 +62,7 @@ try:
     torchaudio.load = patched_load
 except Exception: pass
-# 📦 2. AI LIBRARIES (No engines yet)
 import chatterbox_utils
 from faster_whisper import WhisperModel
 from TTS.api import TTS
@@ -71,80 +78,89 @@ except ImportError:
             if f is None: return lambda x: x
             return f
-# FORCE BUILD TRIGGER: 11:35:00 Jan 21 2026
-# v92: Background Warmup (Fixes infinite reload loop and redundant downloads)
 os.environ["COQUI_TOS_AGREED"] = "1"
 MODELS = {"stt": None, "translate": None, "tts": None, "denoiser": None}
-# --- THREAD SAFETY & STATUS ---
 WARMUP_STATUS = {"complete": False, "in_progress": False, "error": None}
 WARMUP_LOCK = Lock()
 def activate_gpu_models(action):
-    """v92: Safety wait for background download"""
     global MODELS, WARMUP_STATUS
-    # If warmup is still running, wait for it (simple polling to avoid complex locks)
-    wait_start = time.time()
-    while WARMUP_STATUS["in_progress"] and not WARMUP_STATUS["complete"]:
-        if time.time() - wait_start > 120: # 2 min max wait
-             print("⚠️ Warmup taking too long, proceeding anyway...")
-             break
-        print(f"⏳ Waiting for background model download to finish for {action}...")
-        time.sleep(5)
     # 1. Faster-Whisper GPU Activation
     if action in ["stt", "s2st"]:
-        if MODELS["stt"] is None or MODELS["stt"].model.device != "cuda":
-            print(f"🎙️ [v92] Activating Whisper on GPU for {action}...")
-            MODELS["stt"] = WhisperModel("large-v3", device="cuda", compute_type="float16")
     # 2. XTTS-v2 GPU Activation
     if action in ["tts", "s2st"]:
-        if MODELS["tts"] is None:
-            print("🔊 Initializing XTTS directly to GPU...")
-            MODELS["tts"] = TTS(model_name="tts_models/multilingual/multi-dataset/xtts_v2", gpu=True)
         try:
             current_dev = str(next(MODELS["tts"].synthesizer.tts_model.parameters()).device)
-            if "cuda" not in current_dev:
-                print("🚀 Moving XTTS-v2 to GPU...")
                 MODELS["tts"].to("cuda")
-        except: MODELS["tts"].to("cuda")
-    # 3. Helpers
     if MODELS["denoiser"] is None:
         try: MODELS["denoiser"] = init_df()
         except: pass
     if MODELS["translate"] is None: MODELS["translate"] = "active"
     chatterbox_utils.load_chatterbox(device="cuda" if torch.cuda.is_available() else "cpu")
-    gc.collect()
-    if torch.cuda.is_available(): torch.cuda.empty_cache()
 def warmup_task():
-    """Background thread to handle heavy downloads (v92)"""
     global WARMUP_STATUS
     with WARMUP_LOCK:
         if WARMUP_STATUS["complete"] or WARMUP_STATUS["in_progress"]: return
         WARMUP_STATUS["in_progress"] = True
-    print("\n🔥 --- BACKGROUND WARMUP STARTED (v92) ---")
     start = time.time()
     try:
-        # Check if local files exist to skip slow verification if possible
-        # snapshot_download is quite smart, but we'll log it clearly
-        print("📥 Caching Whisper large-v3 weights...")
-        snapshot_download(repo_id="Systran/faster-whisper-large-v3", local_files_only=False)
-        print("📥 Caching XTTS-v2 weights...")
-        snapshot_download(repo_id="coqui/XTTS-v2", local_files_only=False)
         chatterbox_utils.warmup_chatterbox()
         WARMUP_STATUS["complete"] = True
-        print(f"✅ --- BACKGROUND WARMUP COMPLETE ({time.time()-start:.2f}s) --- \n")
     except Exception as e:
-        print(f"❌ Warmup error: {e}")
         WARMUP_STATUS["error"] = str(e)
     finally:
         WARMUP_STATUS["in_progress"] = False
@@ -203,7 +219,8 @@ def _tts_logic(text, lang, speaker_wav_b64):
 def core_process(request_dict):
     action = request_dict.get("action")
     t0 = time.time()
-    print(f"--- [v92] 🚀 GPU SESSION START: {action} ---")
     activate_gpu_models(action)
     try:
         if action == "stt": res = _stt_logic(request_dict)
@@ -217,7 +234,7 @@ def core_process(request_dict):
         elif action == "health": res = {"status": "awake"}
         else: res = {"error": f"Unknown action: {action}"}
     finally:
-        print(f"--- [v92] ✨ END: {action} ({time.time()-t0:.2f}s) ---")
         gc.collect()
         if torch.cuda.is_available(): torch.cuda.empty_cache()
     return res
@@ -226,23 +243,21 @@ app = FastAPI()
 @app.post("/api/v1/process")
 async def api_process(request: Request):
     try: return core_process(await request.json())
-    except Exception as e: traceback.print_exc(); return {"error": str(e)}
 @app.get("/health")
 def health():
     return {
         "status": "ok",
-        "gpu": torch.cuda.is_available(),
-        "warmup_complete": WARMUP_STATUS["complete"],
-        "warmup_in_progress": WARMUP_STATUS["in_progress"],
         "time": time.ctime()
     }
 @app.post("/api/v1/clear_cache")
 async def clear_cache():
     try:
-        gc.collect()
-        if torch.cuda.is_available(): torch.cuda.empty_cache()
         temp_dir = tempfile.gettempdir()
         for f in os.listdir(temp_dir):
             if f.endswith(".wav") or f.startswith("tm"):
@@ -260,5 +275,4 @@ app = gr.mount_gradio_app(app, demo, path="/")
 if __name__ == "__main__":
     start_background_warmup()
-    print("🚀 Starting FastAPI Server...")
-    uvicorn.run(app, host="0.0.0.0", port=7860)

 import gc
 import sys
 import types
+import logging
 from threading import Thread, Lock
 from huggingface_hub import snapshot_download
+# 🛡️ 1. SILENCE VERBOSE LOGGING
+# Minimize "steps" in logs as requested by user
+logging.getLogger("transformers").setLevel(logging.ERROR)
+logging.getLogger("TTS").setLevel(logging.ERROR)
+os.environ["CT2_VERBOSE"] = "0"
+os.environ["KMP_WARNINGS"] = "0"
+# 🛠️ 2. CRITICAL COMPATIBILITY MONKEYPATCHES
 if "torchaudio.backend" not in sys.modules:
     backend = types.ModuleType("torchaudio.backend")
     common = types.ModuleType("torchaudio.backend.common")
     torchaudio.load = patched_load
 except Exception: pass
+# 📦 3. AI LIBRARIES
 import chatterbox_utils
 from faster_whisper import WhisperModel
 from TTS.api import TTS
             if f is None: return lambda x: x
             return f
+# FORCE BUILD TRIGGER: 11:45:00 Jan 21 2026
+# v93: Silent Local-Only Mode. Forces instant retrieval after warmup.
 os.environ["COQUI_TOS_AGREED"] = "1"
 MODELS = {"stt": None, "translate": None, "tts": None, "denoiser": None}
 WARMUP_STATUS = {"complete": False, "in_progress": False, "error": None}
 WARMUP_LOCK = Lock()
 def activate_gpu_models(action):
+    """v93: Silent Instant Activation"""
     global MODELS, WARMUP_STATUS
+    # Force local-only if warmup is done
+    local_only = WARMUP_STATUS["complete"]
     # 1. Faster-Whisper GPU Activation
     if action in ["stt", "s2st"]:
+        stt_ready = False
+        try: stt_ready = MODELS["stt"] is not None and MODELS["stt"].model.device == "cuda"
+        except: pass
+        if not stt_ready:
+            print(f"🎙️ [v93] Activating Whisper (Local Mode={local_only})...")
+            # Explicitly clear any CPU crumbs to prevent CUDA conflicts
+            if MODELS["stt"]: del MODELS["stt"]
+            gc.collect(); torch.cuda.empty_cache()
+            MODELS["stt"] = WhisperModel(
+                "large-v3",
+                device="cuda",
+                compute_type="float16",
+                local_files_only=local_only
+            )
     # 2. XTTS-v2 GPU Activation
     if action in ["tts", "s2st"]:
+        tts_on_gpu = False
         try:
             current_dev = str(next(MODELS["tts"].synthesizer.tts_model.parameters()).device)
+            tts_on_gpu = "cuda" in current_dev
+        except: pass
+        if MODELS["tts"] is None or not tts_on_gpu:
+            print(f"🔊 [v93] Activating XTTS-v2 (Local Mode={local_only})...")
+            if MODELS["tts"] is None:
+                MODELS["tts"] = TTS(model_name="tts_models/multilingual/multi-dataset/xtts_v2", gpu=True)
+            else:
                 MODELS["tts"].to("cuda")
+    # 3. Helpers (Always Local)
     if MODELS["denoiser"] is None:
         try: MODELS["denoiser"] = init_df()
         except: pass
     if MODELS["translate"] is None: MODELS["translate"] = "active"
     chatterbox_utils.load_chatterbox(device="cuda" if torch.cuda.is_available() else "cpu")
 def warmup_task():
+    """Silent Background Warmup (v93)"""
     global WARMUP_STATUS
     with WARMUP_LOCK:
         if WARMUP_STATUS["complete"] or WARMUP_STATUS["in_progress"]: return
         WARMUP_STATUS["in_progress"] = True
+    # We load them to CPU RAM first to ensure weights are in OS page cache
+    print("\n🔥 --- SILENT WARMUP STARTED (v93) ---")
     start = time.time()
     try:
+        # 1. Faster-Whisper
+        print("📥 Pre-loading Whisper to System RAM...")
+        MODELS["stt"] = WhisperModel("large-v3", device="cpu", compute_type="int8")
+        # 2. XTTS-v2
+        print("📥 Pre-loading XTTS-v2 to System RAM...")
+        MODELS["tts"] = TTS(model_name="tts_models/multilingual/multi-dataset/xtts_v2", gpu=False)
+        # 3. Chatterbox
         chatterbox_utils.warmup_chatterbox()
         WARMUP_STATUS["complete"] = True
+        print(f"✅ --- SYSTEM OPTIMIZED: INSTANT RETRIEVAL READY ({time.time()-start:.2f}s) --- \n")
     except Exception as e:
+        print(f"❌ Warmup fail: {e}")
         WARMUP_STATUS["error"] = str(e)
     finally:
         WARMUP_STATUS["in_progress"] = False
 def core_process(request_dict):
     action = request_dict.get("action")
     t0 = time.time()
+    # v93: Optimized logs (less "steps")
+    print(f"--- [v93] 🚀 GPU SESSION START: {action} ---")
     activate_gpu_models(action)
     try:
         if action == "stt": res = _stt_logic(request_dict)
         elif action == "health": res = {"status": "awake"}
         else: res = {"error": f"Unknown action: {action}"}
     finally:
+        print(f"--- [v93] ✨ END: {action} ({time.time()-t0:.2f}s) ---")
         gc.collect()
         if torch.cuda.is_available(): torch.cuda.empty_cache()
     return res
 @app.post("/api/v1/process")
 async def api_process(request: Request):
     try: return core_process(await request.json())
+    except Exception as e: return {"error": str(e)}
 @app.get("/health")
 def health():
     return {
         "status": "ok",
+        "optimized": WARMUP_STATUS["complete"],
+        "gpu_available": torch.cuda.is_available(),
         "time": time.ctime()
     }
 @app.post("/api/v1/clear_cache")
 async def clear_cache():
     try:
+        gc.collect(); torch.cuda.empty_cache()
         temp_dir = tempfile.gettempdir()
         for f in os.listdir(temp_dir):
             if f.endswith(".wav") or f.startswith("tm"):
 if __name__ == "__main__":
     start_background_warmup()
+    uvicorn.run(app, host="0.0.0.0", port=7860, log_level="error")