Spaces:

TGPro1
/

S2ST

Running on Zero

App Files Files Community

TGPro1 commited on 28 days ago

Commit

7bb29ef

verified ·

1 Parent(s): 9e4730f

Upload app.py with huggingface_hub

Browse files

Files changed (1) hide show

app.py +72 -64

app.py CHANGED Viewed

@@ -13,10 +13,11 @@ import torchaudio
 import gc
 import sys
 import types
 from huggingface_hub import snapshot_download
-# 🛡️ 1. CRITICAL COMPATIBILITY MONKEYPATCHES
-print("🛠️ Applying compatibility monkeypatches...")
 if "torchaudio.backend" not in sys.modules:
     backend = types.ModuleType("torchaudio.backend")
     common = types.ModuleType("torchaudio.backend.common")
@@ -52,21 +53,17 @@ try:
                 return t, samplerate
             raise e
     torchaudio.load = patched_load
-    print("✅ Torchaudio patched")
-except Exception as e: print(f"⚠️ Patch failed: {e}")
-# 📦 2. TOP-LEVEL IMPORTS (No engine initialization yet)
-print("📦 Importing AI Libraries...")
 import chatterbox_utils
-# Note: We import the classes, but DO NOT instantiate them on the CPU
 from faster_whisper import WhisperModel
 from TTS.api import TTS
 from df.enhance import init_df
-print("✅ Imports Complete")
 try:
     import spaces
-    print("✅ ZeroGPU/Spaces detected")
 except ImportError:
     class spaces:
         @staticmethod
@@ -74,49 +71,46 @@ except ImportError:
             if f is None: return lambda x: x
             return f
-# FORCE BUILD TRIGGER: 11:05:00 Jan 21 2026
-# v91: No-Instance Startup (Resolved CUDA std::system_error)
 os.environ["COQUI_TOS_AGREED"] = "1"
-# MODELS starts empty to ensure a clean CUDA handoff
 MODELS = {"stt": None, "translate": None, "tts": None, "denoiser": None}
 def activate_gpu_models(action):
-    """v91: Direct GPU initialization (Safe & Clean)"""
-    global MODELS
-    # 1. Faster-Whisper: Initialize directly on GPU
     if action in ["stt", "s2st"]:
-        if MODELS["stt"] is None:
-            print(f"🎙️ [v91] Initializing Whisper directly on GPU for {action}...")
-            # No CPU instance should exist at this point
-            MODELS["stt"] = WhisperModel(
-                "large-v3",
-                device="cuda",
-                compute_type="float16"
-            )
-            print("✨ Whisper GPU Engine Ready")
-        elif MODELS["stt"].model.device != "cuda":
-            # This case shouldn't happen with No-Instance Startup, but for safety:
-            print("⚠️ Switching Whisper to GPU...")
-            del MODELS["stt"]
-            gc.collect()
-            torch.cuda.empty_cache()
             MODELS["stt"] = WhisperModel("large-v3", device="cuda", compute_type="float16")
-    # 2. XTTS-v2: Initialize directly on GPU
     if action in ["tts", "s2st"]:
         if MODELS["tts"] is None:
-            print(f"🔊 [v91] Initializing XTTS directly on GPU for {action}...")
             MODELS["tts"] = TTS(model_name="tts_models/multilingual/multi-dataset/xtts_v2", gpu=True)
-            print("✨ XTTS GPU Engine Ready")
-        else:
-            try:
-                current_dev = str(next(MODELS["tts"].synthesizer.tts_model.parameters()).device)
-                if "cuda" not in current_dev:
-                    print("🚀 Moving XTTS-v2 to GPU...")
-                    MODELS["tts"].to("cuda")
-            except: MODELS["tts"].to("cuda")
     # 3. Helpers
     if MODELS["denoiser"] is None:
@@ -124,32 +118,39 @@ def activate_gpu_models(action):
         except: pass
     if MODELS["translate"] is None: MODELS["translate"] = "active"
     chatterbox_utils.load_chatterbox(device="cuda" if torch.cuda.is_available() else "cpu")
-def warmup_models():
-    """v91: DOWNLOAD ONLY (No engine initialization)"""
-    print("\n🔥 --- SYSTEM STARTUP: NO-INSTANCE WARMUP (v91) ---")
     start = time.time()
     try:
-        # 1. Download Whisper (CTranslate2 format)
-        print("📥 Pre-downloading Whisper large-v3 weights...")
-        snapshot_download(repo_id="Systran/faster-whisper-large-v3")
-        # 2. Download XTTS-v2
-        print("📥 Pre-downloading XTTS-v2 weights...")
-        snapshot_download(repo_id="coqui/XTTS-v2")
-        # 3. Download DeepFilterNet
-        print("📥 Pre-downloading DeepFilterNet...")
-        # DeepFilterNet downloads usually happen via init_df, but we can try to force it
-        # snapshot_download(repo_id="RVoice/DeepFilterNet3")
-        # 4. Chatterbox Warmup
         chatterbox_utils.warmup_chatterbox()
-        print(f"✅ --- STARTUP COMPLETE: DATA ON DISK ({time.time()-start:.2f}s) --- \n")
-        print("⚠️ NOTE: No engine instances created on CPU to prevent CUDA conflicts.")
     except Exception as e:
-        print(f"⚠️ Startup warning: {e}")
 def _stt_logic(request_dict):
     audio_bytes = base64.b64decode(request_dict.get("file"))
@@ -163,8 +164,7 @@ def _stt_logic(request_dict):
         if os.path.exists(temp_path): os.unlink(temp_path)
 def _translate_logic(text, target_lang):
-    from deep_translator import GoogleTranslator
-    return GoogleTranslator(source='auto', target=target_lang).translate(text)
 def _tts_logic(text, lang, speaker_wav_b64):
     if not text or not text.strip(): return {"error": "Input empty"}
@@ -203,7 +203,7 @@ def _tts_logic(text, lang, speaker_wav_b64):
 def core_process(request_dict):
     action = request_dict.get("action")
     t0 = time.time()
-    print(f"--- [v91] 🚀 GPU SESSION START: {action} ---")
     activate_gpu_models(action)
     try:
         if action == "stt": res = _stt_logic(request_dict)
@@ -217,7 +217,7 @@ def core_process(request_dict):
         elif action == "health": res = {"status": "awake"}
         else: res = {"error": f"Unknown action: {action}"}
     finally:
-        print(f"--- [v91] ✨ END: {action} ({time.time()-t0:.2f}s) ---")
         gc.collect()
         if torch.cuda.is_available(): torch.cuda.empty_cache()
     return res
@@ -229,7 +229,14 @@ async def api_process(request: Request):
     except Exception as e: traceback.print_exc(); return {"error": str(e)}
 @app.get("/health")
-def health(): return {"status": "ok", "gpu": torch.cuda.is_available(), "time": time.ctime()}
 @app.post("/api/v1/clear_cache")
 async def clear_cache():
@@ -252,5 +259,6 @@ demo = gr.Interface(fn=gradio_fn, inputs="text", outputs="text", title="🚀 AI
 app = gr.mount_gradio_app(app, demo, path="/")
 if __name__ == "__main__":
-    warmup_models()
     uvicorn.run(app, host="0.0.0.0", port=7860)

 import gc
 import sys
 import types
+from threading import Thread, Lock
 from huggingface_hub import snapshot_download
+# 🛠️ 1. CRITICAL COMPATIBILITY MONKEYPATCHES
+# These MUST happen before any AI imports
 if "torchaudio.backend" not in sys.modules:
     backend = types.ModuleType("torchaudio.backend")
     common = types.ModuleType("torchaudio.backend.common")
                 return t, samplerate
             raise e
     torchaudio.load = patched_load
+except Exception: pass
+# 📦 2. AI LIBRARIES (No engines yet)
 import chatterbox_utils
 from faster_whisper import WhisperModel
 from TTS.api import TTS
 from df.enhance import init_df
+import deep_translator
 try:
     import spaces
 except ImportError:
     class spaces:
         @staticmethod
             if f is None: return lambda x: x
             return f
+# FORCE BUILD TRIGGER: 11:35:00 Jan 21 2026
+# v92: Background Warmup (Fixes infinite reload loop and redundant downloads)
 os.environ["COQUI_TOS_AGREED"] = "1"
 MODELS = {"stt": None, "translate": None, "tts": None, "denoiser": None}
+# --- THREAD SAFETY & STATUS ---
+WARMUP_STATUS = {"complete": False, "in_progress": False, "error": None}
+WARMUP_LOCK = Lock()
 def activate_gpu_models(action):
+    """v92: Safety wait for background download"""
+    global MODELS, WARMUP_STATUS
+    # If warmup is still running, wait for it (simple polling to avoid complex locks)
+    wait_start = time.time()
+    while WARMUP_STATUS["in_progress"] and not WARMUP_STATUS["complete"]:
+        if time.time() - wait_start > 120: # 2 min max wait
+             print("⚠️ Warmup taking too long, proceeding anyway...")
+             break
+        print(f"⏳ Waiting for background model download to finish for {action}...")
+        time.sleep(5)
+    # 1. Faster-Whisper GPU Activation
     if action in ["stt", "s2st"]:
+        if MODELS["stt"] is None or MODELS["stt"].model.device != "cuda":
+            print(f"🎙️ [v92] Activating Whisper on GPU for {action}...")
             MODELS["stt"] = WhisperModel("large-v3", device="cuda", compute_type="float16")
+    # 2. XTTS-v2 GPU Activation
     if action in ["tts", "s2st"]:
         if MODELS["tts"] is None:
+            print("🔊 Initializing XTTS directly to GPU...")
             MODELS["tts"] = TTS(model_name="tts_models/multilingual/multi-dataset/xtts_v2", gpu=True)
+        try:
+            current_dev = str(next(MODELS["tts"].synthesizer.tts_model.parameters()).device)
+            if "cuda" not in current_dev:
+                print("🚀 Moving XTTS-v2 to GPU...")
+                MODELS["tts"].to("cuda")
+        except: MODELS["tts"].to("cuda")
     # 3. Helpers
     if MODELS["denoiser"] is None:
         except: pass
     if MODELS["translate"] is None: MODELS["translate"] = "active"
     chatterbox_utils.load_chatterbox(device="cuda" if torch.cuda.is_available() else "cpu")
+    gc.collect()
+    if torch.cuda.is_available(): torch.cuda.empty_cache()
+def warmup_task():
+    """Background thread to handle heavy downloads (v92)"""
+    global WARMUP_STATUS
+    with WARMUP_LOCK:
+        if WARMUP_STATUS["complete"] or WARMUP_STATUS["in_progress"]: return
+        WARMUP_STATUS["in_progress"] = True
+    print("\n🔥 --- BACKGROUND WARMUP STARTED (v92) ---")
     start = time.time()
     try:
+        # Check if local files exist to skip slow verification if possible
+        # snapshot_download is quite smart, but we'll log it clearly
+        print("📥 Caching Whisper large-v3 weights...")
+        snapshot_download(repo_id="Systran/faster-whisper-large-v3", local_files_only=False)
+        print("📥 Caching XTTS-v2 weights...")
+        snapshot_download(repo_id="coqui/XTTS-v2", local_files_only=False)
         chatterbox_utils.warmup_chatterbox()
+        WARMUP_STATUS["complete"] = True
+        print(f"✅ --- BACKGROUND WARMUP COMPLETE ({time.time()-start:.2f}s) --- \n")
     except Exception as e:
+        print(f"❌ Warmup error: {e}")
+        WARMUP_STATUS["error"] = str(e)
+    finally:
+        WARMUP_STATUS["in_progress"] = False
+def start_background_warmup():
+    Thread(target=warmup_task, daemon=True).start()
 def _stt_logic(request_dict):
     audio_bytes = base64.b64decode(request_dict.get("file"))
         if os.path.exists(temp_path): os.unlink(temp_path)
 def _translate_logic(text, target_lang):
+    return deep_translator.GoogleTranslator(source='auto', target=target_lang).translate(text)
 def _tts_logic(text, lang, speaker_wav_b64):
     if not text or not text.strip(): return {"error": "Input empty"}
 def core_process(request_dict):
     action = request_dict.get("action")
     t0 = time.time()
+    print(f"--- [v92] 🚀 GPU SESSION START: {action} ---")
     activate_gpu_models(action)
     try:
         if action == "stt": res = _stt_logic(request_dict)
         elif action == "health": res = {"status": "awake"}
         else: res = {"error": f"Unknown action: {action}"}
     finally:
+        print(f"--- [v92] ✨ END: {action} ({time.time()-t0:.2f}s) ---")
         gc.collect()
         if torch.cuda.is_available(): torch.cuda.empty_cache()
     return res
     except Exception as e: traceback.print_exc(); return {"error": str(e)}
 @app.get("/health")
+def health():
+    return {
+        "status": "ok",
+        "gpu": torch.cuda.is_available(),
+        "warmup_complete": WARMUP_STATUS["complete"],
+        "warmup_in_progress": WARMUP_STATUS["in_progress"],
+        "time": time.ctime()
+    }
 @app.post("/api/v1/clear_cache")
 async def clear_cache():
 app = gr.mount_gradio_app(app, demo, path="/")
 if __name__ == "__main__":
+    start_background_warmup()
+    print("🚀 Starting FastAPI Server...")
     uvicorn.run(app, host="0.0.0.0", port=7860)