Spaces:

TGPro1
/

S2ST

Sleeping

App Files Files Community

TGPro1 commited on Jan 21

Commit

ba2715c

verified ·

1 Parent(s): 81c9481

Upload app.py with huggingface_hub

Browse files

Files changed (1) hide show

app.py +39 -42

app.py CHANGED Viewed

@@ -1,4 +1,4 @@
-# 🚀 V110: ZEROGPU RESILIENT STARTUP
 # Must be first to patch environment correctly
 try:
     import spaces
@@ -30,7 +30,7 @@ import traceback
 from threading import Thread, Lock
 from huggingface_hub import snapshot_download, hf_hub_download
-# 🛡️ 1. SILENCE & ENV (v110)
 logging.getLogger("transformers").setLevel(logging.ERROR)
 logging.getLogger("TTS").setLevel(logging.ERROR)
 os.environ["CT2_VERBOSE"] = "0"
@@ -38,7 +38,7 @@ os.environ["ORT_LOGGING_LEVEL"] = "3"
 os.environ["GRADIO_SERVER_NAME"] = "0.0.0.0"
 os.environ["GRADIO_SERVER_PORT"] = "7860"
-# 🛠️ 2. COMPATIBILITY PATCHES (v110)
 if "torchaudio.backend" not in sys.modules:
     backend = types.ModuleType("torchaudio.backend")
     common = types.ModuleType("torchaudio.backend.common")
@@ -83,8 +83,8 @@ from TTS.api import TTS
 from df.enhance import init_df
 import deep_translator
-# FORCE BUILD TRIGGER: 17:40:00 Jan 21 2026
-# v110: Resilient Warmup. No blocks.
 os.environ["COQUI_TOS_AGREED"] = "1"
 MODELS = {"stt": None, "translate": None, "tts": None, "denoiser": None}
@@ -93,8 +93,12 @@ WARMUP_STATUS = {"complete": False, "in_progress": False}
 WARMUP_LOCK = Lock()
 def activate_gpu_models(action):
-    """v110: Safe Deferred Activation"""
     global MODELS, WARMUP_STATUS
     local_only = WARMUP_STATUS["complete"]
     if action in ["stt", "s2st"]:
@@ -102,7 +106,7 @@ def activate_gpu_models(action):
         try: stt_on_gpu = MODELS["stt"] is not None and MODELS["stt"].model.device == "cuda"
         except: pass
         if not stt_on_gpu:
-            print(f"🎙️ [v110] Init Whisper (MIG-Ready: int8_float16)...")
             try:
                 if MODELS["stt"]: del MODELS["stt"]; gc.collect(); torch.cuda.empty_cache()
                 MODELS["stt"] = WhisperModel(
@@ -113,7 +117,7 @@ def activate_gpu_models(action):
                     local_files_only=local_only
                 )
             except Exception as e:
-                print(f"⚠️ GPU STT Init failed: {e}")
                 MODELS["stt"] = WhisperModel("large-v3", device="cpu", compute_type="int8", local_files_only=True)
     if action in ["tts", "s2st"]:
@@ -123,14 +127,15 @@ def activate_gpu_models(action):
             tts_on_gpu = "cuda" in curr
         except: pass
         if MODELS["tts"] is None or not tts_on_gpu:
-            print(f"🔊 [v110] Init XTTS-v2 (GPU)...")
             try:
                 if MODELS["tts"] is None:
                     MODELS["tts"] = TTS(model_name="tts_models/multilingual/multi-dataset/xtts_v2", gpu=True)
                 else: MODELS["tts"].to("cuda")
             except: pass
-    chatterbox_utils.load_chatterbox(device="cuda")
     if MODELS["denoiser"] is None:
         try: MODELS["denoiser"] = init_df()
@@ -138,9 +143,9 @@ def activate_gpu_models(action):
     if MODELS["translate"] is None: MODELS["translate"] = "active"
 def release_gpu_models():
-    """v110: Quiet Release"""
     global MODELS
-    print("🧹 [v110] Releasing.")
     try:
         if MODELS["stt"] and MODELS["stt"].model.device == "cuda":
             del MODELS["stt"]
@@ -148,44 +153,37 @@ def release_gpu_models():
         if MODELS["tts"]:
             try: MODELS["tts"].to("cpu")
             except: pass
-        chatterbox_utils.load_chatterbox(device="cpu")
     except: pass
     gc.collect()
     if torch.cuda.is_available(): torch.cuda.empty_cache()
     time.sleep(0.5)
 def warmup_task():
-    """v110: Resilient Warmup (Independent Downloads)"""
     global WARMUP_STATUS
     if WARMUP_STATUS["complete"] or WARMUP_STATUS["in_progress"]: return
     WARMUP_STATUS["in_progress"] = True
-    print("\n🔥 --- V110: ZEROGPU RESILIENT WARMUP ---")
-    # 1. Faster-Whisper
-    try: snapshot_download("Systran/faster-whisper-large-v3")
-    except Exception as e: print(f"⚠️ Whisper download: {e}")
-    # 2. XTTS-v2
-    try: snapshot_download("coqui/XTTS-v2")
-    except Exception as e: print(f"⚠️ XTTS download: {e}")
-    # 3. Chatterbox
-    try: chatterbox_utils.warmup_chatterbox()
-    except Exception as e: print(f"⚠️ Chatterbox download: {e}")
-    WARMUP_STATUS["complete"] = True
-    print(f"✅ --- SYSTEM READY: v110 --- \n")
-    WARMUP_STATUS["in_progress"] = False
 @spaces.GPU(duration=150)
 def core_process(request_dict):
     action = request_dict.get("action")
-    print(f"--- [v110] 🚀 PROCESSING: {action} ---")
-    # Wait for thermal preparation
     waited = 0
     while not WARMUP_STATUS["complete"] and waited < 60:
-        if waited % 10 == 0: print(f"⏳ Syncing assets... ({waited}s)")
         time.sleep(1)
         waited += 1
@@ -232,8 +230,7 @@ def core_process(request_dict):
                 res = {"audio": base64.b64encode(audio_bytes).decode()}
         elif action == "s2st":
-            req_copy = request_dict.copy()
-            req_copy["action"] = "stt"
             stt_res = core_process.__wrapped__(req_copy)
             translated = deep_translator.GoogleTranslator(source='auto', target=request_dict.get("target_lang")).translate(stt_res.get("text", ""))
             req_tts = {"action": "tts", "text": translated, "lang": request_dict.get("target_lang"), "speaker_wav": request_dict.get("speaker_wav")}
@@ -241,10 +238,10 @@ def core_process(request_dict):
             res = {"text": stt_res.get("text"), "translated": translated, "audio": tts_res.get("audio")}
         else: res = {"error": f"Unknown action: {action}"}
     except Exception as e:
-        print(f"❌ Processing Fault: {traceback.format_exc()}")
         res = {"error": str(e)}
     finally:
-        print(f"--- [v110] ✨ FINISHED ---")
         release_gpu_models()
     return res
@@ -262,21 +259,21 @@ async def api_process(request: Request):
     try:
         req_data = await request.json()
         if req_data.get("action") == "health":
-            return {"status": "awake", "warm": WARMUP_STATUS["complete"], "v": "110"}
         return core_process(req_data)
     except Exception as e: return {"error": str(e)}
 @app.get("/health")
-def health(): return {"status": "ok", "warm": WARMUP_STATUS["complete"], "v": "110"}
 def gradio_fn(req_json):
     try: return json.dumps(core_process(json.loads(req_json)))
     except Exception as e: return json.dumps({"error": str(e)})
-demo = gr.Interface(fn=gradio_fn, inputs="text", outputs="text", title="🚀 AI Engine v110")
 demo.queue()
 app = gr.mount_gradio_app(app, demo, path="/")
 if __name__ == "__main__":
-    print("🚀 [v110] Starting Resilient Server...")
     uvicorn.run(app, host="0.0.0.0", port=7860, log_level="error")

+# 🚀 V111: ZEROGPU HARDENED ENGINE
 # Must be first to patch environment correctly
 try:
     import spaces
 from threading import Thread, Lock
 from huggingface_hub import snapshot_download, hf_hub_download
+# 🛡️ 1. SILENCE & ENV (v111)
 logging.getLogger("transformers").setLevel(logging.ERROR)
 logging.getLogger("TTS").setLevel(logging.ERROR)
 os.environ["CT2_VERBOSE"] = "0"
 os.environ["GRADIO_SERVER_NAME"] = "0.0.0.0"
 os.environ["GRADIO_SERVER_PORT"] = "7860"
+# 🛠️ 2. COMPATIBILITY PATCHES (v111)
 if "torchaudio.backend" not in sys.modules:
     backend = types.ModuleType("torchaudio.backend")
     common = types.ModuleType("torchaudio.backend.common")
 from df.enhance import init_df
 import deep_translator
+# FORCE BUILD TRIGGER: 17:45:00 Jan 21 2026
+# v111: Hardened Engine. Chatterbox CPU-only. Whisper Clean Init.
 os.environ["COQUI_TOS_AGREED"] = "1"
 MODELS = {"stt": None, "translate": None, "tts": None, "denoiser": None}
 WARMUP_LOCK = Lock()
 def activate_gpu_models(action):
+    """v111: Clean Isolation Activation"""
     global MODELS, WARMUP_STATUS
+    # Always clean before loading big models to prevent MIG out-of-memory
+    gc.collect()
+    if torch.cuda.is_available(): torch.cuda.empty_cache()
     local_only = WARMUP_STATUS["complete"]
     if action in ["stt", "s2st"]:
         try: stt_on_gpu = MODELS["stt"] is not None and MODELS["stt"].model.device == "cuda"
         except: pass
         if not stt_on_gpu:
+            print(f"🎙️ [v111] Activating Whisper (High-Speed int8_float16)...")
             try:
                 if MODELS["stt"]: del MODELS["stt"]; gc.collect(); torch.cuda.empty_cache()
                 MODELS["stt"] = WhisperModel(
                     local_files_only=local_only
                 )
             except Exception as e:
+                print(f"⚠️ GPU Init failed: {e}")
                 MODELS["stt"] = WhisperModel("large-v3", device="cpu", compute_type="int8", local_files_only=True)
     if action in ["tts", "s2st"]:
             tts_on_gpu = "cuda" in curr
         except: pass
         if MODELS["tts"] is None or not tts_on_gpu:
+            print(f"🔊 [v111] Activating XTTS-v2 (GPU)...")
             try:
                 if MODELS["tts"] is None:
                     MODELS["tts"] = TTS(model_name="tts_models/multilingual/multi-dataset/xtts_v2", gpu=True)
                 else: MODELS["tts"].to("cuda")
             except: pass
+    # v111: Chatterbox is now strictly CPU to avoid CUDA conflicts
+    chatterbox_utils.load_chatterbox(device="cpu")
     if MODELS["denoiser"] is None:
         try: MODELS["denoiser"] = init_df()
     if MODELS["translate"] is None: MODELS["translate"] = "active"
 def release_gpu_models():
+    """v111: Force Release"""
     global MODELS
+    print("🧹 [v111] Releasing GPU.")
     try:
         if MODELS["stt"] and MODELS["stt"].model.device == "cuda":
             del MODELS["stt"]
         if MODELS["tts"]:
             try: MODELS["tts"].to("cpu")
             except: pass
+        # Chatterbox is on CPU, no release needed
     except: pass
     gc.collect()
     if torch.cuda.is_available(): torch.cuda.empty_cache()
     time.sleep(0.5)
 def warmup_task():
+    """v111: Asset Prefetch"""
     global WARMUP_STATUS
     if WARMUP_STATUS["complete"] or WARMUP_STATUS["in_progress"]: return
     WARMUP_STATUS["in_progress"] = True
+    print("\n🔥 --- V111: ZEROGPU ASSET SYNC ---")
+    try:
+        snapshot_download("Systran/faster-whisper-large-v3")
+        snapshot_download("coqui/XTTS-v2")
+        chatterbox_utils.warmup_chatterbox()
+        WARMUP_STATUS["complete"] = True
+        print(f"✅ --- SYSTEM READY: v111 --- \n")
+    except Exception as e:
+        print(f"❌ Warmup Warning: {e}")
+    finally: WARMUP_STATUS["in_progress"] = False
 @spaces.GPU(duration=150)
 def core_process(request_dict):
     action = request_dict.get("action")
+    print(f"--- [v111] 🚀 PROCESS: {action} ---")
+    # Wait for assets
     waited = 0
     while not WARMUP_STATUS["complete"] and waited < 60:
+        if waited % 10 == 0: print(f"⏳ Syncing... ({waited}s)")
         time.sleep(1)
         waited += 1
                 res = {"audio": base64.b64encode(audio_bytes).decode()}
         elif action == "s2st":
+            req_copy = request_dict.copy(); req_copy["action"] = "stt"
             stt_res = core_process.__wrapped__(req_copy)
             translated = deep_translator.GoogleTranslator(source='auto', target=request_dict.get("target_lang")).translate(stt_res.get("text", ""))
             req_tts = {"action": "tts", "text": translated, "lang": request_dict.get("target_lang"), "speaker_wav": request_dict.get("speaker_wav")}
             res = {"text": stt_res.get("text"), "translated": translated, "audio": tts_res.get("audio")}
         else: res = {"error": f"Unknown action: {action}"}
     except Exception as e:
+        print(f"❌ Fault: {traceback.format_exc()}")
         res = {"error": str(e)}
     finally:
+        print(f"--- [v111] ✨ DONE ---")
         release_gpu_models()
     return res
     try:
         req_data = await request.json()
         if req_data.get("action") == "health":
+            return {"status": "awake", "warm": WARMUP_STATUS["complete"], "v": "111"}
         return core_process(req_data)
     except Exception as e: return {"error": str(e)}
 @app.get("/health")
+def health(): return {"status": "ok", "warm": WARMUP_STATUS["complete"], "v": "111"}
 def gradio_fn(req_json):
     try: return json.dumps(core_process(json.loads(req_json)))
     except Exception as e: return json.dumps({"error": str(e)})
+demo = gr.Interface(fn=gradio_fn, inputs="text", outputs="text", title="🚀 AI Engine v111")
 demo.queue()
 app = gr.mount_gradio_app(app, demo, path="/")
 if __name__ == "__main__":
+    print("🚀 [v111] Starting Hardened Server...")
     uvicorn.run(app, host="0.0.0.0", port=7860, log_level="error")