Spaces:

TGPro1
/

S2ST

Running on Zero

App Files Files Community

TGPro1 commited on 28 days ago

Commit

855133f

verified ·

1 Parent(s): 8d4fa1d

Upload app.py with huggingface_hub

Browse files

Files changed (1) hide show

app.py +46 -47

app.py CHANGED Viewed

@@ -1,4 +1,4 @@
-# 🚀 V108: ZEROGPU REQUEST-WAIT PROTOCOL
 # Must be first to patch environment correctly
 try:
     import spaces
@@ -27,9 +27,9 @@ import sys
 import types
 import logging
 from threading import Thread, Lock
-from huggingface_hub import snapshot_download
-# 🛡️ 1. SILENCE & ENV (v108)
 logging.getLogger("transformers").setLevel(logging.ERROR)
 logging.getLogger("TTS").setLevel(logging.ERROR)
 os.environ["CT2_VERBOSE"] = "0"
@@ -37,7 +37,7 @@ os.environ["ORT_LOGGING_LEVEL"] = "3"
 os.environ["GRADIO_SERVER_NAME"] = "0.0.0.0"
 os.environ["GRADIO_SERVER_PORT"] = "7860"
-# 🛠️ 2. COMPATIBILITY PATCHES (v108)
 if "torchaudio.backend" not in sys.modules:
     backend = types.ModuleType("torchaudio.backend")
     common = types.ModuleType("torchaudio.backend.common")
@@ -77,13 +77,14 @@ except Exception: pass
 # 📦 3. AI LIBRARIES
 import chatterbox_utils
 from faster_whisper import WhisperModel
 from TTS.api import TTS
 from df.enhance import init_df
 import deep_translator
-# FORCE REBUILD: 17:18:00 Jan 21 2026
-# v108: Rebuilt with full requirements. Wait protocol active.
 os.environ["COQUI_TOS_AGREED"] = "1"
 MODELS = {"stt": None, "translate": None, "tts": None, "denoiser": None}
@@ -92,7 +93,7 @@ WARMUP_STATUS = {"complete": False, "in_progress": False}
 WARMUP_LOCK = Lock()
 def activate_gpu_models(action):
-    """v108: Optimized Activation"""
     global MODELS, WARMUP_STATUS
     local_only = WARMUP_STATUS["complete"]
@@ -101,11 +102,9 @@ def activate_gpu_models(action):
         try: stt_on_gpu = MODELS["stt"] is not None and MODELS["stt"].model.device == "cuda"
         except: pass
         if not stt_on_gpu:
-            print(f"🎙️ [v108] Activating Whisper (GPU: int8_float16)...")
             try:
-                if MODELS["stt"]: del MODELS["stt"]
-                gc.collect(); torch.cuda.empty_cache()
-                time.sleep(0.5)
                 MODELS["stt"] = WhisperModel(
                     "large-v3",
                     device="cuda",
@@ -114,7 +113,7 @@ def activate_gpu_models(action):
                     local_files_only=local_only
                 )
             except Exception as e:
-                print(f"⚠️ GPU Init failed: {e}")
                 MODELS["stt"] = WhisperModel("large-v3", device="cpu", compute_type="int8", local_files_only=True)
     if action in ["tts", "s2st"]:
@@ -124,14 +123,15 @@ def activate_gpu_models(action):
             tts_on_gpu = "cuda" in curr
         except: pass
         if MODELS["tts"] is None or not tts_on_gpu:
-            print(f"🔊 [v108] Activating XTTS-v2 (GPU)...")
             try:
                 if MODELS["tts"] is None:
                     MODELS["tts"] = TTS(model_name="tts_models/multilingual/multi-dataset/xtts_v2", gpu=True)
                 else: MODELS["tts"].to("cuda")
             except: pass
-    chatterbox_utils.load_chatterbox(device="cuda" if torch.cuda.is_available() else "cpu")
     if MODELS["denoiser"] is None:
         try: MODELS["denoiser"] = init_df()
@@ -139,9 +139,9 @@ def activate_gpu_models(action):
     if MODELS["translate"] is None: MODELS["translate"] = "active"
 def release_gpu_models():
-    """v108: Resilient Release"""
     global MODELS
-    print("🧹 [v108] Releasing GPU resources.")
     try:
         if MODELS["stt"] and MODELS["stt"].model.device == "cuda":
             del MODELS["stt"]
@@ -156,39 +156,43 @@ def release_gpu_models():
     time.sleep(0.5)
 def warmup_task():
-    """v108: System Preparation"""
     global WARMUP_STATUS
-    with WARMUP_LOCK:
-        if WARMUP_STATUS["complete"] or WARMUP_STATUS["in_progress"]: return
-        WARMUP_STATUS["in_progress"] = True
-    print("\n🔥 --- V108: ZEROGPU WARMUP STARTED ---")
     try:
-        # Pre-download everything to CPU first
-        MODELS["stt"] = WhisperModel("large-v3", device="cpu", compute_type="int8")
-        MODELS["tts"] = TTS(model_name="tts_models/multilingual/multi-dataset/xtts_v2", gpu=False)
-        MODELS["denoiser"] = init_df()
         chatterbox_utils.warmup_chatterbox()
         WARMUP_STATUS["complete"] = True
-        print(f"✅ --- SYSTEM READY: v108 --- \n")
     except Exception as e:
-        print(f"❌ Warmup Error: {e}")
     finally: WARMUP_STATUS["in_progress"] = False
 @spaces.GPU(duration=150)
 def core_process(request_dict):
     action = request_dict.get("action")
-    print(f"--- [v108] 🚀 REQUEST: {action} ---")
-    # 🔥 v108: WAIT PROTOCOL
-    max_wait = 180 # 3 minutes for absolute first-time build/download
     waited = 0
-    while not WARMUP_STATUS["complete"] and waited < max_wait:
-        if waited % 5 == 0: print(f"⏳ System warming up... (waited {waited}s)")
         time.sleep(1)
         waited += 1
-    if not WARMUP_STATUS["complete"]:
-        return {"error": "System still warming up. Please try again in 30 seconds."}
     t1 = time.time()
     activate_gpu_models(action)
@@ -234,23 +238,19 @@ def core_process(request_dict):
                 res = {"audio": base64.b64encode(audio_bytes).decode()}
         elif action == "s2st":
-            # Combined logic
             req_copy = request_dict.copy()
             req_copy["action"] = "stt"
-            stt_res = core_process.__wrapped__(req_copy) # Recursive but without double GPU wrapper
             translated = deep_translator.GoogleTranslator(source='auto', target=request_dict.get("target_lang")).translate(stt_res.get("text", ""))
             req_tts = {"action": "tts", "text": translated, "lang": request_dict.get("target_lang"), "speaker_wav": request_dict.get("speaker_wav")}
             tts_res = core_process.__wrapped__(req_tts)
             res = {"text": stt_res.get("text"), "translated": translated, "audio": tts_res.get("audio")}
         else: res = {"error": f"Unknown action: {action}"}
     except Exception as e:
-        print(f"❌ Error in core_process: {traceback.format_exc()}")
         res = {"error": str(e)}
     finally:
-        print(f"--- [v108] ✨ DONE ({time.time()-t1:.2f}s) ---")
         release_gpu_models()
     return res
@@ -268,22 +268,21 @@ async def api_process(request: Request):
     try:
         req_data = await request.json()
         if req_data.get("action") == "health":
-            return {"status": "awake", "warm": WARMUP_STATUS["complete"], "v": "108"}
         return core_process(req_data)
     except Exception as e: return {"error": str(e)}
 @app.get("/health")
-def health(): return {"status": "ok", "warm": WARMUP_STATUS["complete"], "v": "108"}
 def gradio_fn(req_json):
     try: return json.dumps(core_process(json.loads(req_json)))
     except Exception as e: return json.dumps({"error": str(e)})
-demo = gr.Interface(fn=gradio_fn, inputs="text", outputs="text", title="🚀 AI Engine v108")
 demo.queue()
 app = gr.mount_gradio_app(app, demo, path="/")
 if __name__ == "__main__":
-    print("🚀 [v108] Starting System...")
     uvicorn.run(app, host="0.0.0.0", port=7860, log_level="error")

+# 🚀 V109: ZEROGPU STERILE STARTUP
 # Must be first to patch environment correctly
 try:
     import spaces
 import types
 import logging
 from threading import Thread, Lock
+from huggingface_hub import snapshot_download, hf_hub_download
+# 🛡️ 1. SILENCE & ENV (v109)
 logging.getLogger("transformers").setLevel(logging.ERROR)
 logging.getLogger("TTS").setLevel(logging.ERROR)
 os.environ["CT2_VERBOSE"] = "0"
 os.environ["GRADIO_SERVER_NAME"] = "0.0.0.0"
 os.environ["GRADIO_SERVER_PORT"] = "7860"
+# 🛠️ 2. COMPATIBILITY PATCHES (v109)
 if "torchaudio.backend" not in sys.modules:
     backend = types.ModuleType("torchaudio.backend")
     common = types.ModuleType("torchaudio.backend.common")
 # 📦 3. AI LIBRARIES
 import chatterbox_utils
+# We import types/classes but do NOT instantiate them at top-level
 from faster_whisper import WhisperModel
 from TTS.api import TTS
 from df.enhance import init_df
 import deep_translator
+# FORCE BUILD TRIGGER: 17:35:00 Jan 21 2026
+# v109: Sterile Startup. defer all AI init to GPU session.
 os.environ["COQUI_TOS_AGREED"] = "1"
 MODELS = {"stt": None, "translate": None, "tts": None, "denoiser": None}
 WARMUP_LOCK = Lock()
 def activate_gpu_models(action):
+    """v109: Deferred Activation (Inside GPU)"""
     global MODELS, WARMUP_STATUS
     local_only = WARMUP_STATUS["complete"]
         try: stt_on_gpu = MODELS["stt"] is not None and MODELS["stt"].model.device == "cuda"
         except: pass
         if not stt_on_gpu:
+            print(f"🎙️ [v109] Initializing Whisper (GPU: int8_float16)...")
             try:
+                if MODELS["stt"]: del MODELS["stt"]; gc.collect(); torch.cuda.empty_cache()
                 MODELS["stt"] = WhisperModel(
                     "large-v3",
                     device="cuda",
                     local_files_only=local_only
                 )
             except Exception as e:
+                print(f"⚠️ GPU STT Init failed: {e}")
                 MODELS["stt"] = WhisperModel("large-v3", device="cpu", compute_type="int8", local_files_only=True)
     if action in ["tts", "s2st"]:
             tts_on_gpu = "cuda" in curr
         except: pass
         if MODELS["tts"] is None or not tts_on_gpu:
+            print(f"🔊 [v109] Initializing XTTS-v2 (GPU)...")
             try:
                 if MODELS["tts"] is None:
                     MODELS["tts"] = TTS(model_name="tts_models/multilingual/multi-dataset/xtts_v2", gpu=True)
                 else: MODELS["tts"].to("cuda")
             except: pass
+    # In v109, we only load chatterbox into CUDA when needed
+    chatterbox_utils.load_chatterbox(device="cuda")
     if MODELS["denoiser"] is None:
         try: MODELS["denoiser"] = init_df()
     if MODELS["translate"] is None: MODELS["translate"] = "active"
 def release_gpu_models():
+    """v109: GPU Release"""
     global MODELS
+    print("🧹 [v109] Releasing GPU.")
     try:
         if MODELS["stt"] and MODELS["stt"].model.device == "cuda":
             del MODELS["stt"]
     time.sleep(0.5)
 def warmup_task():
+    """v109: Sterile Warmup (Downloads only)"""
     global WARMUP_STATUS
+    if WARMUP_STATUS["complete"] or WARMUP_STATUS["in_progress"]: return
+    WARMUP_STATUS["in_progress"] = True
+    print("\n🔥 --- V109: ZEROGPU STERILE WARMUP ---")
     try:
+        # Pre-fetch files WITHOUT loading them into memory/CUDA
+        print("⏬ Pre-fetching Whisper Large-v3...")
+        snapshot_download("Systran/faster-whisper-large-v3")
+        print("⏬ Pre-fetching XTTS-v2...")
+        # TTS internal download trigger (folder structure varies)
+        snapshot_download("coqui/XTTS-v2")
+        print("⏬ Pre-fetching Chatterbox...")
         chatterbox_utils.warmup_chatterbox()
+        # DeepFilterNet
+        hf_hub_download(repo_id="R_S/DeepFilterNet3", filename="config.json")
         WARMUP_STATUS["complete"] = True
+        print(f"✅ --- CACHE SYNCED: v109 --- \n")
     except Exception as e:
+        print(f"❌ Warmup Warning (might be fine): {e}")
     finally: WARMUP_STATUS["in_progress"] = False
 @spaces.GPU(duration=150)
 def core_process(request_dict):
     action = request_dict.get("action")
+    print(f"--- [v109] 🚀 REQUEST: {action} ---")
+    # Wait for downloads if necessary
     waited = 0
+    while not WARMUP_STATUS["complete"] and waited < 300: # 5 min limit
+        if waited % 10 == 0: print(f"⏳ Downloading models... ({waited}s)")
         time.sleep(1)
         waited += 1
     t1 = time.time()
     activate_gpu_models(action)
                 res = {"audio": base64.b64encode(audio_bytes).decode()}
         elif action == "s2st":
             req_copy = request_dict.copy()
             req_copy["action"] = "stt"
+            stt_res = core_process.__wrapped__(req_copy)
             translated = deep_translator.GoogleTranslator(source='auto', target=request_dict.get("target_lang")).translate(stt_res.get("text", ""))
             req_tts = {"action": "tts", "text": translated, "lang": request_dict.get("target_lang"), "speaker_wav": request_dict.get("speaker_wav")}
             tts_res = core_process.__wrapped__(req_tts)
             res = {"text": stt_res.get("text"), "translated": translated, "audio": tts_res.get("audio")}
         else: res = {"error": f"Unknown action: {action}"}
     except Exception as e:
+        print(f"❌ Core Error: {traceback.format_exc()}")
         res = {"error": str(e)}
     finally:
+        print(f"--- [v109] ✨ DONE ({time.time()-t1:.2f}s) ---")
         release_gpu_models()
     return res
     try:
         req_data = await request.json()
         if req_data.get("action") == "health":
+            return {"status": "awake", "warm": WARMUP_STATUS["complete"], "v": "109"}
         return core_process(req_data)
     except Exception as e: return {"error": str(e)}
 @app.get("/health")
+def health(): return {"status": "ok", "warm": WARMUP_STATUS["complete"], "v": "109"}
 def gradio_fn(req_json):
     try: return json.dumps(core_process(json.loads(req_json)))
     except Exception as e: return json.dumps({"error": str(e)})
+demo = gr.Interface(fn=gradio_fn, inputs="text", outputs="text", title="🚀 AI Engine v109")
 demo.queue()
 app = gr.mount_gradio_app(app, demo, path="/")
 if __name__ == "__main__":
+    print("🚀 [v109] Starting Sterile Server...")
     uvicorn.run(app, host="0.0.0.0", port=7860, log_level="error")