Spaces:

TGPro1
/

S2ST

Running on Zero

App Files Files Community

TGPro1 commited on 29 days ago

Commit

0b4811b

verified ·

1 Parent(s): 1822120

Upload app.py with huggingface_hub

Browse files

Files changed (1) hide show

app.py +37 -43

app.py CHANGED Viewed

@@ -1,3 +1,15 @@
 from fastapi import FastAPI, Request, Response
 from fastapi.middleware.cors import CORSMiddleware
 from contextlib import asynccontextmanager
@@ -18,7 +30,7 @@ import logging
 from threading import Thread, Lock
 from huggingface_hub import snapshot_download
-# 🛡️ 1. SILENCE LOGS & WARNINGS (v105: CUDA Stability Protocol)
 logging.getLogger("transformers").setLevel(logging.ERROR)
 logging.getLogger("TTS").setLevel(logging.ERROR)
 logging.getLogger("onnxruntime").setLevel(logging.ERROR)
@@ -72,17 +84,8 @@ from TTS.api import TTS
 from df.enhance import init_df
 import deep_translator
-try:
-    import spaces
-except ImportError:
-    class spaces:
-        @staticmethod
-        def GPU(duration=60, f=None):
-            if f is None: return lambda x: x
-            return f
-# FORCE BUILD TRIGGER: 14:00:00 Jan 21 2026
-# v105: CUDA Stability Fix. int8_float16. Device Indexing.
 os.environ["COQUI_TOS_AGREED"] = "1"
 MODELS = {"stt": None, "translate": None, "tts": None, "denoiser": None}
@@ -91,7 +94,7 @@ WARMUP_STATUS = {"complete": False, "in_progress": False}
 WARMUP_LOCK = Lock()
 def activate_gpu_models(action):
-    """v105: MISSION-CRITICAL GPU ACTIVATION"""
     global MODELS, WARMUP_STATUS
     local_only = WARMUP_STATUS["complete"]
@@ -100,12 +103,11 @@ def activate_gpu_models(action):
         try: stt_on_gpu = MODELS["stt"] is not None and MODELS["stt"].model.device == "cuda"
         except: pass
         if not stt_on_gpu:
-            print(f"🎙️ [v105] Activating Whisper (GPU: int8_float16 Stability)...")
             try:
                 if MODELS["stt"]: del MODELS["stt"]
                 gc.collect(); torch.cuda.empty_cache()
-                time.sleep(0.5) # Let CUDA driver settle
-                # v105: int8_float16 is the most stable for CTranslate2 on H200 MIGs
                 MODELS["stt"] = WhisperModel(
                     "large-v3",
                     device="cuda",
@@ -115,7 +117,7 @@ def activate_gpu_models(action):
                     local_files_only=local_only
                 )
             except Exception as e:
-                print(f"⚠️ CUDA v105 Stability Init failed: {e}")
                 MODELS["stt"] = WhisperModel("large-v3", device="cpu", compute_type="int8", local_files_only=True)
     if action in ["tts", "s2st"]:
@@ -125,7 +127,7 @@ def activate_gpu_models(action):
             tts_on_gpu = "cuda" in curr
         except: pass
         if MODELS["tts"] is None or not tts_on_gpu:
-            print(f"🔊 [v105] Activating XTTS-v2 (GPU)...")
             try:
                 if MODELS["tts"] is None:
                     MODELS["tts"] = TTS(model_name="tts_models/multilingual/multi-dataset/xtts_v2", gpu=True)
@@ -140,9 +142,9 @@ def activate_gpu_models(action):
     if MODELS["translate"] is None: MODELS["translate"] = "active"
 def release_gpu_models():
-    """v105: Safe Resident Cleanup"""
     global MODELS
-    print("🧹 [v105] Releasing GPU resources.")
     try:
         if MODELS["stt"] and MODELS["stt"].model.device == "cuda":
             del MODELS["stt"]
@@ -154,21 +156,21 @@ def release_gpu_models():
     except: pass
     gc.collect()
     if torch.cuda.is_available(): torch.cuda.empty_cache()
-    time.sleep(0.5) # Driver buffer
 def warmup_task():
-    """Silent Pre-loading (v105)"""
     global WARMUP_STATUS
     with WARMUP_LOCK:
         if WARMUP_STATUS["complete"] or WARMUP_STATUS["in_progress"]: return
         WARMUP_STATUS["in_progress"] = True
-    print("\n🔥 --- V105: STABILITY WARMUP STARTED ---")
     try:
         MODELS["stt"] = WhisperModel("large-v3", device="cpu", compute_type="int8")
         MODELS["tts"] = TTS(model_name="tts_models/multilingual/multi-dataset/xtts_v2", gpu=False)
         chatterbox_utils.warmup_chatterbox()
         WARMUP_STATUS["complete"] = True
-        print(f"✅ --- SYSTEM READY: v105 --- \n")
     except: pass
     finally: WARMUP_STATUS["in_progress"] = False
@@ -220,7 +222,7 @@ def _tts_logic(text, lang, speaker_wav_b64):
 def core_process(request_dict):
     action = request_dict.get("action")
     t1 = time.time()
-    print(f"--- [v105] 🚀 GPU SESSION: {action} ---")
     activate_gpu_models(action)
     try:
         if action == "stt": res = _stt_logic(request_dict)
@@ -233,7 +235,7 @@ def core_process(request_dict):
             res = {"text": stt_res.get("text"), "translated": translated, "audio": tts_res.get("audio")}
         else: res = {"error": f"Unknown action: {action}"}
     finally:
-        print(f"--- [v105] ✨ SUCCESS: {action} ({time.time()-t1:.2f}s) ---")
         release_gpu_models()
     return res
@@ -242,14 +244,9 @@ async def lifespan(app: FastAPI):
     Thread(target=warmup_task, daemon=True).start()
     yield
-# 🚀 STEP 1: DEFINE THE PRIMARY FASTAPI APP
 app = FastAPI(lifespan=lifespan)
-app.add_middleware(
-    CORSMiddleware,
-    allow_origins=["*"],
-    allow_methods=["*"],
-    allow_headers=["*"],
-)
 @app.post("/api/v1/process")
 async def api_process(request: Request):
@@ -261,7 +258,7 @@ async def api_process(request: Request):
     except Exception as e: return {"error": str(e)}
 @app.get("/health")
-def health(): return {"status": "ok", "warm": WARMUP_STATUS["complete"], "v": "105"}
 @app.post("/api/v1/clear_cache")
 async def clear_cache_api():
@@ -270,21 +267,18 @@ async def clear_cache_api():
         return {"status": "success"}
     except: return {"status": "error"}
-# 🚀 STEP 2: DEFINE GRADIO INTERFACE
 def gradio_fn(req_json):
     try: return json.dumps(core_process(json.loads(req_json)))
     except Exception as e: return json.dumps({"error": str(e)})
-demo = gr.Interface(
-    fn=gradio_fn,
-    inputs="text",
-    outputs="text",
-    title="🚀 AI Engine v105"
-)
-# 🚀 STEP 3: MOUNT GRADIO ONTO FASTAPI
 app = gr.mount_gradio_app(app, demo, path="/")
 if __name__ == "__main__":
-    print("🚀 [v105] Starting Unified Server on Port 7860...")
     uvicorn.run(app, host="0.0.0.0", port=7860, log_level="error")

+# 🚀 V106: ZEROGPU PRIORITY IMPORT
+# Must be first to patch environment correctly
+try:
+    import spaces
+except ImportError:
+    print("⚠️ 'spaces' not installed. ZeroGPU features disabled.")
+    class spaces:
+        @staticmethod
+        def GPU(duration=60, f=None):
+            if f is None: return lambda x: x
+            return f
 from fastapi import FastAPI, Request, Response
 from fastapi.middleware.cors import CORSMiddleware
 from contextlib import asynccontextmanager
 from threading import Thread, Lock
 from huggingface_hub import snapshot_download
+# 🛡️ 1. SILENCE LOGS & WARNINGS (v106: Verified Silence)
 logging.getLogger("transformers").setLevel(logging.ERROR)
 logging.getLogger("TTS").setLevel(logging.ERROR)
 logging.getLogger("onnxruntime").setLevel(logging.ERROR)
 from df.enhance import init_df
 import deep_translator
+# FORCE BUILD TRIGGER: 14:45:00 Jan 21 2026
+# v106: Import Priority + Explicit Queue.
 os.environ["COQUI_TOS_AGREED"] = "1"
 MODELS = {"stt": None, "translate": None, "tts": None, "denoiser": None}
 WARMUP_LOCK = Lock()
 def activate_gpu_models(action):
+    """v106: Stable Activation"""
     global MODELS, WARMUP_STATUS
     local_only = WARMUP_STATUS["complete"]
         try: stt_on_gpu = MODELS["stt"] is not None and MODELS["stt"].model.device == "cuda"
         except: pass
         if not stt_on_gpu:
+            print(f"🎙️ [v106] Activating Whisper (GPU: int8_float16)...")
             try:
                 if MODELS["stt"]: del MODELS["stt"]
                 gc.collect(); torch.cuda.empty_cache()
+                time.sleep(0.5)
                 MODELS["stt"] = WhisperModel(
                     "large-v3",
                     device="cuda",
                     local_files_only=local_only
                 )
             except Exception as e:
+                print(f"⚠️ GPU Init failed: {e}")
                 MODELS["stt"] = WhisperModel("large-v3", device="cpu", compute_type="int8", local_files_only=True)
     if action in ["tts", "s2st"]:
             tts_on_gpu = "cuda" in curr
         except: pass
         if MODELS["tts"] is None or not tts_on_gpu:
+            print(f"🔊 [v106] Activating XTTS-v2 (GPU)...")
             try:
                 if MODELS["tts"] is None:
                     MODELS["tts"] = TTS(model_name="tts_models/multilingual/multi-dataset/xtts_v2", gpu=True)
     if MODELS["translate"] is None: MODELS["translate"] = "active"
 def release_gpu_models():
+    """v106: Resident Cleanup"""
     global MODELS
+    print("🧹 [v106] Releasing GPU resources.")
     try:
         if MODELS["stt"] and MODELS["stt"].model.device == "cuda":
             del MODELS["stt"]
     except: pass
     gc.collect()
     if torch.cuda.is_available(): torch.cuda.empty_cache()
+    time.sleep(0.5)
 def warmup_task():
+    """Silent Warmup (v106)"""
     global WARMUP_STATUS
     with WARMUP_LOCK:
         if WARMUP_STATUS["complete"] or WARMUP_STATUS["in_progress"]: return
         WARMUP_STATUS["in_progress"] = True
+    print("\n🔥 --- V106: ZEROGPU RECOVERY WARMUP ---")
     try:
         MODELS["stt"] = WhisperModel("large-v3", device="cpu", compute_type="int8")
         MODELS["tts"] = TTS(model_name="tts_models/multilingual/multi-dataset/xtts_v2", gpu=False)
         chatterbox_utils.warmup_chatterbox()
         WARMUP_STATUS["complete"] = True
+        print(f"✅ --- SYSTEM READY: v106 --- \n")
     except: pass
     finally: WARMUP_STATUS["in_progress"] = False
 def core_process(request_dict):
     action = request_dict.get("action")
     t1 = time.time()
+    print(f"--- [v106] 🚀 GPU SESSION: {action} ---")
     activate_gpu_models(action)
     try:
         if action == "stt": res = _stt_logic(request_dict)
             res = {"text": stt_res.get("text"), "translated": translated, "audio": tts_res.get("audio")}
         else: res = {"error": f"Unknown action: {action}"}
     finally:
+        print(f"--- [v106] ✨ SUCCESS: {action} ({time.time()-t1:.2f}s) ---")
         release_gpu_models()
     return res
     Thread(target=warmup_task, daemon=True).start()
     yield
+# 🚀 PRIMARY FASTAPI
 app = FastAPI(lifespan=lifespan)
+app.add_middleware(CORSMiddleware, allow_origins=["*"], allow_methods=["*"], allow_headers=["*"])
 @app.post("/api/v1/process")
 async def api_process(request: Request):
     except Exception as e: return {"error": str(e)}
 @app.get("/health")
+def health(): return {"status": "ok", "warm": WARMUP_STATUS["complete"], "v": "106"}
 @app.post("/api/v1/clear_cache")
 async def clear_cache_api():
         return {"status": "success"}
     except: return {"status": "error"}
 def gradio_fn(req_json):
     try: return json.dumps(core_process(json.loads(req_json)))
     except Exception as e: return json.dumps({"error": str(e)})
+# 🚀 GRADIO WITH QUEUE
+demo = gr.Interface(fn=gradio_fn, inputs="text", outputs="text", title="🚀 AI Engine v106")
+# 🔥 V106: EXPLICIT QUEUE. ZeroGPU needs this.
+demo.queue()
+# MOUNT
 app = gr.mount_gradio_app(app, demo, path="/")
 if __name__ == "__main__":
+    print("🚀 [v106] Starting Unified Server (ZeroGPU Fixed)...")
     uvicorn.run(app, host="0.0.0.0", port=7860, log_level="error")