Spaces:

TGPro1
/

S2ST

Running on Zero

App Files Files Community

TGPro1 commited on Jan 21

Commit

8d25f5f

verified ·

1 Parent(s): 4bebd1d

Upload app.py with huggingface_hub

Browse files

Files changed (1) hide show

app.py +42 -34

app.py CHANGED Viewed

@@ -1,4 +1,4 @@
-# 🚀 V116: ZEROGPU HOPPER ATOMIC
 try:
     import spaces
 except ImportError:
@@ -28,7 +28,7 @@ import traceback
 from threading import Thread
 from huggingface_hub import snapshot_download, hf_hub_download
-# 🛡️ 1. SILENCE & ENV (v116)
 logging.getLogger("transformers").setLevel(logging.ERROR)
 logging.getLogger("TTS").setLevel(logging.ERROR)
 os.environ["CT2_VERBOSE"] = "0"
@@ -81,19 +81,23 @@ from TTS.api import TTS
 from df.enhance import init_df
 import deep_translator
-# FORCE BUILD TRIGGER: 19:10:00 Jan 21 2026
-# v116: Hopper Atomic. Device Index Fix. No Lock.
 os.environ["COQUI_TOS_AGREED"] = "1"
 MODELS = {"stt": None, "translate": None, "tts": None, "denoiser": None}
 READY_FLAG = os.path.expanduser("~/.engine_ready")
 def is_system_ready():
     return os.path.exists(READY_FLAG)
 def activate_gpu_models(action):
-    """v116: Granular Activation"""
-    global MODELS
     if action in ["stt", "s2st"]:
         stt_on_gpu = False
@@ -101,21 +105,20 @@ def activate_gpu_models(action):
         except: pass
         if not stt_on_gpu:
-            print(f"🎙️ [v116] WHISPER CHECKPOINT: Allocation...")
             try:
                 gc.collect(); torch.cuda.empty_cache()
-                print(f"🎙️ [v116] WHISPER CHECKPOINT: Loading to CUDA:0 (int8_float16)...")
-                # Removed local_files_only=True as it can hang integrity checks
                 MODELS["stt"] = WhisperModel(
-                    "large-v3",
                     device="cuda",
-                    device_index=0,
-                    compute_type="int8_float16",
                     num_workers=1
                 )
-                print(f"🎙️ [v116] WHISPER CHECKPOINT: Ready.")
             except Exception as e:
-                print(f"⚠️ [v116] GPU STT Fail: {e}")
                 MODELS["stt"] = WhisperModel("large-v3", device="cpu", compute_type="int8")
     if action in ["tts", "s2st"]:
@@ -126,12 +129,9 @@ def activate_gpu_models(action):
         except: pass
         if MODELS["tts"] is not None and not tts_on_gpu:
-            print(f"🔊 [v116] XTTS CHECKPOINT: Promotion to GPU...")
-            try:
-                MODELS["tts"].to("cuda")
-                print(f"🔊 [v116] XTTS CHECKPOINT: Ready.")
-            except Exception as e:
-                print(f"⚠️ [v116] XTTS GPU Fail: {e}")
     chatterbox_utils.load_chatterbox(device="cpu")
     if MODELS["denoiser"] is None:
@@ -140,7 +140,7 @@ def activate_gpu_models(action):
     if MODELS["translate"] is None: MODELS["translate"] = "active"
 def release_gpu_models():
-    """v116: Clean Release"""
     global MODELS
     try:
         if MODELS["stt"] and MODELS["stt"].model.device == "cuda":
@@ -153,26 +153,34 @@ def release_gpu_models():
     if torch.cuda.is_available(): torch.cuda.empty_cache()
 def warmup_task():
-    """v116: Asset Prep"""
     if os.path.exists(READY_FLAG): os.remove(READY_FLAG)
-    print("\n🔥 --- V116: ATOMIC WARMUP ---")
     try:
-        MODELS["stt"] = WhisperModel("large-v3", device="cpu", compute_type="int8")
         MODELS["tts"] = TTS(model_name="tts_models/multilingual/multi-dataset/xtts_v2", gpu=False)
         chatterbox_utils.warmup_chatterbox()
         chatterbox_utils.load_chatterbox(device="cpu")
         with open(READY_FLAG, "w") as f: f.write("READY")
-        print(f"✅ --- ENGINE ARMED: v116 --- \n")
     except Exception as e: print(f"❌ Warmup Error: {e}")
 @spaces.GPU(duration=150)
 def core_process(request_dict):
     action = request_dict.get("action")
-    print(f"--- [v116] 🚀 REQUEST: {action} ---")
     waited = 0
     while not is_system_ready() and waited < 300:
-        if waited % 10 == 0: print(f"⏳ Prep stage... ({waited}s)")
         time.sleep(1)
         waited += 1
@@ -219,7 +227,7 @@ def core_process(request_dict):
                 res = {"audio": base64.b64encode(audio_bytes).decode()}
         elif action == "s2st":
-            print("🎙️ Phase 1: Whisper GPU...")
             stt_res = core_process.__wrapped__( {**request_dict, "action": "stt"} )
             stt_text = stt_res.get("text", "")
@@ -232,10 +240,10 @@ def core_process(request_dict):
             res = {"text": stt_text, "translated": translated, "audio": tts_res.get("audio")}
         else: res = {"error": f"Unknown action: {action}"}
     except Exception as e:
-        print(f"❌ Fault: {traceback.format_exc()}")
         res = {"error": str(e)}
     finally:
-        print(f"--- [v116] ✨ ATOMIC FINISH ({time.time()-t1:.2f}s) ---")
         release_gpu_models()
     return res
@@ -244,7 +252,7 @@ async def lifespan(app: FastAPI):
     Thread(target=warmup_task, daemon=True).start()
     yield
-# 🚀 FastAPI & Gradio Unified
 app = FastAPI(lifespan=lifespan)
 app.add_middleware(CORSMiddleware, allow_origins=["*"], allow_methods=["*"], allow_headers=["*"])
@@ -253,18 +261,18 @@ async def api_process(request: Request):
     try:
         req_data = await request.json()
         if req_data.get("action") == "health":
-            return {"status": "awake", "warm": is_system_ready(), "v": "116"}
         return core_process(req_data)
     except Exception as e: return {"error": str(e)}
 @app.get("/health")
-def health(): return {"status": "ok", "warm": is_system_ready(), "v": "116"}
 def gradio_fn(req_json):
     try: return json.dumps(core_process(json.loads(req_json)))
     except Exception as e: return json.dumps({"error": str(e)})
-demo = gr.Interface(fn=gradio_fn, inputs="text", outputs="text", title="🚀 AI Engine v116")
 demo.queue()
 app = gr.mount_gradio_app(app, demo, path="/")

+# 🚀 V117: ZEROGPU HOPPER DIRECT (CLEAN ACTIVATE)
 try:
     import spaces
 except ImportError:
 from threading import Thread
 from huggingface_hub import snapshot_download, hf_hub_download
+# 🛡️ 1. SILENCE & ENV (v117)
 logging.getLogger("transformers").setLevel(logging.ERROR)
 logging.getLogger("TTS").setLevel(logging.ERROR)
 os.environ["CT2_VERBOSE"] = "0"
 from df.enhance import init_df
 import deep_translator
+# FORCE BUILD TRIGGER: 19:15:00 Jan 21 2026
+# v117: Hopper Direct. float16 native. 2s Settle. Absolute Paths.
 os.environ["COQUI_TOS_AGREED"] = "1"
 MODELS = {"stt": None, "translate": None, "tts": None, "denoiser": None}
 READY_FLAG = os.path.expanduser("~/.engine_ready")
+MODEL_PATHS = {"stt": None, "tts": None}
 def is_system_ready():
     return os.path.exists(READY_FLAG)
 def activate_gpu_models(action):
+    """v117: Stable Native Activation"""
+    global MODELS, MODEL_PATHS
+    # 🏎️ v117: 2s Driver Settle. Crucial for MIG partitions.
+    time.sleep(2)
     if action in ["stt", "s2st"]:
         stt_on_gpu = False
         except: pass
         if not stt_on_gpu:
+            print(f"🎙️ [v117] ACTIVATE: Whisper (Native float16, Auto-Device)...")
             try:
                 gc.collect(); torch.cuda.empty_cache()
+                # Use absolute local path to bypass hub/integrity hangs
+                path = MODEL_PATHS["stt"] or "large-v3"
                 MODELS["stt"] = WhisperModel(
+                    path,
                     device="cuda",
+                    compute_type="float16", # v117: format natif pour H200
                     num_workers=1
                 )
+                print(f"🎙️ [v117] WHISPER: Ready.")
             except Exception as e:
+                print(f"⚠️ [v117] GPU STT Error: {e}")
                 MODELS["stt"] = WhisperModel("large-v3", device="cpu", compute_type="int8")
     if action in ["tts", "s2st"]:
         except: pass
         if MODELS["tts"] is not None and not tts_on_gpu:
+            print(f"🔊 [v117] ACTIVATE: Promoting XTTS to GPU...")
+            try: MODELS["tts"].to("cuda")
+            except: pass
     chatterbox_utils.load_chatterbox(device="cpu")
     if MODELS["denoiser"] is None:
     if MODELS["translate"] is None: MODELS["translate"] = "active"
 def release_gpu_models():
+    """v117: Clean Offload"""
     global MODELS
     try:
         if MODELS["stt"] and MODELS["stt"].model.device == "cuda":
     if torch.cuda.is_available(): torch.cuda.empty_cache()
 def warmup_task():
+    """v117: Absolute Cache Warming"""
     if os.path.exists(READY_FLAG): os.remove(READY_FLAG)
+    print("\n🔥 --- V117: DIRECT WARMUP ---")
     try:
+        # Pre-fetch and store paths
+        MODEL_PATHS["stt"] = snapshot_download("Systran/faster-whisper-large-v3")
+        print("⏬ Warming Whisper to RAM...")
+        MODELS["stt"] = WhisperModel(MODEL_PATHS["stt"], device="cpu", compute_type="int8")
+        print("⏬ Warming XTTS to RAM...")
         MODELS["tts"] = TTS(model_name="tts_models/multilingual/multi-dataset/xtts_v2", gpu=False)
         chatterbox_utils.warmup_chatterbox()
         chatterbox_utils.load_chatterbox(device="cpu")
         with open(READY_FLAG, "w") as f: f.write("READY")
+        print(f"✅ --- SYSTEM ARMED: v117 --- \n")
     except Exception as e: print(f"❌ Warmup Error: {e}")
 @spaces.GPU(duration=150)
 def core_process(request_dict):
     action = request_dict.get("action")
+    print(f"--- [v117] 🚀 REQ: {action} ---")
     waited = 0
     while not is_system_ready() and waited < 300:
+        if waited % 10 == 0: print(f"⏳ Sync stage... ({waited}s)")
         time.sleep(1)
         waited += 1
                 res = {"audio": base64.b64encode(audio_bytes).decode()}
         elif action == "s2st":
+            print("🎙️ Phase 1: Whisper GPU (H200 Native)...")
             stt_res = core_process.__wrapped__( {**request_dict, "action": "stt"} )
             stt_text = stt_res.get("text", "")
             res = {"text": stt_text, "translated": translated, "audio": tts_res.get("audio")}
         else: res = {"error": f"Unknown action: {action}"}
     except Exception as e:
+        print(f"❌ Error: {traceback.format_exc()}")
         res = {"error": str(e)}
     finally:
+        print(f"--- [v117] ✨ FINISH ({time.time()-t1:.2f}s) ---")
         release_gpu_models()
     return res
     Thread(target=warmup_task, daemon=True).start()
     yield
+# 🚀 Server Lifecycle
 app = FastAPI(lifespan=lifespan)
 app.add_middleware(CORSMiddleware, allow_origins=["*"], allow_methods=["*"], allow_headers=["*"])
     try:
         req_data = await request.json()
         if req_data.get("action") == "health":
+            return {"status": "awake", "warm": is_system_ready(), "v": "117"}
         return core_process(req_data)
     except Exception as e: return {"error": str(e)}
 @app.get("/health")
+def health(): return {"status": "ok", "warm": is_system_ready(), "v": "117"}
 def gradio_fn(req_json):
     try: return json.dumps(core_process(json.loads(req_json)))
     except Exception as e: return json.dumps({"error": str(e)})
+demo = gr.Interface(fn=gradio_fn, inputs="text", outputs="text", title="🚀 AI Engine v117")
 demo.queue()
 app = gr.mount_gradio_app(app, demo, path="/")