Spaces:

TGPro1
/

S2ST

Running on Zero

App Files Files Community

TGPro1 commited on Jan 21

Commit

4bebd1d

verified ·

1 Parent(s): b9d3269

Upload app.py with huggingface_hub

Browse files

Files changed (1) hide show

app.py +66 -60

app.py CHANGED Viewed

@@ -1,4 +1,4 @@
-# 🚀 V115: ZEROGPU HOPPER STABILITY (STABLE-FAST)
 try:
     import spaces
 except ImportError:
@@ -25,10 +25,10 @@ import sys
 import types
 import logging
 import traceback
-from threading import Thread, Lock
 from huggingface_hub import snapshot_download, hf_hub_download
-# 🛡️ 1. SILENCE & ENV (v115)
 logging.getLogger("transformers").setLevel(logging.ERROR)
 logging.getLogger("TTS").setLevel(logging.ERROR)
 os.environ["CT2_VERBOSE"] = "0"
@@ -81,61 +81,66 @@ from TTS.api import TTS
 from df.enhance import init_df
 import deep_translator
-# FORCE BUILD TRIGGER: 18:55:00 Jan 21 2026
-# v115: Hopper Stability. num_workers=1. Serial Activation.
 os.environ["COQUI_TOS_AGREED"] = "1"
 MODELS = {"stt": None, "translate": None, "tts": None, "denoiser": None}
 READY_FLAG = os.path.expanduser("~/.engine_ready")
-ACTIVATE_LOCK = Lock()
 def is_system_ready():
     return os.path.exists(READY_FLAG)
 def activate_gpu_models(action):
-    """v115: Serial Safe Activation"""
     global MODELS
-    with ACTIVATE_LOCK:
-        if action in ["stt", "s2st"]:
-            stt_on_gpu = False
-            try: stt_on_gpu = MODELS["stt"] is not None and MODELS["stt"].model.device == "cuda"
-            except: pass
-            if not stt_on_gpu:
-                print(f"🎙️ [v115] Activating Whisper (Stable: 1 Worker)...")
-                try:
-                    gc.collect(); torch.cuda.empty_cache()
-                    MODELS["stt"] = WhisperModel(
-                        "large-v3",
-                        device="cuda",
-                        compute_type="int8_float16",
-                        num_workers=1,
-                        local_files_only=True
-                    )
-                except Exception as e:
-                    print(f"⚠️ GPU STT Init crash: {e}")
-                    MODELS["stt"] = WhisperModel("large-v3", device="cpu", compute_type="int8", local_files_only=True)
-        if action in ["tts", "s2st"]:
-            tts_on_gpu = False
             try:
-                params = next(MODELS["tts"].synthesizer.tts_model.parameters())
-                tts_on_gpu = "cuda" in str(params.device)
-            except: pass
-            if MODELS["tts"] is not None and not tts_on_gpu:
-                print(f"🔊 [v115] Activating XTTS-v2 (GPU)...")
-                try: MODELS["tts"].to("cuda")
-                except: pass
-        chatterbox_utils.load_chatterbox(device="cpu")
-        if MODELS["denoiser"] is None:
-            try: MODELS["denoiser"] = init_df()
-            except: pass
-        if MODELS["translate"] is None: MODELS["translate"] = "active"
 def release_gpu_models():
-    """v115: Safe Offload"""
     global MODELS
     try:
         if MODELS["stt"] and MODELS["stt"].model.device == "cuda":
@@ -148,26 +153,26 @@ def release_gpu_models():
     if torch.cuda.is_available(): torch.cuda.empty_cache()
 def warmup_task():
-    """v115: RAM Warming"""
     if os.path.exists(READY_FLAG): os.remove(READY_FLAG)
-    print("\n🔥 --- V115: HOPPER WARMUP ---")
     try:
         MODELS["stt"] = WhisperModel("large-v3", device="cpu", compute_type="int8")
         MODELS["tts"] = TTS(model_name="tts_models/multilingual/multi-dataset/xtts_v2", gpu=False)
         chatterbox_utils.warmup_chatterbox()
         chatterbox_utils.load_chatterbox(device="cpu")
         with open(READY_FLAG, "w") as f: f.write("READY")
-        print(f"✅ --- ENGINE ON: v115 --- \n")
-    except Exception as e: print(f"❌ Warmup Fault: {e}")
 @spaces.GPU(duration=150)
 def core_process(request_dict):
     action = request_dict.get("action")
-    print(f"--- [v115] 🚀 START: {action} ---")
     waited = 0
     while not is_system_ready() and waited < 300:
-        if waited % 10 == 0: print(f"⏳ Waiting for Engine... ({waited}s)")
         time.sleep(1)
         waited += 1
@@ -214,22 +219,23 @@ def core_process(request_dict):
                 res = {"audio": base64.b64encode(audio_bytes).decode()}
         elif action == "s2st":
-            print("🎙️ Step 1: Transcription...")
             stt_res = core_process.__wrapped__( {**request_dict, "action": "stt"} )
             stt_text = stt_res.get("text", "")
-            print(f"🌍 Step 2: Translation ({request_dict.get('target_lang')})...")
-            translated = deep_translator.GoogleTranslator(source='auto', target=request_dict.get("target_lang")).translate(stt_text)
-            print("🔊 Step 3: Synthesis...")
-            tts_res = core_process.__wrapped__( {"action": "tts", "text": translated, "lang": request_dict.get("target_lang"), "speaker_wav": request_dict.get("speaker_wav")} )
             res = {"text": stt_text, "translated": translated, "audio": tts_res.get("audio")}
         else: res = {"error": f"Unknown action: {action}"}
     except Exception as e:
-        print(f"❌ Error: {traceback.format_exc()}")
         res = {"error": str(e)}
     finally:
-        print(f"--- [v115] ✨ FINISH ({time.time()-t1:.2f}s) ---")
         release_gpu_models()
     return res
@@ -238,7 +244,7 @@ async def lifespan(app: FastAPI):
     Thread(target=warmup_task, daemon=True).start()
     yield
-# 🚀 Server
 app = FastAPI(lifespan=lifespan)
 app.add_middleware(CORSMiddleware, allow_origins=["*"], allow_methods=["*"], allow_headers=["*"])
@@ -247,18 +253,18 @@ async def api_process(request: Request):
     try:
         req_data = await request.json()
         if req_data.get("action") == "health":
-            return {"status": "awake", "warm": is_system_ready(), "v": "115"}
         return core_process(req_data)
     except Exception as e: return {"error": str(e)}
 @app.get("/health")
-def health(): return {"status": "ok", "warm": is_system_ready(), "v": "115"}
 def gradio_fn(req_json):
     try: return json.dumps(core_process(json.loads(req_json)))
     except Exception as e: return json.dumps({"error": str(e)})
-demo = gr.Interface(fn=gradio_fn, inputs="text", outputs="text", title="🚀 AI Engine v115")
 demo.queue()
 app = gr.mount_gradio_app(app, demo, path="/")

+# 🚀 V116: ZEROGPU HOPPER ATOMIC
 try:
     import spaces
 except ImportError:
 import types
 import logging
 import traceback
+from threading import Thread
 from huggingface_hub import snapshot_download, hf_hub_download
+# 🛡️ 1. SILENCE & ENV (v116)
 logging.getLogger("transformers").setLevel(logging.ERROR)
 logging.getLogger("TTS").setLevel(logging.ERROR)
 os.environ["CT2_VERBOSE"] = "0"
 from df.enhance import init_df
 import deep_translator
+# FORCE BUILD TRIGGER: 19:10:00 Jan 21 2026
+# v116: Hopper Atomic. Device Index Fix. No Lock.
 os.environ["COQUI_TOS_AGREED"] = "1"
 MODELS = {"stt": None, "translate": None, "tts": None, "denoiser": None}
 READY_FLAG = os.path.expanduser("~/.engine_ready")
 def is_system_ready():
     return os.path.exists(READY_FLAG)
 def activate_gpu_models(action):
+    """v116: Granular Activation"""
     global MODELS
+    if action in ["stt", "s2st"]:
+        stt_on_gpu = False
+        try: stt_on_gpu = MODELS["stt"] is not None and MODELS["stt"].model.device == "cuda"
+        except: pass
+        if not stt_on_gpu:
+            print(f"🎙️ [v116] WHISPER CHECKPOINT: Allocation...")
+            try:
+                gc.collect(); torch.cuda.empty_cache()
+                print(f"🎙️ [v116] WHISPER CHECKPOINT: Loading to CUDA:0 (int8_float16)...")
+                # Removed local_files_only=True as it can hang integrity checks
+                MODELS["stt"] = WhisperModel(
+                    "large-v3",
+                    device="cuda",
+                    device_index=0,
+                    compute_type="int8_float16",
+                    num_workers=1
+                )
+                print(f"🎙️ [v116] WHISPER CHECKPOINT: Ready.")
+            except Exception as e:
+                print(f"⚠️ [v116] GPU STT Fail: {e}")
+                MODELS["stt"] = WhisperModel("large-v3", device="cpu", compute_type="int8")
+    if action in ["tts", "s2st"]:
+        tts_on_gpu = False
+        try:
+            params = next(MODELS["tts"].synthesizer.tts_model.parameters())
+            tts_on_gpu = "cuda" in str(params.device)
+        except: pass
+        if MODELS["tts"] is not None and not tts_on_gpu:
+            print(f"🔊 [v116] XTTS CHECKPOINT: Promotion to GPU...")
             try:
+                MODELS["tts"].to("cuda")
+                print(f"🔊 [v116] XTTS CHECKPOINT: Ready.")
+            except Exception as e:
+                print(f"⚠️ [v116] XTTS GPU Fail: {e}")
+    chatterbox_utils.load_chatterbox(device="cpu")
+    if MODELS["denoiser"] is None:
+        try: MODELS["denoiser"] = init_df()
+        except: pass
+    if MODELS["translate"] is None: MODELS["translate"] = "active"
 def release_gpu_models():
+    """v116: Clean Release"""
     global MODELS
     try:
         if MODELS["stt"] and MODELS["stt"].model.device == "cuda":
     if torch.cuda.is_available(): torch.cuda.empty_cache()
 def warmup_task():
+    """v116: Asset Prep"""
     if os.path.exists(READY_FLAG): os.remove(READY_FLAG)
+    print("\n🔥 --- V116: ATOMIC WARMUP ---")
     try:
         MODELS["stt"] = WhisperModel("large-v3", device="cpu", compute_type="int8")
         MODELS["tts"] = TTS(model_name="tts_models/multilingual/multi-dataset/xtts_v2", gpu=False)
         chatterbox_utils.warmup_chatterbox()
         chatterbox_utils.load_chatterbox(device="cpu")
         with open(READY_FLAG, "w") as f: f.write("READY")
+        print(f"✅ --- ENGINE ARMED: v116 --- \n")
+    except Exception as e: print(f"❌ Warmup Error: {e}")
 @spaces.GPU(duration=150)
 def core_process(request_dict):
     action = request_dict.get("action")
+    print(f"--- [v116] 🚀 REQUEST: {action} ---")
     waited = 0
     while not is_system_ready() and waited < 300:
+        if waited % 10 == 0: print(f"⏳ Prep stage... ({waited}s)")
         time.sleep(1)
         waited += 1
                 res = {"audio": base64.b64encode(audio_bytes).decode()}
         elif action == "s2st":
+            print("🎙️ Phase 1: Whisper GPU...")
             stt_res = core_process.__wrapped__( {**request_dict, "action": "stt"} )
             stt_text = stt_res.get("text", "")
+            print(f"🌍 Phase 2: Translation...")
+            target = request_dict.get("target_lang")
+            translated = deep_translator.GoogleTranslator(source='auto', target=target).translate(stt_text)
+            print("🔊 Phase 3: XTTS GPU...")
+            tts_res = core_process.__wrapped__( {"action": "tts", "text": translated, "lang": target, "speaker_wav": request_dict.get("speaker_wav")} )
             res = {"text": stt_text, "translated": translated, "audio": tts_res.get("audio")}
         else: res = {"error": f"Unknown action: {action}"}
     except Exception as e:
+        print(f"❌ Fault: {traceback.format_exc()}")
         res = {"error": str(e)}
     finally:
+        print(f"--- [v116] ✨ ATOMIC FINISH ({time.time()-t1:.2f}s) ---")
         release_gpu_models()
     return res
     Thread(target=warmup_task, daemon=True).start()
     yield
+# 🚀 FastAPI & Gradio Unified
 app = FastAPI(lifespan=lifespan)
 app.add_middleware(CORSMiddleware, allow_origins=["*"], allow_methods=["*"], allow_headers=["*"])
     try:
         req_data = await request.json()
         if req_data.get("action") == "health":
+            return {"status": "awake", "warm": is_system_ready(), "v": "116"}
         return core_process(req_data)
     except Exception as e: return {"error": str(e)}
 @app.get("/health")
+def health(): return {"status": "ok", "warm": is_system_ready(), "v": "116"}
 def gradio_fn(req_json):
     try: return json.dumps(core_process(json.loads(req_json)))
     except Exception as e: return json.dumps({"error": str(e)})
+demo = gr.Interface(fn=gradio_fn, inputs="text", outputs="text", title="🚀 AI Engine v116")
 demo.queue()
 app = gr.mount_gradio_app(app, demo, path="/")