Spaces:

TGPro1
/

S2ST

Running on Zero

App Files Files Community

TGPro1 commited on Jan 21

Commit

456b557

verified ·

1 Parent(s): 29416ae

Upload app.py with huggingface_hub

Browse files

Files changed (1) hide show

app.py +55 -100

app.py CHANGED Viewed

@@ -1,4 +1,4 @@
-# 🚀 V117: ZEROGPU HOPPER DIRECT (CLEAN ACTIVATE)
 try:
     import spaces
 except ImportError:
@@ -25,18 +25,26 @@ import sys
 import types
 import logging
 import traceback
-from threading import Thread
 from huggingface_hub import snapshot_download, hf_hub_download
-# 🛡️ 1. SILENCE & ENV (v117)
 logging.getLogger("transformers").setLevel(logging.ERROR)
 logging.getLogger("TTS").setLevel(logging.ERROR)
 os.environ["CT2_VERBOSE"] = "0"
 os.environ["ORT_LOGGING_LEVEL"] = "3"
-os.environ["GRADIO_SERVER_NAME"] = "0.0.0.0"
-os.environ["GRADIO_SERVER_PORT"] = "7860"
-# 🛠️ 2. COMPATIBILITY PATCHES
 if "torchaudio.backend" not in sys.modules:
     backend = types.ModuleType("torchaudio.backend")
     common = types.ModuleType("torchaudio.backend.common")
@@ -58,46 +66,19 @@ if not hasattr(torchaudio, "info"):
         except: return SimpleNamespace(sample_rate=48000, num_frames=0, num_channels=1)
     torchaudio.info = mock_info
-try:
-    _orig_load = torchaudio.load
-    def patched_load(filepath, *args, **kwargs):
-        try: return _orig_load(filepath, *args, **kwargs)
-        except ImportError as e:
-            if "torchcodec" in str(e).lower():
-                import soundfile as sf
-                data, samplerate = sf.read(filepath)
-                t = torch.from_numpy(data).float()
-                if len(t.shape) == 1: t = t.unsqueeze(0)
-                else: t = t.T
-                return t, samplerate
-            raise e
-    torchaudio.load = patched_load
-except Exception: pass
-# 📦 3. AI LIBRARIES
 import chatterbox_utils
 from faster_whisper import WhisperModel
 from TTS.api import TTS
 from df.enhance import init_df
 import deep_translator
-# FORCE BUILD TRIGGER: 19:15:00 Jan 21 2026
-# v117: Hopper Direct. float16 native. 2s Settle. Absolute Paths.
-os.environ["COQUI_TOS_AGREED"] = "1"
 MODELS = {"stt": None, "translate": None, "tts": None, "denoiser": None}
-READY_FLAG = os.path.expanduser("~/.engine_ready")
-MODEL_PATHS = {"stt": None, "tts": None}
-def is_system_ready():
-    return os.path.exists(READY_FLAG)
 def activate_gpu_models(action):
-    """v117: Stable Native Activation"""
-    global MODELS, MODEL_PATHS
-    # 🏎️ v117: 2s Driver Settle. Crucial for MIG partitions.
-    time.sleep(2)
     if action in ["stt", "s2st"]:
         stt_on_gpu = False
@@ -105,21 +86,13 @@ def activate_gpu_models(action):
         except: pass
         if not stt_on_gpu:
-            print(f"🎙️ [v117] ACTIVATE: Whisper (Native float16, Auto-Device)...")
             try:
                 gc.collect(); torch.cuda.empty_cache()
-                # Use absolute local path to bypass hub/integrity hangs
-                path = MODEL_PATHS["stt"] or "large-v3"
-                MODELS["stt"] = WhisperModel(
-                    path,
-                    device="cuda",
-                    compute_type="float16", # v117: format natif pour H200
-                    num_workers=1
-                )
-                print(f"🎙️ [v117] WHISPER: Ready.")
             except Exception as e:
-                print(f"⚠️ [v117] GPU STT Error: {e}")
-                MODELS["stt"] = WhisperModel("large-v3", device="cpu", compute_type="int8")
     if action in ["tts", "s2st"]:
         tts_on_gpu = False
@@ -129,7 +102,7 @@ def activate_gpu_models(action):
         except: pass
         if MODELS["tts"] is not None and not tts_on_gpu:
-            print(f"🔊 [v117] ACTIVATE: Promoting XTTS to GPU...")
             try: MODELS["tts"].to("cuda")
             except: pass
@@ -140,11 +113,11 @@ def activate_gpu_models(action):
     if MODELS["translate"] is None: MODELS["translate"] = "active"
 def release_gpu_models():
-    """v117: Clean Offload"""
     global MODELS
     try:
         if MODELS["stt"] and MODELS["stt"].model.device == "cuda":
-            MODELS["stt"] = WhisperModel("large-v3", device="cpu", compute_type="int8", local_files_only=True)
         if MODELS["tts"]:
             try: MODELS["tts"].to("cpu")
             except: pass
@@ -152,38 +125,11 @@ def release_gpu_models():
     gc.collect()
     if torch.cuda.is_available(): torch.cuda.empty_cache()
-def warmup_task():
-    """v117: Absolute Cache Warming"""
-    if os.path.exists(READY_FLAG): os.remove(READY_FLAG)
-    print("\n🔥 --- V117: DIRECT WARMUP ---")
-    try:
-        # Pre-fetch and store paths
-        MODEL_PATHS["stt"] = snapshot_download("Systran/faster-whisper-large-v3")
-        print("⏬ Warming Whisper to RAM...")
-        MODELS["stt"] = WhisperModel(MODEL_PATHS["stt"], device="cpu", compute_type="int8")
-        print("⏬ Warming XTTS to RAM...")
-        MODELS["tts"] = TTS(model_name="tts_models/multilingual/multi-dataset/xtts_v2", gpu=False)
-        chatterbox_utils.warmup_chatterbox()
-        chatterbox_utils.load_chatterbox(device="cpu")
-        with open(READY_FLAG, "w") as f: f.write("READY")
-        print(f"✅ --- SYSTEM ARMED: v117 --- \n")
-    except Exception as e: print(f"❌ Warmup Error: {e}")
 @spaces.GPU(duration=150)
 def core_process(request_dict):
     action = request_dict.get("action")
-    print(f"--- [v117] 🚀 REQ: {action} ---")
-    waited = 0
-    while not is_system_ready() and waited < 300:
-        if waited % 10 == 0: print(f"⏳ Sync stage... ({waited}s)")
-        time.sleep(1)
-        waited += 1
     t1 = time.time()
     activate_gpu_models(action)
     try:
@@ -227,32 +173,41 @@ def core_process(request_dict):
                 res = {"audio": base64.b64encode(audio_bytes).decode()}
         elif action == "s2st":
-            print("🎙️ Phase 1: Whisper GPU (H200 Native)...")
-            stt_res = core_process.__wrapped__( {**request_dict, "action": "stt"} )
-            stt_text = stt_res.get("text", "")
-            print(f"🌍 Phase 2: Translation...")
-            target = request_dict.get("target_lang")
-            translated = deep_translator.GoogleTranslator(source='auto', target=target).translate(stt_text)
-            print("🔊 Phase 3: XTTS GPU...")
-            tts_res = core_process.__wrapped__( {"action": "tts", "text": translated, "lang": target, "speaker_wav": request_dict.get("speaker_wav")} )
-            res = {"text": stt_text, "translated": translated, "audio": tts_res.get("audio")}
         else: res = {"error": f"Unknown action: {action}"}
     except Exception as e:
-        print(f"❌ Error: {traceback.format_exc()}")
         res = {"error": str(e)}
     finally:
-        print(f"--- [v117] ✨ FINISH ({time.time()-t1:.2f}s) ---")
         release_gpu_models()
     return res
 @asynccontextmanager
 async def lifespan(app: FastAPI):
-    Thread(target=warmup_task, daemon=True).start()
     yield
-# 🚀 Server Lifecycle
 app = FastAPI(lifespan=lifespan)
 app.add_middleware(CORSMiddleware, allow_origins=["*"], allow_methods=["*"], allow_headers=["*"])
@@ -260,21 +215,21 @@ app.add_middleware(CORSMiddleware, allow_origins=["*"], allow_methods=["*"], all
 async def api_process(request: Request):
     try:
         req_data = await request.json()
-        if req_data.get("action") == "health":
-            return {"status": "awake", "warm": is_system_ready(), "v": "117"}
         return core_process(req_data)
     except Exception as e: return {"error": str(e)}
 @app.get("/health")
-def health(): return {"status": "ok", "warm": is_system_ready(), "v": "117"}
 def gradio_fn(req_json):
     try: return json.dumps(core_process(json.loads(req_json)))
     except Exception as e: return json.dumps({"error": str(e)})
-demo = gr.Interface(fn=gradio_fn, inputs="text", outputs="text", title="🚀 AI Engine v117")
 demo.queue()
 app = gr.mount_gradio_app(app, demo, path="/")
 if __name__ == "__main__":
-    uvicorn.run(app, host="0.0.0.0", port=7860, log_level="error")

+# 🚀 V118: ZEROGPU HOPPER STEADY (PRODUCTION GRADE)
 try:
     import spaces
 except ImportError:
 import types
 import logging
 import traceback
 from huggingface_hub import snapshot_download, hf_hub_download
+# 🛡️ 1. SILENCE & ENV (v118)
 logging.getLogger("transformers").setLevel(logging.ERROR)
 logging.getLogger("TTS").setLevel(logging.ERROR)
 os.environ["CT2_VERBOSE"] = "0"
 os.environ["ORT_LOGGING_LEVEL"] = "3"
+os.environ["COQUI_TOS_AGREED"] = "1"
+# 🛠️ 2. TOP-LEVEL ASSET PREPARATION (Ensures HF Readiness)
+print("\n📦 [v118] TOP-LEVEL: Preparing AI Assets...")
+try:
+    WHISPER_PATH = snapshot_download("Systran/faster-whisper-large-v3")
+    XTTS_PATH = snapshot_download("coqui/XTTS-v2")
+    print("✅ Assets cached on disk.")
+except Exception as e:
+    print(f"⚠️ Pre-download warning: {e}")
+    WHISPER_PATH = "large-v3"
+# 🛠️ 3. COMPATIBILITY PATCHES
 if "torchaudio.backend" not in sys.modules:
     backend = types.ModuleType("torchaudio.backend")
     common = types.ModuleType("torchaudio.backend.common")
         except: return SimpleNamespace(sample_rate=48000, num_frames=0, num_channels=1)
     torchaudio.info = mock_info
+# 📦 4. AI LIBRARIES
 import chatterbox_utils
 from faster_whisper import WhisperModel
 from TTS.api import TTS
 from df.enhance import init_df
 import deep_translator
+# v118: Hopper Steady. Persistent RAM Init. int8 GPU.
 MODELS = {"stt": None, "translate": None, "tts": None, "denoiser": None}
 def activate_gpu_models(action):
+    """v118: Robust GPU Promotion"""
+    global MODELS
     if action in ["stt", "s2st"]:
         stt_on_gpu = False
         except: pass
         if not stt_on_gpu:
+            print(f"🎙️ [v118] PROMOTE: Whisper (GPU, int8)...")
             try:
                 gc.collect(); torch.cuda.empty_cache()
+                MODELS["stt"] = WhisperModel(WHISPER_PATH, device="cuda", compute_type="int8", num_workers=1)
             except Exception as e:
+                print(f"⚠️ GPU STT Fail: {e}")
+                MODELS["stt"] = WhisperModel(WHISPER_PATH, device="cpu", compute_type="int8")
     if action in ["tts", "s2st"]:
         tts_on_gpu = False
         except: pass
         if MODELS["tts"] is not None and not tts_on_gpu:
+            print(f"🔊 [v118] PROMOTE: XTTS to GPU...")
             try: MODELS["tts"].to("cuda")
             except: pass
     if MODELS["translate"] is None: MODELS["translate"] = "active"
 def release_gpu_models():
+    """v118: Graceful Offload"""
     global MODELS
     try:
         if MODELS["stt"] and MODELS["stt"].model.device == "cuda":
+            MODELS["stt"] = WhisperModel(WHISPER_PATH, device="cpu", compute_type="int8", local_files_only=True)
         if MODELS["tts"]:
             try: MODELS["tts"].to("cpu")
             except: pass
     gc.collect()
     if torch.cuda.is_available(): torch.cuda.empty_cache()
 @spaces.GPU(duration=150)
 def core_process(request_dict):
     action = request_dict.get("action")
+    print(f"--- [v118] 🚀 REQUEST: {action} ---")
     t1 = time.time()
     activate_gpu_models(action)
     try:
                 res = {"audio": base64.b64encode(audio_bytes).decode()}
         elif action == "s2st":
+            # Direct logic sequence in v118 (No recursion)
+            audio_bytes = base64.b64decode(request_dict.get("file"))
+            with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as f:
+                f.write(audio_bytes); temp_path = f.name
+            try:
+                # 1. STT
+                segs, _ = MODELS["stt"].transcribe(temp_path, language=request_dict.get("lang"), beam_size=1)
+                stt_text = " ".join([s.text for s in segs]).strip()
+                # 2. Translated
+                target = request_dict.get("target_lang")
+                translated = deep_translator.GoogleTranslator(source='auto', target=target).translate(stt_text)
+                # 3. TTS
+                final_res = core_process.__wrapped__({"action": "tts", "text": translated, "lang": target, "speaker_wav": request_dict.get("speaker_wav")})
+                res = {"text": stt_text, "translated": translated, "audio": final_res.get("audio")}
+            finally:
+                if os.path.exists(temp_path): os.unlink(temp_path)
         else: res = {"error": f"Unknown action: {action}"}
     except Exception as e:
+        print(f"❌ Fault: {traceback.format_exc()}")
         res = {"error": str(e)}
     finally:
+        print(f"--- [v118] ✨ FINISH ({time.time()-t1:.2f}s) ---")
         release_gpu_models()
     return res
 @asynccontextmanager
 async def lifespan(app: FastAPI):
+    print("🔥 [v118] RAM Warming...")
+    MODELS["stt"] = WhisperModel(WHISPER_PATH, device="cpu", compute_type="int8")
+    MODELS["tts"] = TTS(model_name="tts_models/multilingual/multi-dataset/xtts_v2", gpu=False)
+    chatterbox_utils.warmup_chatterbox()
+    print("✅ [v118] ENGINE READY.")
     yield
+# 🚀 FastAPI
 app = FastAPI(lifespan=lifespan)
 app.add_middleware(CORSMiddleware, allow_origins=["*"], allow_methods=["*"], allow_headers=["*"])
 async def api_process(request: Request):
     try:
         req_data = await request.json()
+        if req_data.get("action") == "health": return {"status": "awake", "v": "118"}
         return core_process(req_data)
     except Exception as e: return {"error": str(e)}
 @app.get("/health")
+def health(): return {"status": "ok", "v": "118"}
 def gradio_fn(req_json):
     try: return json.dumps(core_process(json.loads(req_json)))
     except Exception as e: return json.dumps({"error": str(e)})
+# Unified UI mount
+demo = gr.Interface(fn=gradio_fn, inputs="text", outputs="text", title="🚀 AI Engine v118")
 demo.queue()
 app = gr.mount_gradio_app(app, demo, path="/")
 if __name__ == "__main__":
+    uvicorn.run(app, host="0.0.0.0", port=7860, log_level="warning")