Spaces:

TGPro1
/

S2ST

Sleeping

App Files Files Community

TGPro1 commited on Jan 21

Commit

b9d3269

verified ·

1 Parent(s): c4fd68e

Upload app.py with huggingface_hub

Browse files

Files changed (1) hide show

app.py +65 -68

app.py CHANGED Viewed

@@ -1,4 +1,4 @@
-# 🚀 V114: ZEROGPU BULLETPROOF CACHE
 try:
     import spaces
 except ImportError:
@@ -28,7 +28,7 @@ import traceback
 from threading import Thread, Lock
 from huggingface_hub import snapshot_download, hf_hub_download
-# 🛡️ 1. SILENCE & ENV (v114)
 logging.getLogger("transformers").setLevel(logging.ERROR)
 logging.getLogger("TTS").setLevel(logging.ERROR)
 os.environ["CT2_VERBOSE"] = "0"
@@ -36,7 +36,7 @@ os.environ["ORT_LOGGING_LEVEL"] = "3"
 os.environ["GRADIO_SERVER_NAME"] = "0.0.0.0"
 os.environ["GRADIO_SERVER_PORT"] = "7860"
-# 🛠️ 2. COMPATIBILITY PATCHES (v114)
 if "torchaudio.backend" not in sys.modules:
     backend = types.ModuleType("torchaudio.backend")
     common = types.ModuleType("torchaudio.backend.common")
@@ -81,62 +81,61 @@ from TTS.api import TTS
 from df.enhance import init_df
 import deep_translator
-# FORCE BUILD TRIGGER: 18:35:00 Jan 21 2026
-# v114: Bulletproof Flag-File Sync.
 os.environ["COQUI_TOS_AGREED"] = "1"
 MODELS = {"stt": None, "translate": None, "tts": None, "denoiser": None}
 READY_FLAG = os.path.expanduser("~/.engine_ready")
-# Cleanup flag on boot
-if os.path.exists(READY_FLAG): os.remove(READY_FLAG)
 def is_system_ready():
-    """v114: Process-Safe Readiness Check"""
     return os.path.exists(READY_FLAG)
 def activate_gpu_models(action):
-    """v114: Instant Promotion (RAM -> GPU)"""
     global MODELS
-    if action in ["stt", "s2st"]:
-        stt_on_gpu = False
-        try: stt_on_gpu = MODELS["stt"] is not None and MODELS["stt"].model.device == "cuda"
-        except: pass
-        if not stt_on_gpu:
-            print(f"🎙️ [v114] Promoting Whisper (2 Workers, int8_float16)...")
-            try:
-                # Fast re-init (local_files_only=True ensures no network lag)
-                MODELS["stt"] = WhisperModel(
-                    "large-v3",
-                    device="cuda",
-                    compute_type="int8_float16",
-                    num_workers=2,
-                    local_files_only=True
-                )
-            except: MODELS["stt"] = WhisperModel("large-v3", device="cpu", compute_type="int8", local_files_only=True)
-    if action in ["tts", "s2st"]:
-        tts_on_gpu = False
-        try:
-            params = next(MODELS["tts"].synthesizer.tts_model.parameters())
-            tts_on_gpu = "cuda" in str(params.device)
-        except: pass
-        if MODELS["tts"] is not None and not tts_on_gpu:
-            print(f"🔊 [v114] Promoting XTTS-v2 to GPU...")
-            try: MODELS["tts"].to("cuda")
             except: pass
-    chatterbox_utils.load_chatterbox(device="cpu")
-    if MODELS["denoiser"] is None:
-        try: MODELS["denoiser"] = init_df()
-        except: pass
-    if MODELS["translate"] is None: MODELS["translate"] = "active"
 def release_gpu_models():
-    """v114: Smooth Offloading"""
     global MODELS
     try:
         if MODELS["stt"] and MODELS["stt"].model.device == "cuda":
@@ -145,36 +144,30 @@ def release_gpu_models():
             try: MODELS["tts"].to("cpu")
             except: pass
     except: pass
     if torch.cuda.is_available(): torch.cuda.empty_cache()
 def warmup_task():
-    """v114: Heavy RAM Loading (Worker Thread)"""
-    print("\n🔥 --- V114: BULLETPROOF WARMUP (RAM) ---")
     try:
-        # 1. Load Whisper to RAM
         MODELS["stt"] = WhisperModel("large-v3", device="cpu", compute_type="int8")
-        # 2. Load TTS to RAM
         MODELS["tts"] = TTS(model_name="tts_models/multilingual/multi-dataset/xtts_v2", gpu=False)
-        # 3. Cache ONNX models
         chatterbox_utils.warmup_chatterbox()
         chatterbox_utils.load_chatterbox(device="cpu")
-        # Write the Flag-File!
         with open(READY_FLAG, "w") as f: f.write("READY")
-        print(f"✅ --- ENGINE READY: v114 --- \n")
-    except Exception as e:
-        print(f"❌ Warmup Fault: {e}")
 @spaces.GPU(duration=150)
 def core_process(request_dict):
     action = request_dict.get("action")
-    print(f"--- [v114] 🚀 REQUEST: {action} ---")
-    # 🔥 v114: ZERO-WAIT (Flag Check)
     waited = 0
     while not is_system_ready() and waited < 300:
-        if waited % 10 == 0: print(f"⏳ Syncing assets... ({waited}s)")
         time.sleep(1)
         waited += 1
@@ -186,7 +179,6 @@ def core_process(request_dict):
             with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as f:
                 f.write(audio_bytes); temp_path = f.name
             try:
-                # v114: Explicit beam_size=1 for max speed
                 segments, _ = MODELS["stt"].transcribe(temp_path, language=request_dict.get("lang"), beam_size=1)
                 res = {"text": " ".join([s.text for s in segments]).strip()}
             finally:
@@ -222,17 +214,22 @@ def core_process(request_dict):
                 res = {"audio": base64.b64encode(audio_bytes).decode()}
         elif action == "s2st":
-            # Combined logic (process-safe)
             stt_res = core_process.__wrapped__( {**request_dict, "action": "stt"} )
-            translated = deep_translator.GoogleTranslator(source='auto', target=request_dict.get("target_lang")).translate(stt_res.get("text", ""))
             tts_res = core_process.__wrapped__( {"action": "tts", "text": translated, "lang": request_dict.get("target_lang"), "speaker_wav": request_dict.get("speaker_wav")} )
-            res = {"text": stt_res.get("text"), "translated": translated, "audio": tts_res.get("audio")}
         else: res = {"error": f"Unknown action: {action}"}
     except Exception as e:
-        print(f"❌ Fault: {traceback.format_exc()}")
         res = {"error": str(e)}
     finally:
-        print(f"--- [v114] ✨ DONE ({time.time()-t1:.2f}s) ---")
         release_gpu_models()
     return res
@@ -241,7 +238,7 @@ async def lifespan(app: FastAPI):
     Thread(target=warmup_task, daemon=True).start()
     yield
-# 🚀 Server Lifecycle
 app = FastAPI(lifespan=lifespan)
 app.add_middleware(CORSMiddleware, allow_origins=["*"], allow_methods=["*"], allow_headers=["*"])
@@ -250,18 +247,18 @@ async def api_process(request: Request):
     try:
         req_data = await request.json()
         if req_data.get("action") == "health":
-            return {"status": "awake", "warm": is_system_ready(), "v": "114"}
         return core_process(req_data)
     except Exception as e: return {"error": str(e)}
 @app.get("/health")
-def health(): return {"status": "ok", "warm": is_system_ready(), "v": "114"}
 def gradio_fn(req_json):
     try: return json.dumps(core_process(json.loads(req_json)))
     except Exception as e: return json.dumps({"error": str(e)})
-demo = gr.Interface(fn=gradio_fn, inputs="text", outputs="text", title="🚀 AI Engine v114")
 demo.queue()
 app = gr.mount_gradio_app(app, demo, path="/")

+# 🚀 V115: ZEROGPU HOPPER STABILITY (STABLE-FAST)
 try:
     import spaces
 except ImportError:
 from threading import Thread, Lock
 from huggingface_hub import snapshot_download, hf_hub_download
+# 🛡️ 1. SILENCE & ENV (v115)
 logging.getLogger("transformers").setLevel(logging.ERROR)
 logging.getLogger("TTS").setLevel(logging.ERROR)
 os.environ["CT2_VERBOSE"] = "0"
 os.environ["GRADIO_SERVER_NAME"] = "0.0.0.0"
 os.environ["GRADIO_SERVER_PORT"] = "7860"
+# 🛠️ 2. COMPATIBILITY PATCHES
 if "torchaudio.backend" not in sys.modules:
     backend = types.ModuleType("torchaudio.backend")
     common = types.ModuleType("torchaudio.backend.common")
 from df.enhance import init_df
 import deep_translator
+# FORCE BUILD TRIGGER: 18:55:00 Jan 21 2026
+# v115: Hopper Stability. num_workers=1. Serial Activation.
 os.environ["COQUI_TOS_AGREED"] = "1"
 MODELS = {"stt": None, "translate": None, "tts": None, "denoiser": None}
 READY_FLAG = os.path.expanduser("~/.engine_ready")
+ACTIVATE_LOCK = Lock()
 def is_system_ready():
     return os.path.exists(READY_FLAG)
 def activate_gpu_models(action):
+    """v115: Serial Safe Activation"""
     global MODELS
+    with ACTIVATE_LOCK:
+        if action in ["stt", "s2st"]:
+            stt_on_gpu = False
+            try: stt_on_gpu = MODELS["stt"] is not None and MODELS["stt"].model.device == "cuda"
+            except: pass
+            if not stt_on_gpu:
+                print(f"🎙️ [v115] Activating Whisper (Stable: 1 Worker)...")
+                try:
+                    gc.collect(); torch.cuda.empty_cache()
+                    MODELS["stt"] = WhisperModel(
+                        "large-v3",
+                        device="cuda",
+                        compute_type="int8_float16",
+                        num_workers=1,
+                        local_files_only=True
+                    )
+                except Exception as e:
+                    print(f"⚠️ GPU STT Init crash: {e}")
+                    MODELS["stt"] = WhisperModel("large-v3", device="cpu", compute_type="int8", local_files_only=True)
+        if action in ["tts", "s2st"]:
+            tts_on_gpu = False
+            try:
+                params = next(MODELS["tts"].synthesizer.tts_model.parameters())
+                tts_on_gpu = "cuda" in str(params.device)
             except: pass
+            if MODELS["tts"] is not None and not tts_on_gpu:
+                print(f"🔊 [v115] Activating XTTS-v2 (GPU)...")
+                try: MODELS["tts"].to("cuda")
+                except: pass
+        chatterbox_utils.load_chatterbox(device="cpu")
+        if MODELS["denoiser"] is None:
+            try: MODELS["denoiser"] = init_df()
+            except: pass
+        if MODELS["translate"] is None: MODELS["translate"] = "active"
 def release_gpu_models():
+    """v115: Safe Offload"""
     global MODELS
     try:
         if MODELS["stt"] and MODELS["stt"].model.device == "cuda":
             try: MODELS["tts"].to("cpu")
             except: pass
     except: pass
+    gc.collect()
     if torch.cuda.is_available(): torch.cuda.empty_cache()
 def warmup_task():
+    """v115: RAM Warming"""
+    if os.path.exists(READY_FLAG): os.remove(READY_FLAG)
+    print("\n🔥 --- V115: HOPPER WARMUP ---")
     try:
         MODELS["stt"] = WhisperModel("large-v3", device="cpu", compute_type="int8")
         MODELS["tts"] = TTS(model_name="tts_models/multilingual/multi-dataset/xtts_v2", gpu=False)
         chatterbox_utils.warmup_chatterbox()
         chatterbox_utils.load_chatterbox(device="cpu")
         with open(READY_FLAG, "w") as f: f.write("READY")
+        print(f"✅ --- ENGINE ON: v115 --- \n")
+    except Exception as e: print(f"❌ Warmup Fault: {e}")
 @spaces.GPU(duration=150)
 def core_process(request_dict):
     action = request_dict.get("action")
+    print(f"--- [v115] 🚀 START: {action} ---")
     waited = 0
     while not is_system_ready() and waited < 300:
+        if waited % 10 == 0: print(f"⏳ Waiting for Engine... ({waited}s)")
         time.sleep(1)
         waited += 1
             with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as f:
                 f.write(audio_bytes); temp_path = f.name
             try:
                 segments, _ = MODELS["stt"].transcribe(temp_path, language=request_dict.get("lang"), beam_size=1)
                 res = {"text": " ".join([s.text for s in segments]).strip()}
             finally:
                 res = {"audio": base64.b64encode(audio_bytes).decode()}
         elif action == "s2st":
+            print("🎙️ Step 1: Transcription...")
             stt_res = core_process.__wrapped__( {**request_dict, "action": "stt"} )
+            stt_text = stt_res.get("text", "")
+            print(f"🌍 Step 2: Translation ({request_dict.get('target_lang')})...")
+            translated = deep_translator.GoogleTranslator(source='auto', target=request_dict.get("target_lang")).translate(stt_text)
+            print("🔊 Step 3: Synthesis...")
             tts_res = core_process.__wrapped__( {"action": "tts", "text": translated, "lang": request_dict.get("target_lang"), "speaker_wav": request_dict.get("speaker_wav")} )
+            res = {"text": stt_text, "translated": translated, "audio": tts_res.get("audio")}
         else: res = {"error": f"Unknown action: {action}"}
     except Exception as e:
+        print(f"❌ Error: {traceback.format_exc()}")
         res = {"error": str(e)}
     finally:
+        print(f"--- [v115] ✨ FINISH ({time.time()-t1:.2f}s) ---")
         release_gpu_models()
     return res
     Thread(target=warmup_task, daemon=True).start()
     yield
+# 🚀 Server
 app = FastAPI(lifespan=lifespan)
 app.add_middleware(CORSMiddleware, allow_origins=["*"], allow_methods=["*"], allow_headers=["*"])
     try:
         req_data = await request.json()
         if req_data.get("action") == "health":
+            return {"status": "awake", "warm": is_system_ready(), "v": "115"}
         return core_process(req_data)
     except Exception as e: return {"error": str(e)}
 @app.get("/health")
+def health(): return {"status": "ok", "warm": is_system_ready(), "v": "115"}
 def gradio_fn(req_json):
     try: return json.dumps(core_process(json.loads(req_json)))
     except Exception as e: return json.dumps({"error": str(e)})
+demo = gr.Interface(fn=gradio_fn, inputs="text", outputs="text", title="🚀 AI Engine v115")
 demo.queue()
 app = gr.mount_gradio_app(app, demo, path="/")