Spaces:

TGPro1
/

S2ST

Running on Zero

App Files Files Community

TGPro1 commited on 27 days ago

Commit

32297a1

verified ·

1 Parent(s): 1f540c3

Upload app.py with huggingface_hub

Browse files

Files changed (1) hide show

app.py +36 -38

app.py CHANGED Viewed

@@ -1,4 +1,4 @@
-# 🚀 V126: ZEROGPU HOPPER ROBUST (HYBRID ENGINE)
 try:
     import spaces
 except ImportError:
@@ -9,6 +9,9 @@ except ImportError:
             return f
 import gradio as gr
 import base64
 import torch
 import os
@@ -20,8 +23,8 @@ import traceback
 import soundfile as sf
 from transformers import pipeline
-# 🛡️ 0. ENV & MONKEYPATCH (v126)
-os.environ["CUBLAS_WORKSPACE_CONFIG"] = ":4096:8" # Stability for MIG
 os.environ["COQUI_TOS_AGREED"] = "1"
 os.environ["PYTHONWARNINGS"] = "ignore"
@@ -36,36 +39,25 @@ torchaudio.load = torchaudio_load_safe
 # 📦 1. GLOBAL MODELS (LAZY LOAD)
 MODELS = {"stt": None, "tts": None}
-# 🛠️ 2. CORE PROCESSING (v126: GPU-STT + CPU-TTS)
-# Since XTTS keeps crashing the CUDA context on H200, we move it to CPU.
-# Whisper remains on GPU as it is fully stable and incredibly fast.
 @spaces.GPU(duration=120)
 def core_process(request_dict):
     global MODELS
     action = request_dict.get("action")
-    print(f"--- [v126] 🛠️ HYBRID ENGINE: {action} ---")
     t1 = time.time()
     try:
-        # GPU PATH: Whisper Large-v3-Turbo
         if action in ["stt", "s2st"] and MODELS["stt"] is None:
             print("🎙️ Loading Whisper Turbo (v3) [GPU: float32]...")
-            MODELS["stt"] = pipeline(
-                "automatic-speech-recognition",
-                model="openai/whisper-large-v3-turbo",
-                torch_dtype=torch.float32,
-                device="cuda"
-            )
-        # CPU PATH: XTTS-v2 (Zero-Crash Stability)
         if action in ["tts", "s2st"] and MODELS["tts"] is None:
             print("🔊 Loading XTTS-v2 [CPU Path]...")
             from TTS.api import TTS
-            # Running on CPU avoids the persistent cublasSgemm crashes on H200
             MODELS["tts"] = TTS(model_name="tts_models/multilingual/multi-dataset/xtts_v2", gpu=False)
-        # 🛠️ Execution Logic
         if action == "stt":
             audio_bytes = base64.b64decode(request_dict.get("file"))
             with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as f:
@@ -124,41 +116,47 @@ def core_process(request_dict):
             from deep_translator import GoogleTranslator
             target = request_dict.get("target_lang") or "en"
             trans_t = GoogleTranslator(source='auto', target=target).translate(stt_t)
-            # TTS is already on CPU, so we call it directly
             t_res = core_process.__wrapped__({"action": "tts", "text": trans_t, "lang": target, "speaker_wav": request_dict.get("speaker_wav")})
             res = {"text": stt_t, "translated": trans_t, "audio": t_res.get("audio")}
-        elif action == "health":
-            res = {"status": "awake", "v": "126", "gpu": torch.cuda.get_device_name(0) if torch.cuda.is_available() else "None"}
         else: res = {"error": "Invalid action"}
     except Exception as e:
-        print(f"❌ [v126] ERROR: {traceback.format_exc()}")
         res = {"error": str(e)}
     finally:
-        print(f"--- [v126] ✨ DONE ({time.time()-t1:.1f}s) ---")
         gc.collect()
         if torch.cuda.is_available(): torch.cuda.empty_cache()
     return res
-# 🚀 3. GRADIO INTERFACE (v126)
-def handle_api(req_json):
     try:
-        data = json.loads(req_json)
-        # Direct return for health to avoid GPU trigger if not needed
-        if data.get("action") == "health": return json.dumps({"status": "awake", "v": "126"})
-        return json.dumps(core_process(data))
-    except Exception as e:
-        return json.dumps({"error": str(e)})
 demo = gr.Interface(
-    fn=handle_api,
     inputs="text",
     outputs="text",
-    title="🚀 AI Engine v126 (Hopper Robust)",
-    description="STT (GPU) | Translation | TTS (CPU-Fallthrough)"
 )
 if __name__ == "__main__":
-    demo.queue()
-    # demo.launch handles the server and port binding automatically/robustly on HF
-    demo.launch(server_name="0.0.0.0", server_port=7860)

+# 🚀 V127: ZEROGPU HOPPER PRO (API RESTORE)
 try:
     import spaces
 except ImportError:
             return f
 import gradio as gr
+from fastapi import FastAPI, Request
+from fastapi.middleware.cors import CORSMiddleware
+import uvicorn
 import base64
 import torch
 import os
 import soundfile as sf
 from transformers import pipeline
+# 🛡️ 0. ENV & MONKEYPATCH (v127)
+os.environ["CUBLAS_WORKSPACE_CONFIG"] = ":4096:8"
 os.environ["COQUI_TOS_AGREED"] = "1"
 os.environ["PYTHONWARNINGS"] = "ignore"
 # 📦 1. GLOBAL MODELS (LAZY LOAD)
 MODELS = {"stt": None, "tts": None}
+# 🛠️ 2. CORE PROCESSING (v127: GPU-STT + CPU-TTS)
 @spaces.GPU(duration=120)
 def core_process(request_dict):
     global MODELS
     action = request_dict.get("action")
+    print(f"--- [v127] 🛠️ PRO ENGINE: {action} ---")
     t1 = time.time()
     try:
         if action in ["stt", "s2st"] and MODELS["stt"] is None:
             print("🎙️ Loading Whisper Turbo (v3) [GPU: float32]...")
+            MODELS["stt"] = pipeline("automatic-speech-recognition", model="openai/whisper-large-v3-turbo", torch_dtype=torch.float32, device="cuda")
         if action in ["tts", "s2st"] and MODELS["tts"] is None:
             print("🔊 Loading XTTS-v2 [CPU Path]...")
             from TTS.api import TTS
             MODELS["tts"] = TTS(model_name="tts_models/multilingual/multi-dataset/xtts_v2", gpu=False)
+        # 🛠️ Logic
         if action == "stt":
             audio_bytes = base64.b64decode(request_dict.get("file"))
             with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as f:
             from deep_translator import GoogleTranslator
             target = request_dict.get("target_lang") or "en"
             trans_t = GoogleTranslator(source='auto', target=target).translate(stt_t)
             t_res = core_process.__wrapped__({"action": "tts", "text": trans_t, "lang": target, "speaker_wav": request_dict.get("speaker_wav")})
             res = {"text": stt_t, "translated": trans_t, "audio": t_res.get("audio")}
         else: res = {"error": "Invalid action"}
     except Exception as e:
+        print(f"❌ [v127] ERROR: {traceback.format_exc()}")
         res = {"error": str(e)}
     finally:
+        print(f"--- [v127] ✨ DONE ({time.time()-t1:.1f}s) ---")
         gc.collect()
         if torch.cuda.is_available(): torch.cuda.empty_cache()
     return res
+# 🚀 3. SERVER SETUP (REST + UI)
+app = FastAPI()
+app.add_middleware(CORSMiddleware, allow_origins=["*"], allow_methods=["*"], allow_headers=["*"])
+@app.post("/api/v1/process")
+async def api_process(request: Request):
     try:
+        data = await request.json()
+        if data.get("action") == "health": return {"status": "awake", "v": "127"}
+        return core_process(data)
+    except Exception as e: return {"error": str(e)}
+@app.get("/health")
+def health(): return {"status": "ok", "v": "127"}
+def gradio_fn(req_json):
+    try: return json.dumps(core_process(json.loads(req_json)))
+    except Exception as e: return json.dumps({"error": str(e)})
 demo = gr.Interface(
+    fn=gradio_fn,
     inputs="text",
     outputs="text",
+    title="🚀 AI Engine v127 (Hopper Pro)",
+    description="API & UI Active | Hybrid STT-GPU / TTS-CPU"
 )
+demo.queue()
+app = gr.mount_gradio_app(app, demo, path="/", ssr=False)
 if __name__ == "__main__":
+    uvicorn.run(app, host="0.0.0.0", port=7860, log_level="warning")