Spaces:

TGPro1
/

S2ST

Sleeping

App Files Files Community

TGPro1 commited on Jan 21

Commit

cb71958

verified ·

1 Parent(s): 91f0d67

Upload app.py with huggingface_hub

Browse files

Files changed (1) hide show

app.py +20 -35

app.py CHANGED Viewed

@@ -1,4 +1,4 @@
-# 🚀 v134: ZEROGPU HOPPER PRO+ (MEMORY FENCE)
 try:
     import spaces
 except ImportError:
@@ -23,10 +23,10 @@ import traceback
 import soundfile as sf
 from faster_whisper import WhisperModel
-# 🛡️ 0. INFRASTRUCTURE OPTIMIZATION (v134)
 os.environ["COQUI_TOS_AGREED"] = "1"
 os.environ["PYTHONWARNINGS"] = "ignore"
-os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:True"
 torch.set_float32_matmul_precision('high')
 import torchaudio
@@ -40,12 +40,6 @@ torchaudio.load = torchaudio_load_safe
 # 📦 1. GLOBAL MODELS (LAZY CPU LOAD)
 MODELS = {"stt": None, "tts": None}
-def get_stt():
-    if MODELS["stt"] is None:
-        print("🎙️ Pre-loading Faster-Whisper (CPU RAM)...")
-        MODELS["stt"] = WhisperModel("large-v3-turbo", device="cpu", compute_type="float16")
-    return MODELS["stt"]
 def get_tts():
     if MODELS["tts"] is None:
         print("🔊 Pre-loading XTTS-v2 (CPU RAM)...")
@@ -53,20 +47,20 @@ def get_tts():
         MODELS["tts"] = TTS(model_name="tts_models/multilingual/multi-dataset/xtts_v2", gpu=False)
     return MODELS["tts"]
-# 🛠️ 2. CORE PROCESSING (v134: MEMORY FENCE STRATEGY)
 @spaces.GPU(duration=120)
 def core_process(request_dict):
     global MODELS
     action = request_dict.get("action")
-    print(f"--- [v134] 🛠️ PRO ENGINE: {action} ---")
     t1 = time.time()
     try:
-        # 🎙️ STT PATH (Fast-Whisper GPU)
         if action in ["stt", "s2st"]:
-            print("⚡ Activating STT GPU Fence...")
-            # Re-init on GPU to bypass PyTorch/Cublas alignment issues
-            gpu_stt = WhisperModel("large-v3-turbo", device="cuda", compute_type="float16")
             audio_bytes = base64.b64decode(request_dict.get("file"))
             with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as f:
@@ -78,8 +72,8 @@ def core_process(request_dict):
             finally:
                 if os.path.exists(temp_path): os.unlink(temp_path)
                 del gpu_stt
-                torch.cuda.empty_cache()
                 gc.collect()
             if action == "stt": return {"text": stt_text}
@@ -95,7 +89,7 @@ def core_process(request_dict):
             if len(text) < 2 or not any(c.isalnum() for c in text):
                 return {"audio": ""} if action == "tts" else {"text": stt_text, "translated": "", "audio": ""}
-            print("⚡ Activating TTS GPU Fence...")
             from TTS.api import TTS
             gpu_tts = TTS(model_name="tts_models/multilingual/multi-dataset/xtts_v2", gpu=True)
@@ -123,8 +117,8 @@ def core_process(request_dict):
                     if speaker_wav_path and "default" not in speaker_wav_path and os.path.exists(speaker_wav_path): os.unlink(speaker_wav_path)
                     if 'out_p' in locals() and os.path.exists(out_p): os.unlink(out_p)
                     del gpu_tts
-                    torch.cuda.empty_cache()
                     gc.collect()
             else:
                 import chatterbox_utils
                 audio_bytes = chatterbox_utils.run_chatterbox_inference(text, clean_lang)
@@ -134,10 +128,10 @@ def core_process(request_dict):
             return {"text": stt_text, "translated": trans_text, "audio": audio_b64}
     except Exception as e:
-        print(f"❌ [v134] ERROR: {traceback.format_exc()}")
         return {"error": str(e)}
     finally:
-        print(f"--- [v134] ✨ DONE ({time.time()-t1:.1f}s) ---")
         gc.collect()
 # 🚀 3. SERVER SETUP
@@ -148,29 +142,20 @@ app.add_middleware(CORSMiddleware, allow_origins=["*"], allow_methods=["*"], all
 async def api_process(request: Request):
     try:
         data = await request.json()
-        if data.get("action") == "health": return {"status": "awake", "v": "134"}
         return core_process(data)
     except Exception as e: return {"error": str(e)}
 @app.get("/health")
-def health(): return {"status": "ok", "v": "134"}
 demo = gr.Interface(
     fn=lambda x: json.dumps(core_process(json.loads(x))),
-    inputs="text", outputs="text", title="🚀 AI Engine v134 (Memory Fence)",
-    description="H200 Optimized | Full GPU | Zero-Crash Design"
 ).queue()
 app = gr.mount_gradio_app(app, demo, path="/")
-def start_server():
-    ports = [7860, 7861, 7862]
-    for p in ports:
-        try:
-            print(f"🌐 Attempting to start server on port {p}...")
-            uvicorn.run(app, host="0.0.0.0", port=p, log_level="warning")
-            break
-        except Exception as e:
-            print(f"⚠️ Port {p} busy, trying next...")
 if __name__ == "__main__":
-    start_server()

+# 🚀 v135: ZEROGPU HOPPER ELITE (FP32 STABILITY)
 try:
     import spaces
 except ImportError:
 import soundfile as sf
 from faster_whisper import WhisperModel
+# 🛡️ 0. INFRASTRUCTURE OPTIMIZATION (v135)
 os.environ["COQUI_TOS_AGREED"] = "1"
 os.environ["PYTHONWARNINGS"] = "ignore"
+os.environ["CT2_CUDA_ALLOW_TF32"] = "1" # Leverage H200 TF32 cores
 torch.set_float32_matmul_precision('high')
 import torchaudio
 # 📦 1. GLOBAL MODELS (LAZY CPU LOAD)
 MODELS = {"stt": None, "tts": None}
 def get_tts():
     if MODELS["tts"] is None:
         print("🔊 Pre-loading XTTS-v2 (CPU RAM)...")
         MODELS["tts"] = TTS(model_name="tts_models/multilingual/multi-dataset/xtts_v2", gpu=False)
     return MODELS["tts"]
+# 🛠️ 2. CORE PROCESSING (v135: FP32 FOR STABILITY)
 @spaces.GPU(duration=120)
 def core_process(request_dict):
     global MODELS
     action = request_dict.get("action")
+    print(f"--- [v135] 🛠️ ELITE ENGINE: {action} ---")
     t1 = time.time()
     try:
+        # 🎙️ STT PATH (Fast-Whisper GPU FP32)
         if action in ["stt", "s2st"]:
+            print("⚡ Promoting STT to GPU (FP32 path)...")
+            # Force float32 to avoid cublasSgemm alignment errors on H200 drivers
+            gpu_stt = WhisperModel("large-v3-turbo", device="cuda", compute_type="float32")
             audio_bytes = base64.b64decode(request_dict.get("file"))
             with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as f:
             finally:
                 if os.path.exists(temp_path): os.unlink(temp_path)
                 del gpu_stt
                 gc.collect()
+                torch.cuda.empty_cache()
             if action == "stt": return {"text": stt_text}
             if len(text) < 2 or not any(c.isalnum() for c in text):
                 return {"audio": ""} if action == "tts" else {"text": stt_text, "translated": "", "audio": ""}
+            print("⚡ Promoting TTS to GPU...")
             from TTS.api import TTS
             gpu_tts = TTS(model_name="tts_models/multilingual/multi-dataset/xtts_v2", gpu=True)
                     if speaker_wav_path and "default" not in speaker_wav_path and os.path.exists(speaker_wav_path): os.unlink(speaker_wav_path)
                     if 'out_p' in locals() and os.path.exists(out_p): os.unlink(out_p)
                     del gpu_tts
                     gc.collect()
+                    torch.cuda.empty_cache()
             else:
                 import chatterbox_utils
                 audio_bytes = chatterbox_utils.run_chatterbox_inference(text, clean_lang)
             return {"text": stt_text, "translated": trans_text, "audio": audio_b64}
     except Exception as e:
+        print(f"❌ [v135] ERROR: {traceback.format_exc()}")
         return {"error": str(e)}
     finally:
+        print(f"--- [v135] ✨ DONE ({time.time()-t1:.1f}s) ---")
         gc.collect()
 # 🚀 3. SERVER SETUP
 async def api_process(request: Request):
     try:
         data = await request.json()
+        if data.get("action") == "health": return {"status": "awake", "v": "135"}
         return core_process(data)
     except Exception as e: return {"error": str(e)}
 @app.get("/health")
+def health(): return {"status": "ok", "v": "135"}
 demo = gr.Interface(
     fn=lambda x: json.dumps(core_process(json.loads(x))),
+    inputs="text", outputs="text", title="🚀 AI Engine v135 (H200 FP32)",
+    description="Optimized for H200 | GPU STT (FP32) | GPU TTS | Zero-Crash"
 ).queue()
 app = gr.mount_gradio_app(app, demo, path="/")
 if __name__ == "__main__":
+    # Simplified entry point for Hugging Face compatibility
+    uvicorn.run(app, host="0.0.0.0", port=7860, log_level="warning")