Spaces:

TGPro1
/

S2ST

Sleeping

App Files Files Community

TGPro1 commited on Jan 21

Commit

58069b9

verified ·

1 Parent(s): 1936bbb

Upload app.py with huggingface_hub

Browse files

Files changed (1) hide show

app.py +16 -24

app.py CHANGED Viewed

@@ -1,4 +1,4 @@
-# 🚀 V120: ZEROGPU HOPPER TURBO (FLASH ATTENTION ENABLED)
 try:
     import spaces
 except ImportError:
@@ -24,7 +24,7 @@ import traceback
 from huggingface_hub import snapshot_download
 from transformers import pipeline
-# 🛡️ 1. SILENCE & ENV (v120)
 import logging
 logging.getLogger("transformers").setLevel(logging.ERROR)
 os.environ["COQUI_TOS_AGREED"] = "1"
@@ -33,31 +33,30 @@ os.environ["PYTHONWARNINGS"] = "ignore"
 # 📦 2. GLOBAL MODELS (LAZY LOAD)
 MODELS = {"stt": None, "tts": None}
-# 🛠️ 3. CORE PROCESSING (v120: FLASH SPEED)
 @spaces.GPU(duration=120)
 def core_process(request_dict):
     global MODELS
     action = request_dict.get("action")
-    print(f"--- [v120] ⚡ HOPPER ACTIVATED: {action} ---")
     t1 = time.time()
     try:
-        # v120: Whisper Large-v3-Turbo + Flash Attention 2 (H200 Optimized)
         if action in ["stt", "s2st"] and MODELS["stt"] is None:
-            print("🎙️ Loading Whisper Turbo (v3) + FlashAttention-2...")
             model_id = "openai/whisper-large-v3-turbo"
             MODELS["stt"] = pipeline(
                 "automatic-speech-recognition",
                 model=model_id,
                 torch_dtype=torch.bfloat16,
-                device="cuda",
-                model_kwargs={"attn_implementation": "flash_attention_2"}
             )
         if action in ["tts", "s2st"] and MODELS["tts"] is None:
-            print("🔊 Loading XTTS-v2 (Hopper BF16 Optimized)...")
             from TTS.api import TTS
-            # Note: XTTS-v2 doesn't native support bfloat16 in its loader yet, but we'll use gpu=True
             MODELS["tts"] = TTS(model_name="tts_models/multilingual/multi-dataset/xtts_v2", gpu=True)
         # 🛠️ Execute Logic
@@ -66,7 +65,7 @@ def core_process(request_dict):
             with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as f:
                 f.write(audio_bytes); temp_path = f.name
             try:
-                # v120: Optimized Transcription
                 lang = request_dict.get("lang")
                 gen_kwargs = {"language": lang} if lang and len(lang) <= 3 else {}
                 result = MODELS["stt"](
@@ -97,11 +96,8 @@ def core_process(request_dict):
                     with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as f:
                         f.write(sb); speaker_wav_path = f.name
                 else:
-                    # Use a default speaker if available, or just use the first available
                     speaker_wav_path = "default_speaker.wav"
-                    if not os.path.exists(speaker_wav_path):
-                        # Fallback to internal speaker if default not found
-                        speaker_wav_path = None
                 try:
                     with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as output_file:
@@ -117,23 +113,20 @@ def core_process(request_dict):
                 res = {"audio": base64.b64encode(audio_bytes).decode()}
         elif action == "s2st":
-            print("🔄 Step 1: STT...")
             s_res = core_process.__wrapped__({**request_dict, "action": "stt"})
             text = s_res.get("text", "")
-            print(f"🔄 Step 2: Translation ({request_dict.get('target_lang')})...")
             import deep_translator
             target = request_dict.get("target_lang") or "en"
             translated = deep_translator.GoogleTranslator(source='auto', target=target).translate(text)
-            print("🔄 Step 3: TTS...")
             t_res = core_process.__wrapped__({"action": "tts", "text": translated, "lang": target, "speaker_wav": request_dict.get("speaker_wav")})
             res = {"text": text, "translated": translated, "audio": t_res.get("audio")}
         else: res = {"error": "Invalid action"}
     except Exception as e:
-        print(f"❌ [v120] ERROR: {traceback.format_exc()}")
         res = {"error": str(e)}
     finally:
-        print(f"--- [v120] ✨ FINISHED IN {time.time()-t1:.2f}s ---")
         gc.collect()
         if torch.cuda.is_available(): torch.cuda.empty_cache()
     return res
@@ -146,21 +139,20 @@ app.add_middleware(CORSMiddleware, allow_origins=["*"], allow_methods=["*"], all
 async def api_process(request: Request):
     try:
         data = await request.json()
-        if data.get("action") == "health": return {"status": "awake", "v": "120"}
         return core_process(data)
     except Exception as e: return {"error": str(e)}
 @app.get("/health")
-def health(): return {"status": "ok", "v": "120"}
 def gradio_fn(req_json):
     try: return json.dumps(core_process(json.loads(req_json)))
     except Exception as e: return json.dumps({"error": str(e)})
-demo = gr.Interface(fn=gradio_fn, inputs="text", outputs="text", title="🚀 AI Engine v120 (Hopper Turbo)")
 demo.queue()
 app = gr.mount_gradio_app(app, demo, path="/")
 if __name__ == "__main__":
-    print("🚀 [v120] Starting Hopper Turbo Engine on Port 7860...")
     uvicorn.run(app, host="0.0.0.0", port=7860, log_level="warning")

+# 🚀 V121: ZEROGPU HOPPER STABLE (BUILD FIX)
 try:
     import spaces
 except ImportError:
 from huggingface_hub import snapshot_download
 from transformers import pipeline
+# 🛡️ 1. SILENCE & ENV (v121)
 import logging
 logging.getLogger("transformers").setLevel(logging.ERROR)
 os.environ["COQUI_TOS_AGREED"] = "1"
 # 📦 2. GLOBAL MODELS (LAZY LOAD)
 MODELS = {"stt": None, "tts": None}
+# 🛠️ 3. CORE PROCESSING (v121: STABLE SPEED)
 @spaces.GPU(duration=120)
 def core_process(request_dict):
     global MODELS
     action = request_dict.get("action")
+    print(f"--- [v121] ⚡ HOPPER ACTIVATED: {action} ---")
     t1 = time.time()
     try:
+        # v121: Whisper Large-v3-Turbo (Hopper BF16 Optimized)
         if action in ["stt", "s2st"] and MODELS["stt"] is None:
+            print("🎙️ Loading Whisper Turbo (v3) [BF16]...")
             model_id = "openai/whisper-large-v3-turbo"
+            # We let transformers auto-select the best attention (SDPA/Flash)
             MODELS["stt"] = pipeline(
                 "automatic-speech-recognition",
                 model=model_id,
                 torch_dtype=torch.bfloat16,
+                device="cuda"
             )
         if action in ["tts", "s2st"] and MODELS["tts"] is None:
+            print("🔊 Loading XTTS-v2 (GPU Optimized)...")
             from TTS.api import TTS
             MODELS["tts"] = TTS(model_name="tts_models/multilingual/multi-dataset/xtts_v2", gpu=True)
         # 🛠️ Execute Logic
             with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as f:
                 f.write(audio_bytes); temp_path = f.name
             try:
+                # v121: Flexible Transcription
                 lang = request_dict.get("lang")
                 gen_kwargs = {"language": lang} if lang and len(lang) <= 3 else {}
                 result = MODELS["stt"](
                     with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as f:
                         f.write(sb); speaker_wav_path = f.name
                 else:
                     speaker_wav_path = "default_speaker.wav"
+                    if not os.path.exists(speaker_wav_path): speaker_wav_path = None
                 try:
                     with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as output_file:
                 res = {"audio": base64.b64encode(audio_bytes).decode()}
         elif action == "s2st":
             s_res = core_process.__wrapped__({**request_dict, "action": "stt"})
             text = s_res.get("text", "")
             import deep_translator
             target = request_dict.get("target_lang") or "en"
             translated = deep_translator.GoogleTranslator(source='auto', target=target).translate(text)
             t_res = core_process.__wrapped__({"action": "tts", "text": translated, "lang": target, "speaker_wav": request_dict.get("speaker_wav")})
             res = {"text": text, "translated": translated, "audio": t_res.get("audio")}
         else: res = {"error": "Invalid action"}
     except Exception as e:
+        print(f"❌ [v121] ERROR: {traceback.format_exc()}")
         res = {"error": str(e)}
     finally:
+        print(f"--- [v121] ✨ FINISHED IN {time.time()-t1:.2f}s ---")
         gc.collect()
         if torch.cuda.is_available(): torch.cuda.empty_cache()
     return res
 async def api_process(request: Request):
     try:
         data = await request.json()
+        if data.get("action") == "health": return {"status": "awake", "v": "121"}
         return core_process(data)
     except Exception as e: return {"error": str(e)}
 @app.get("/health")
+def health(): return {"status": "ok", "v": "121"}
 def gradio_fn(req_json):
     try: return json.dumps(core_process(json.loads(req_json)))
     except Exception as e: return json.dumps({"error": str(e)})
+demo = gr.Interface(fn=gradio_fn, inputs="text", outputs="text", title="🚀 AI Engine v121")
 demo.queue()
 app = gr.mount_gradio_app(app, demo, path="/")
 if __name__ == "__main__":
     uvicorn.run(app, host="0.0.0.0", port=7860, log_level="warning")