TGPro1 commited on
Commit
32297a1
Β·
verified Β·
1 Parent(s): 1f540c3

Upload app.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. app.py +36 -38
app.py CHANGED
@@ -1,4 +1,4 @@
1
- # πŸš€ V126: ZEROGPU HOPPER ROBUST (HYBRID ENGINE)
2
  try:
3
  import spaces
4
  except ImportError:
@@ -9,6 +9,9 @@ except ImportError:
9
  return f
10
 
11
  import gradio as gr
 
 
 
12
  import base64
13
  import torch
14
  import os
@@ -20,8 +23,8 @@ import traceback
20
  import soundfile as sf
21
  from transformers import pipeline
22
 
23
- # πŸ›‘οΈ 0. ENV & MONKEYPATCH (v126)
24
- os.environ["CUBLAS_WORKSPACE_CONFIG"] = ":4096:8" # Stability for MIG
25
  os.environ["COQUI_TOS_AGREED"] = "1"
26
  os.environ["PYTHONWARNINGS"] = "ignore"
27
 
@@ -36,36 +39,25 @@ torchaudio.load = torchaudio_load_safe
36
  # πŸ“¦ 1. GLOBAL MODELS (LAZY LOAD)
37
  MODELS = {"stt": None, "tts": None}
38
 
39
- # πŸ› οΈ 2. CORE PROCESSING (v126: GPU-STT + CPU-TTS)
40
- # Since XTTS keeps crashing the CUDA context on H200, we move it to CPU.
41
- # Whisper remains on GPU as it is fully stable and incredibly fast.
42
-
43
  @spaces.GPU(duration=120)
44
  def core_process(request_dict):
45
  global MODELS
46
  action = request_dict.get("action")
47
- print(f"--- [v126] πŸ› οΈ HYBRID ENGINE: {action} ---")
48
  t1 = time.time()
49
 
50
  try:
51
- # GPU PATH: Whisper Large-v3-Turbo
52
  if action in ["stt", "s2st"] and MODELS["stt"] is None:
53
  print("πŸŽ™οΈ Loading Whisper Turbo (v3) [GPU: float32]...")
54
- MODELS["stt"] = pipeline(
55
- "automatic-speech-recognition",
56
- model="openai/whisper-large-v3-turbo",
57
- torch_dtype=torch.float32,
58
- device="cuda"
59
- )
60
-
61
- # CPU PATH: XTTS-v2 (Zero-Crash Stability)
62
  if action in ["tts", "s2st"] and MODELS["tts"] is None:
63
  print("πŸ”Š Loading XTTS-v2 [CPU Path]...")
64
  from TTS.api import TTS
65
- # Running on CPU avoids the persistent cublasSgemm crashes on H200
66
  MODELS["tts"] = TTS(model_name="tts_models/multilingual/multi-dataset/xtts_v2", gpu=False)
67
 
68
- # πŸ› οΈ Execution Logic
69
  if action == "stt":
70
  audio_bytes = base64.b64decode(request_dict.get("file"))
71
  with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as f:
@@ -124,41 +116,47 @@ def core_process(request_dict):
124
  from deep_translator import GoogleTranslator
125
  target = request_dict.get("target_lang") or "en"
126
  trans_t = GoogleTranslator(source='auto', target=target).translate(stt_t)
127
- # TTS is already on CPU, so we call it directly
128
  t_res = core_process.__wrapped__({"action": "tts", "text": trans_t, "lang": target, "speaker_wav": request_dict.get("speaker_wav")})
129
  res = {"text": stt_t, "translated": trans_t, "audio": t_res.get("audio")}
130
- elif action == "health":
131
- res = {"status": "awake", "v": "126", "gpu": torch.cuda.get_device_name(0) if torch.cuda.is_available() else "None"}
132
  else: res = {"error": "Invalid action"}
133
 
134
  except Exception as e:
135
- print(f"❌ [v126] ERROR: {traceback.format_exc()}")
136
  res = {"error": str(e)}
137
  finally:
138
- print(f"--- [v126] ✨ DONE ({time.time()-t1:.1f}s) ---")
139
  gc.collect()
140
  if torch.cuda.is_available(): torch.cuda.empty_cache()
141
  return res
142
 
143
- # πŸš€ 3. GRADIO INTERFACE (v126)
144
- def handle_api(req_json):
 
 
 
 
145
  try:
146
- data = json.loads(req_json)
147
- # Direct return for health to avoid GPU trigger if not needed
148
- if data.get("action") == "health": return json.dumps({"status": "awake", "v": "126"})
149
- return json.dumps(core_process(data))
150
- except Exception as e:
151
- return json.dumps({"error": str(e)})
 
 
 
 
 
152
 
153
  demo = gr.Interface(
154
- fn=handle_api,
155
  inputs="text",
156
  outputs="text",
157
- title="πŸš€ AI Engine v126 (Hopper Robust)",
158
- description="STT (GPU) | Translation | TTS (CPU-Fallthrough)"
159
  )
 
 
160
 
161
  if __name__ == "__main__":
162
- demo.queue()
163
- # demo.launch handles the server and port binding automatically/robustly on HF
164
- demo.launch(server_name="0.0.0.0", server_port=7860)
 
1
+ # πŸš€ V127: ZEROGPU HOPPER PRO (API RESTORE)
2
  try:
3
  import spaces
4
  except ImportError:
 
9
  return f
10
 
11
  import gradio as gr
12
+ from fastapi import FastAPI, Request
13
+ from fastapi.middleware.cors import CORSMiddleware
14
+ import uvicorn
15
  import base64
16
  import torch
17
  import os
 
23
  import soundfile as sf
24
  from transformers import pipeline
25
 
26
+ # πŸ›‘οΈ 0. ENV & MONKEYPATCH (v127)
27
+ os.environ["CUBLAS_WORKSPACE_CONFIG"] = ":4096:8"
28
  os.environ["COQUI_TOS_AGREED"] = "1"
29
  os.environ["PYTHONWARNINGS"] = "ignore"
30
 
 
39
  # πŸ“¦ 1. GLOBAL MODELS (LAZY LOAD)
40
  MODELS = {"stt": None, "tts": None}
41
 
42
+ # πŸ› οΈ 2. CORE PROCESSING (v127: GPU-STT + CPU-TTS)
 
 
 
43
  @spaces.GPU(duration=120)
44
  def core_process(request_dict):
45
  global MODELS
46
  action = request_dict.get("action")
47
+ print(f"--- [v127] πŸ› οΈ PRO ENGINE: {action} ---")
48
  t1 = time.time()
49
 
50
  try:
 
51
  if action in ["stt", "s2st"] and MODELS["stt"] is None:
52
  print("πŸŽ™οΈ Loading Whisper Turbo (v3) [GPU: float32]...")
53
+ MODELS["stt"] = pipeline("automatic-speech-recognition", model="openai/whisper-large-v3-turbo", torch_dtype=torch.float32, device="cuda")
54
+
 
 
 
 
 
 
55
  if action in ["tts", "s2st"] and MODELS["tts"] is None:
56
  print("πŸ”Š Loading XTTS-v2 [CPU Path]...")
57
  from TTS.api import TTS
 
58
  MODELS["tts"] = TTS(model_name="tts_models/multilingual/multi-dataset/xtts_v2", gpu=False)
59
 
60
+ # πŸ› οΈ Logic
61
  if action == "stt":
62
  audio_bytes = base64.b64decode(request_dict.get("file"))
63
  with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as f:
 
116
  from deep_translator import GoogleTranslator
117
  target = request_dict.get("target_lang") or "en"
118
  trans_t = GoogleTranslator(source='auto', target=target).translate(stt_t)
 
119
  t_res = core_process.__wrapped__({"action": "tts", "text": trans_t, "lang": target, "speaker_wav": request_dict.get("speaker_wav")})
120
  res = {"text": stt_t, "translated": trans_t, "audio": t_res.get("audio")}
 
 
121
  else: res = {"error": "Invalid action"}
122
 
123
  except Exception as e:
124
+ print(f"❌ [v127] ERROR: {traceback.format_exc()}")
125
  res = {"error": str(e)}
126
  finally:
127
+ print(f"--- [v127] ✨ DONE ({time.time()-t1:.1f}s) ---")
128
  gc.collect()
129
  if torch.cuda.is_available(): torch.cuda.empty_cache()
130
  return res
131
 
132
+ # πŸš€ 3. SERVER SETUP (REST + UI)
133
+ app = FastAPI()
134
+ app.add_middleware(CORSMiddleware, allow_origins=["*"], allow_methods=["*"], allow_headers=["*"])
135
+
136
+ @app.post("/api/v1/process")
137
+ async def api_process(request: Request):
138
  try:
139
+ data = await request.json()
140
+ if data.get("action") == "health": return {"status": "awake", "v": "127"}
141
+ return core_process(data)
142
+ except Exception as e: return {"error": str(e)}
143
+
144
+ @app.get("/health")
145
+ def health(): return {"status": "ok", "v": "127"}
146
+
147
+ def gradio_fn(req_json):
148
+ try: return json.dumps(core_process(json.loads(req_json)))
149
+ except Exception as e: return json.dumps({"error": str(e)})
150
 
151
  demo = gr.Interface(
152
+ fn=gradio_fn,
153
  inputs="text",
154
  outputs="text",
155
+ title="πŸš€ AI Engine v127 (Hopper Pro)",
156
+ description="API & UI Active | Hybrid STT-GPU / TTS-CPU"
157
  )
158
+ demo.queue()
159
+ app = gr.mount_gradio_app(app, demo, path="/", ssr=False)
160
 
161
  if __name__ == "__main__":
162
+ uvicorn.run(app, host="0.0.0.0", port=7860, log_level="warning")