TGPro1 commited on
Commit
2ebc6b4
Β·
verified Β·
1 Parent(s): abb9165

Upload app.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. app.py +43 -51
app.py CHANGED
@@ -1,4 +1,4 @@
1
- # πŸš€ V125: ZEROGPU HOPPER ULTIMATE (FULL FP32 MODE)
2
  try:
3
  import spaces
4
  except ImportError:
@@ -9,9 +9,6 @@ except ImportError:
9
  return f
10
 
11
  import gradio as gr
12
- from fastapi import FastAPI, Request
13
- from fastapi.middleware.cors import CORSMiddleware
14
- import uvicorn
15
  import base64
16
  import torch
17
  import os
@@ -21,10 +18,13 @@ import time
21
  import gc
22
  import traceback
23
  import soundfile as sf
24
- from huggingface_hub import snapshot_download
25
  from transformers import pipeline
26
 
27
- # πŸ›‘οΈ 0. MONKEYPATCH: TORCHAUDIO CODEC BYPASS (v125)
 
 
 
 
28
  import torchaudio
29
  def torchaudio_load_safe(filepath, **kwargs):
30
  data, sr = sf.read(filepath)
@@ -33,46 +33,39 @@ def torchaudio_load_safe(filepath, **kwargs):
33
  return tensor, sr
34
  torchaudio.load = torchaudio_load_safe
35
 
36
- # πŸ›‘οΈ 1. SILENCE & ENV (v125)
37
- import logging
38
- logging.getLogger("transformers").setLevel(logging.ERROR)
39
- os.environ["COQUI_TOS_AGREED"] = "1"
40
- os.environ["PYTHONWARNINGS"] = "ignore"
41
-
42
- # πŸ“¦ 2. GLOBAL MODELS (LAZY LOAD)
43
  MODELS = {"stt": None, "tts": None}
44
 
45
- # πŸ› οΈ 3. CORE PROCESSING (v125: FULL FP32 STABILITY)
 
 
 
46
  @spaces.GPU(duration=120)
47
  def core_process(request_dict):
48
  global MODELS
49
  action = request_dict.get("action")
50
- print(f"--- [v125] πŸ›‘οΈ TOTAL FP32 MODE: {action} ---")
51
  t1 = time.time()
52
 
53
  try:
54
- # v125: Whisper Turbo (Forced FP32 for H200 Driver Stability)
55
  if action in ["stt", "s2st"] and MODELS["stt"] is None:
56
- print("πŸŽ™οΈ Loading Whisper Turbo (v3) [float32]...")
57
- model_id = "openai/whisper-large-v3-turbo"
58
  MODELS["stt"] = pipeline(
59
  "automatic-speech-recognition",
60
- model=model_id,
61
  torch_dtype=torch.float32,
62
  device="cuda"
63
  )
64
 
65
- # v125: XTTS-v2 (Forced FP32 to avoid cublasSgemm Batched crash)
66
  if action in ["tts", "s2st"] and MODELS["tts"] is None:
67
- print("πŸ”Š Loading XTTS-v2 (FP32 Guarded)...")
68
  from TTS.api import TTS
69
- # We don't set gpu=True in constructor to stay in float32 initially
70
- tt = TTS(model_name="tts_models/multilingual/multi-dataset/xtts_v2", gpu=False)
71
- print("βš™οΈ Moving XTTS to CUDA [float32]...")
72
- tt.to("cuda") # Manually move. Default is float32.
73
- MODELS["tts"] = tt
74
 
75
- # πŸ› οΈ Execute Logic
76
  if action == "stt":
77
  audio_bytes = base64.b64decode(request_dict.get("file"))
78
  with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as f:
@@ -114,7 +107,6 @@ def core_process(request_dict):
114
  try:
115
  with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as out_f:
116
  out_p = out_f.name
117
- # v125: Force context to avoid any automatic half-precision casting
118
  MODELS["tts"].tts_to_file(text=text, language=mapped_lang, file_path=out_p, speaker_wav=speaker_wav_path)
119
  with open(out_p, "rb") as f: res = {"audio": base64.b64encode(f.read()).decode()}
120
  finally:
@@ -132,41 +124,41 @@ def core_process(request_dict):
132
  from deep_translator import GoogleTranslator
133
  target = request_dict.get("target_lang") or "en"
134
  trans_t = GoogleTranslator(source='auto', target=target).translate(stt_t)
 
135
  t_res = core_process.__wrapped__({"action": "tts", "text": trans_t, "lang": target, "speaker_wav": request_dict.get("speaker_wav")})
136
  res = {"text": stt_t, "translated": trans_t, "audio": t_res.get("audio")}
 
 
137
  else: res = {"error": "Invalid action"}
138
 
139
  except Exception as e:
140
- print(f"❌ [v125] ERROR: {traceback.format_exc()}")
141
  res = {"error": str(e)}
142
  finally:
143
- print(f"--- [v125] ✨ DONE ({time.time()-t1:.1f}s) ---")
144
  gc.collect()
145
  if torch.cuda.is_available(): torch.cuda.empty_cache()
146
  return res
147
 
148
- # πŸš€ 4. SERVER SETUP
149
- app = FastAPI()
150
- app.add_middleware(CORSMiddleware, allow_origins=["*"], allow_methods=["*"], allow_headers=["*"])
151
-
152
- @app.post("/api/v1/process")
153
- async def api_process(request: Request):
154
  try:
155
- data = await request.json()
156
- if data.get("action") == "health": return {"status": "awake", "v": "125"}
157
- return core_process(data)
158
- except Exception as e: return {"error": str(e)}
159
-
160
- @app.get("/health")
161
- def health(): return {"status": "ok", "v": "125"}
162
-
163
- def gradio_fn(req_json):
164
- try: return json.dumps(core_process(json.loads(req_json)))
165
- except Exception as e: return json.dumps({"error": str(e)})
166
 
167
- demo = gr.Interface(fn=gradio_fn, inputs="text", outputs="text", title="πŸš€ AI Engine v125")
168
- demo.queue()
169
- app = gr.mount_gradio_app(app, demo, path="/")
 
 
 
 
170
 
171
  if __name__ == "__main__":
172
- uvicorn.run(app, host="0.0.0.0", port=7860, log_level="warning")
 
 
 
1
+ # πŸš€ V126: ZEROGPU HOPPER ROBUST (HYBRID ENGINE)
2
  try:
3
  import spaces
4
  except ImportError:
 
9
  return f
10
 
11
  import gradio as gr
 
 
 
12
  import base64
13
  import torch
14
  import os
 
18
  import gc
19
  import traceback
20
  import soundfile as sf
 
21
  from transformers import pipeline
22
 
23
+ # πŸ›‘οΈ 0. ENV & MONKEYPATCH (v126)
24
+ os.environ["CUBLAS_WORKSPACE_CONFIG"] = ":4096:8" # Stability for MIG
25
+ os.environ["COQUI_TOS_AGREED"] = "1"
26
+ os.environ["PYTHONWARNINGS"] = "ignore"
27
+
28
  import torchaudio
29
  def torchaudio_load_safe(filepath, **kwargs):
30
  data, sr = sf.read(filepath)
 
33
  return tensor, sr
34
  torchaudio.load = torchaudio_load_safe
35
 
36
+ # πŸ“¦ 1. GLOBAL MODELS (LAZY LOAD)
 
 
 
 
 
 
37
  MODELS = {"stt": None, "tts": None}
38
 
39
+ # πŸ› οΈ 2. CORE PROCESSING (v126: GPU-STT + CPU-TTS)
40
+ # Since XTTS keeps crashing the CUDA context on H200, we move it to CPU.
41
+ # Whisper remains on GPU as it is fully stable and incredibly fast.
42
+
43
  @spaces.GPU(duration=120)
44
  def core_process(request_dict):
45
  global MODELS
46
  action = request_dict.get("action")
47
+ print(f"--- [v126] πŸ› οΈ HYBRID ENGINE: {action} ---")
48
  t1 = time.time()
49
 
50
  try:
51
+ # GPU PATH: Whisper Large-v3-Turbo
52
  if action in ["stt", "s2st"] and MODELS["stt"] is None:
53
+ print("πŸŽ™οΈ Loading Whisper Turbo (v3) [GPU: float32]...")
 
54
  MODELS["stt"] = pipeline(
55
  "automatic-speech-recognition",
56
+ model="openai/whisper-large-v3-turbo",
57
  torch_dtype=torch.float32,
58
  device="cuda"
59
  )
60
 
61
+ # CPU PATH: XTTS-v2 (Zero-Crash Stability)
62
  if action in ["tts", "s2st"] and MODELS["tts"] is None:
63
+ print("πŸ”Š Loading XTTS-v2 [CPU Path]...")
64
  from TTS.api import TTS
65
+ # Running on CPU avoids the persistent cublasSgemm crashes on H200
66
+ MODELS["tts"] = TTS(model_name="tts_models/multilingual/multi-dataset/xtts_v2", gpu=False)
 
 
 
67
 
68
+ # πŸ› οΈ Execution Logic
69
  if action == "stt":
70
  audio_bytes = base64.b64decode(request_dict.get("file"))
71
  with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as f:
 
107
  try:
108
  with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as out_f:
109
  out_p = out_f.name
 
110
  MODELS["tts"].tts_to_file(text=text, language=mapped_lang, file_path=out_p, speaker_wav=speaker_wav_path)
111
  with open(out_p, "rb") as f: res = {"audio": base64.b64encode(f.read()).decode()}
112
  finally:
 
124
  from deep_translator import GoogleTranslator
125
  target = request_dict.get("target_lang") or "en"
126
  trans_t = GoogleTranslator(source='auto', target=target).translate(stt_t)
127
+ # TTS is already on CPU, so we call it directly
128
  t_res = core_process.__wrapped__({"action": "tts", "text": trans_t, "lang": target, "speaker_wav": request_dict.get("speaker_wav")})
129
  res = {"text": stt_t, "translated": trans_t, "audio": t_res.get("audio")}
130
+ elif action == "health":
131
+ res = {"status": "awake", "v": "126", "gpu": torch.cuda.get_device_name(0) if torch.cuda.is_available() else "None"}
132
  else: res = {"error": "Invalid action"}
133
 
134
  except Exception as e:
135
+ print(f"❌ [v126] ERROR: {traceback.format_exc()}")
136
  res = {"error": str(e)}
137
  finally:
138
+ print(f"--- [v126] ✨ DONE ({time.time()-t1:.1f}s) ---")
139
  gc.collect()
140
  if torch.cuda.is_available(): torch.cuda.empty_cache()
141
  return res
142
 
143
+ # πŸš€ 3. GRADIO INTERFACE (v126)
144
+ def handle_api(req_json):
 
 
 
 
145
  try:
146
+ data = json.loads(req_json)
147
+ # Direct return for health to avoid GPU trigger if not needed
148
+ if data.get("action") == "health": return json.dumps({"status": "awake", "v": "126"})
149
+ return json.dumps(core_process(data))
150
+ except Exception as e:
151
+ return json.dumps({"error": str(e)})
 
 
 
 
 
152
 
153
+ demo = gr.Interface(
154
+ fn=handle_api,
155
+ inputs="text",
156
+ outputs="text",
157
+ title="πŸš€ AI Engine v126 (Hopper Robust)",
158
+ description="STT (GPU) | Translation | TTS (CPU-Fallthrough)"
159
+ )
160
 
161
  if __name__ == "__main__":
162
+ demo.queue()
163
+ # demo.launch handles the server and port binding automatically/robustly on HF
164
+ demo.launch(server_name="0.0.0.0", server_port=7860)