TGPro1 commited on
Commit
cb71958
Β·
verified Β·
1 Parent(s): 91f0d67

Upload app.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. app.py +20 -35
app.py CHANGED
@@ -1,4 +1,4 @@
1
- # πŸš€ v134: ZEROGPU HOPPER PRO+ (MEMORY FENCE)
2
  try:
3
  import spaces
4
  except ImportError:
@@ -23,10 +23,10 @@ import traceback
23
  import soundfile as sf
24
  from faster_whisper import WhisperModel
25
 
26
- # πŸ›‘οΈ 0. INFRASTRUCTURE OPTIMIZATION (v134)
27
  os.environ["COQUI_TOS_AGREED"] = "1"
28
  os.environ["PYTHONWARNINGS"] = "ignore"
29
- os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:True"
30
  torch.set_float32_matmul_precision('high')
31
 
32
  import torchaudio
@@ -40,12 +40,6 @@ torchaudio.load = torchaudio_load_safe
40
  # πŸ“¦ 1. GLOBAL MODELS (LAZY CPU LOAD)
41
  MODELS = {"stt": None, "tts": None}
42
 
43
- def get_stt():
44
- if MODELS["stt"] is None:
45
- print("πŸŽ™οΈ Pre-loading Faster-Whisper (CPU RAM)...")
46
- MODELS["stt"] = WhisperModel("large-v3-turbo", device="cpu", compute_type="float16")
47
- return MODELS["stt"]
48
-
49
  def get_tts():
50
  if MODELS["tts"] is None:
51
  print("πŸ”Š Pre-loading XTTS-v2 (CPU RAM)...")
@@ -53,20 +47,20 @@ def get_tts():
53
  MODELS["tts"] = TTS(model_name="tts_models/multilingual/multi-dataset/xtts_v2", gpu=False)
54
  return MODELS["tts"]
55
 
56
- # πŸ› οΈ 2. CORE PROCESSING (v134: MEMORY FENCE STRATEGY)
57
  @spaces.GPU(duration=120)
58
  def core_process(request_dict):
59
  global MODELS
60
  action = request_dict.get("action")
61
- print(f"--- [v134] πŸ› οΈ PRO ENGINE: {action} ---")
62
  t1 = time.time()
63
 
64
  try:
65
- # πŸŽ™οΈ STT PATH (Fast-Whisper GPU)
66
  if action in ["stt", "s2st"]:
67
- print("⚑ Activating STT GPU Fence...")
68
- # Re-init on GPU to bypass PyTorch/Cublas alignment issues
69
- gpu_stt = WhisperModel("large-v3-turbo", device="cuda", compute_type="float16")
70
 
71
  audio_bytes = base64.b64decode(request_dict.get("file"))
72
  with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as f:
@@ -78,8 +72,8 @@ def core_process(request_dict):
78
  finally:
79
  if os.path.exists(temp_path): os.unlink(temp_path)
80
  del gpu_stt
81
- torch.cuda.empty_cache()
82
  gc.collect()
 
83
 
84
  if action == "stt": return {"text": stt_text}
85
 
@@ -95,7 +89,7 @@ def core_process(request_dict):
95
  if len(text) < 2 or not any(c.isalnum() for c in text):
96
  return {"audio": ""} if action == "tts" else {"text": stt_text, "translated": "", "audio": ""}
97
 
98
- print("⚑ Activating TTS GPU Fence...")
99
  from TTS.api import TTS
100
  gpu_tts = TTS(model_name="tts_models/multilingual/multi-dataset/xtts_v2", gpu=True)
101
 
@@ -123,8 +117,8 @@ def core_process(request_dict):
123
  if speaker_wav_path and "default" not in speaker_wav_path and os.path.exists(speaker_wav_path): os.unlink(speaker_wav_path)
124
  if 'out_p' in locals() and os.path.exists(out_p): os.unlink(out_p)
125
  del gpu_tts
126
- torch.cuda.empty_cache()
127
  gc.collect()
 
128
  else:
129
  import chatterbox_utils
130
  audio_bytes = chatterbox_utils.run_chatterbox_inference(text, clean_lang)
@@ -134,10 +128,10 @@ def core_process(request_dict):
134
  return {"text": stt_text, "translated": trans_text, "audio": audio_b64}
135
 
136
  except Exception as e:
137
- print(f"❌ [v134] ERROR: {traceback.format_exc()}")
138
  return {"error": str(e)}
139
  finally:
140
- print(f"--- [v134] ✨ DONE ({time.time()-t1:.1f}s) ---")
141
  gc.collect()
142
 
143
  # πŸš€ 3. SERVER SETUP
@@ -148,29 +142,20 @@ app.add_middleware(CORSMiddleware, allow_origins=["*"], allow_methods=["*"], all
148
  async def api_process(request: Request):
149
  try:
150
  data = await request.json()
151
- if data.get("action") == "health": return {"status": "awake", "v": "134"}
152
  return core_process(data)
153
  except Exception as e: return {"error": str(e)}
154
 
155
  @app.get("/health")
156
- def health(): return {"status": "ok", "v": "134"}
157
 
158
  demo = gr.Interface(
159
  fn=lambda x: json.dumps(core_process(json.loads(x))),
160
- inputs="text", outputs="text", title="πŸš€ AI Engine v134 (Memory Fence)",
161
- description="H200 Optimized | Full GPU | Zero-Crash Design"
162
  ).queue()
163
  app = gr.mount_gradio_app(app, demo, path="/")
164
 
165
- def start_server():
166
- ports = [7860, 7861, 7862]
167
- for p in ports:
168
- try:
169
- print(f"🌐 Attempting to start server on port {p}...")
170
- uvicorn.run(app, host="0.0.0.0", port=p, log_level="warning")
171
- break
172
- except Exception as e:
173
- print(f"⚠️ Port {p} busy, trying next...")
174
-
175
  if __name__ == "__main__":
176
- start_server()
 
 
1
+ # πŸš€ v135: ZEROGPU HOPPER ELITE (FP32 STABILITY)
2
  try:
3
  import spaces
4
  except ImportError:
 
23
  import soundfile as sf
24
  from faster_whisper import WhisperModel
25
 
26
+ # πŸ›‘οΈ 0. INFRASTRUCTURE OPTIMIZATION (v135)
27
  os.environ["COQUI_TOS_AGREED"] = "1"
28
  os.environ["PYTHONWARNINGS"] = "ignore"
29
+ os.environ["CT2_CUDA_ALLOW_TF32"] = "1" # Leverage H200 TF32 cores
30
  torch.set_float32_matmul_precision('high')
31
 
32
  import torchaudio
 
40
  # πŸ“¦ 1. GLOBAL MODELS (LAZY CPU LOAD)
41
  MODELS = {"stt": None, "tts": None}
42
 
 
 
 
 
 
 
43
  def get_tts():
44
  if MODELS["tts"] is None:
45
  print("πŸ”Š Pre-loading XTTS-v2 (CPU RAM)...")
 
47
  MODELS["tts"] = TTS(model_name="tts_models/multilingual/multi-dataset/xtts_v2", gpu=False)
48
  return MODELS["tts"]
49
 
50
+ # πŸ› οΈ 2. CORE PROCESSING (v135: FP32 FOR STABILITY)
51
  @spaces.GPU(duration=120)
52
  def core_process(request_dict):
53
  global MODELS
54
  action = request_dict.get("action")
55
+ print(f"--- [v135] πŸ› οΈ ELITE ENGINE: {action} ---")
56
  t1 = time.time()
57
 
58
  try:
59
+ # πŸŽ™οΈ STT PATH (Fast-Whisper GPU FP32)
60
  if action in ["stt", "s2st"]:
61
+ print("⚑ Promoting STT to GPU (FP32 path)...")
62
+ # Force float32 to avoid cublasSgemm alignment errors on H200 drivers
63
+ gpu_stt = WhisperModel("large-v3-turbo", device="cuda", compute_type="float32")
64
 
65
  audio_bytes = base64.b64decode(request_dict.get("file"))
66
  with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as f:
 
72
  finally:
73
  if os.path.exists(temp_path): os.unlink(temp_path)
74
  del gpu_stt
 
75
  gc.collect()
76
+ torch.cuda.empty_cache()
77
 
78
  if action == "stt": return {"text": stt_text}
79
 
 
89
  if len(text) < 2 or not any(c.isalnum() for c in text):
90
  return {"audio": ""} if action == "tts" else {"text": stt_text, "translated": "", "audio": ""}
91
 
92
+ print("⚑ Promoting TTS to GPU...")
93
  from TTS.api import TTS
94
  gpu_tts = TTS(model_name="tts_models/multilingual/multi-dataset/xtts_v2", gpu=True)
95
 
 
117
  if speaker_wav_path and "default" not in speaker_wav_path and os.path.exists(speaker_wav_path): os.unlink(speaker_wav_path)
118
  if 'out_p' in locals() and os.path.exists(out_p): os.unlink(out_p)
119
  del gpu_tts
 
120
  gc.collect()
121
+ torch.cuda.empty_cache()
122
  else:
123
  import chatterbox_utils
124
  audio_bytes = chatterbox_utils.run_chatterbox_inference(text, clean_lang)
 
128
  return {"text": stt_text, "translated": trans_text, "audio": audio_b64}
129
 
130
  except Exception as e:
131
+ print(f"❌ [v135] ERROR: {traceback.format_exc()}")
132
  return {"error": str(e)}
133
  finally:
134
+ print(f"--- [v135] ✨ DONE ({time.time()-t1:.1f}s) ---")
135
  gc.collect()
136
 
137
  # πŸš€ 3. SERVER SETUP
 
142
  async def api_process(request: Request):
143
  try:
144
  data = await request.json()
145
+ if data.get("action") == "health": return {"status": "awake", "v": "135"}
146
  return core_process(data)
147
  except Exception as e: return {"error": str(e)}
148
 
149
  @app.get("/health")
150
+ def health(): return {"status": "ok", "v": "135"}
151
 
152
  demo = gr.Interface(
153
  fn=lambda x: json.dumps(core_process(json.loads(x))),
154
+ inputs="text", outputs="text", title="πŸš€ AI Engine v135 (H200 FP32)",
155
+ description="Optimized for H200 | GPU STT (FP32) | GPU TTS | Zero-Crash"
156
  ).queue()
157
  app = gr.mount_gradio_app(app, demo, path="/")
158
 
 
 
 
 
 
 
 
 
 
 
159
  if __name__ == "__main__":
160
+ # Simplified entry point for Hugging Face compatibility
161
+ uvicorn.run(app, host="0.0.0.0", port=7860, log_level="warning")