TGPro1 commited on
Commit
1822120
Β·
verified Β·
1 Parent(s): 94261d6

Upload app.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. app.py +24 -24
app.py CHANGED
@@ -18,7 +18,7 @@ import logging
18
  from threading import Thread, Lock
19
  from huggingface_hub import snapshot_download
20
 
21
- # πŸ›‘οΈ 1. SILENCE LOGS & WARNINGS (v104: Extreme Stability & Precedence)
22
  logging.getLogger("transformers").setLevel(logging.ERROR)
23
  logging.getLogger("TTS").setLevel(logging.ERROR)
24
  logging.getLogger("onnxruntime").setLevel(logging.ERROR)
@@ -81,8 +81,8 @@ except ImportError:
81
  if f is None: return lambda x: x
82
  return f
83
 
84
- # FORCE BUILD TRIGGER: 13:35:00 Jan 21 2026
85
- # v104: FastAPI-First Precedence. Fix 405 error. ZeroGPU Unified.
86
 
87
  os.environ["COQUI_TOS_AGREED"] = "1"
88
  MODELS = {"stt": None, "translate": None, "tts": None, "denoiser": None}
@@ -91,7 +91,7 @@ WARMUP_STATUS = {"complete": False, "in_progress": False}
91
  WARMUP_LOCK = Lock()
92
 
93
  def activate_gpu_models(action):
94
- """v104: Stability Milestone Activation"""
95
  global MODELS, WARMUP_STATUS
96
  local_only = WARMUP_STATUS["complete"]
97
 
@@ -100,19 +100,22 @@ def activate_gpu_models(action):
100
  try: stt_on_gpu = MODELS["stt"] is not None and MODELS["stt"].model.device == "cuda"
101
  except: pass
102
  if not stt_on_gpu:
103
- print(f"πŸŽ™οΈ [v104] Activating Whisper (GPU)...")
104
  try:
105
  if MODELS["stt"]: del MODELS["stt"]
106
  gc.collect(); torch.cuda.empty_cache()
 
 
107
  MODELS["stt"] = WhisperModel(
108
  "large-v3",
109
  device="cuda",
110
- compute_type="float16",
 
111
  num_workers=1,
112
  local_files_only=local_only
113
  )
114
  except Exception as e:
115
- print(f"⚠️ GPU Init failed: {e}")
116
  MODELS["stt"] = WhisperModel("large-v3", device="cpu", compute_type="int8", local_files_only=True)
117
 
118
  if action in ["tts", "s2st"]:
@@ -122,7 +125,7 @@ def activate_gpu_models(action):
122
  tts_on_gpu = "cuda" in curr
123
  except: pass
124
  if MODELS["tts"] is None or not tts_on_gpu:
125
- print(f"πŸ”Š [v104] Activating XTTS-v2 (GPU)...")
126
  try:
127
  if MODELS["tts"] is None:
128
  MODELS["tts"] = TTS(model_name="tts_models/multilingual/multi-dataset/xtts_v2", gpu=True)
@@ -137,9 +140,9 @@ def activate_gpu_models(action):
137
  if MODELS["translate"] is None: MODELS["translate"] = "active"
138
 
139
  def release_gpu_models():
140
- """v104: Resident RAM Protocol"""
141
  global MODELS
142
- print("🧹 [v104] Releasing GPU resources.")
143
  try:
144
  if MODELS["stt"] and MODELS["stt"].model.device == "cuda":
145
  del MODELS["stt"]
@@ -151,20 +154,21 @@ def release_gpu_models():
151
  except: pass
152
  gc.collect()
153
  if torch.cuda.is_available(): torch.cuda.empty_cache()
 
154
 
155
  def warmup_task():
156
- """Silent Warmup (v104)"""
157
  global WARMUP_STATUS
158
  with WARMUP_LOCK:
159
  if WARMUP_STATUS["complete"] or WARMUP_STATUS["in_progress"]: return
160
  WARMUP_STATUS["in_progress"] = True
161
- print("\nπŸ”₯ --- V104: PRECEDENCE WARMUP STARTED ---")
162
  try:
163
  MODELS["stt"] = WhisperModel("large-v3", device="cpu", compute_type="int8")
164
  MODELS["tts"] = TTS(model_name="tts_models/multilingual/multi-dataset/xtts_v2", gpu=False)
165
  chatterbox_utils.warmup_chatterbox()
166
  WARMUP_STATUS["complete"] = True
167
- print(f"βœ… --- SYSTEM READY: v104 --- \n")
168
  except: pass
169
  finally: WARMUP_STATUS["in_progress"] = False
170
 
@@ -216,7 +220,7 @@ def _tts_logic(text, lang, speaker_wav_b64):
216
  def core_process(request_dict):
217
  action = request_dict.get("action")
218
  t1 = time.time()
219
- print(f"--- [v104] πŸš€ GPU SESSION: {action} ---")
220
  activate_gpu_models(action)
221
  try:
222
  if action == "stt": res = _stt_logic(request_dict)
@@ -229,7 +233,7 @@ def core_process(request_dict):
229
  res = {"text": stt_res.get("text"), "translated": translated, "audio": tts_res.get("audio")}
230
  else: res = {"error": f"Unknown action: {action}"}
231
  finally:
232
- print(f"--- [v104] ✨ SUCCESS: {action} ({time.time()-t1:.2f}s) ---")
233
  release_gpu_models()
234
  return res
235
 
@@ -247,7 +251,6 @@ app.add_middleware(
247
  allow_headers=["*"],
248
  )
249
 
250
- # πŸš€ STEP 2: REGISTER ROUTES ON PRIMARY APP (High Precedence)
251
  @app.post("/api/v1/process")
252
  async def api_process(request: Request):
253
  try:
@@ -258,7 +261,7 @@ async def api_process(request: Request):
258
  except Exception as e: return {"error": str(e)}
259
 
260
  @app.get("/health")
261
- def health(): return {"status": "ok", "warm": WARMUP_STATUS["complete"], "v": "104"}
262
 
263
  @app.post("/api/v1/clear_cache")
264
  async def clear_cache_api():
@@ -267,7 +270,7 @@ async def clear_cache_api():
267
  return {"status": "success"}
268
  except: return {"status": "error"}
269
 
270
- # πŸš€ STEP 3: DEFINE GRADIO INTERFACE
271
  def gradio_fn(req_json):
272
  try: return json.dumps(core_process(json.loads(req_json)))
273
  except Exception as e: return json.dumps({"error": str(e)})
@@ -276,15 +279,12 @@ demo = gr.Interface(
276
  fn=gradio_fn,
277
  inputs="text",
278
  outputs="text",
279
- title="πŸš€ AI Engine v104"
280
  )
281
 
282
- # πŸš€ STEP 4: MOUNT GRADIO ONTO FASTAPI (Low Precedence)
283
- # This way, our /api/v1/process route is checked BEFORE Gradio handles it.
284
  app = gr.mount_gradio_app(app, demo, path="/")
285
 
286
- # πŸš€ STEP 5: FINAL LAUNCH
287
  if __name__ == "__main__":
288
- print("πŸš€ [v104] Starting Unified Server on Port 7860...")
289
- # Using uvicorn on the unified app instance.
290
  uvicorn.run(app, host="0.0.0.0", port=7860, log_level="error")
 
18
  from threading import Thread, Lock
19
  from huggingface_hub import snapshot_download
20
 
21
+ # πŸ›‘οΈ 1. SILENCE LOGS & WARNINGS (v105: CUDA Stability Protocol)
22
  logging.getLogger("transformers").setLevel(logging.ERROR)
23
  logging.getLogger("TTS").setLevel(logging.ERROR)
24
  logging.getLogger("onnxruntime").setLevel(logging.ERROR)
 
81
  if f is None: return lambda x: x
82
  return f
83
 
84
+ # FORCE BUILD TRIGGER: 14:00:00 Jan 21 2026
85
+ # v105: CUDA Stability Fix. int8_float16. Device Indexing.
86
 
87
  os.environ["COQUI_TOS_AGREED"] = "1"
88
  MODELS = {"stt": None, "translate": None, "tts": None, "denoiser": None}
 
91
  WARMUP_LOCK = Lock()
92
 
93
  def activate_gpu_models(action):
94
+ """v105: MISSION-CRITICAL GPU ACTIVATION"""
95
  global MODELS, WARMUP_STATUS
96
  local_only = WARMUP_STATUS["complete"]
97
 
 
100
  try: stt_on_gpu = MODELS["stt"] is not None and MODELS["stt"].model.device == "cuda"
101
  except: pass
102
  if not stt_on_gpu:
103
+ print(f"πŸŽ™οΈ [v105] Activating Whisper (GPU: int8_float16 Stability)...")
104
  try:
105
  if MODELS["stt"]: del MODELS["stt"]
106
  gc.collect(); torch.cuda.empty_cache()
107
+ time.sleep(0.5) # Let CUDA driver settle
108
+ # v105: int8_float16 is the most stable for CTranslate2 on H200 MIGs
109
  MODELS["stt"] = WhisperModel(
110
  "large-v3",
111
  device="cuda",
112
+ device_index=0,
113
+ compute_type="int8_float16",
114
  num_workers=1,
115
  local_files_only=local_only
116
  )
117
  except Exception as e:
118
+ print(f"⚠️ CUDA v105 Stability Init failed: {e}")
119
  MODELS["stt"] = WhisperModel("large-v3", device="cpu", compute_type="int8", local_files_only=True)
120
 
121
  if action in ["tts", "s2st"]:
 
125
  tts_on_gpu = "cuda" in curr
126
  except: pass
127
  if MODELS["tts"] is None or not tts_on_gpu:
128
+ print(f"πŸ”Š [v105] Activating XTTS-v2 (GPU)...")
129
  try:
130
  if MODELS["tts"] is None:
131
  MODELS["tts"] = TTS(model_name="tts_models/multilingual/multi-dataset/xtts_v2", gpu=True)
 
140
  if MODELS["translate"] is None: MODELS["translate"] = "active"
141
 
142
  def release_gpu_models():
143
+ """v105: Safe Resident Cleanup"""
144
  global MODELS
145
+ print("🧹 [v105] Releasing GPU resources.")
146
  try:
147
  if MODELS["stt"] and MODELS["stt"].model.device == "cuda":
148
  del MODELS["stt"]
 
154
  except: pass
155
  gc.collect()
156
  if torch.cuda.is_available(): torch.cuda.empty_cache()
157
+ time.sleep(0.5) # Driver buffer
158
 
159
  def warmup_task():
160
+ """Silent Pre-loading (v105)"""
161
  global WARMUP_STATUS
162
  with WARMUP_LOCK:
163
  if WARMUP_STATUS["complete"] or WARMUP_STATUS["in_progress"]: return
164
  WARMUP_STATUS["in_progress"] = True
165
+ print("\nπŸ”₯ --- V105: STABILITY WARMUP STARTED ---")
166
  try:
167
  MODELS["stt"] = WhisperModel("large-v3", device="cpu", compute_type="int8")
168
  MODELS["tts"] = TTS(model_name="tts_models/multilingual/multi-dataset/xtts_v2", gpu=False)
169
  chatterbox_utils.warmup_chatterbox()
170
  WARMUP_STATUS["complete"] = True
171
+ print(f"βœ… --- SYSTEM READY: v105 --- \n")
172
  except: pass
173
  finally: WARMUP_STATUS["in_progress"] = False
174
 
 
220
  def core_process(request_dict):
221
  action = request_dict.get("action")
222
  t1 = time.time()
223
+ print(f"--- [v105] πŸš€ GPU SESSION: {action} ---")
224
  activate_gpu_models(action)
225
  try:
226
  if action == "stt": res = _stt_logic(request_dict)
 
233
  res = {"text": stt_res.get("text"), "translated": translated, "audio": tts_res.get("audio")}
234
  else: res = {"error": f"Unknown action: {action}"}
235
  finally:
236
+ print(f"--- [v105] ✨ SUCCESS: {action} ({time.time()-t1:.2f}s) ---")
237
  release_gpu_models()
238
  return res
239
 
 
251
  allow_headers=["*"],
252
  )
253
 
 
254
  @app.post("/api/v1/process")
255
  async def api_process(request: Request):
256
  try:
 
261
  except Exception as e: return {"error": str(e)}
262
 
263
  @app.get("/health")
264
+ def health(): return {"status": "ok", "warm": WARMUP_STATUS["complete"], "v": "105"}
265
 
266
  @app.post("/api/v1/clear_cache")
267
  async def clear_cache_api():
 
270
  return {"status": "success"}
271
  except: return {"status": "error"}
272
 
273
+ # πŸš€ STEP 2: DEFINE GRADIO INTERFACE
274
  def gradio_fn(req_json):
275
  try: return json.dumps(core_process(json.loads(req_json)))
276
  except Exception as e: return json.dumps({"error": str(e)})
 
279
  fn=gradio_fn,
280
  inputs="text",
281
  outputs="text",
282
+ title="πŸš€ AI Engine v105"
283
  )
284
 
285
+ # πŸš€ STEP 3: MOUNT GRADIO ONTO FASTAPI
 
286
  app = gr.mount_gradio_app(app, demo, path="/")
287
 
 
288
  if __name__ == "__main__":
289
+ print("πŸš€ [v105] Starting Unified Server on Port 7860...")
 
290
  uvicorn.run(app, host="0.0.0.0", port=7860, log_level="error")