TGPro1 commited on
Commit
0b4811b
Β·
verified Β·
1 Parent(s): 1822120

Upload app.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. app.py +37 -43
app.py CHANGED
@@ -1,3 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
  from fastapi import FastAPI, Request, Response
2
  from fastapi.middleware.cors import CORSMiddleware
3
  from contextlib import asynccontextmanager
@@ -18,7 +30,7 @@ import logging
18
  from threading import Thread, Lock
19
  from huggingface_hub import snapshot_download
20
 
21
- # πŸ›‘οΈ 1. SILENCE LOGS & WARNINGS (v105: CUDA Stability Protocol)
22
  logging.getLogger("transformers").setLevel(logging.ERROR)
23
  logging.getLogger("TTS").setLevel(logging.ERROR)
24
  logging.getLogger("onnxruntime").setLevel(logging.ERROR)
@@ -72,17 +84,8 @@ from TTS.api import TTS
72
  from df.enhance import init_df
73
  import deep_translator
74
 
75
- try:
76
- import spaces
77
- except ImportError:
78
- class spaces:
79
- @staticmethod
80
- def GPU(duration=60, f=None):
81
- if f is None: return lambda x: x
82
- return f
83
-
84
- # FORCE BUILD TRIGGER: 14:00:00 Jan 21 2026
85
- # v105: CUDA Stability Fix. int8_float16. Device Indexing.
86
 
87
  os.environ["COQUI_TOS_AGREED"] = "1"
88
  MODELS = {"stt": None, "translate": None, "tts": None, "denoiser": None}
@@ -91,7 +94,7 @@ WARMUP_STATUS = {"complete": False, "in_progress": False}
91
  WARMUP_LOCK = Lock()
92
 
93
  def activate_gpu_models(action):
94
- """v105: MISSION-CRITICAL GPU ACTIVATION"""
95
  global MODELS, WARMUP_STATUS
96
  local_only = WARMUP_STATUS["complete"]
97
 
@@ -100,12 +103,11 @@ def activate_gpu_models(action):
100
  try: stt_on_gpu = MODELS["stt"] is not None and MODELS["stt"].model.device == "cuda"
101
  except: pass
102
  if not stt_on_gpu:
103
- print(f"πŸŽ™οΈ [v105] Activating Whisper (GPU: int8_float16 Stability)...")
104
  try:
105
  if MODELS["stt"]: del MODELS["stt"]
106
  gc.collect(); torch.cuda.empty_cache()
107
- time.sleep(0.5) # Let CUDA driver settle
108
- # v105: int8_float16 is the most stable for CTranslate2 on H200 MIGs
109
  MODELS["stt"] = WhisperModel(
110
  "large-v3",
111
  device="cuda",
@@ -115,7 +117,7 @@ def activate_gpu_models(action):
115
  local_files_only=local_only
116
  )
117
  except Exception as e:
118
- print(f"⚠️ CUDA v105 Stability Init failed: {e}")
119
  MODELS["stt"] = WhisperModel("large-v3", device="cpu", compute_type="int8", local_files_only=True)
120
 
121
  if action in ["tts", "s2st"]:
@@ -125,7 +127,7 @@ def activate_gpu_models(action):
125
  tts_on_gpu = "cuda" in curr
126
  except: pass
127
  if MODELS["tts"] is None or not tts_on_gpu:
128
- print(f"πŸ”Š [v105] Activating XTTS-v2 (GPU)...")
129
  try:
130
  if MODELS["tts"] is None:
131
  MODELS["tts"] = TTS(model_name="tts_models/multilingual/multi-dataset/xtts_v2", gpu=True)
@@ -140,9 +142,9 @@ def activate_gpu_models(action):
140
  if MODELS["translate"] is None: MODELS["translate"] = "active"
141
 
142
  def release_gpu_models():
143
- """v105: Safe Resident Cleanup"""
144
  global MODELS
145
- print("🧹 [v105] Releasing GPU resources.")
146
  try:
147
  if MODELS["stt"] and MODELS["stt"].model.device == "cuda":
148
  del MODELS["stt"]
@@ -154,21 +156,21 @@ def release_gpu_models():
154
  except: pass
155
  gc.collect()
156
  if torch.cuda.is_available(): torch.cuda.empty_cache()
157
- time.sleep(0.5) # Driver buffer
158
 
159
  def warmup_task():
160
- """Silent Pre-loading (v105)"""
161
  global WARMUP_STATUS
162
  with WARMUP_LOCK:
163
  if WARMUP_STATUS["complete"] or WARMUP_STATUS["in_progress"]: return
164
  WARMUP_STATUS["in_progress"] = True
165
- print("\nπŸ”₯ --- V105: STABILITY WARMUP STARTED ---")
166
  try:
167
  MODELS["stt"] = WhisperModel("large-v3", device="cpu", compute_type="int8")
168
  MODELS["tts"] = TTS(model_name="tts_models/multilingual/multi-dataset/xtts_v2", gpu=False)
169
  chatterbox_utils.warmup_chatterbox()
170
  WARMUP_STATUS["complete"] = True
171
- print(f"βœ… --- SYSTEM READY: v105 --- \n")
172
  except: pass
173
  finally: WARMUP_STATUS["in_progress"] = False
174
 
@@ -220,7 +222,7 @@ def _tts_logic(text, lang, speaker_wav_b64):
220
  def core_process(request_dict):
221
  action = request_dict.get("action")
222
  t1 = time.time()
223
- print(f"--- [v105] πŸš€ GPU SESSION: {action} ---")
224
  activate_gpu_models(action)
225
  try:
226
  if action == "stt": res = _stt_logic(request_dict)
@@ -233,7 +235,7 @@ def core_process(request_dict):
233
  res = {"text": stt_res.get("text"), "translated": translated, "audio": tts_res.get("audio")}
234
  else: res = {"error": f"Unknown action: {action}"}
235
  finally:
236
- print(f"--- [v105] ✨ SUCCESS: {action} ({time.time()-t1:.2f}s) ---")
237
  release_gpu_models()
238
  return res
239
 
@@ -242,14 +244,9 @@ async def lifespan(app: FastAPI):
242
  Thread(target=warmup_task, daemon=True).start()
243
  yield
244
 
245
- # πŸš€ STEP 1: DEFINE THE PRIMARY FASTAPI APP
246
  app = FastAPI(lifespan=lifespan)
247
- app.add_middleware(
248
- CORSMiddleware,
249
- allow_origins=["*"],
250
- allow_methods=["*"],
251
- allow_headers=["*"],
252
- )
253
 
254
  @app.post("/api/v1/process")
255
  async def api_process(request: Request):
@@ -261,7 +258,7 @@ async def api_process(request: Request):
261
  except Exception as e: return {"error": str(e)}
262
 
263
  @app.get("/health")
264
- def health(): return {"status": "ok", "warm": WARMUP_STATUS["complete"], "v": "105"}
265
 
266
  @app.post("/api/v1/clear_cache")
267
  async def clear_cache_api():
@@ -270,21 +267,18 @@ async def clear_cache_api():
270
  return {"status": "success"}
271
  except: return {"status": "error"}
272
 
273
- # πŸš€ STEP 2: DEFINE GRADIO INTERFACE
274
  def gradio_fn(req_json):
275
  try: return json.dumps(core_process(json.loads(req_json)))
276
  except Exception as e: return json.dumps({"error": str(e)})
277
 
278
- demo = gr.Interface(
279
- fn=gradio_fn,
280
- inputs="text",
281
- outputs="text",
282
- title="πŸš€ AI Engine v105"
283
- )
284
 
285
- # πŸš€ STEP 3: MOUNT GRADIO ONTO FASTAPI
286
  app = gr.mount_gradio_app(app, demo, path="/")
287
 
288
  if __name__ == "__main__":
289
- print("πŸš€ [v105] Starting Unified Server on Port 7860...")
290
  uvicorn.run(app, host="0.0.0.0", port=7860, log_level="error")
 
1
+ # πŸš€ V106: ZEROGPU PRIORITY IMPORT
2
+ # Must be first to patch environment correctly
3
+ try:
4
+ import spaces
5
+ except ImportError:
6
+ print("⚠️ 'spaces' not installed. ZeroGPU features disabled.")
7
+ class spaces:
8
+ @staticmethod
9
+ def GPU(duration=60, f=None):
10
+ if f is None: return lambda x: x
11
+ return f
12
+
13
  from fastapi import FastAPI, Request, Response
14
  from fastapi.middleware.cors import CORSMiddleware
15
  from contextlib import asynccontextmanager
 
30
  from threading import Thread, Lock
31
  from huggingface_hub import snapshot_download
32
 
33
+ # πŸ›‘οΈ 1. SILENCE LOGS & WARNINGS (v106: Verified Silence)
34
  logging.getLogger("transformers").setLevel(logging.ERROR)
35
  logging.getLogger("TTS").setLevel(logging.ERROR)
36
  logging.getLogger("onnxruntime").setLevel(logging.ERROR)
 
84
  from df.enhance import init_df
85
  import deep_translator
86
 
87
+ # FORCE BUILD TRIGGER: 14:45:00 Jan 21 2026
88
+ # v106: Import Priority + Explicit Queue.
 
 
 
 
 
 
 
 
 
89
 
90
  os.environ["COQUI_TOS_AGREED"] = "1"
91
  MODELS = {"stt": None, "translate": None, "tts": None, "denoiser": None}
 
94
  WARMUP_LOCK = Lock()
95
 
96
  def activate_gpu_models(action):
97
+ """v106: Stable Activation"""
98
  global MODELS, WARMUP_STATUS
99
  local_only = WARMUP_STATUS["complete"]
100
 
 
103
  try: stt_on_gpu = MODELS["stt"] is not None and MODELS["stt"].model.device == "cuda"
104
  except: pass
105
  if not stt_on_gpu:
106
+ print(f"πŸŽ™οΈ [v106] Activating Whisper (GPU: int8_float16)...")
107
  try:
108
  if MODELS["stt"]: del MODELS["stt"]
109
  gc.collect(); torch.cuda.empty_cache()
110
+ time.sleep(0.5)
 
111
  MODELS["stt"] = WhisperModel(
112
  "large-v3",
113
  device="cuda",
 
117
  local_files_only=local_only
118
  )
119
  except Exception as e:
120
+ print(f"⚠️ GPU Init failed: {e}")
121
  MODELS["stt"] = WhisperModel("large-v3", device="cpu", compute_type="int8", local_files_only=True)
122
 
123
  if action in ["tts", "s2st"]:
 
127
  tts_on_gpu = "cuda" in curr
128
  except: pass
129
  if MODELS["tts"] is None or not tts_on_gpu:
130
+ print(f"πŸ”Š [v106] Activating XTTS-v2 (GPU)...")
131
  try:
132
  if MODELS["tts"] is None:
133
  MODELS["tts"] = TTS(model_name="tts_models/multilingual/multi-dataset/xtts_v2", gpu=True)
 
142
  if MODELS["translate"] is None: MODELS["translate"] = "active"
143
 
144
  def release_gpu_models():
145
+ """v106: Resident Cleanup"""
146
  global MODELS
147
+ print("🧹 [v106] Releasing GPU resources.")
148
  try:
149
  if MODELS["stt"] and MODELS["stt"].model.device == "cuda":
150
  del MODELS["stt"]
 
156
  except: pass
157
  gc.collect()
158
  if torch.cuda.is_available(): torch.cuda.empty_cache()
159
+ time.sleep(0.5)
160
 
161
  def warmup_task():
162
+ """Silent Warmup (v106)"""
163
  global WARMUP_STATUS
164
  with WARMUP_LOCK:
165
  if WARMUP_STATUS["complete"] or WARMUP_STATUS["in_progress"]: return
166
  WARMUP_STATUS["in_progress"] = True
167
+ print("\nπŸ”₯ --- V106: ZEROGPU RECOVERY WARMUP ---")
168
  try:
169
  MODELS["stt"] = WhisperModel("large-v3", device="cpu", compute_type="int8")
170
  MODELS["tts"] = TTS(model_name="tts_models/multilingual/multi-dataset/xtts_v2", gpu=False)
171
  chatterbox_utils.warmup_chatterbox()
172
  WARMUP_STATUS["complete"] = True
173
+ print(f"βœ… --- SYSTEM READY: v106 --- \n")
174
  except: pass
175
  finally: WARMUP_STATUS["in_progress"] = False
176
 
 
222
  def core_process(request_dict):
223
  action = request_dict.get("action")
224
  t1 = time.time()
225
+ print(f"--- [v106] πŸš€ GPU SESSION: {action} ---")
226
  activate_gpu_models(action)
227
  try:
228
  if action == "stt": res = _stt_logic(request_dict)
 
235
  res = {"text": stt_res.get("text"), "translated": translated, "audio": tts_res.get("audio")}
236
  else: res = {"error": f"Unknown action: {action}"}
237
  finally:
238
+ print(f"--- [v106] ✨ SUCCESS: {action} ({time.time()-t1:.2f}s) ---")
239
  release_gpu_models()
240
  return res
241
 
 
244
  Thread(target=warmup_task, daemon=True).start()
245
  yield
246
 
247
+ # πŸš€ PRIMARY FASTAPI
248
  app = FastAPI(lifespan=lifespan)
249
+ app.add_middleware(CORSMiddleware, allow_origins=["*"], allow_methods=["*"], allow_headers=["*"])
 
 
 
 
 
250
 
251
  @app.post("/api/v1/process")
252
  async def api_process(request: Request):
 
258
  except Exception as e: return {"error": str(e)}
259
 
260
  @app.get("/health")
261
+ def health(): return {"status": "ok", "warm": WARMUP_STATUS["complete"], "v": "106"}
262
 
263
  @app.post("/api/v1/clear_cache")
264
  async def clear_cache_api():
 
267
  return {"status": "success"}
268
  except: return {"status": "error"}
269
 
 
270
  def gradio_fn(req_json):
271
  try: return json.dumps(core_process(json.loads(req_json)))
272
  except Exception as e: return json.dumps({"error": str(e)})
273
 
274
+ # πŸš€ GRADIO WITH QUEUE
275
+ demo = gr.Interface(fn=gradio_fn, inputs="text", outputs="text", title="πŸš€ AI Engine v106")
276
+ # πŸ”₯ V106: EXPLICIT QUEUE. ZeroGPU needs this.
277
+ demo.queue()
 
 
278
 
279
+ # MOUNT
280
  app = gr.mount_gradio_app(app, demo, path="/")
281
 
282
  if __name__ == "__main__":
283
+ print("πŸš€ [v106] Starting Unified Server (ZeroGPU Fixed)...")
284
  uvicorn.run(app, host="0.0.0.0", port=7860, log_level="error")