TGPro1 commited on
Commit
f27d8b6
Β·
verified Β·
1 Parent(s): 0b4811b

Upload app.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. app.py +24 -23
app.py CHANGED
@@ -1,9 +1,9 @@
1
- # πŸš€ V106: ZEROGPU PRIORITY IMPORT
2
  # Must be first to patch environment correctly
3
  try:
4
  import spaces
5
  except ImportError:
6
- print("⚠️ 'spaces' not installed. ZeroGPU features disabled.")
7
  class spaces:
8
  @staticmethod
9
  def GPU(duration=60, f=None):
@@ -30,7 +30,7 @@ import logging
30
  from threading import Thread, Lock
31
  from huggingface_hub import snapshot_download
32
 
33
- # πŸ›‘οΈ 1. SILENCE LOGS & WARNINGS (v106: Verified Silence)
34
  logging.getLogger("transformers").setLevel(logging.ERROR)
35
  logging.getLogger("TTS").setLevel(logging.ERROR)
36
  logging.getLogger("onnxruntime").setLevel(logging.ERROR)
@@ -84,8 +84,8 @@ from TTS.api import TTS
84
  from df.enhance import init_df
85
  import deep_translator
86
 
87
- # FORCE BUILD TRIGGER: 14:45:00 Jan 21 2026
88
- # v106: Import Priority + Explicit Queue.
89
 
90
  os.environ["COQUI_TOS_AGREED"] = "1"
91
  MODELS = {"stt": None, "translate": None, "tts": None, "denoiser": None}
@@ -94,7 +94,7 @@ WARMUP_STATUS = {"complete": False, "in_progress": False}
94
  WARMUP_LOCK = Lock()
95
 
96
  def activate_gpu_models(action):
97
- """v106: Stable Activation"""
98
  global MODELS, WARMUP_STATUS
99
  local_only = WARMUP_STATUS["complete"]
100
 
@@ -103,17 +103,19 @@ def activate_gpu_models(action):
103
  try: stt_on_gpu = MODELS["stt"] is not None and MODELS["stt"].model.device == "cuda"
104
  except: pass
105
  if not stt_on_gpu:
106
- print(f"πŸŽ™οΈ [v106] Activating Whisper (GPU: int8_float16)...")
107
  try:
108
  if MODELS["stt"]: del MODELS["stt"]
109
  gc.collect(); torch.cuda.empty_cache()
110
  time.sleep(0.5)
 
 
111
  MODELS["stt"] = WhisperModel(
112
  "large-v3",
113
  device="cuda",
114
- device_index=0,
115
- compute_type="int8_float16",
116
  num_workers=1,
 
117
  local_files_only=local_only
118
  )
119
  except Exception as e:
@@ -127,7 +129,7 @@ def activate_gpu_models(action):
127
  tts_on_gpu = "cuda" in curr
128
  except: pass
129
  if MODELS["tts"] is None or not tts_on_gpu:
130
- print(f"πŸ”Š [v106] Activating XTTS-v2 (GPU)...")
131
  try:
132
  if MODELS["tts"] is None:
133
  MODELS["tts"] = TTS(model_name="tts_models/multilingual/multi-dataset/xtts_v2", gpu=True)
@@ -142,9 +144,9 @@ def activate_gpu_models(action):
142
  if MODELS["translate"] is None: MODELS["translate"] = "active"
143
 
144
  def release_gpu_models():
145
- """v106: Resident Cleanup"""
146
  global MODELS
147
- print("🧹 [v106] Releasing GPU resources.")
148
  try:
149
  if MODELS["stt"] and MODELS["stt"].model.device == "cuda":
150
  del MODELS["stt"]
@@ -159,18 +161,18 @@ def release_gpu_models():
159
  time.sleep(0.5)
160
 
161
  def warmup_task():
162
- """Silent Warmup (v106)"""
163
  global WARMUP_STATUS
164
  with WARMUP_LOCK:
165
  if WARMUP_STATUS["complete"] or WARMUP_STATUS["in_progress"]: return
166
  WARMUP_STATUS["in_progress"] = True
167
- print("\nπŸ”₯ --- V106: ZEROGPU RECOVERY WARMUP ---")
168
  try:
169
  MODELS["stt"] = WhisperModel("large-v3", device="cpu", compute_type="int8")
170
  MODELS["tts"] = TTS(model_name="tts_models/multilingual/multi-dataset/xtts_v2", gpu=False)
171
  chatterbox_utils.warmup_chatterbox()
172
  WARMUP_STATUS["complete"] = True
173
- print(f"βœ… --- SYSTEM READY: v106 --- \n")
174
  except: pass
175
  finally: WARMUP_STATUS["in_progress"] = False
176
 
@@ -222,7 +224,7 @@ def _tts_logic(text, lang, speaker_wav_b64):
222
  def core_process(request_dict):
223
  action = request_dict.get("action")
224
  t1 = time.time()
225
- print(f"--- [v106] πŸš€ GPU SESSION: {action} ---")
226
  activate_gpu_models(action)
227
  try:
228
  if action == "stt": res = _stt_logic(request_dict)
@@ -235,7 +237,7 @@ def core_process(request_dict):
235
  res = {"text": stt_res.get("text"), "translated": translated, "audio": tts_res.get("audio")}
236
  else: res = {"error": f"Unknown action: {action}"}
237
  finally:
238
- print(f"--- [v106] ✨ SUCCESS: {action} ({time.time()-t1:.2f}s) ---")
239
  release_gpu_models()
240
  return res
241
 
@@ -244,7 +246,7 @@ async def lifespan(app: FastAPI):
244
  Thread(target=warmup_task, daemon=True).start()
245
  yield
246
 
247
- # πŸš€ PRIMARY FASTAPI
248
  app = FastAPI(lifespan=lifespan)
249
  app.add_middleware(CORSMiddleware, allow_origins=["*"], allow_methods=["*"], allow_headers=["*"])
250
 
@@ -258,7 +260,7 @@ async def api_process(request: Request):
258
  except Exception as e: return {"error": str(e)}
259
 
260
  @app.get("/health")
261
- def health(): return {"status": "ok", "warm": WARMUP_STATUS["complete"], "v": "106"}
262
 
263
  @app.post("/api/v1/clear_cache")
264
  async def clear_cache_api():
@@ -267,18 +269,17 @@ async def clear_cache_api():
267
  return {"status": "success"}
268
  except: return {"status": "error"}
269
 
 
270
  def gradio_fn(req_json):
271
  try: return json.dumps(core_process(json.loads(req_json)))
272
  except Exception as e: return json.dumps({"error": str(e)})
273
 
274
- # πŸš€ GRADIO WITH QUEUE
275
- demo = gr.Interface(fn=gradio_fn, inputs="text", outputs="text", title="πŸš€ AI Engine v106")
276
- # πŸ”₯ V106: EXPLICIT QUEUE. ZeroGPU needs this.
277
  demo.queue()
278
 
279
  # MOUNT
280
  app = gr.mount_gradio_app(app, demo, path="/")
281
 
282
  if __name__ == "__main__":
283
- print("πŸš€ [v106] Starting Unified Server (ZeroGPU Fixed)...")
284
  uvicorn.run(app, host="0.0.0.0", port=7860, log_level="error")
 
1
+ # πŸš€ V107: ZEROGPU HARDENING
2
  # Must be first to patch environment correctly
3
  try:
4
  import spaces
5
  except ImportError:
6
+ print("⚠️ 'spaces' not installed.")
7
  class spaces:
8
  @staticmethod
9
  def GPU(duration=60, f=None):
 
30
  from threading import Thread, Lock
31
  from huggingface_hub import snapshot_download
32
 
33
+ # πŸ›‘οΈ 1. SILENCE LOGS & WARNINGS (v107: Stability Milestone)
34
  logging.getLogger("transformers").setLevel(logging.ERROR)
35
  logging.getLogger("TTS").setLevel(logging.ERROR)
36
  logging.getLogger("onnxruntime").setLevel(logging.ERROR)
 
84
  from df.enhance import init_df
85
  import deep_translator
86
 
87
+ # FORCE BUILD TRIGGER: 17:10:00 Jan 21 2026
88
+ # v107: Whisper int8 stability. Gradio 5.9.1.
89
 
90
  os.environ["COQUI_TOS_AGREED"] = "1"
91
  MODELS = {"stt": None, "translate": None, "tts": None, "denoiser": None}
 
94
  WARMUP_LOCK = Lock()
95
 
96
  def activate_gpu_models(action):
97
+ """v107: Safe Hardware Activation"""
98
  global MODELS, WARMUP_STATUS
99
  local_only = WARMUP_STATUS["complete"]
100
 
 
103
  try: stt_on_gpu = MODELS["stt"] is not None and MODELS["stt"].model.device == "cuda"
104
  except: pass
105
  if not stt_on_gpu:
106
+ print(f"πŸŽ™οΈ [v107] Activating Whisper (GPU: int8 Protocol)...")
107
  try:
108
  if MODELS["stt"]: del MODELS["stt"]
109
  gc.collect(); torch.cuda.empty_cache()
110
  time.sleep(0.5)
111
+ # v107: Using 'int8' for guaranteed stability on H200 MIG.
112
+ # Removed device_index to allow driver-level discovery.
113
  MODELS["stt"] = WhisperModel(
114
  "large-v3",
115
  device="cuda",
116
+ compute_type="int8",
 
117
  num_workers=1,
118
+ cpu_threads=1,
119
  local_files_only=local_only
120
  )
121
  except Exception as e:
 
129
  tts_on_gpu = "cuda" in curr
130
  except: pass
131
  if MODELS["tts"] is None or not tts_on_gpu:
132
+ print(f"πŸ”Š [v107] Activating XTTS-v2 (GPU)...")
133
  try:
134
  if MODELS["tts"] is None:
135
  MODELS["tts"] = TTS(model_name="tts_models/multilingual/multi-dataset/xtts_v2", gpu=True)
 
144
  if MODELS["translate"] is None: MODELS["translate"] = "active"
145
 
146
  def release_gpu_models():
147
+ """v107: GPU Cleanup"""
148
  global MODELS
149
+ print("🧹 [v107] Releasing GPU resources.")
150
  try:
151
  if MODELS["stt"] and MODELS["stt"].model.device == "cuda":
152
  del MODELS["stt"]
 
161
  time.sleep(0.5)
162
 
163
  def warmup_task():
164
+ """Silent Warmup (v107)"""
165
  global WARMUP_STATUS
166
  with WARMUP_LOCK:
167
  if WARMUP_STATUS["complete"] or WARMUP_STATUS["in_progress"]: return
168
  WARMUP_STATUS["in_progress"] = True
169
+ print("\nπŸ”₯ --- V107: ZEROGPU RECOVERY STARTED ---")
170
  try:
171
  MODELS["stt"] = WhisperModel("large-v3", device="cpu", compute_type="int8")
172
  MODELS["tts"] = TTS(model_name="tts_models/multilingual/multi-dataset/xtts_v2", gpu=False)
173
  chatterbox_utils.warmup_chatterbox()
174
  WARMUP_STATUS["complete"] = True
175
+ print(f"βœ… --- SYSTEM READY: v107 --- \n")
176
  except: pass
177
  finally: WARMUP_STATUS["in_progress"] = False
178
 
 
224
  def core_process(request_dict):
225
  action = request_dict.get("action")
226
  t1 = time.time()
227
+ print(f"--- [v107] πŸš€ GPU SESSION: {action} ---")
228
  activate_gpu_models(action)
229
  try:
230
  if action == "stt": res = _stt_logic(request_dict)
 
237
  res = {"text": stt_res.get("text"), "translated": translated, "audio": tts_res.get("audio")}
238
  else: res = {"error": f"Unknown action: {action}"}
239
  finally:
240
+ print(f"--- [v107] ✨ SUCCESS: {action} ({time.time()-t1:.2f}s) ---")
241
  release_gpu_models()
242
  return res
243
 
 
246
  Thread(target=warmup_task, daemon=True).start()
247
  yield
248
 
249
+ # πŸš€ STEP 1: DEFINE FASTAPI
250
  app = FastAPI(lifespan=lifespan)
251
  app.add_middleware(CORSMiddleware, allow_origins=["*"], allow_methods=["*"], allow_headers=["*"])
252
 
 
260
  except Exception as e: return {"error": str(e)}
261
 
262
  @app.get("/health")
263
+ def health(): return {"status": "ok", "warm": WARMUP_STATUS["complete"], "v": "107"}
264
 
265
  @app.post("/api/v1/clear_cache")
266
  async def clear_cache_api():
 
269
  return {"status": "success"}
270
  except: return {"status": "error"}
271
 
272
+ # πŸš€ STEP 2: DEFINE GRADIO
273
  def gradio_fn(req_json):
274
  try: return json.dumps(core_process(json.loads(req_json)))
275
  except Exception as e: return json.dumps({"error": str(e)})
276
 
277
+ demo = gr.Interface(fn=gradio_fn, inputs="text", outputs="text", title="πŸš€ AI Engine v107")
 
 
278
  demo.queue()
279
 
280
  # MOUNT
281
  app = gr.mount_gradio_app(app, demo, path="/")
282
 
283
  if __name__ == "__main__":
284
+ print("πŸš€ [v107] Starting Unified Server (ZeroGPU Recovery)...")
285
  uvicorn.run(app, host="0.0.0.0", port=7860, log_level="error")