TGPro1 commited on
Commit
94261d6
Β·
verified Β·
1 Parent(s): 92366fd

Upload app.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. app.py +50 -45
app.py CHANGED
@@ -1,4 +1,6 @@
1
  from fastapi import FastAPI, Request, Response
 
 
2
  import gradio as gr
3
  import uvicorn
4
  import base64
@@ -16,7 +18,7 @@ import logging
16
  from threading import Thread, Lock
17
  from huggingface_hub import snapshot_download
18
 
19
- # πŸ›‘οΈ 1. SILENCE LOGS & WARNINGS (v103: Mission-Critical Stability)
20
  logging.getLogger("transformers").setLevel(logging.ERROR)
21
  logging.getLogger("TTS").setLevel(logging.ERROR)
22
  logging.getLogger("onnxruntime").setLevel(logging.ERROR)
@@ -79,8 +81,8 @@ except ImportError:
79
  if f is None: return lambda x: x
80
  return f
81
 
82
- # FORCE BUILD TRIGGER: 13:25:00 Jan 21 2026
83
- # v103: Gradio Blocking Launch. ZeroGPU Ready. CPU Health Fix.
84
 
85
  os.environ["COQUI_TOS_AGREED"] = "1"
86
  MODELS = {"stt": None, "translate": None, "tts": None, "denoiser": None}
@@ -89,7 +91,7 @@ WARMUP_STATUS = {"complete": False, "in_progress": False}
89
  WARMUP_LOCK = Lock()
90
 
91
  def activate_gpu_models(action):
92
- """v103: Stability-First Activation"""
93
  global MODELS, WARMUP_STATUS
94
  local_only = WARMUP_STATUS["complete"]
95
 
@@ -98,7 +100,7 @@ def activate_gpu_models(action):
98
  try: stt_on_gpu = MODELS["stt"] is not None and MODELS["stt"].model.device == "cuda"
99
  except: pass
100
  if not stt_on_gpu:
101
- print(f"πŸŽ™οΈ [v103] Activating Whisper (GPU: 1-Worker Stability)...")
102
  try:
103
  if MODELS["stt"]: del MODELS["stt"]
104
  gc.collect(); torch.cuda.empty_cache()
@@ -120,7 +122,7 @@ def activate_gpu_models(action):
120
  tts_on_gpu = "cuda" in curr
121
  except: pass
122
  if MODELS["tts"] is None or not tts_on_gpu:
123
- print(f"πŸ”Š [v103] Activating XTTS-v2 (GPU)...")
124
  try:
125
  if MODELS["tts"] is None:
126
  MODELS["tts"] = TTS(model_name="tts_models/multilingual/multi-dataset/xtts_v2", gpu=True)
@@ -135,9 +137,9 @@ def activate_gpu_models(action):
135
  if MODELS["translate"] is None: MODELS["translate"] = "active"
136
 
137
  def release_gpu_models():
138
- """v103: RAM Cleanup"""
139
  global MODELS
140
- print("🧹 [v103] Releasing GPU resources.")
141
  try:
142
  if MODELS["stt"] and MODELS["stt"].model.device == "cuda":
143
  del MODELS["stt"]
@@ -151,18 +153,18 @@ def release_gpu_models():
151
  if torch.cuda.is_available(): torch.cuda.empty_cache()
152
 
153
  def warmup_task():
154
- """Silent Warmup (v103)"""
155
  global WARMUP_STATUS
156
  with WARMUP_LOCK:
157
  if WARMUP_STATUS["complete"] or WARMUP_STATUS["in_progress"]: return
158
  WARMUP_STATUS["in_progress"] = True
159
- print("\nπŸ”₯ --- V103: UNIFIED WARMUP STARTED ---")
160
  try:
161
  MODELS["stt"] = WhisperModel("large-v3", device="cpu", compute_type="int8")
162
  MODELS["tts"] = TTS(model_name="tts_models/multilingual/multi-dataset/xtts_v2", gpu=False)
163
  chatterbox_utils.warmup_chatterbox()
164
  WARMUP_STATUS["complete"] = True
165
- print(f"βœ… --- SYSTEM READY: v103 --- \n")
166
  except: pass
167
  finally: WARMUP_STATUS["in_progress"] = False
168
 
@@ -214,7 +216,7 @@ def _tts_logic(text, lang, speaker_wav_b64):
214
  def core_process(request_dict):
215
  action = request_dict.get("action")
216
  t1 = time.time()
217
- print(f"--- [v103] πŸš€ GPU SESSION: {action} ---")
218
  activate_gpu_models(action)
219
  try:
220
  if action == "stt": res = _stt_logic(request_dict)
@@ -227,27 +229,26 @@ def core_process(request_dict):
227
  res = {"text": stt_res.get("text"), "translated": translated, "audio": tts_res.get("audio")}
228
  else: res = {"error": f"Unknown action: {action}"}
229
  finally:
230
- print(f"--- [v103] ✨ SUCCESS: {action} ({time.time()-t1:.2f}s) ---")
231
  release_gpu_models()
232
  return res
233
 
234
- # πŸš€ GRADIO UNIFIED PORT (v103)
235
- # We mount everything on the Gradio app to avoid "Address already in use"
236
- def gradio_fn(req_json):
237
- try: return json.dumps(core_process(json.loads(req_json)))
238
- except Exception as e: return json.dumps({"error": str(e)})
239
 
240
- # Create Gradio interface
241
- demo = gr.Interface(
242
- fn=gradio_fn,
243
- inputs="text",
244
- outputs="text",
245
- title="πŸš€ AI Engine v103 (ZeroGPU Ready)",
246
- description="Backend API with integrated Gradio UI"
247
  )
248
 
249
- # πŸ₯ ADD FASTAPI ROUTES TO GRADIO'S INTERNAL APP
250
- @demo.app.post("/api/v1/process")
251
  async def api_process(request: Request):
252
  try:
253
  req_data = await request.json()
@@ -256,30 +257,34 @@ async def api_process(request: Request):
256
  return core_process(req_data)
257
  except Exception as e: return {"error": str(e)}
258
 
259
- @demo.app.get("/health")
260
- def health(): return {"status": "ok", "warm": WARMUP_STATUS["complete"], "v": "103"}
261
 
262
- @demo.app.post("/api/v1/clear_cache")
263
  async def clear_cache_api():
264
  try:
265
  release_gpu_models()
266
  return {"status": "success"}
267
  except: return {"status": "error"}
268
 
269
- # START WARMUP
270
- Thread(target=warmup_task, daemon=True).start()
 
 
 
 
 
 
 
 
 
 
 
 
 
271
 
272
- # πŸš€ FINAL LAUNCH: This keeps the process alive on HF Spaces
273
  if __name__ == "__main__":
274
- print("πŸš€ [v103] Starting Engine...")
275
- try:
276
- demo.launch(server_name="0.0.0.0", server_port=7860, show_api=False)
277
- except Exception as e:
278
- print(f"⚠️ Launch failed (possibly handled by SDK): {e}")
279
- # Final keep-alive if launch() returned instantly
280
- while True: time.sleep(100)
281
- else:
282
- # If imported by HF SDK wrapper
283
- try:
284
- demo.launch(server_name="0.0.0.0", server_port=7860, show_api=False, prevent_thread_lock=True)
285
- except: pass
 
1
  from fastapi import FastAPI, Request, Response
2
+ from fastapi.middleware.cors import CORSMiddleware
3
+ from contextlib import asynccontextmanager
4
  import gradio as gr
5
  import uvicorn
6
  import base64
 
18
  from threading import Thread, Lock
19
  from huggingface_hub import snapshot_download
20
 
21
+ # πŸ›‘οΈ 1. SILENCE LOGS & WARNINGS (v104: Extreme Stability & Precedence)
22
  logging.getLogger("transformers").setLevel(logging.ERROR)
23
  logging.getLogger("TTS").setLevel(logging.ERROR)
24
  logging.getLogger("onnxruntime").setLevel(logging.ERROR)
 
81
  if f is None: return lambda x: x
82
  return f
83
 
84
+ # FORCE BUILD TRIGGER: 13:35:00 Jan 21 2026
85
+ # v104: FastAPI-First Precedence. Fix 405 error. ZeroGPU Unified.
86
 
87
  os.environ["COQUI_TOS_AGREED"] = "1"
88
  MODELS = {"stt": None, "translate": None, "tts": None, "denoiser": None}
 
91
  WARMUP_LOCK = Lock()
92
 
93
  def activate_gpu_models(action):
94
+ """v104: Stability Milestone Activation"""
95
  global MODELS, WARMUP_STATUS
96
  local_only = WARMUP_STATUS["complete"]
97
 
 
100
  try: stt_on_gpu = MODELS["stt"] is not None and MODELS["stt"].model.device == "cuda"
101
  except: pass
102
  if not stt_on_gpu:
103
+ print(f"πŸŽ™οΈ [v104] Activating Whisper (GPU)...")
104
  try:
105
  if MODELS["stt"]: del MODELS["stt"]
106
  gc.collect(); torch.cuda.empty_cache()
 
122
  tts_on_gpu = "cuda" in curr
123
  except: pass
124
  if MODELS["tts"] is None or not tts_on_gpu:
125
+ print(f"πŸ”Š [v104] Activating XTTS-v2 (GPU)...")
126
  try:
127
  if MODELS["tts"] is None:
128
  MODELS["tts"] = TTS(model_name="tts_models/multilingual/multi-dataset/xtts_v2", gpu=True)
 
137
  if MODELS["translate"] is None: MODELS["translate"] = "active"
138
 
139
  def release_gpu_models():
140
+ """v104: Resident RAM Protocol"""
141
  global MODELS
142
+ print("🧹 [v104] Releasing GPU resources.")
143
  try:
144
  if MODELS["stt"] and MODELS["stt"].model.device == "cuda":
145
  del MODELS["stt"]
 
153
  if torch.cuda.is_available(): torch.cuda.empty_cache()
154
 
155
  def warmup_task():
156
+ """Silent Warmup (v104)"""
157
  global WARMUP_STATUS
158
  with WARMUP_LOCK:
159
  if WARMUP_STATUS["complete"] or WARMUP_STATUS["in_progress"]: return
160
  WARMUP_STATUS["in_progress"] = True
161
+ print("\nπŸ”₯ --- V104: PRECEDENCE WARMUP STARTED ---")
162
  try:
163
  MODELS["stt"] = WhisperModel("large-v3", device="cpu", compute_type="int8")
164
  MODELS["tts"] = TTS(model_name="tts_models/multilingual/multi-dataset/xtts_v2", gpu=False)
165
  chatterbox_utils.warmup_chatterbox()
166
  WARMUP_STATUS["complete"] = True
167
+ print(f"βœ… --- SYSTEM READY: v104 --- \n")
168
  except: pass
169
  finally: WARMUP_STATUS["in_progress"] = False
170
 
 
216
  def core_process(request_dict):
217
  action = request_dict.get("action")
218
  t1 = time.time()
219
+ print(f"--- [v104] πŸš€ GPU SESSION: {action} ---")
220
  activate_gpu_models(action)
221
  try:
222
  if action == "stt": res = _stt_logic(request_dict)
 
229
  res = {"text": stt_res.get("text"), "translated": translated, "audio": tts_res.get("audio")}
230
  else: res = {"error": f"Unknown action: {action}"}
231
  finally:
232
+ print(f"--- [v104] ✨ SUCCESS: {action} ({time.time()-t1:.2f}s) ---")
233
  release_gpu_models()
234
  return res
235
 
236
+ @asynccontextmanager
237
+ async def lifespan(app: FastAPI):
238
+ Thread(target=warmup_task, daemon=True).start()
239
+ yield
 
240
 
241
+ # πŸš€ STEP 1: DEFINE THE PRIMARY FASTAPI APP
242
+ app = FastAPI(lifespan=lifespan)
243
+ app.add_middleware(
244
+ CORSMiddleware,
245
+ allow_origins=["*"],
246
+ allow_methods=["*"],
247
+ allow_headers=["*"],
248
  )
249
 
250
+ # πŸš€ STEP 2: REGISTER ROUTES ON PRIMARY APP (High Precedence)
251
+ @app.post("/api/v1/process")
252
  async def api_process(request: Request):
253
  try:
254
  req_data = await request.json()
 
257
  return core_process(req_data)
258
  except Exception as e: return {"error": str(e)}
259
 
260
+ @app.get("/health")
261
+ def health(): return {"status": "ok", "warm": WARMUP_STATUS["complete"], "v": "104"}
262
 
263
+ @app.post("/api/v1/clear_cache")
264
  async def clear_cache_api():
265
  try:
266
  release_gpu_models()
267
  return {"status": "success"}
268
  except: return {"status": "error"}
269
 
270
+ # πŸš€ STEP 3: DEFINE GRADIO INTERFACE
271
+ def gradio_fn(req_json):
272
+ try: return json.dumps(core_process(json.loads(req_json)))
273
+ except Exception as e: return json.dumps({"error": str(e)})
274
+
275
+ demo = gr.Interface(
276
+ fn=gradio_fn,
277
+ inputs="text",
278
+ outputs="text",
279
+ title="πŸš€ AI Engine v104"
280
+ )
281
+
282
+ # πŸš€ STEP 4: MOUNT GRADIO ONTO FASTAPI (Low Precedence)
283
+ # This way, our /api/v1/process route is checked BEFORE Gradio handles it.
284
+ app = gr.mount_gradio_app(app, demo, path="/")
285
 
286
+ # πŸš€ STEP 5: FINAL LAUNCH
287
  if __name__ == "__main__":
288
+ print("πŸš€ [v104] Starting Unified Server on Port 7860...")
289
+ # Using uvicorn on the unified app instance.
290
+ uvicorn.run(app, host="0.0.0.0", port=7860, log_level="error")