TGPro1 commited on
Commit
a145329
Β·
verified Β·
1 Parent(s): 827c66d

Upload app.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. app.py +28 -20
app.py CHANGED
@@ -18,7 +18,7 @@ import logging
18
  from threading import Thread, Lock
19
  from huggingface_hub import snapshot_download
20
 
21
- # πŸ›‘οΈ 1. SILENCE LOGS & WARNINGS (v101: Pure Power & Stability)
22
  logging.getLogger("transformers").setLevel(logging.ERROR)
23
  logging.getLogger("TTS").setLevel(logging.ERROR)
24
  logging.getLogger("onnxruntime").setLevel(logging.ERROR)
@@ -81,8 +81,8 @@ except ImportError:
81
  if f is None: return lambda x: x
82
  return f
83
 
84
- # FORCE BUILD TRIGGER: 13:10:00 Jan 21 2026
85
- # v101: Docker SDK Transition. Absolute Port Isolation.
86
 
87
  os.environ["COQUI_TOS_AGREED"] = "1"
88
  MODELS = {"stt": None, "translate": None, "tts": None, "denoiser": None}
@@ -91,7 +91,7 @@ WARMUP_STATUS = {"complete": False, "in_progress": False}
91
  WARMUP_LOCK = Lock()
92
 
93
  def activate_gpu_models(action):
94
- """v101: Mission-Critical GPU Mode"""
95
  global MODELS, WARMUP_STATUS
96
  local_only = WARMUP_STATUS["complete"]
97
 
@@ -100,7 +100,7 @@ def activate_gpu_models(action):
100
  try: stt_on_gpu = MODELS["stt"] is not None and MODELS["stt"].model.device == "cuda"
101
  except: pass
102
  if not stt_on_gpu:
103
- print(f"πŸŽ™οΈ [v101] Activating Whisper (GPU)...")
104
  try:
105
  if MODELS["stt"]: del MODELS["stt"]
106
  gc.collect(); torch.cuda.empty_cache()
@@ -122,7 +122,7 @@ def activate_gpu_models(action):
122
  tts_on_gpu = "cuda" in curr
123
  except: pass
124
  if MODELS["tts"] is None or not tts_on_gpu:
125
- print(f"πŸ”Š [v101] Activating XTTS-v2 (GPU)...")
126
  try:
127
  if MODELS["tts"] is None:
128
  MODELS["tts"] = TTS(model_name="tts_models/multilingual/multi-dataset/xtts_v2", gpu=True)
@@ -137,9 +137,9 @@ def activate_gpu_models(action):
137
  if MODELS["translate"] is None: MODELS["translate"] = "active"
138
 
139
  def release_gpu_models():
140
- """v101: Clean Resident State"""
141
  global MODELS
142
- print("🧹 [v101] Releasing GPU resources.")
143
  try:
144
  if MODELS["stt"] and MODELS["stt"].model.device == "cuda":
145
  del MODELS["stt"]
@@ -153,18 +153,18 @@ def release_gpu_models():
153
  if torch.cuda.is_available(): torch.cuda.empty_cache()
154
 
155
  def warmup_task():
156
- """Silent Pre-loading (v101)"""
157
  global WARMUP_STATUS
158
  with WARMUP_LOCK:
159
  if WARMUP_STATUS["complete"] or WARMUP_STATUS["in_progress"]: return
160
  WARMUP_STATUS["in_progress"] = True
161
- print("\nπŸ”₯ --- V101: DOCKER DEPLOY WARMUP STARTED ---")
162
  try:
163
  MODELS["stt"] = WhisperModel("large-v3", device="cpu", compute_type="int8")
164
  MODELS["tts"] = TTS(model_name="tts_models/multilingual/multi-dataset/xtts_v2", gpu=False)
165
  chatterbox_utils.warmup_chatterbox()
166
  WARMUP_STATUS["complete"] = True
167
- print(f"βœ… --- SYSTEM WARM: v101 --- \n")
168
  except Exception as e:
169
  print(f"❌ Warmup fail: {e}")
170
  finally: WARMUP_STATUS["in_progress"] = False
@@ -217,7 +217,7 @@ def _tts_logic(text, lang, speaker_wav_b64):
217
  def core_process(request_dict):
218
  action = request_dict.get("action")
219
  t1 = time.time()
220
- print(f"--- [v101] πŸš€ GPU SESSION: {action} ---")
221
  activate_gpu_models(action)
222
  try:
223
  if action == "stt": res = _stt_logic(request_dict)
@@ -230,13 +230,13 @@ def core_process(request_dict):
230
  res = {"text": stt_res.get("text"), "translated": translated, "audio": tts_res.get("audio")}
231
  else: res = {"error": f"Unknown action: {action}"}
232
  finally:
233
- print(f"--- [v101] ✨ SUCCESS: {action} ({time.time()-t1:.2f}s) ---")
234
  release_gpu_models()
235
  return res
236
 
237
  @asynccontextmanager
238
  async def lifespan(app: FastAPI):
239
- # DOCKER ENTRYPOINT TRIGGER
240
  Thread(target=warmup_task, daemon=True).start()
241
  yield
242
 
@@ -252,7 +252,7 @@ async def api_process(request: Request):
252
  except Exception as e: return {"error": str(e)}
253
 
254
  @app.get("/health")
255
- def health(): return {"status": "ok", "warm": WARMUP_STATUS["complete"], "v": "101"}
256
 
257
  @app.post("/api/v1/clear_cache")
258
  async def clear_cache():
@@ -266,10 +266,18 @@ async def clear_cache():
266
  return {"status": "success"}
267
  except: return {"status": "error"}
268
 
269
- # GRADIO INTERFACE (v101)
270
- demo = gr.Interface(fn=lambda x: json.dumps(core_process(json.loads(x))), inputs="text", outputs="text")
 
 
 
 
 
 
 
 
 
271
  app = gr.mount_gradio_app(app, demo, path="/")
272
 
273
- if __name__ == "__main__":
274
- print("πŸš€ [v101] DOCKER SERVER STARTING ON 7860...")
275
- uvicorn.run(app, host="0.0.0.0", port=7860, log_level="error", loop="asyncio")
 
18
  from threading import Thread, Lock
19
  from huggingface_hub import snapshot_download
20
 
21
+ # πŸ›‘οΈ 1. SILENCE LOGS & WARNINGS (v102: Mission-Critical Silence & Stability)
22
  logging.getLogger("transformers").setLevel(logging.ERROR)
23
  logging.getLogger("TTS").setLevel(logging.ERROR)
24
  logging.getLogger("onnxruntime").setLevel(logging.ERROR)
 
81
  if f is None: return lambda x: x
82
  return f
83
 
84
+ # FORCE BUILD TRIGGER: 13:16:00 Jan 21 2026
85
+ # v102: Gradio SDK Restoration. Synchronized Port Binding.
86
 
87
  os.environ["COQUI_TOS_AGREED"] = "1"
88
  MODELS = {"stt": None, "translate": None, "tts": None, "denoiser": None}
 
91
  WARMUP_LOCK = Lock()
92
 
93
  def activate_gpu_models(action):
94
+ """v102: Mission-Critical GPU Mode"""
95
  global MODELS, WARMUP_STATUS
96
  local_only = WARMUP_STATUS["complete"]
97
 
 
100
  try: stt_on_gpu = MODELS["stt"] is not None and MODELS["stt"].model.device == "cuda"
101
  except: pass
102
  if not stt_on_gpu:
103
+ print(f"πŸŽ™οΈ [v102] Activating Whisper (GPU)...")
104
  try:
105
  if MODELS["stt"]: del MODELS["stt"]
106
  gc.collect(); torch.cuda.empty_cache()
 
122
  tts_on_gpu = "cuda" in curr
123
  except: pass
124
  if MODELS["tts"] is None or not tts_on_gpu:
125
+ print(f"πŸ”Š [v102] Activating XTTS-v2 (GPU)...")
126
  try:
127
  if MODELS["tts"] is None:
128
  MODELS["tts"] = TTS(model_name="tts_models/multilingual/multi-dataset/xtts_v2", gpu=True)
 
137
  if MODELS["translate"] is None: MODELS["translate"] = "active"
138
 
139
  def release_gpu_models():
140
+ """v102: Clean Resident State"""
141
  global MODELS
142
+ print("🧹 [v102] Releasing GPU resources.")
143
  try:
144
  if MODELS["stt"] and MODELS["stt"].model.device == "cuda":
145
  del MODELS["stt"]
 
153
  if torch.cuda.is_available(): torch.cuda.empty_cache()
154
 
155
  def warmup_task():
156
+ """Silent Pre-loading (v102)"""
157
  global WARMUP_STATUS
158
  with WARMUP_LOCK:
159
  if WARMUP_STATUS["complete"] or WARMUP_STATUS["in_progress"]: return
160
  WARMUP_STATUS["in_progress"] = True
161
+ print("\nπŸ”₯ --- V102: GRADIO SYNC WARMUP STARTED ---")
162
  try:
163
  MODELS["stt"] = WhisperModel("large-v3", device="cpu", compute_type="int8")
164
  MODELS["tts"] = TTS(model_name="tts_models/multilingual/multi-dataset/xtts_v2", gpu=False)
165
  chatterbox_utils.warmup_chatterbox()
166
  WARMUP_STATUS["complete"] = True
167
+ print(f"βœ… --- SYSTEM WARM: v102 --- \n")
168
  except Exception as e:
169
  print(f"❌ Warmup fail: {e}")
170
  finally: WARMUP_STATUS["in_progress"] = False
 
217
  def core_process(request_dict):
218
  action = request_dict.get("action")
219
  t1 = time.time()
220
+ print(f"--- [v102] πŸš€ GPU SESSION: {action} ---")
221
  activate_gpu_models(action)
222
  try:
223
  if action == "stt": res = _stt_logic(request_dict)
 
230
  res = {"text": stt_res.get("text"), "translated": translated, "audio": tts_res.get("audio")}
231
  else: res = {"error": f"Unknown action: {action}"}
232
  finally:
233
+ print(f"--- [v102] ✨ SUCCESS: {action} ({time.time()-t1:.2f}s) ---")
234
  release_gpu_models()
235
  return res
236
 
237
  @asynccontextmanager
238
  async def lifespan(app: FastAPI):
239
+ # GRADIO MANAGED LIFECYCLE
240
  Thread(target=warmup_task, daemon=True).start()
241
  yield
242
 
 
252
  except Exception as e: return {"error": str(e)}
253
 
254
  @app.get("/health")
255
+ def health(): return {"status": "ok", "warm": WARMUP_STATUS["complete"], "v": "102"}
256
 
257
  @app.post("/api/v1/clear_cache")
258
  async def clear_cache():
 
266
  return {"status": "success"}
267
  except: return {"status": "error"}
268
 
269
+ # πŸš€ V102: SYNCHRONIZED ENTRY POINT
270
+ # We mount everything into Gradio and let the SDK handle the binding.
271
+ demo = gr.Interface(
272
+ fn=lambda x: json.dumps(core_process(json.loads(x))),
273
+ inputs="text",
274
+ outputs="text",
275
+ title="πŸš€ AI Engine v102"
276
+ )
277
+
278
+ # Use gr.mount_gradio_app to merge FastAPI and Gradio.
279
+ # The Hugging Face Gradio SDK will automatically detect and launch the mounted app.
280
  app = gr.mount_gradio_app(app, demo, path="/")
281
 
282
+ # No uvicorn.run here! The Hugging Face Gradio launcher handles it.
283
+ # This prevents the "Address already in use" error.