TGPro1 commited on
Commit
f98c76e
Β·
verified Β·
1 Parent(s): 4aeda0b

Upload app.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. app.py +21 -25
app.py CHANGED
@@ -18,7 +18,7 @@ import logging
18
  from threading import Thread, Lock
19
  from huggingface_hub import snapshot_download
20
 
21
- # πŸ›‘οΈ 1. SILENCE LOGS & WARNINGS (v100: Absolute Silence)
22
  logging.getLogger("transformers").setLevel(logging.ERROR)
23
  logging.getLogger("TTS").setLevel(logging.ERROR)
24
  logging.getLogger("onnxruntime").setLevel(logging.ERROR)
@@ -81,8 +81,8 @@ except ImportError:
81
  if f is None: return lambda x: x
82
  return f
83
 
84
- # FORCE BUILD TRIGGER: 13:00:00 Jan 21 2026
85
- # v100: Centennial Stability Update. 1-Worker Lockdown. Lifespan Events.
86
 
87
  os.environ["COQUI_TOS_AGREED"] = "1"
88
  MODELS = {"stt": None, "translate": None, "tts": None, "denoiser": None}
@@ -91,21 +91,19 @@ WARMUP_STATUS = {"complete": False, "in_progress": False}
91
  WARMUP_LOCK = Lock()
92
 
93
  def activate_gpu_models(action):
94
- """v100: Stability-First GPU Activation"""
95
  global MODELS, WARMUP_STATUS
96
  local_only = WARMUP_STATUS["complete"]
97
 
98
- # 1. Faster-Whisper: Lockdown to 1 worker for stability on the H200 MIG
99
  if action in ["stt", "s2st"]:
100
  stt_on_gpu = False
101
  try: stt_on_gpu = MODELS["stt"] is not None and MODELS["stt"].model.device == "cuda"
102
  except: pass
103
  if not stt_on_gpu:
104
- print(f"πŸŽ™οΈ [v100] Activating Whisper (GPU: Stability Protocol)...")
105
  try:
106
  if MODELS["stt"]: del MODELS["stt"]
107
  gc.collect(); torch.cuda.empty_cache()
108
- # πŸ›‘οΈ v100: 1-Worker to prevent CUDA deadlocks observed in v99
109
  MODELS["stt"] = WhisperModel(
110
  "large-v3",
111
  device="cuda",
@@ -114,10 +112,9 @@ def activate_gpu_models(action):
114
  local_files_only=local_only
115
  )
116
  except Exception as e:
117
- print(f"⚠️ Stability Init failed: {e}. Falling back to CPU.")
118
  MODELS["stt"] = WhisperModel("large-v3", device="cpu", compute_type="int8", local_files_only=True)
119
 
120
- # 2. XTTS-v2
121
  if action in ["tts", "s2st"]:
122
  tts_on_gpu = False
123
  try:
@@ -125,26 +122,24 @@ def activate_gpu_models(action):
125
  tts_on_gpu = "cuda" in curr
126
  except: pass
127
  if MODELS["tts"] is None or not tts_on_gpu:
128
- print(f"πŸ”Š [v100] Activating XTTS-v2 (GPU)...")
129
  try:
130
  if MODELS["tts"] is None:
131
  MODELS["tts"] = TTS(model_name="tts_models/multilingual/multi-dataset/xtts_v2", gpu=True)
132
  else: MODELS["tts"].to("cuda")
133
  except: pass
134
 
135
- # 3. Chatterbox GPU-Mode (Zero-Latency)
136
  chatterbox_utils.load_chatterbox(device="cuda" if torch.cuda.is_available() else "cpu")
137
 
138
- # 4. Helpers
139
  if MODELS["denoiser"] is None:
140
  try: MODELS["denoiser"] = init_df()
141
  except: pass
142
  if MODELS["translate"] is None: MODELS["translate"] = "active"
143
 
144
  def release_gpu_models():
145
- """v100: RAM-Resident Cleanup"""
146
  global MODELS
147
- print("🧹 [v100] Releasing GPU. Engines resident in RAM.")
148
  try:
149
  if MODELS["stt"] and MODELS["stt"].model.device == "cuda":
150
  del MODELS["stt"]
@@ -158,19 +153,20 @@ def release_gpu_models():
158
  if torch.cuda.is_available(): torch.cuda.empty_cache()
159
 
160
  def warmup_task():
161
- """Silent Warmup (v100)"""
162
  global WARMUP_STATUS
163
  with WARMUP_LOCK:
164
  if WARMUP_STATUS["complete"] or WARMUP_STATUS["in_progress"]: return
165
  WARMUP_STATUS["in_progress"] = True
166
- print("\nπŸ”₯ --- V100: STABILITY WARMUP STARTED ---")
167
  try:
168
  MODELS["stt"] = WhisperModel("large-v3", device="cpu", compute_type="int8")
169
  MODELS["tts"] = TTS(model_name="tts_models/multilingual/multi-dataset/xtts_v2", gpu=False)
170
  chatterbox_utils.warmup_chatterbox()
171
  WARMUP_STATUS["complete"] = True
172
- print(f"βœ… --- SYSTEM READY: v100 --- \n")
173
- except: pass
 
174
  finally: WARMUP_STATUS["in_progress"] = False
175
 
176
  def _stt_logic(request_dict):
@@ -221,7 +217,7 @@ def _tts_logic(text, lang, speaker_wav_b64):
221
  def core_process(request_dict):
222
  action = request_dict.get("action")
223
  t1 = time.time()
224
- print(f"--- [v100] πŸš€ GPU SESSION: {action} ---")
225
  activate_gpu_models(action)
226
  try:
227
  if action == "stt": res = _stt_logic(request_dict)
@@ -234,16 +230,15 @@ def core_process(request_dict):
234
  res = {"text": stt_res.get("text"), "translated": translated, "audio": tts_res.get("audio")}
235
  else: res = {"error": f"Unknown action: {action}"}
236
  finally:
237
- print(f"--- [v100] ✨ END: {action} ({time.time()-t1:.2f}s) ---")
238
  release_gpu_models()
239
  return res
240
 
241
  @asynccontextmanager
242
  async def lifespan(app: FastAPI):
243
- # Startup
244
  Thread(target=warmup_task, daemon=True).start()
245
  yield
246
- # Shutdown
247
 
248
  app = FastAPI(lifespan=lifespan)
249
 
@@ -251,14 +246,13 @@ app = FastAPI(lifespan=lifespan)
251
  async def api_process(request: Request):
252
  try:
253
  req_data = await request.json()
254
- # πŸ₯ v100: LIGHTWEIGHT CPU HEALTH (Prevent Queue Bloat)
255
  if req_data.get("action") == "health":
256
  return {"status": "awake", "warm": WARMUP_STATUS["complete"]}
257
  return core_process(req_data)
258
  except Exception as e: return {"error": str(e)}
259
 
260
  @app.get("/health")
261
- def health(): return {"status": "ok", "warm": WARMUP_STATUS["complete"], "version": "v100"}
262
 
263
  @app.post("/api/v1/clear_cache")
264
  async def clear_cache():
@@ -272,8 +266,10 @@ async def clear_cache():
272
  return {"status": "success"}
273
  except: return {"status": "error"}
274
 
 
275
  demo = gr.Interface(fn=lambda x: json.dumps(core_process(json.loads(x))), inputs="text", outputs="text")
276
  app = gr.mount_gradio_app(app, demo, path="/")
277
 
278
  if __name__ == "__main__":
279
- uvicorn.run(app, host="0.0.0.0", port=7860, log_level="error")
 
 
18
  from threading import Thread, Lock
19
  from huggingface_hub import snapshot_download
20
 
21
+ # πŸ›‘οΈ 1. SILENCE LOGS & WARNINGS (v101: Pure Power & Stability)
22
  logging.getLogger("transformers").setLevel(logging.ERROR)
23
  logging.getLogger("TTS").setLevel(logging.ERROR)
24
  logging.getLogger("onnxruntime").setLevel(logging.ERROR)
 
81
  if f is None: return lambda x: x
82
  return f
83
 
84
+ # FORCE BUILD TRIGGER: 13:10:00 Jan 21 2026
85
+ # v101: Docker SDK Transition. Absolute Port Isolation.
86
 
87
  os.environ["COQUI_TOS_AGREED"] = "1"
88
  MODELS = {"stt": None, "translate": None, "tts": None, "denoiser": None}
 
91
  WARMUP_LOCK = Lock()
92
 
93
  def activate_gpu_models(action):
94
+ """v101: Mission-Critical GPU Mode"""
95
  global MODELS, WARMUP_STATUS
96
  local_only = WARMUP_STATUS["complete"]
97
 
 
98
  if action in ["stt", "s2st"]:
99
  stt_on_gpu = False
100
  try: stt_on_gpu = MODELS["stt"] is not None and MODELS["stt"].model.device == "cuda"
101
  except: pass
102
  if not stt_on_gpu:
103
+ print(f"πŸŽ™οΈ [v101] Activating Whisper (GPU)...")
104
  try:
105
  if MODELS["stt"]: del MODELS["stt"]
106
  gc.collect(); torch.cuda.empty_cache()
 
107
  MODELS["stt"] = WhisperModel(
108
  "large-v3",
109
  device="cuda",
 
112
  local_files_only=local_only
113
  )
114
  except Exception as e:
115
+ print(f"⚠️ GPU Init failed: {e}. Falling back to CPU in-RAM.")
116
  MODELS["stt"] = WhisperModel("large-v3", device="cpu", compute_type="int8", local_files_only=True)
117
 
 
118
  if action in ["tts", "s2st"]:
119
  tts_on_gpu = False
120
  try:
 
122
  tts_on_gpu = "cuda" in curr
123
  except: pass
124
  if MODELS["tts"] is None or not tts_on_gpu:
125
+ print(f"πŸ”Š [v101] Activating XTTS-v2 (GPU)...")
126
  try:
127
  if MODELS["tts"] is None:
128
  MODELS["tts"] = TTS(model_name="tts_models/multilingual/multi-dataset/xtts_v2", gpu=True)
129
  else: MODELS["tts"].to("cuda")
130
  except: pass
131
 
 
132
  chatterbox_utils.load_chatterbox(device="cuda" if torch.cuda.is_available() else "cpu")
133
 
 
134
  if MODELS["denoiser"] is None:
135
  try: MODELS["denoiser"] = init_df()
136
  except: pass
137
  if MODELS["translate"] is None: MODELS["translate"] = "active"
138
 
139
  def release_gpu_models():
140
+ """v101: Clean Resident State"""
141
  global MODELS
142
+ print("🧹 [v101] Releasing GPU resources.")
143
  try:
144
  if MODELS["stt"] and MODELS["stt"].model.device == "cuda":
145
  del MODELS["stt"]
 
153
  if torch.cuda.is_available(): torch.cuda.empty_cache()
154
 
155
  def warmup_task():
156
+ """Silent Pre-loading (v101)"""
157
  global WARMUP_STATUS
158
  with WARMUP_LOCK:
159
  if WARMUP_STATUS["complete"] or WARMUP_STATUS["in_progress"]: return
160
  WARMUP_STATUS["in_progress"] = True
161
+ print("\nπŸ”₯ --- V101: DOCKER DEPLOY WARMUP STARTED ---")
162
  try:
163
  MODELS["stt"] = WhisperModel("large-v3", device="cpu", compute_type="int8")
164
  MODELS["tts"] = TTS(model_name="tts_models/multilingual/multi-dataset/xtts_v2", gpu=False)
165
  chatterbox_utils.warmup_chatterbox()
166
  WARMUP_STATUS["complete"] = True
167
+ print(f"βœ… --- SYSTEM WARM: v101 --- \n")
168
+ except Exception as e:
169
+ print(f"❌ Warmup fail: {e}")
170
  finally: WARMUP_STATUS["in_progress"] = False
171
 
172
  def _stt_logic(request_dict):
 
217
  def core_process(request_dict):
218
  action = request_dict.get("action")
219
  t1 = time.time()
220
+ print(f"--- [v101] πŸš€ GPU SESSION: {action} ---")
221
  activate_gpu_models(action)
222
  try:
223
  if action == "stt": res = _stt_logic(request_dict)
 
230
  res = {"text": stt_res.get("text"), "translated": translated, "audio": tts_res.get("audio")}
231
  else: res = {"error": f"Unknown action: {action}"}
232
  finally:
233
+ print(f"--- [v101] ✨ SUCCESS: {action} ({time.time()-t1:.2f}s) ---")
234
  release_gpu_models()
235
  return res
236
 
237
  @asynccontextmanager
238
  async def lifespan(app: FastAPI):
239
+ # DOCKER ENTRYPOINT TRIGGER
240
  Thread(target=warmup_task, daemon=True).start()
241
  yield
 
242
 
243
  app = FastAPI(lifespan=lifespan)
244
 
 
246
  async def api_process(request: Request):
247
  try:
248
  req_data = await request.json()
 
249
  if req_data.get("action") == "health":
250
  return {"status": "awake", "warm": WARMUP_STATUS["complete"]}
251
  return core_process(req_data)
252
  except Exception as e: return {"error": str(e)}
253
 
254
  @app.get("/health")
255
+ def health(): return {"status": "ok", "warm": WARMUP_STATUS["complete"], "v": "101"}
256
 
257
  @app.post("/api/v1/clear_cache")
258
  async def clear_cache():
 
266
  return {"status": "success"}
267
  except: return {"status": "error"}
268
 
269
+ # GRADIO INTERFACE (v101)
270
  demo = gr.Interface(fn=lambda x: json.dumps(core_process(json.loads(x))), inputs="text", outputs="text")
271
  app = gr.mount_gradio_app(app, demo, path="/")
272
 
273
  if __name__ == "__main__":
274
+ print("πŸš€ [v101] DOCKER SERVER STARTING ON 7860...")
275
+ uvicorn.run(app, host="0.0.0.0", port=7860, log_level="error", loop="asyncio")