TGPro1 commited on
Commit
ba2715c
Β·
verified Β·
1 Parent(s): 81c9481

Upload app.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. app.py +39 -42
app.py CHANGED
@@ -1,4 +1,4 @@
1
- # πŸš€ V110: ZEROGPU RESILIENT STARTUP
2
  # Must be first to patch environment correctly
3
  try:
4
  import spaces
@@ -30,7 +30,7 @@ import traceback
30
  from threading import Thread, Lock
31
  from huggingface_hub import snapshot_download, hf_hub_download
32
 
33
- # πŸ›‘οΈ 1. SILENCE & ENV (v110)
34
  logging.getLogger("transformers").setLevel(logging.ERROR)
35
  logging.getLogger("TTS").setLevel(logging.ERROR)
36
  os.environ["CT2_VERBOSE"] = "0"
@@ -38,7 +38,7 @@ os.environ["ORT_LOGGING_LEVEL"] = "3"
38
  os.environ["GRADIO_SERVER_NAME"] = "0.0.0.0"
39
  os.environ["GRADIO_SERVER_PORT"] = "7860"
40
 
41
- # πŸ› οΈ 2. COMPATIBILITY PATCHES (v110)
42
  if "torchaudio.backend" not in sys.modules:
43
  backend = types.ModuleType("torchaudio.backend")
44
  common = types.ModuleType("torchaudio.backend.common")
@@ -83,8 +83,8 @@ from TTS.api import TTS
83
  from df.enhance import init_df
84
  import deep_translator
85
 
86
- # FORCE BUILD TRIGGER: 17:40:00 Jan 21 2026
87
- # v110: Resilient Warmup. No blocks.
88
 
89
  os.environ["COQUI_TOS_AGREED"] = "1"
90
  MODELS = {"stt": None, "translate": None, "tts": None, "denoiser": None}
@@ -93,8 +93,12 @@ WARMUP_STATUS = {"complete": False, "in_progress": False}
93
  WARMUP_LOCK = Lock()
94
 
95
  def activate_gpu_models(action):
96
- """v110: Safe Deferred Activation"""
97
  global MODELS, WARMUP_STATUS
 
 
 
 
98
  local_only = WARMUP_STATUS["complete"]
99
 
100
  if action in ["stt", "s2st"]:
@@ -102,7 +106,7 @@ def activate_gpu_models(action):
102
  try: stt_on_gpu = MODELS["stt"] is not None and MODELS["stt"].model.device == "cuda"
103
  except: pass
104
  if not stt_on_gpu:
105
- print(f"πŸŽ™οΈ [v110] Init Whisper (MIG-Ready: int8_float16)...")
106
  try:
107
  if MODELS["stt"]: del MODELS["stt"]; gc.collect(); torch.cuda.empty_cache()
108
  MODELS["stt"] = WhisperModel(
@@ -113,7 +117,7 @@ def activate_gpu_models(action):
113
  local_files_only=local_only
114
  )
115
  except Exception as e:
116
- print(f"⚠️ GPU STT Init failed: {e}")
117
  MODELS["stt"] = WhisperModel("large-v3", device="cpu", compute_type="int8", local_files_only=True)
118
 
119
  if action in ["tts", "s2st"]:
@@ -123,14 +127,15 @@ def activate_gpu_models(action):
123
  tts_on_gpu = "cuda" in curr
124
  except: pass
125
  if MODELS["tts"] is None or not tts_on_gpu:
126
- print(f"πŸ”Š [v110] Init XTTS-v2 (GPU)...")
127
  try:
128
  if MODELS["tts"] is None:
129
  MODELS["tts"] = TTS(model_name="tts_models/multilingual/multi-dataset/xtts_v2", gpu=True)
130
  else: MODELS["tts"].to("cuda")
131
  except: pass
132
 
133
- chatterbox_utils.load_chatterbox(device="cuda")
 
134
 
135
  if MODELS["denoiser"] is None:
136
  try: MODELS["denoiser"] = init_df()
@@ -138,9 +143,9 @@ def activate_gpu_models(action):
138
  if MODELS["translate"] is None: MODELS["translate"] = "active"
139
 
140
  def release_gpu_models():
141
- """v110: Quiet Release"""
142
  global MODELS
143
- print("🧹 [v110] Releasing.")
144
  try:
145
  if MODELS["stt"] and MODELS["stt"].model.device == "cuda":
146
  del MODELS["stt"]
@@ -148,44 +153,37 @@ def release_gpu_models():
148
  if MODELS["tts"]:
149
  try: MODELS["tts"].to("cpu")
150
  except: pass
151
- chatterbox_utils.load_chatterbox(device="cpu")
152
  except: pass
153
  gc.collect()
154
  if torch.cuda.is_available(): torch.cuda.empty_cache()
155
  time.sleep(0.5)
156
 
157
  def warmup_task():
158
- """v110: Resilient Warmup (Independent Downloads)"""
159
  global WARMUP_STATUS
160
  if WARMUP_STATUS["complete"] or WARMUP_STATUS["in_progress"]: return
161
  WARMUP_STATUS["in_progress"] = True
162
- print("\nπŸ”₯ --- V110: ZEROGPU RESILIENT WARMUP ---")
163
-
164
- # 1. Faster-Whisper
165
- try: snapshot_download("Systran/faster-whisper-large-v3")
166
- except Exception as e: print(f"⚠️ Whisper download: {e}")
167
-
168
- # 2. XTTS-v2
169
- try: snapshot_download("coqui/XTTS-v2")
170
- except Exception as e: print(f"⚠️ XTTS download: {e}")
171
-
172
- # 3. Chatterbox
173
- try: chatterbox_utils.warmup_chatterbox()
174
- except Exception as e: print(f"⚠️ Chatterbox download: {e}")
175
-
176
- WARMUP_STATUS["complete"] = True
177
- print(f"βœ… --- SYSTEM READY: v110 --- \n")
178
- WARMUP_STATUS["in_progress"] = False
179
 
180
  @spaces.GPU(duration=150)
181
  def core_process(request_dict):
182
  action = request_dict.get("action")
183
- print(f"--- [v110] πŸš€ PROCESSING: {action} ---")
184
 
185
- # Wait for thermal preparation
186
  waited = 0
187
  while not WARMUP_STATUS["complete"] and waited < 60:
188
- if waited % 10 == 0: print(f"⏳ Syncing assets... ({waited}s)")
189
  time.sleep(1)
190
  waited += 1
191
 
@@ -232,8 +230,7 @@ def core_process(request_dict):
232
  res = {"audio": base64.b64encode(audio_bytes).decode()}
233
 
234
  elif action == "s2st":
235
- req_copy = request_dict.copy()
236
- req_copy["action"] = "stt"
237
  stt_res = core_process.__wrapped__(req_copy)
238
  translated = deep_translator.GoogleTranslator(source='auto', target=request_dict.get("target_lang")).translate(stt_res.get("text", ""))
239
  req_tts = {"action": "tts", "text": translated, "lang": request_dict.get("target_lang"), "speaker_wav": request_dict.get("speaker_wav")}
@@ -241,10 +238,10 @@ def core_process(request_dict):
241
  res = {"text": stt_res.get("text"), "translated": translated, "audio": tts_res.get("audio")}
242
  else: res = {"error": f"Unknown action: {action}"}
243
  except Exception as e:
244
- print(f"❌ Processing Fault: {traceback.format_exc()}")
245
  res = {"error": str(e)}
246
  finally:
247
- print(f"--- [v110] ✨ FINISHED ---")
248
  release_gpu_models()
249
  return res
250
 
@@ -262,21 +259,21 @@ async def api_process(request: Request):
262
  try:
263
  req_data = await request.json()
264
  if req_data.get("action") == "health":
265
- return {"status": "awake", "warm": WARMUP_STATUS["complete"], "v": "110"}
266
  return core_process(req_data)
267
  except Exception as e: return {"error": str(e)}
268
 
269
  @app.get("/health")
270
- def health(): return {"status": "ok", "warm": WARMUP_STATUS["complete"], "v": "110"}
271
 
272
  def gradio_fn(req_json):
273
  try: return json.dumps(core_process(json.loads(req_json)))
274
  except Exception as e: return json.dumps({"error": str(e)})
275
 
276
- demo = gr.Interface(fn=gradio_fn, inputs="text", outputs="text", title="πŸš€ AI Engine v110")
277
  demo.queue()
278
  app = gr.mount_gradio_app(app, demo, path="/")
279
 
280
  if __name__ == "__main__":
281
- print("πŸš€ [v110] Starting Resilient Server...")
282
  uvicorn.run(app, host="0.0.0.0", port=7860, log_level="error")
 
1
+ # πŸš€ V111: ZEROGPU HARDENED ENGINE
2
  # Must be first to patch environment correctly
3
  try:
4
  import spaces
 
30
  from threading import Thread, Lock
31
  from huggingface_hub import snapshot_download, hf_hub_download
32
 
33
+ # πŸ›‘οΈ 1. SILENCE & ENV (v111)
34
  logging.getLogger("transformers").setLevel(logging.ERROR)
35
  logging.getLogger("TTS").setLevel(logging.ERROR)
36
  os.environ["CT2_VERBOSE"] = "0"
 
38
  os.environ["GRADIO_SERVER_NAME"] = "0.0.0.0"
39
  os.environ["GRADIO_SERVER_PORT"] = "7860"
40
 
41
+ # πŸ› οΈ 2. COMPATIBILITY PATCHES (v111)
42
  if "torchaudio.backend" not in sys.modules:
43
  backend = types.ModuleType("torchaudio.backend")
44
  common = types.ModuleType("torchaudio.backend.common")
 
83
  from df.enhance import init_df
84
  import deep_translator
85
 
86
+ # FORCE BUILD TRIGGER: 17:45:00 Jan 21 2026
87
+ # v111: Hardened Engine. Chatterbox CPU-only. Whisper Clean Init.
88
 
89
  os.environ["COQUI_TOS_AGREED"] = "1"
90
  MODELS = {"stt": None, "translate": None, "tts": None, "denoiser": None}
 
93
  WARMUP_LOCK = Lock()
94
 
95
  def activate_gpu_models(action):
96
+ """v111: Clean Isolation Activation"""
97
  global MODELS, WARMUP_STATUS
98
+ # Always clean before loading big models to prevent MIG out-of-memory
99
+ gc.collect()
100
+ if torch.cuda.is_available(): torch.cuda.empty_cache()
101
+
102
  local_only = WARMUP_STATUS["complete"]
103
 
104
  if action in ["stt", "s2st"]:
 
106
  try: stt_on_gpu = MODELS["stt"] is not None and MODELS["stt"].model.device == "cuda"
107
  except: pass
108
  if not stt_on_gpu:
109
+ print(f"πŸŽ™οΈ [v111] Activating Whisper (High-Speed int8_float16)...")
110
  try:
111
  if MODELS["stt"]: del MODELS["stt"]; gc.collect(); torch.cuda.empty_cache()
112
  MODELS["stt"] = WhisperModel(
 
117
  local_files_only=local_only
118
  )
119
  except Exception as e:
120
+ print(f"⚠️ GPU Init failed: {e}")
121
  MODELS["stt"] = WhisperModel("large-v3", device="cpu", compute_type="int8", local_files_only=True)
122
 
123
  if action in ["tts", "s2st"]:
 
127
  tts_on_gpu = "cuda" in curr
128
  except: pass
129
  if MODELS["tts"] is None or not tts_on_gpu:
130
+ print(f"πŸ”Š [v111] Activating XTTS-v2 (GPU)...")
131
  try:
132
  if MODELS["tts"] is None:
133
  MODELS["tts"] = TTS(model_name="tts_models/multilingual/multi-dataset/xtts_v2", gpu=True)
134
  else: MODELS["tts"].to("cuda")
135
  except: pass
136
 
137
+ # v111: Chatterbox is now strictly CPU to avoid CUDA conflicts
138
+ chatterbox_utils.load_chatterbox(device="cpu")
139
 
140
  if MODELS["denoiser"] is None:
141
  try: MODELS["denoiser"] = init_df()
 
143
  if MODELS["translate"] is None: MODELS["translate"] = "active"
144
 
145
  def release_gpu_models():
146
+ """v111: Force Release"""
147
  global MODELS
148
+ print("🧹 [v111] Releasing GPU.")
149
  try:
150
  if MODELS["stt"] and MODELS["stt"].model.device == "cuda":
151
  del MODELS["stt"]
 
153
  if MODELS["tts"]:
154
  try: MODELS["tts"].to("cpu")
155
  except: pass
156
+ # Chatterbox is on CPU, no release needed
157
  except: pass
158
  gc.collect()
159
  if torch.cuda.is_available(): torch.cuda.empty_cache()
160
  time.sleep(0.5)
161
 
162
  def warmup_task():
163
+ """v111: Asset Prefetch"""
164
  global WARMUP_STATUS
165
  if WARMUP_STATUS["complete"] or WARMUP_STATUS["in_progress"]: return
166
  WARMUP_STATUS["in_progress"] = True
167
+ print("\nπŸ”₯ --- V111: ZEROGPU ASSET SYNC ---")
168
+ try:
169
+ snapshot_download("Systran/faster-whisper-large-v3")
170
+ snapshot_download("coqui/XTTS-v2")
171
+ chatterbox_utils.warmup_chatterbox()
172
+ WARMUP_STATUS["complete"] = True
173
+ print(f"βœ… --- SYSTEM READY: v111 --- \n")
174
+ except Exception as e:
175
+ print(f"❌ Warmup Warning: {e}")
176
+ finally: WARMUP_STATUS["in_progress"] = False
 
 
 
 
 
 
 
177
 
178
  @spaces.GPU(duration=150)
179
  def core_process(request_dict):
180
  action = request_dict.get("action")
181
+ print(f"--- [v111] πŸš€ PROCESS: {action} ---")
182
 
183
+ # Wait for assets
184
  waited = 0
185
  while not WARMUP_STATUS["complete"] and waited < 60:
186
+ if waited % 10 == 0: print(f"⏳ Syncing... ({waited}s)")
187
  time.sleep(1)
188
  waited += 1
189
 
 
230
  res = {"audio": base64.b64encode(audio_bytes).decode()}
231
 
232
  elif action == "s2st":
233
+ req_copy = request_dict.copy(); req_copy["action"] = "stt"
 
234
  stt_res = core_process.__wrapped__(req_copy)
235
  translated = deep_translator.GoogleTranslator(source='auto', target=request_dict.get("target_lang")).translate(stt_res.get("text", ""))
236
  req_tts = {"action": "tts", "text": translated, "lang": request_dict.get("target_lang"), "speaker_wav": request_dict.get("speaker_wav")}
 
238
  res = {"text": stt_res.get("text"), "translated": translated, "audio": tts_res.get("audio")}
239
  else: res = {"error": f"Unknown action: {action}"}
240
  except Exception as e:
241
+ print(f"❌ Fault: {traceback.format_exc()}")
242
  res = {"error": str(e)}
243
  finally:
244
+ print(f"--- [v111] ✨ DONE ---")
245
  release_gpu_models()
246
  return res
247
 
 
259
  try:
260
  req_data = await request.json()
261
  if req_data.get("action") == "health":
262
+ return {"status": "awake", "warm": WARMUP_STATUS["complete"], "v": "111"}
263
  return core_process(req_data)
264
  except Exception as e: return {"error": str(e)}
265
 
266
  @app.get("/health")
267
+ def health(): return {"status": "ok", "warm": WARMUP_STATUS["complete"], "v": "111"}
268
 
269
  def gradio_fn(req_json):
270
  try: return json.dumps(core_process(json.loads(req_json)))
271
  except Exception as e: return json.dumps({"error": str(e)})
272
 
273
+ demo = gr.Interface(fn=gradio_fn, inputs="text", outputs="text", title="πŸš€ AI Engine v111")
274
  demo.queue()
275
  app = gr.mount_gradio_app(app, demo, path="/")
276
 
277
  if __name__ == "__main__":
278
+ print("πŸš€ [v111] Starting Hardened Server...")
279
  uvicorn.run(app, host="0.0.0.0", port=7860, log_level="error")