TGPro1 commited on
Commit
8d25f5f
Β·
verified Β·
1 Parent(s): 4bebd1d

Upload app.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. app.py +42 -34
app.py CHANGED
@@ -1,4 +1,4 @@
1
- # πŸš€ V116: ZEROGPU HOPPER ATOMIC
2
  try:
3
  import spaces
4
  except ImportError:
@@ -28,7 +28,7 @@ import traceback
28
  from threading import Thread
29
  from huggingface_hub import snapshot_download, hf_hub_download
30
 
31
- # πŸ›‘οΈ 1. SILENCE & ENV (v116)
32
  logging.getLogger("transformers").setLevel(logging.ERROR)
33
  logging.getLogger("TTS").setLevel(logging.ERROR)
34
  os.environ["CT2_VERBOSE"] = "0"
@@ -81,19 +81,23 @@ from TTS.api import TTS
81
  from df.enhance import init_df
82
  import deep_translator
83
 
84
- # FORCE BUILD TRIGGER: 19:10:00 Jan 21 2026
85
- # v116: Hopper Atomic. Device Index Fix. No Lock.
86
 
87
  os.environ["COQUI_TOS_AGREED"] = "1"
88
  MODELS = {"stt": None, "translate": None, "tts": None, "denoiser": None}
89
  READY_FLAG = os.path.expanduser("~/.engine_ready")
 
90
 
91
  def is_system_ready():
92
  return os.path.exists(READY_FLAG)
93
 
94
  def activate_gpu_models(action):
95
- """v116: Granular Activation"""
96
- global MODELS
 
 
 
97
 
98
  if action in ["stt", "s2st"]:
99
  stt_on_gpu = False
@@ -101,21 +105,20 @@ def activate_gpu_models(action):
101
  except: pass
102
 
103
  if not stt_on_gpu:
104
- print(f"πŸŽ™οΈ [v116] WHISPER CHECKPOINT: Allocation...")
105
  try:
106
  gc.collect(); torch.cuda.empty_cache()
107
- print(f"πŸŽ™οΈ [v116] WHISPER CHECKPOINT: Loading to CUDA:0 (int8_float16)...")
108
- # Removed local_files_only=True as it can hang integrity checks
109
  MODELS["stt"] = WhisperModel(
110
- "large-v3",
111
  device="cuda",
112
- device_index=0,
113
- compute_type="int8_float16",
114
  num_workers=1
115
  )
116
- print(f"πŸŽ™οΈ [v116] WHISPER CHECKPOINT: Ready.")
117
  except Exception as e:
118
- print(f"⚠️ [v116] GPU STT Fail: {e}")
119
  MODELS["stt"] = WhisperModel("large-v3", device="cpu", compute_type="int8")
120
 
121
  if action in ["tts", "s2st"]:
@@ -126,12 +129,9 @@ def activate_gpu_models(action):
126
  except: pass
127
 
128
  if MODELS["tts"] is not None and not tts_on_gpu:
129
- print(f"πŸ”Š [v116] XTTS CHECKPOINT: Promotion to GPU...")
130
- try:
131
- MODELS["tts"].to("cuda")
132
- print(f"πŸ”Š [v116] XTTS CHECKPOINT: Ready.")
133
- except Exception as e:
134
- print(f"⚠️ [v116] XTTS GPU Fail: {e}")
135
 
136
  chatterbox_utils.load_chatterbox(device="cpu")
137
  if MODELS["denoiser"] is None:
@@ -140,7 +140,7 @@ def activate_gpu_models(action):
140
  if MODELS["translate"] is None: MODELS["translate"] = "active"
141
 
142
  def release_gpu_models():
143
- """v116: Clean Release"""
144
  global MODELS
145
  try:
146
  if MODELS["stt"] and MODELS["stt"].model.device == "cuda":
@@ -153,26 +153,34 @@ def release_gpu_models():
153
  if torch.cuda.is_available(): torch.cuda.empty_cache()
154
 
155
  def warmup_task():
156
- """v116: Asset Prep"""
157
  if os.path.exists(READY_FLAG): os.remove(READY_FLAG)
158
- print("\nπŸ”₯ --- V116: ATOMIC WARMUP ---")
159
  try:
160
- MODELS["stt"] = WhisperModel("large-v3", device="cpu", compute_type="int8")
 
 
 
 
 
 
161
  MODELS["tts"] = TTS(model_name="tts_models/multilingual/multi-dataset/xtts_v2", gpu=False)
 
162
  chatterbox_utils.warmup_chatterbox()
163
  chatterbox_utils.load_chatterbox(device="cpu")
 
164
  with open(READY_FLAG, "w") as f: f.write("READY")
165
- print(f"βœ… --- ENGINE ARMED: v116 --- \n")
166
  except Exception as e: print(f"❌ Warmup Error: {e}")
167
 
168
  @spaces.GPU(duration=150)
169
  def core_process(request_dict):
170
  action = request_dict.get("action")
171
- print(f"--- [v116] πŸš€ REQUEST: {action} ---")
172
 
173
  waited = 0
174
  while not is_system_ready() and waited < 300:
175
- if waited % 10 == 0: print(f"⏳ Prep stage... ({waited}s)")
176
  time.sleep(1)
177
  waited += 1
178
 
@@ -219,7 +227,7 @@ def core_process(request_dict):
219
  res = {"audio": base64.b64encode(audio_bytes).decode()}
220
 
221
  elif action == "s2st":
222
- print("πŸŽ™οΈ Phase 1: Whisper GPU...")
223
  stt_res = core_process.__wrapped__( {**request_dict, "action": "stt"} )
224
  stt_text = stt_res.get("text", "")
225
 
@@ -232,10 +240,10 @@ def core_process(request_dict):
232
  res = {"text": stt_text, "translated": translated, "audio": tts_res.get("audio")}
233
  else: res = {"error": f"Unknown action: {action}"}
234
  except Exception as e:
235
- print(f"❌ Fault: {traceback.format_exc()}")
236
  res = {"error": str(e)}
237
  finally:
238
- print(f"--- [v116] ✨ ATOMIC FINISH ({time.time()-t1:.2f}s) ---")
239
  release_gpu_models()
240
  return res
241
 
@@ -244,7 +252,7 @@ async def lifespan(app: FastAPI):
244
  Thread(target=warmup_task, daemon=True).start()
245
  yield
246
 
247
- # πŸš€ FastAPI & Gradio Unified
248
  app = FastAPI(lifespan=lifespan)
249
  app.add_middleware(CORSMiddleware, allow_origins=["*"], allow_methods=["*"], allow_headers=["*"])
250
 
@@ -253,18 +261,18 @@ async def api_process(request: Request):
253
  try:
254
  req_data = await request.json()
255
  if req_data.get("action") == "health":
256
- return {"status": "awake", "warm": is_system_ready(), "v": "116"}
257
  return core_process(req_data)
258
  except Exception as e: return {"error": str(e)}
259
 
260
  @app.get("/health")
261
- def health(): return {"status": "ok", "warm": is_system_ready(), "v": "116"}
262
 
263
  def gradio_fn(req_json):
264
  try: return json.dumps(core_process(json.loads(req_json)))
265
  except Exception as e: return json.dumps({"error": str(e)})
266
 
267
- demo = gr.Interface(fn=gradio_fn, inputs="text", outputs="text", title="πŸš€ AI Engine v116")
268
  demo.queue()
269
  app = gr.mount_gradio_app(app, demo, path="/")
270
 
 
1
+ # πŸš€ V117: ZEROGPU HOPPER DIRECT (CLEAN ACTIVATE)
2
  try:
3
  import spaces
4
  except ImportError:
 
28
  from threading import Thread
29
  from huggingface_hub import snapshot_download, hf_hub_download
30
 
31
+ # πŸ›‘οΈ 1. SILENCE & ENV (v117)
32
  logging.getLogger("transformers").setLevel(logging.ERROR)
33
  logging.getLogger("TTS").setLevel(logging.ERROR)
34
  os.environ["CT2_VERBOSE"] = "0"
 
81
  from df.enhance import init_df
82
  import deep_translator
83
 
84
+ # FORCE BUILD TRIGGER: 19:15:00 Jan 21 2026
85
+ # v117: Hopper Direct. float16 native. 2s Settle. Absolute Paths.
86
 
87
  os.environ["COQUI_TOS_AGREED"] = "1"
88
  MODELS = {"stt": None, "translate": None, "tts": None, "denoiser": None}
89
  READY_FLAG = os.path.expanduser("~/.engine_ready")
90
+ MODEL_PATHS = {"stt": None, "tts": None}
91
 
92
  def is_system_ready():
93
  return os.path.exists(READY_FLAG)
94
 
95
  def activate_gpu_models(action):
96
+ """v117: Stable Native Activation"""
97
+ global MODELS, MODEL_PATHS
98
+
99
+ # 🏎️ v117: 2s Driver Settle. Crucial for MIG partitions.
100
+ time.sleep(2)
101
 
102
  if action in ["stt", "s2st"]:
103
  stt_on_gpu = False
 
105
  except: pass
106
 
107
  if not stt_on_gpu:
108
+ print(f"πŸŽ™οΈ [v117] ACTIVATE: Whisper (Native float16, Auto-Device)...")
109
  try:
110
  gc.collect(); torch.cuda.empty_cache()
111
+ # Use absolute local path to bypass hub/integrity hangs
112
+ path = MODEL_PATHS["stt"] or "large-v3"
113
  MODELS["stt"] = WhisperModel(
114
+ path,
115
  device="cuda",
116
+ compute_type="float16", # v117: format natif pour H200
 
117
  num_workers=1
118
  )
119
+ print(f"πŸŽ™οΈ [v117] WHISPER: Ready.")
120
  except Exception as e:
121
+ print(f"⚠️ [v117] GPU STT Error: {e}")
122
  MODELS["stt"] = WhisperModel("large-v3", device="cpu", compute_type="int8")
123
 
124
  if action in ["tts", "s2st"]:
 
129
  except: pass
130
 
131
  if MODELS["tts"] is not None and not tts_on_gpu:
132
+ print(f"πŸ”Š [v117] ACTIVATE: Promoting XTTS to GPU...")
133
+ try: MODELS["tts"].to("cuda")
134
+ except: pass
 
 
 
135
 
136
  chatterbox_utils.load_chatterbox(device="cpu")
137
  if MODELS["denoiser"] is None:
 
140
  if MODELS["translate"] is None: MODELS["translate"] = "active"
141
 
142
  def release_gpu_models():
143
+ """v117: Clean Offload"""
144
  global MODELS
145
  try:
146
  if MODELS["stt"] and MODELS["stt"].model.device == "cuda":
 
153
  if torch.cuda.is_available(): torch.cuda.empty_cache()
154
 
155
  def warmup_task():
156
+ """v117: Absolute Cache Warming"""
157
  if os.path.exists(READY_FLAG): os.remove(READY_FLAG)
158
+ print("\nπŸ”₯ --- V117: DIRECT WARMUP ---")
159
  try:
160
+ # Pre-fetch and store paths
161
+ MODEL_PATHS["stt"] = snapshot_download("Systran/faster-whisper-large-v3")
162
+
163
+ print("⏬ Warming Whisper to RAM...")
164
+ MODELS["stt"] = WhisperModel(MODEL_PATHS["stt"], device="cpu", compute_type="int8")
165
+
166
+ print("⏬ Warming XTTS to RAM...")
167
  MODELS["tts"] = TTS(model_name="tts_models/multilingual/multi-dataset/xtts_v2", gpu=False)
168
+
169
  chatterbox_utils.warmup_chatterbox()
170
  chatterbox_utils.load_chatterbox(device="cpu")
171
+
172
  with open(READY_FLAG, "w") as f: f.write("READY")
173
+ print(f"βœ… --- SYSTEM ARMED: v117 --- \n")
174
  except Exception as e: print(f"❌ Warmup Error: {e}")
175
 
176
  @spaces.GPU(duration=150)
177
  def core_process(request_dict):
178
  action = request_dict.get("action")
179
+ print(f"--- [v117] πŸš€ REQ: {action} ---")
180
 
181
  waited = 0
182
  while not is_system_ready() and waited < 300:
183
+ if waited % 10 == 0: print(f"⏳ Sync stage... ({waited}s)")
184
  time.sleep(1)
185
  waited += 1
186
 
 
227
  res = {"audio": base64.b64encode(audio_bytes).decode()}
228
 
229
  elif action == "s2st":
230
+ print("πŸŽ™οΈ Phase 1: Whisper GPU (H200 Native)...")
231
  stt_res = core_process.__wrapped__( {**request_dict, "action": "stt"} )
232
  stt_text = stt_res.get("text", "")
233
 
 
240
  res = {"text": stt_text, "translated": translated, "audio": tts_res.get("audio")}
241
  else: res = {"error": f"Unknown action: {action}"}
242
  except Exception as e:
243
+ print(f"❌ Error: {traceback.format_exc()}")
244
  res = {"error": str(e)}
245
  finally:
246
+ print(f"--- [v117] ✨ FINISH ({time.time()-t1:.2f}s) ---")
247
  release_gpu_models()
248
  return res
249
 
 
252
  Thread(target=warmup_task, daemon=True).start()
253
  yield
254
 
255
+ # πŸš€ Server Lifecycle
256
  app = FastAPI(lifespan=lifespan)
257
  app.add_middleware(CORSMiddleware, allow_origins=["*"], allow_methods=["*"], allow_headers=["*"])
258
 
 
261
  try:
262
  req_data = await request.json()
263
  if req_data.get("action") == "health":
264
+ return {"status": "awake", "warm": is_system_ready(), "v": "117"}
265
  return core_process(req_data)
266
  except Exception as e: return {"error": str(e)}
267
 
268
  @app.get("/health")
269
+ def health(): return {"status": "ok", "warm": is_system_ready(), "v": "117"}
270
 
271
  def gradio_fn(req_json):
272
  try: return json.dumps(core_process(json.loads(req_json)))
273
  except Exception as e: return json.dumps({"error": str(e)})
274
 
275
+ demo = gr.Interface(fn=gradio_fn, inputs="text", outputs="text", title="πŸš€ AI Engine v117")
276
  demo.queue()
277
  app = gr.mount_gradio_app(app, demo, path="/")
278