TGPro1 commited on
Commit
456b557
Β·
verified Β·
1 Parent(s): 29416ae

Upload app.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. app.py +55 -100
app.py CHANGED
@@ -1,4 +1,4 @@
1
- # πŸš€ V117: ZEROGPU HOPPER DIRECT (CLEAN ACTIVATE)
2
  try:
3
  import spaces
4
  except ImportError:
@@ -25,18 +25,26 @@ import sys
25
  import types
26
  import logging
27
  import traceback
28
- from threading import Thread
29
  from huggingface_hub import snapshot_download, hf_hub_download
30
 
31
- # πŸ›‘οΈ 1. SILENCE & ENV (v117)
32
  logging.getLogger("transformers").setLevel(logging.ERROR)
33
  logging.getLogger("TTS").setLevel(logging.ERROR)
34
  os.environ["CT2_VERBOSE"] = "0"
35
  os.environ["ORT_LOGGING_LEVEL"] = "3"
36
- os.environ["GRADIO_SERVER_NAME"] = "0.0.0.0"
37
- os.environ["GRADIO_SERVER_PORT"] = "7860"
38
 
39
- # πŸ› οΈ 2. COMPATIBILITY PATCHES
 
 
 
 
 
 
 
 
 
 
40
  if "torchaudio.backend" not in sys.modules:
41
  backend = types.ModuleType("torchaudio.backend")
42
  common = types.ModuleType("torchaudio.backend.common")
@@ -58,46 +66,19 @@ if not hasattr(torchaudio, "info"):
58
  except: return SimpleNamespace(sample_rate=48000, num_frames=0, num_channels=1)
59
  torchaudio.info = mock_info
60
 
61
- try:
62
- _orig_load = torchaudio.load
63
- def patched_load(filepath, *args, **kwargs):
64
- try: return _orig_load(filepath, *args, **kwargs)
65
- except ImportError as e:
66
- if "torchcodec" in str(e).lower():
67
- import soundfile as sf
68
- data, samplerate = sf.read(filepath)
69
- t = torch.from_numpy(data).float()
70
- if len(t.shape) == 1: t = t.unsqueeze(0)
71
- else: t = t.T
72
- return t, samplerate
73
- raise e
74
- torchaudio.load = patched_load
75
- except Exception: pass
76
-
77
- # πŸ“¦ 3. AI LIBRARIES
78
  import chatterbox_utils
79
  from faster_whisper import WhisperModel
80
  from TTS.api import TTS
81
  from df.enhance import init_df
82
  import deep_translator
83
 
84
- # FORCE BUILD TRIGGER: 19:15:00 Jan 21 2026
85
- # v117: Hopper Direct. float16 native. 2s Settle. Absolute Paths.
86
-
87
- os.environ["COQUI_TOS_AGREED"] = "1"
88
  MODELS = {"stt": None, "translate": None, "tts": None, "denoiser": None}
89
- READY_FLAG = os.path.expanduser("~/.engine_ready")
90
- MODEL_PATHS = {"stt": None, "tts": None}
91
-
92
- def is_system_ready():
93
- return os.path.exists(READY_FLAG)
94
 
95
  def activate_gpu_models(action):
96
- """v117: Stable Native Activation"""
97
- global MODELS, MODEL_PATHS
98
-
99
- # 🏎️ v117: 2s Driver Settle. Crucial for MIG partitions.
100
- time.sleep(2)
101
 
102
  if action in ["stt", "s2st"]:
103
  stt_on_gpu = False
@@ -105,21 +86,13 @@ def activate_gpu_models(action):
105
  except: pass
106
 
107
  if not stt_on_gpu:
108
- print(f"πŸŽ™οΈ [v117] ACTIVATE: Whisper (Native float16, Auto-Device)...")
109
  try:
110
  gc.collect(); torch.cuda.empty_cache()
111
- # Use absolute local path to bypass hub/integrity hangs
112
- path = MODEL_PATHS["stt"] or "large-v3"
113
- MODELS["stt"] = WhisperModel(
114
- path,
115
- device="cuda",
116
- compute_type="float16", # v117: format natif pour H200
117
- num_workers=1
118
- )
119
- print(f"πŸŽ™οΈ [v117] WHISPER: Ready.")
120
  except Exception as e:
121
- print(f"⚠️ [v117] GPU STT Error: {e}")
122
- MODELS["stt"] = WhisperModel("large-v3", device="cpu", compute_type="int8")
123
 
124
  if action in ["tts", "s2st"]:
125
  tts_on_gpu = False
@@ -129,7 +102,7 @@ def activate_gpu_models(action):
129
  except: pass
130
 
131
  if MODELS["tts"] is not None and not tts_on_gpu:
132
- print(f"πŸ”Š [v117] ACTIVATE: Promoting XTTS to GPU...")
133
  try: MODELS["tts"].to("cuda")
134
  except: pass
135
 
@@ -140,11 +113,11 @@ def activate_gpu_models(action):
140
  if MODELS["translate"] is None: MODELS["translate"] = "active"
141
 
142
  def release_gpu_models():
143
- """v117: Clean Offload"""
144
  global MODELS
145
  try:
146
  if MODELS["stt"] and MODELS["stt"].model.device == "cuda":
147
- MODELS["stt"] = WhisperModel("large-v3", device="cpu", compute_type="int8", local_files_only=True)
148
  if MODELS["tts"]:
149
  try: MODELS["tts"].to("cpu")
150
  except: pass
@@ -152,38 +125,11 @@ def release_gpu_models():
152
  gc.collect()
153
  if torch.cuda.is_available(): torch.cuda.empty_cache()
154
 
155
- def warmup_task():
156
- """v117: Absolute Cache Warming"""
157
- if os.path.exists(READY_FLAG): os.remove(READY_FLAG)
158
- print("\nπŸ”₯ --- V117: DIRECT WARMUP ---")
159
- try:
160
- # Pre-fetch and store paths
161
- MODEL_PATHS["stt"] = snapshot_download("Systran/faster-whisper-large-v3")
162
-
163
- print("⏬ Warming Whisper to RAM...")
164
- MODELS["stt"] = WhisperModel(MODEL_PATHS["stt"], device="cpu", compute_type="int8")
165
-
166
- print("⏬ Warming XTTS to RAM...")
167
- MODELS["tts"] = TTS(model_name="tts_models/multilingual/multi-dataset/xtts_v2", gpu=False)
168
-
169
- chatterbox_utils.warmup_chatterbox()
170
- chatterbox_utils.load_chatterbox(device="cpu")
171
-
172
- with open(READY_FLAG, "w") as f: f.write("READY")
173
- print(f"βœ… --- SYSTEM ARMED: v117 --- \n")
174
- except Exception as e: print(f"❌ Warmup Error: {e}")
175
-
176
  @spaces.GPU(duration=150)
177
  def core_process(request_dict):
178
  action = request_dict.get("action")
179
- print(f"--- [v117] πŸš€ REQ: {action} ---")
180
 
181
- waited = 0
182
- while not is_system_ready() and waited < 300:
183
- if waited % 10 == 0: print(f"⏳ Sync stage... ({waited}s)")
184
- time.sleep(1)
185
- waited += 1
186
-
187
  t1 = time.time()
188
  activate_gpu_models(action)
189
  try:
@@ -227,32 +173,41 @@ def core_process(request_dict):
227
  res = {"audio": base64.b64encode(audio_bytes).decode()}
228
 
229
  elif action == "s2st":
230
- print("πŸŽ™οΈ Phase 1: Whisper GPU (H200 Native)...")
231
- stt_res = core_process.__wrapped__( {**request_dict, "action": "stt"} )
232
- stt_text = stt_res.get("text", "")
233
-
234
- print(f"🌍 Phase 2: Translation...")
235
- target = request_dict.get("target_lang")
236
- translated = deep_translator.GoogleTranslator(source='auto', target=target).translate(stt_text)
237
-
238
- print("πŸ”Š Phase 3: XTTS GPU...")
239
- tts_res = core_process.__wrapped__( {"action": "tts", "text": translated, "lang": target, "speaker_wav": request_dict.get("speaker_wav")} )
240
- res = {"text": stt_text, "translated": translated, "audio": tts_res.get("audio")}
 
 
 
 
 
241
  else: res = {"error": f"Unknown action: {action}"}
242
  except Exception as e:
243
- print(f"❌ Error: {traceback.format_exc()}")
244
  res = {"error": str(e)}
245
  finally:
246
- print(f"--- [v117] ✨ FINISH ({time.time()-t1:.2f}s) ---")
247
  release_gpu_models()
248
  return res
249
 
250
  @asynccontextmanager
251
  async def lifespan(app: FastAPI):
252
- Thread(target=warmup_task, daemon=True).start()
 
 
 
 
253
  yield
254
 
255
- # πŸš€ Server Lifecycle
256
  app = FastAPI(lifespan=lifespan)
257
  app.add_middleware(CORSMiddleware, allow_origins=["*"], allow_methods=["*"], allow_headers=["*"])
258
 
@@ -260,21 +215,21 @@ app.add_middleware(CORSMiddleware, allow_origins=["*"], allow_methods=["*"], all
260
  async def api_process(request: Request):
261
  try:
262
  req_data = await request.json()
263
- if req_data.get("action") == "health":
264
- return {"status": "awake", "warm": is_system_ready(), "v": "117"}
265
  return core_process(req_data)
266
  except Exception as e: return {"error": str(e)}
267
 
268
  @app.get("/health")
269
- def health(): return {"status": "ok", "warm": is_system_ready(), "v": "117"}
270
 
271
  def gradio_fn(req_json):
272
  try: return json.dumps(core_process(json.loads(req_json)))
273
  except Exception as e: return json.dumps({"error": str(e)})
274
 
275
- demo = gr.Interface(fn=gradio_fn, inputs="text", outputs="text", title="πŸš€ AI Engine v117")
 
276
  demo.queue()
277
  app = gr.mount_gradio_app(app, demo, path="/")
278
 
279
  if __name__ == "__main__":
280
- uvicorn.run(app, host="0.0.0.0", port=7860, log_level="error")
 
1
+ # πŸš€ V118: ZEROGPU HOPPER STEADY (PRODUCTION GRADE)
2
  try:
3
  import spaces
4
  except ImportError:
 
25
  import types
26
  import logging
27
  import traceback
 
28
  from huggingface_hub import snapshot_download, hf_hub_download
29
 
30
+ # πŸ›‘οΈ 1. SILENCE & ENV (v118)
31
  logging.getLogger("transformers").setLevel(logging.ERROR)
32
  logging.getLogger("TTS").setLevel(logging.ERROR)
33
  os.environ["CT2_VERBOSE"] = "0"
34
  os.environ["ORT_LOGGING_LEVEL"] = "3"
35
+ os.environ["COQUI_TOS_AGREED"] = "1"
 
36
 
37
+ # πŸ› οΈ 2. TOP-LEVEL ASSET PREPARATION (Ensures HF Readiness)
38
+ print("\nπŸ“¦ [v118] TOP-LEVEL: Preparing AI Assets...")
39
+ try:
40
+ WHISPER_PATH = snapshot_download("Systran/faster-whisper-large-v3")
41
+ XTTS_PATH = snapshot_download("coqui/XTTS-v2")
42
+ print("βœ… Assets cached on disk.")
43
+ except Exception as e:
44
+ print(f"⚠️ Pre-download warning: {e}")
45
+ WHISPER_PATH = "large-v3"
46
+
47
+ # πŸ› οΈ 3. COMPATIBILITY PATCHES
48
  if "torchaudio.backend" not in sys.modules:
49
  backend = types.ModuleType("torchaudio.backend")
50
  common = types.ModuleType("torchaudio.backend.common")
 
66
  except: return SimpleNamespace(sample_rate=48000, num_frames=0, num_channels=1)
67
  torchaudio.info = mock_info
68
 
69
+ # πŸ“¦ 4. AI LIBRARIES
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
70
  import chatterbox_utils
71
  from faster_whisper import WhisperModel
72
  from TTS.api import TTS
73
  from df.enhance import init_df
74
  import deep_translator
75
 
76
+ # v118: Hopper Steady. Persistent RAM Init. int8 GPU.
 
 
 
77
  MODELS = {"stt": None, "translate": None, "tts": None, "denoiser": None}
 
 
 
 
 
78
 
79
  def activate_gpu_models(action):
80
+ """v118: Robust GPU Promotion"""
81
+ global MODELS
 
 
 
82
 
83
  if action in ["stt", "s2st"]:
84
  stt_on_gpu = False
 
86
  except: pass
87
 
88
  if not stt_on_gpu:
89
+ print(f"πŸŽ™οΈ [v118] PROMOTE: Whisper (GPU, int8)...")
90
  try:
91
  gc.collect(); torch.cuda.empty_cache()
92
+ MODELS["stt"] = WhisperModel(WHISPER_PATH, device="cuda", compute_type="int8", num_workers=1)
 
 
 
 
 
 
 
 
93
  except Exception as e:
94
+ print(f"⚠️ GPU STT Fail: {e}")
95
+ MODELS["stt"] = WhisperModel(WHISPER_PATH, device="cpu", compute_type="int8")
96
 
97
  if action in ["tts", "s2st"]:
98
  tts_on_gpu = False
 
102
  except: pass
103
 
104
  if MODELS["tts"] is not None and not tts_on_gpu:
105
+ print(f"πŸ”Š [v118] PROMOTE: XTTS to GPU...")
106
  try: MODELS["tts"].to("cuda")
107
  except: pass
108
 
 
113
  if MODELS["translate"] is None: MODELS["translate"] = "active"
114
 
115
  def release_gpu_models():
116
+ """v118: Graceful Offload"""
117
  global MODELS
118
  try:
119
  if MODELS["stt"] and MODELS["stt"].model.device == "cuda":
120
+ MODELS["stt"] = WhisperModel(WHISPER_PATH, device="cpu", compute_type="int8", local_files_only=True)
121
  if MODELS["tts"]:
122
  try: MODELS["tts"].to("cpu")
123
  except: pass
 
125
  gc.collect()
126
  if torch.cuda.is_available(): torch.cuda.empty_cache()
127
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
128
  @spaces.GPU(duration=150)
129
  def core_process(request_dict):
130
  action = request_dict.get("action")
131
+ print(f"--- [v118] πŸš€ REQUEST: {action} ---")
132
 
 
 
 
 
 
 
133
  t1 = time.time()
134
  activate_gpu_models(action)
135
  try:
 
173
  res = {"audio": base64.b64encode(audio_bytes).decode()}
174
 
175
  elif action == "s2st":
176
+ # Direct logic sequence in v118 (No recursion)
177
+ audio_bytes = base64.b64decode(request_dict.get("file"))
178
+ with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as f:
179
+ f.write(audio_bytes); temp_path = f.name
180
+ try:
181
+ # 1. STT
182
+ segs, _ = MODELS["stt"].transcribe(temp_path, language=request_dict.get("lang"), beam_size=1)
183
+ stt_text = " ".join([s.text for s in segs]).strip()
184
+ # 2. Translated
185
+ target = request_dict.get("target_lang")
186
+ translated = deep_translator.GoogleTranslator(source='auto', target=target).translate(stt_text)
187
+ # 3. TTS
188
+ final_res = core_process.__wrapped__({"action": "tts", "text": translated, "lang": target, "speaker_wav": request_dict.get("speaker_wav")})
189
+ res = {"text": stt_text, "translated": translated, "audio": final_res.get("audio")}
190
+ finally:
191
+ if os.path.exists(temp_path): os.unlink(temp_path)
192
  else: res = {"error": f"Unknown action: {action}"}
193
  except Exception as e:
194
+ print(f"❌ Fault: {traceback.format_exc()}")
195
  res = {"error": str(e)}
196
  finally:
197
+ print(f"--- [v118] ✨ FINISH ({time.time()-t1:.2f}s) ---")
198
  release_gpu_models()
199
  return res
200
 
201
  @asynccontextmanager
202
  async def lifespan(app: FastAPI):
203
+ print("πŸ”₯ [v118] RAM Warming...")
204
+ MODELS["stt"] = WhisperModel(WHISPER_PATH, device="cpu", compute_type="int8")
205
+ MODELS["tts"] = TTS(model_name="tts_models/multilingual/multi-dataset/xtts_v2", gpu=False)
206
+ chatterbox_utils.warmup_chatterbox()
207
+ print("βœ… [v118] ENGINE READY.")
208
  yield
209
 
210
+ # πŸš€ FastAPI
211
  app = FastAPI(lifespan=lifespan)
212
  app.add_middleware(CORSMiddleware, allow_origins=["*"], allow_methods=["*"], allow_headers=["*"])
213
 
 
215
  async def api_process(request: Request):
216
  try:
217
  req_data = await request.json()
218
+ if req_data.get("action") == "health": return {"status": "awake", "v": "118"}
 
219
  return core_process(req_data)
220
  except Exception as e: return {"error": str(e)}
221
 
222
  @app.get("/health")
223
+ def health(): return {"status": "ok", "v": "118"}
224
 
225
  def gradio_fn(req_json):
226
  try: return json.dumps(core_process(json.loads(req_json)))
227
  except Exception as e: return json.dumps({"error": str(e)})
228
 
229
+ # Unified UI mount
230
+ demo = gr.Interface(fn=gradio_fn, inputs="text", outputs="text", title="πŸš€ AI Engine v118")
231
  demo.queue()
232
  app = gr.mount_gradio_app(app, demo, path="/")
233
 
234
  if __name__ == "__main__":
235
+ uvicorn.run(app, host="0.0.0.0", port=7860, log_level="warning")