TGPro1 commited on
Commit
4bebd1d
Β·
verified Β·
1 Parent(s): b9d3269

Upload app.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. app.py +66 -60
app.py CHANGED
@@ -1,4 +1,4 @@
1
- # πŸš€ V115: ZEROGPU HOPPER STABILITY (STABLE-FAST)
2
  try:
3
  import spaces
4
  except ImportError:
@@ -25,10 +25,10 @@ import sys
25
  import types
26
  import logging
27
  import traceback
28
- from threading import Thread, Lock
29
  from huggingface_hub import snapshot_download, hf_hub_download
30
 
31
- # πŸ›‘οΈ 1. SILENCE & ENV (v115)
32
  logging.getLogger("transformers").setLevel(logging.ERROR)
33
  logging.getLogger("TTS").setLevel(logging.ERROR)
34
  os.environ["CT2_VERBOSE"] = "0"
@@ -81,61 +81,66 @@ from TTS.api import TTS
81
  from df.enhance import init_df
82
  import deep_translator
83
 
84
- # FORCE BUILD TRIGGER: 18:55:00 Jan 21 2026
85
- # v115: Hopper Stability. num_workers=1. Serial Activation.
86
 
87
  os.environ["COQUI_TOS_AGREED"] = "1"
88
  MODELS = {"stt": None, "translate": None, "tts": None, "denoiser": None}
89
  READY_FLAG = os.path.expanduser("~/.engine_ready")
90
- ACTIVATE_LOCK = Lock()
91
 
92
  def is_system_ready():
93
  return os.path.exists(READY_FLAG)
94
 
95
  def activate_gpu_models(action):
96
- """v115: Serial Safe Activation"""
97
  global MODELS
98
- with ACTIVATE_LOCK:
99
- if action in ["stt", "s2st"]:
100
- stt_on_gpu = False
101
- try: stt_on_gpu = MODELS["stt"] is not None and MODELS["stt"].model.device == "cuda"
102
- except: pass
103
-
104
- if not stt_on_gpu:
105
- print(f"πŸŽ™οΈ [v115] Activating Whisper (Stable: 1 Worker)...")
106
- try:
107
- gc.collect(); torch.cuda.empty_cache()
108
- MODELS["stt"] = WhisperModel(
109
- "large-v3",
110
- device="cuda",
111
- compute_type="int8_float16",
112
- num_workers=1,
113
- local_files_only=True
114
- )
115
- except Exception as e:
116
- print(f"⚠️ GPU STT Init crash: {e}")
117
- MODELS["stt"] = WhisperModel("large-v3", device="cpu", compute_type="int8", local_files_only=True)
 
 
 
118
 
119
- if action in ["tts", "s2st"]:
120
- tts_on_gpu = False
 
 
 
 
 
 
 
121
  try:
122
- params = next(MODELS["tts"].synthesizer.tts_model.parameters())
123
- tts_on_gpu = "cuda" in str(params.device)
124
- except: pass
125
-
126
- if MODELS["tts"] is not None and not tts_on_gpu:
127
- print(f"πŸ”Š [v115] Activating XTTS-v2 (GPU)...")
128
- try: MODELS["tts"].to("cuda")
129
- except: pass
130
 
131
- chatterbox_utils.load_chatterbox(device="cpu")
132
- if MODELS["denoiser"] is None:
133
- try: MODELS["denoiser"] = init_df()
134
- except: pass
135
- if MODELS["translate"] is None: MODELS["translate"] = "active"
136
 
137
  def release_gpu_models():
138
- """v115: Safe Offload"""
139
  global MODELS
140
  try:
141
  if MODELS["stt"] and MODELS["stt"].model.device == "cuda":
@@ -148,26 +153,26 @@ def release_gpu_models():
148
  if torch.cuda.is_available(): torch.cuda.empty_cache()
149
 
150
  def warmup_task():
151
- """v115: RAM Warming"""
152
  if os.path.exists(READY_FLAG): os.remove(READY_FLAG)
153
- print("\nπŸ”₯ --- V115: HOPPER WARMUP ---")
154
  try:
155
  MODELS["stt"] = WhisperModel("large-v3", device="cpu", compute_type="int8")
156
  MODELS["tts"] = TTS(model_name="tts_models/multilingual/multi-dataset/xtts_v2", gpu=False)
157
  chatterbox_utils.warmup_chatterbox()
158
  chatterbox_utils.load_chatterbox(device="cpu")
159
  with open(READY_FLAG, "w") as f: f.write("READY")
160
- print(f"βœ… --- ENGINE ON: v115 --- \n")
161
- except Exception as e: print(f"❌ Warmup Fault: {e}")
162
 
163
  @spaces.GPU(duration=150)
164
  def core_process(request_dict):
165
  action = request_dict.get("action")
166
- print(f"--- [v115] πŸš€ START: {action} ---")
167
 
168
  waited = 0
169
  while not is_system_ready() and waited < 300:
170
- if waited % 10 == 0: print(f"⏳ Waiting for Engine... ({waited}s)")
171
  time.sleep(1)
172
  waited += 1
173
 
@@ -214,22 +219,23 @@ def core_process(request_dict):
214
  res = {"audio": base64.b64encode(audio_bytes).decode()}
215
 
216
  elif action == "s2st":
217
- print("πŸŽ™οΈ Step 1: Transcription...")
218
  stt_res = core_process.__wrapped__( {**request_dict, "action": "stt"} )
219
  stt_text = stt_res.get("text", "")
220
 
221
- print(f"🌍 Step 2: Translation ({request_dict.get('target_lang')})...")
222
- translated = deep_translator.GoogleTranslator(source='auto', target=request_dict.get("target_lang")).translate(stt_text)
 
223
 
224
- print("πŸ”Š Step 3: Synthesis...")
225
- tts_res = core_process.__wrapped__( {"action": "tts", "text": translated, "lang": request_dict.get("target_lang"), "speaker_wav": request_dict.get("speaker_wav")} )
226
  res = {"text": stt_text, "translated": translated, "audio": tts_res.get("audio")}
227
  else: res = {"error": f"Unknown action: {action}"}
228
  except Exception as e:
229
- print(f"❌ Error: {traceback.format_exc()}")
230
  res = {"error": str(e)}
231
  finally:
232
- print(f"--- [v115] ✨ FINISH ({time.time()-t1:.2f}s) ---")
233
  release_gpu_models()
234
  return res
235
 
@@ -238,7 +244,7 @@ async def lifespan(app: FastAPI):
238
  Thread(target=warmup_task, daemon=True).start()
239
  yield
240
 
241
- # πŸš€ Server
242
  app = FastAPI(lifespan=lifespan)
243
  app.add_middleware(CORSMiddleware, allow_origins=["*"], allow_methods=["*"], allow_headers=["*"])
244
 
@@ -247,18 +253,18 @@ async def api_process(request: Request):
247
  try:
248
  req_data = await request.json()
249
  if req_data.get("action") == "health":
250
- return {"status": "awake", "warm": is_system_ready(), "v": "115"}
251
  return core_process(req_data)
252
  except Exception as e: return {"error": str(e)}
253
 
254
  @app.get("/health")
255
- def health(): return {"status": "ok", "warm": is_system_ready(), "v": "115"}
256
 
257
  def gradio_fn(req_json):
258
  try: return json.dumps(core_process(json.loads(req_json)))
259
  except Exception as e: return json.dumps({"error": str(e)})
260
 
261
- demo = gr.Interface(fn=gradio_fn, inputs="text", outputs="text", title="πŸš€ AI Engine v115")
262
  demo.queue()
263
  app = gr.mount_gradio_app(app, demo, path="/")
264
 
 
1
+ # πŸš€ V116: ZEROGPU HOPPER ATOMIC
2
  try:
3
  import spaces
4
  except ImportError:
 
25
  import types
26
  import logging
27
  import traceback
28
+ from threading import Thread
29
  from huggingface_hub import snapshot_download, hf_hub_download
30
 
31
+ # πŸ›‘οΈ 1. SILENCE & ENV (v116)
32
  logging.getLogger("transformers").setLevel(logging.ERROR)
33
  logging.getLogger("TTS").setLevel(logging.ERROR)
34
  os.environ["CT2_VERBOSE"] = "0"
 
81
  from df.enhance import init_df
82
  import deep_translator
83
 
84
+ # FORCE BUILD TRIGGER: 19:10:00 Jan 21 2026
85
+ # v116: Hopper Atomic. Device Index Fix. No Lock.
86
 
87
  os.environ["COQUI_TOS_AGREED"] = "1"
88
  MODELS = {"stt": None, "translate": None, "tts": None, "denoiser": None}
89
  READY_FLAG = os.path.expanduser("~/.engine_ready")
 
90
 
91
  def is_system_ready():
92
  return os.path.exists(READY_FLAG)
93
 
94
  def activate_gpu_models(action):
95
+ """v116: Granular Activation"""
96
  global MODELS
97
+
98
+ if action in ["stt", "s2st"]:
99
+ stt_on_gpu = False
100
+ try: stt_on_gpu = MODELS["stt"] is not None and MODELS["stt"].model.device == "cuda"
101
+ except: pass
102
+
103
+ if not stt_on_gpu:
104
+ print(f"πŸŽ™οΈ [v116] WHISPER CHECKPOINT: Allocation...")
105
+ try:
106
+ gc.collect(); torch.cuda.empty_cache()
107
+ print(f"πŸŽ™οΈ [v116] WHISPER CHECKPOINT: Loading to CUDA:0 (int8_float16)...")
108
+ # Removed local_files_only=True as it can hang integrity checks
109
+ MODELS["stt"] = WhisperModel(
110
+ "large-v3",
111
+ device="cuda",
112
+ device_index=0,
113
+ compute_type="int8_float16",
114
+ num_workers=1
115
+ )
116
+ print(f"πŸŽ™οΈ [v116] WHISPER CHECKPOINT: Ready.")
117
+ except Exception as e:
118
+ print(f"⚠️ [v116] GPU STT Fail: {e}")
119
+ MODELS["stt"] = WhisperModel("large-v3", device="cpu", compute_type="int8")
120
 
121
+ if action in ["tts", "s2st"]:
122
+ tts_on_gpu = False
123
+ try:
124
+ params = next(MODELS["tts"].synthesizer.tts_model.parameters())
125
+ tts_on_gpu = "cuda" in str(params.device)
126
+ except: pass
127
+
128
+ if MODELS["tts"] is not None and not tts_on_gpu:
129
+ print(f"πŸ”Š [v116] XTTS CHECKPOINT: Promotion to GPU...")
130
  try:
131
+ MODELS["tts"].to("cuda")
132
+ print(f"πŸ”Š [v116] XTTS CHECKPOINT: Ready.")
133
+ except Exception as e:
134
+ print(f"⚠️ [v116] XTTS GPU Fail: {e}")
 
 
 
 
135
 
136
+ chatterbox_utils.load_chatterbox(device="cpu")
137
+ if MODELS["denoiser"] is None:
138
+ try: MODELS["denoiser"] = init_df()
139
+ except: pass
140
+ if MODELS["translate"] is None: MODELS["translate"] = "active"
141
 
142
  def release_gpu_models():
143
+ """v116: Clean Release"""
144
  global MODELS
145
  try:
146
  if MODELS["stt"] and MODELS["stt"].model.device == "cuda":
 
153
  if torch.cuda.is_available(): torch.cuda.empty_cache()
154
 
155
  def warmup_task():
156
+ """v116: Asset Prep"""
157
  if os.path.exists(READY_FLAG): os.remove(READY_FLAG)
158
+ print("\nπŸ”₯ --- V116: ATOMIC WARMUP ---")
159
  try:
160
  MODELS["stt"] = WhisperModel("large-v3", device="cpu", compute_type="int8")
161
  MODELS["tts"] = TTS(model_name="tts_models/multilingual/multi-dataset/xtts_v2", gpu=False)
162
  chatterbox_utils.warmup_chatterbox()
163
  chatterbox_utils.load_chatterbox(device="cpu")
164
  with open(READY_FLAG, "w") as f: f.write("READY")
165
+ print(f"βœ… --- ENGINE ARMED: v116 --- \n")
166
+ except Exception as e: print(f"❌ Warmup Error: {e}")
167
 
168
  @spaces.GPU(duration=150)
169
  def core_process(request_dict):
170
  action = request_dict.get("action")
171
+ print(f"--- [v116] πŸš€ REQUEST: {action} ---")
172
 
173
  waited = 0
174
  while not is_system_ready() and waited < 300:
175
+ if waited % 10 == 0: print(f"⏳ Prep stage... ({waited}s)")
176
  time.sleep(1)
177
  waited += 1
178
 
 
219
  res = {"audio": base64.b64encode(audio_bytes).decode()}
220
 
221
  elif action == "s2st":
222
+ print("πŸŽ™οΈ Phase 1: Whisper GPU...")
223
  stt_res = core_process.__wrapped__( {**request_dict, "action": "stt"} )
224
  stt_text = stt_res.get("text", "")
225
 
226
+ print(f"🌍 Phase 2: Translation...")
227
+ target = request_dict.get("target_lang")
228
+ translated = deep_translator.GoogleTranslator(source='auto', target=target).translate(stt_text)
229
 
230
+ print("πŸ”Š Phase 3: XTTS GPU...")
231
+ tts_res = core_process.__wrapped__( {"action": "tts", "text": translated, "lang": target, "speaker_wav": request_dict.get("speaker_wav")} )
232
  res = {"text": stt_text, "translated": translated, "audio": tts_res.get("audio")}
233
  else: res = {"error": f"Unknown action: {action}"}
234
  except Exception as e:
235
+ print(f"❌ Fault: {traceback.format_exc()}")
236
  res = {"error": str(e)}
237
  finally:
238
+ print(f"--- [v116] ✨ ATOMIC FINISH ({time.time()-t1:.2f}s) ---")
239
  release_gpu_models()
240
  return res
241
 
 
244
  Thread(target=warmup_task, daemon=True).start()
245
  yield
246
 
247
+ # πŸš€ FastAPI & Gradio Unified
248
  app = FastAPI(lifespan=lifespan)
249
  app.add_middleware(CORSMiddleware, allow_origins=["*"], allow_methods=["*"], allow_headers=["*"])
250
 
 
253
  try:
254
  req_data = await request.json()
255
  if req_data.get("action") == "health":
256
+ return {"status": "awake", "warm": is_system_ready(), "v": "116"}
257
  return core_process(req_data)
258
  except Exception as e: return {"error": str(e)}
259
 
260
  @app.get("/health")
261
+ def health(): return {"status": "ok", "warm": is_system_ready(), "v": "116"}
262
 
263
  def gradio_fn(req_json):
264
  try: return json.dumps(core_process(json.loads(req_json)))
265
  except Exception as e: return json.dumps({"error": str(e)})
266
 
267
+ demo = gr.Interface(fn=gradio_fn, inputs="text", outputs="text", title="πŸš€ AI Engine v116")
268
  demo.queue()
269
  app = gr.mount_gradio_app(app, demo, path="/")
270