TGPro1 commited on
Commit
855133f
Β·
verified Β·
1 Parent(s): 8d4fa1d

Upload app.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. app.py +46 -47
app.py CHANGED
@@ -1,4 +1,4 @@
1
- # πŸš€ V108: ZEROGPU REQUEST-WAIT PROTOCOL
2
  # Must be first to patch environment correctly
3
  try:
4
  import spaces
@@ -27,9 +27,9 @@ import sys
27
  import types
28
  import logging
29
  from threading import Thread, Lock
30
- from huggingface_hub import snapshot_download
31
 
32
- # πŸ›‘οΈ 1. SILENCE & ENV (v108)
33
  logging.getLogger("transformers").setLevel(logging.ERROR)
34
  logging.getLogger("TTS").setLevel(logging.ERROR)
35
  os.environ["CT2_VERBOSE"] = "0"
@@ -37,7 +37,7 @@ os.environ["ORT_LOGGING_LEVEL"] = "3"
37
  os.environ["GRADIO_SERVER_NAME"] = "0.0.0.0"
38
  os.environ["GRADIO_SERVER_PORT"] = "7860"
39
 
40
- # πŸ› οΈ 2. COMPATIBILITY PATCHES (v108)
41
  if "torchaudio.backend" not in sys.modules:
42
  backend = types.ModuleType("torchaudio.backend")
43
  common = types.ModuleType("torchaudio.backend.common")
@@ -77,13 +77,14 @@ except Exception: pass
77
 
78
  # πŸ“¦ 3. AI LIBRARIES
79
  import chatterbox_utils
 
80
  from faster_whisper import WhisperModel
81
  from TTS.api import TTS
82
  from df.enhance import init_df
83
  import deep_translator
84
 
85
- # FORCE REBUILD: 17:18:00 Jan 21 2026
86
- # v108: Rebuilt with full requirements. Wait protocol active.
87
 
88
  os.environ["COQUI_TOS_AGREED"] = "1"
89
  MODELS = {"stt": None, "translate": None, "tts": None, "denoiser": None}
@@ -92,7 +93,7 @@ WARMUP_STATUS = {"complete": False, "in_progress": False}
92
  WARMUP_LOCK = Lock()
93
 
94
  def activate_gpu_models(action):
95
- """v108: Optimized Activation"""
96
  global MODELS, WARMUP_STATUS
97
  local_only = WARMUP_STATUS["complete"]
98
 
@@ -101,11 +102,9 @@ def activate_gpu_models(action):
101
  try: stt_on_gpu = MODELS["stt"] is not None and MODELS["stt"].model.device == "cuda"
102
  except: pass
103
  if not stt_on_gpu:
104
- print(f"πŸŽ™οΈ [v108] Activating Whisper (GPU: int8_float16)...")
105
  try:
106
- if MODELS["stt"]: del MODELS["stt"]
107
- gc.collect(); torch.cuda.empty_cache()
108
- time.sleep(0.5)
109
  MODELS["stt"] = WhisperModel(
110
  "large-v3",
111
  device="cuda",
@@ -114,7 +113,7 @@ def activate_gpu_models(action):
114
  local_files_only=local_only
115
  )
116
  except Exception as e:
117
- print(f"⚠️ GPU Init failed: {e}")
118
  MODELS["stt"] = WhisperModel("large-v3", device="cpu", compute_type="int8", local_files_only=True)
119
 
120
  if action in ["tts", "s2st"]:
@@ -124,14 +123,15 @@ def activate_gpu_models(action):
124
  tts_on_gpu = "cuda" in curr
125
  except: pass
126
  if MODELS["tts"] is None or not tts_on_gpu:
127
- print(f"πŸ”Š [v108] Activating XTTS-v2 (GPU)...")
128
  try:
129
  if MODELS["tts"] is None:
130
  MODELS["tts"] = TTS(model_name="tts_models/multilingual/multi-dataset/xtts_v2", gpu=True)
131
  else: MODELS["tts"].to("cuda")
132
  except: pass
133
 
134
- chatterbox_utils.load_chatterbox(device="cuda" if torch.cuda.is_available() else "cpu")
 
135
 
136
  if MODELS["denoiser"] is None:
137
  try: MODELS["denoiser"] = init_df()
@@ -139,9 +139,9 @@ def activate_gpu_models(action):
139
  if MODELS["translate"] is None: MODELS["translate"] = "active"
140
 
141
  def release_gpu_models():
142
- """v108: Resilient Release"""
143
  global MODELS
144
- print("🧹 [v108] Releasing GPU resources.")
145
  try:
146
  if MODELS["stt"] and MODELS["stt"].model.device == "cuda":
147
  del MODELS["stt"]
@@ -156,39 +156,43 @@ def release_gpu_models():
156
  time.sleep(0.5)
157
 
158
  def warmup_task():
159
- """v108: System Preparation"""
160
  global WARMUP_STATUS
161
- with WARMUP_LOCK:
162
- if WARMUP_STATUS["complete"] or WARMUP_STATUS["in_progress"]: return
163
- WARMUP_STATUS["in_progress"] = True
164
- print("\nπŸ”₯ --- V108: ZEROGPU WARMUP STARTED ---")
165
  try:
166
- # Pre-download everything to CPU first
167
- MODELS["stt"] = WhisperModel("large-v3", device="cpu", compute_type="int8")
168
- MODELS["tts"] = TTS(model_name="tts_models/multilingual/multi-dataset/xtts_v2", gpu=False)
169
- MODELS["denoiser"] = init_df()
 
 
 
 
 
170
  chatterbox_utils.warmup_chatterbox()
 
 
 
 
171
  WARMUP_STATUS["complete"] = True
172
- print(f"βœ… --- SYSTEM READY: v108 --- \n")
173
  except Exception as e:
174
- print(f"❌ Warmup Error: {e}")
175
  finally: WARMUP_STATUS["in_progress"] = False
176
 
177
  @spaces.GPU(duration=150)
178
  def core_process(request_dict):
179
  action = request_dict.get("action")
180
- print(f"--- [v108] πŸš€ REQUEST: {action} ---")
181
 
182
- # πŸ”₯ v108: WAIT PROTOCOL
183
- max_wait = 180 # 3 minutes for absolute first-time build/download
184
  waited = 0
185
- while not WARMUP_STATUS["complete"] and waited < max_wait:
186
- if waited % 5 == 0: print(f"⏳ System warming up... (waited {waited}s)")
187
  time.sleep(1)
188
  waited += 1
189
-
190
- if not WARMUP_STATUS["complete"]:
191
- return {"error": "System still warming up. Please try again in 30 seconds."}
192
 
193
  t1 = time.time()
194
  activate_gpu_models(action)
@@ -234,23 +238,19 @@ def core_process(request_dict):
234
  res = {"audio": base64.b64encode(audio_bytes).decode()}
235
 
236
  elif action == "s2st":
237
- # Combined logic
238
  req_copy = request_dict.copy()
239
  req_copy["action"] = "stt"
240
- stt_res = core_process.__wrapped__(req_copy) # Recursive but without double GPU wrapper
241
-
242
  translated = deep_translator.GoogleTranslator(source='auto', target=request_dict.get("target_lang")).translate(stt_res.get("text", ""))
243
-
244
  req_tts = {"action": "tts", "text": translated, "lang": request_dict.get("target_lang"), "speaker_wav": request_dict.get("speaker_wav")}
245
  tts_res = core_process.__wrapped__(req_tts)
246
-
247
  res = {"text": stt_res.get("text"), "translated": translated, "audio": tts_res.get("audio")}
248
  else: res = {"error": f"Unknown action: {action}"}
249
  except Exception as e:
250
- print(f"❌ Error in core_process: {traceback.format_exc()}")
251
  res = {"error": str(e)}
252
  finally:
253
- print(f"--- [v108] ✨ DONE ({time.time()-t1:.2f}s) ---")
254
  release_gpu_models()
255
  return res
256
 
@@ -268,22 +268,21 @@ async def api_process(request: Request):
268
  try:
269
  req_data = await request.json()
270
  if req_data.get("action") == "health":
271
- return {"status": "awake", "warm": WARMUP_STATUS["complete"], "v": "108"}
272
  return core_process(req_data)
273
  except Exception as e: return {"error": str(e)}
274
 
275
  @app.get("/health")
276
- def health(): return {"status": "ok", "warm": WARMUP_STATUS["complete"], "v": "108"}
277
 
278
  def gradio_fn(req_json):
279
  try: return json.dumps(core_process(json.loads(req_json)))
280
  except Exception as e: return json.dumps({"error": str(e)})
281
 
282
- demo = gr.Interface(fn=gradio_fn, inputs="text", outputs="text", title="πŸš€ AI Engine v108")
283
  demo.queue()
284
-
285
  app = gr.mount_gradio_app(app, demo, path="/")
286
 
287
  if __name__ == "__main__":
288
- print("πŸš€ [v108] Starting System...")
289
  uvicorn.run(app, host="0.0.0.0", port=7860, log_level="error")
 
1
+ # πŸš€ V109: ZEROGPU STERILE STARTUP
2
  # Must be first to patch environment correctly
3
  try:
4
  import spaces
 
27
  import types
28
  import logging
29
  from threading import Thread, Lock
30
+ from huggingface_hub import snapshot_download, hf_hub_download
31
 
32
+ # πŸ›‘οΈ 1. SILENCE & ENV (v109)
33
  logging.getLogger("transformers").setLevel(logging.ERROR)
34
  logging.getLogger("TTS").setLevel(logging.ERROR)
35
  os.environ["CT2_VERBOSE"] = "0"
 
37
  os.environ["GRADIO_SERVER_NAME"] = "0.0.0.0"
38
  os.environ["GRADIO_SERVER_PORT"] = "7860"
39
 
40
+ # πŸ› οΈ 2. COMPATIBILITY PATCHES (v109)
41
  if "torchaudio.backend" not in sys.modules:
42
  backend = types.ModuleType("torchaudio.backend")
43
  common = types.ModuleType("torchaudio.backend.common")
 
77
 
78
  # πŸ“¦ 3. AI LIBRARIES
79
  import chatterbox_utils
80
+ # We import types/classes but do NOT instantiate them at top-level
81
  from faster_whisper import WhisperModel
82
  from TTS.api import TTS
83
  from df.enhance import init_df
84
  import deep_translator
85
 
86
+ # FORCE BUILD TRIGGER: 17:35:00 Jan 21 2026
87
+ # v109: Sterile Startup. defer all AI init to GPU session.
88
 
89
  os.environ["COQUI_TOS_AGREED"] = "1"
90
  MODELS = {"stt": None, "translate": None, "tts": None, "denoiser": None}
 
93
  WARMUP_LOCK = Lock()
94
 
95
  def activate_gpu_models(action):
96
+ """v109: Deferred Activation (Inside GPU)"""
97
  global MODELS, WARMUP_STATUS
98
  local_only = WARMUP_STATUS["complete"]
99
 
 
102
  try: stt_on_gpu = MODELS["stt"] is not None and MODELS["stt"].model.device == "cuda"
103
  except: pass
104
  if not stt_on_gpu:
105
+ print(f"πŸŽ™οΈ [v109] Initializing Whisper (GPU: int8_float16)...")
106
  try:
107
+ if MODELS["stt"]: del MODELS["stt"]; gc.collect(); torch.cuda.empty_cache()
 
 
108
  MODELS["stt"] = WhisperModel(
109
  "large-v3",
110
  device="cuda",
 
113
  local_files_only=local_only
114
  )
115
  except Exception as e:
116
+ print(f"⚠️ GPU STT Init failed: {e}")
117
  MODELS["stt"] = WhisperModel("large-v3", device="cpu", compute_type="int8", local_files_only=True)
118
 
119
  if action in ["tts", "s2st"]:
 
123
  tts_on_gpu = "cuda" in curr
124
  except: pass
125
  if MODELS["tts"] is None or not tts_on_gpu:
126
+ print(f"πŸ”Š [v109] Initializing XTTS-v2 (GPU)...")
127
  try:
128
  if MODELS["tts"] is None:
129
  MODELS["tts"] = TTS(model_name="tts_models/multilingual/multi-dataset/xtts_v2", gpu=True)
130
  else: MODELS["tts"].to("cuda")
131
  except: pass
132
 
133
+ # In v109, we only load chatterbox into CUDA when needed
134
+ chatterbox_utils.load_chatterbox(device="cuda")
135
 
136
  if MODELS["denoiser"] is None:
137
  try: MODELS["denoiser"] = init_df()
 
139
  if MODELS["translate"] is None: MODELS["translate"] = "active"
140
 
141
  def release_gpu_models():
142
+ """v109: GPU Release"""
143
  global MODELS
144
+ print("🧹 [v109] Releasing GPU.")
145
  try:
146
  if MODELS["stt"] and MODELS["stt"].model.device == "cuda":
147
  del MODELS["stt"]
 
156
  time.sleep(0.5)
157
 
158
  def warmup_task():
159
+ """v109: Sterile Warmup (Downloads only)"""
160
  global WARMUP_STATUS
161
+ if WARMUP_STATUS["complete"] or WARMUP_STATUS["in_progress"]: return
162
+ WARMUP_STATUS["in_progress"] = True
163
+ print("\nπŸ”₯ --- V109: ZEROGPU STERILE WARMUP ---")
 
164
  try:
165
+ # Pre-fetch files WITHOUT loading them into memory/CUDA
166
+ print("⏬ Pre-fetching Whisper Large-v3...")
167
+ snapshot_download("Systran/faster-whisper-large-v3")
168
+
169
+ print("⏬ Pre-fetching XTTS-v2...")
170
+ # TTS internal download trigger (folder structure varies)
171
+ snapshot_download("coqui/XTTS-v2")
172
+
173
+ print("⏬ Pre-fetching Chatterbox...")
174
  chatterbox_utils.warmup_chatterbox()
175
+
176
+ # DeepFilterNet
177
+ hf_hub_download(repo_id="R_S/DeepFilterNet3", filename="config.json")
178
+
179
  WARMUP_STATUS["complete"] = True
180
+ print(f"βœ… --- CACHE SYNCED: v109 --- \n")
181
  except Exception as e:
182
+ print(f"❌ Warmup Warning (might be fine): {e}")
183
  finally: WARMUP_STATUS["in_progress"] = False
184
 
185
  @spaces.GPU(duration=150)
186
  def core_process(request_dict):
187
  action = request_dict.get("action")
188
+ print(f"--- [v109] πŸš€ REQUEST: {action} ---")
189
 
190
+ # Wait for downloads if necessary
 
191
  waited = 0
192
+ while not WARMUP_STATUS["complete"] and waited < 300: # 5 min limit
193
+ if waited % 10 == 0: print(f"⏳ Downloading models... ({waited}s)")
194
  time.sleep(1)
195
  waited += 1
 
 
 
196
 
197
  t1 = time.time()
198
  activate_gpu_models(action)
 
238
  res = {"audio": base64.b64encode(audio_bytes).decode()}
239
 
240
  elif action == "s2st":
 
241
  req_copy = request_dict.copy()
242
  req_copy["action"] = "stt"
243
+ stt_res = core_process.__wrapped__(req_copy)
 
244
  translated = deep_translator.GoogleTranslator(source='auto', target=request_dict.get("target_lang")).translate(stt_res.get("text", ""))
 
245
  req_tts = {"action": "tts", "text": translated, "lang": request_dict.get("target_lang"), "speaker_wav": request_dict.get("speaker_wav")}
246
  tts_res = core_process.__wrapped__(req_tts)
 
247
  res = {"text": stt_res.get("text"), "translated": translated, "audio": tts_res.get("audio")}
248
  else: res = {"error": f"Unknown action: {action}"}
249
  except Exception as e:
250
+ print(f"❌ Core Error: {traceback.format_exc()}")
251
  res = {"error": str(e)}
252
  finally:
253
+ print(f"--- [v109] ✨ DONE ({time.time()-t1:.2f}s) ---")
254
  release_gpu_models()
255
  return res
256
 
 
268
  try:
269
  req_data = await request.json()
270
  if req_data.get("action") == "health":
271
+ return {"status": "awake", "warm": WARMUP_STATUS["complete"], "v": "109"}
272
  return core_process(req_data)
273
  except Exception as e: return {"error": str(e)}
274
 
275
  @app.get("/health")
276
+ def health(): return {"status": "ok", "warm": WARMUP_STATUS["complete"], "v": "109"}
277
 
278
  def gradio_fn(req_json):
279
  try: return json.dumps(core_process(json.loads(req_json)))
280
  except Exception as e: return json.dumps({"error": str(e)})
281
 
282
+ demo = gr.Interface(fn=gradio_fn, inputs="text", outputs="text", title="πŸš€ AI Engine v109")
283
  demo.queue()
 
284
  app = gr.mount_gradio_app(app, demo, path="/")
285
 
286
  if __name__ == "__main__":
287
+ print("πŸš€ [v109] Starting Sterile Server...")
288
  uvicorn.run(app, host="0.0.0.0", port=7860, log_level="error")