TGPro1 commited on
Commit
7bb29ef
Β·
verified Β·
1 Parent(s): 9e4730f

Upload app.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. app.py +72 -64
app.py CHANGED
@@ -13,10 +13,11 @@ import torchaudio
13
  import gc
14
  import sys
15
  import types
 
16
  from huggingface_hub import snapshot_download
17
 
18
- # πŸ›‘οΈ 1. CRITICAL COMPATIBILITY MONKEYPATCHES
19
- print("πŸ› οΈ Applying compatibility monkeypatches...")
20
  if "torchaudio.backend" not in sys.modules:
21
  backend = types.ModuleType("torchaudio.backend")
22
  common = types.ModuleType("torchaudio.backend.common")
@@ -52,21 +53,17 @@ try:
52
  return t, samplerate
53
  raise e
54
  torchaudio.load = patched_load
55
- print("βœ… Torchaudio patched")
56
- except Exception as e: print(f"⚠️ Patch failed: {e}")
57
 
58
- # πŸ“¦ 2. TOP-LEVEL IMPORTS (No engine initialization yet)
59
- print("πŸ“¦ Importing AI Libraries...")
60
  import chatterbox_utils
61
- # Note: We import the classes, but DO NOT instantiate them on the CPU
62
  from faster_whisper import WhisperModel
63
  from TTS.api import TTS
64
  from df.enhance import init_df
65
- print("βœ… Imports Complete")
66
 
67
  try:
68
  import spaces
69
- print("βœ… ZeroGPU/Spaces detected")
70
  except ImportError:
71
  class spaces:
72
  @staticmethod
@@ -74,49 +71,46 @@ except ImportError:
74
  if f is None: return lambda x: x
75
  return f
76
 
77
- # FORCE BUILD TRIGGER: 11:05:00 Jan 21 2026
78
- # v91: No-Instance Startup (Resolved CUDA std::system_error)
79
 
80
  os.environ["COQUI_TOS_AGREED"] = "1"
81
- # MODELS starts empty to ensure a clean CUDA handoff
82
  MODELS = {"stt": None, "translate": None, "tts": None, "denoiser": None}
83
 
 
 
 
 
84
  def activate_gpu_models(action):
85
- """v91: Direct GPU initialization (Safe & Clean)"""
86
- global MODELS
87
 
88
- # 1. Faster-Whisper: Initialize directly on GPU
 
 
 
 
 
 
 
 
 
89
  if action in ["stt", "s2st"]:
90
- if MODELS["stt"] is None:
91
- print(f"πŸŽ™οΈ [v91] Initializing Whisper directly on GPU for {action}...")
92
- # No CPU instance should exist at this point
93
- MODELS["stt"] = WhisperModel(
94
- "large-v3",
95
- device="cuda",
96
- compute_type="float16"
97
- )
98
- print("✨ Whisper GPU Engine Ready")
99
- elif MODELS["stt"].model.device != "cuda":
100
- # This case shouldn't happen with No-Instance Startup, but for safety:
101
- print("⚠️ Switching Whisper to GPU...")
102
- del MODELS["stt"]
103
- gc.collect()
104
- torch.cuda.empty_cache()
105
  MODELS["stt"] = WhisperModel("large-v3", device="cuda", compute_type="float16")
106
 
107
- # 2. XTTS-v2: Initialize directly on GPU
108
  if action in ["tts", "s2st"]:
109
  if MODELS["tts"] is None:
110
- print(f"πŸ”Š [v91] Initializing XTTS directly on GPU for {action}...")
111
  MODELS["tts"] = TTS(model_name="tts_models/multilingual/multi-dataset/xtts_v2", gpu=True)
112
- print("✨ XTTS GPU Engine Ready")
113
- else:
114
- try:
115
- current_dev = str(next(MODELS["tts"].synthesizer.tts_model.parameters()).device)
116
- if "cuda" not in current_dev:
117
- print("πŸš€ Moving XTTS-v2 to GPU...")
118
- MODELS["tts"].to("cuda")
119
- except: MODELS["tts"].to("cuda")
120
 
121
  # 3. Helpers
122
  if MODELS["denoiser"] is None:
@@ -124,32 +118,39 @@ def activate_gpu_models(action):
124
  except: pass
125
  if MODELS["translate"] is None: MODELS["translate"] = "active"
126
  chatterbox_utils.load_chatterbox(device="cuda" if torch.cuda.is_available() else "cpu")
 
 
127
 
128
- def warmup_models():
129
- """v91: DOWNLOAD ONLY (No engine initialization)"""
130
- print("\nπŸ”₯ --- SYSTEM STARTUP: NO-INSTANCE WARMUP (v91) ---")
 
 
 
 
 
131
  start = time.time()
132
  try:
133
- # 1. Download Whisper (CTranslate2 format)
134
- print("πŸ“₯ Pre-downloading Whisper large-v3 weights...")
135
- snapshot_download(repo_id="Systran/faster-whisper-large-v3")
 
136
 
137
- # 2. Download XTTS-v2
138
- print("πŸ“₯ Pre-downloading XTTS-v2 weights...")
139
- snapshot_download(repo_id="coqui/XTTS-v2")
140
 
141
- # 3. Download DeepFilterNet
142
- print("πŸ“₯ Pre-downloading DeepFilterNet...")
143
- # DeepFilterNet downloads usually happen via init_df, but we can try to force it
144
- # snapshot_download(repo_id="RVoice/DeepFilterNet3")
145
-
146
- # 4. Chatterbox Warmup
147
  chatterbox_utils.warmup_chatterbox()
148
 
149
- print(f"βœ… --- STARTUP COMPLETE: DATA ON DISK ({time.time()-start:.2f}s) --- \n")
150
- print("⚠️ NOTE: No engine instances created on CPU to prevent CUDA conflicts.")
151
  except Exception as e:
152
- print(f"⚠️ Startup warning: {e}")
 
 
 
 
 
 
153
 
154
  def _stt_logic(request_dict):
155
  audio_bytes = base64.b64decode(request_dict.get("file"))
@@ -163,8 +164,7 @@ def _stt_logic(request_dict):
163
  if os.path.exists(temp_path): os.unlink(temp_path)
164
 
165
  def _translate_logic(text, target_lang):
166
- from deep_translator import GoogleTranslator
167
- return GoogleTranslator(source='auto', target=target_lang).translate(text)
168
 
169
  def _tts_logic(text, lang, speaker_wav_b64):
170
  if not text or not text.strip(): return {"error": "Input empty"}
@@ -203,7 +203,7 @@ def _tts_logic(text, lang, speaker_wav_b64):
203
  def core_process(request_dict):
204
  action = request_dict.get("action")
205
  t0 = time.time()
206
- print(f"--- [v91] πŸš€ GPU SESSION START: {action} ---")
207
  activate_gpu_models(action)
208
  try:
209
  if action == "stt": res = _stt_logic(request_dict)
@@ -217,7 +217,7 @@ def core_process(request_dict):
217
  elif action == "health": res = {"status": "awake"}
218
  else: res = {"error": f"Unknown action: {action}"}
219
  finally:
220
- print(f"--- [v91] ✨ END: {action} ({time.time()-t0:.2f}s) ---")
221
  gc.collect()
222
  if torch.cuda.is_available(): torch.cuda.empty_cache()
223
  return res
@@ -229,7 +229,14 @@ async def api_process(request: Request):
229
  except Exception as e: traceback.print_exc(); return {"error": str(e)}
230
 
231
  @app.get("/health")
232
- def health(): return {"status": "ok", "gpu": torch.cuda.is_available(), "time": time.ctime()}
 
 
 
 
 
 
 
233
 
234
  @app.post("/api/v1/clear_cache")
235
  async def clear_cache():
@@ -252,5 +259,6 @@ demo = gr.Interface(fn=gradio_fn, inputs="text", outputs="text", title="πŸš€ AI
252
  app = gr.mount_gradio_app(app, demo, path="/")
253
 
254
  if __name__ == "__main__":
255
- warmup_models()
 
256
  uvicorn.run(app, host="0.0.0.0", port=7860)
 
13
  import gc
14
  import sys
15
  import types
16
+ from threading import Thread, Lock
17
  from huggingface_hub import snapshot_download
18
 
19
+ # πŸ› οΈ 1. CRITICAL COMPATIBILITY MONKEYPATCHES
20
+ # These MUST happen before any AI imports
21
  if "torchaudio.backend" not in sys.modules:
22
  backend = types.ModuleType("torchaudio.backend")
23
  common = types.ModuleType("torchaudio.backend.common")
 
53
  return t, samplerate
54
  raise e
55
  torchaudio.load = patched_load
56
+ except Exception: pass
 
57
 
58
+ # πŸ“¦ 2. AI LIBRARIES (No engines yet)
 
59
  import chatterbox_utils
 
60
  from faster_whisper import WhisperModel
61
  from TTS.api import TTS
62
  from df.enhance import init_df
63
+ import deep_translator
64
 
65
  try:
66
  import spaces
 
67
  except ImportError:
68
  class spaces:
69
  @staticmethod
 
71
  if f is None: return lambda x: x
72
  return f
73
 
74
+ # FORCE BUILD TRIGGER: 11:35:00 Jan 21 2026
75
+ # v92: Background Warmup (Fixes infinite reload loop and redundant downloads)
76
 
77
  os.environ["COQUI_TOS_AGREED"] = "1"
 
78
  MODELS = {"stt": None, "translate": None, "tts": None, "denoiser": None}
79
 
80
+ # --- THREAD SAFETY & STATUS ---
81
+ WARMUP_STATUS = {"complete": False, "in_progress": False, "error": None}
82
+ WARMUP_LOCK = Lock()
83
+
84
  def activate_gpu_models(action):
85
+ """v92: Safety wait for background download"""
86
+ global MODELS, WARMUP_STATUS
87
 
88
+ # If warmup is still running, wait for it (simple polling to avoid complex locks)
89
+ wait_start = time.time()
90
+ while WARMUP_STATUS["in_progress"] and not WARMUP_STATUS["complete"]:
91
+ if time.time() - wait_start > 120: # 2 min max wait
92
+ print("⚠️ Warmup taking too long, proceeding anyway...")
93
+ break
94
+ print(f"⏳ Waiting for background model download to finish for {action}...")
95
+ time.sleep(5)
96
+
97
+ # 1. Faster-Whisper GPU Activation
98
  if action in ["stt", "s2st"]:
99
+ if MODELS["stt"] is None or MODELS["stt"].model.device != "cuda":
100
+ print(f"πŸŽ™οΈ [v92] Activating Whisper on GPU for {action}...")
 
 
 
 
 
 
 
 
 
 
 
 
 
101
  MODELS["stt"] = WhisperModel("large-v3", device="cuda", compute_type="float16")
102
 
103
+ # 2. XTTS-v2 GPU Activation
104
  if action in ["tts", "s2st"]:
105
  if MODELS["tts"] is None:
106
+ print("πŸ”Š Initializing XTTS directly to GPU...")
107
  MODELS["tts"] = TTS(model_name="tts_models/multilingual/multi-dataset/xtts_v2", gpu=True)
108
+ try:
109
+ current_dev = str(next(MODELS["tts"].synthesizer.tts_model.parameters()).device)
110
+ if "cuda" not in current_dev:
111
+ print("πŸš€ Moving XTTS-v2 to GPU...")
112
+ MODELS["tts"].to("cuda")
113
+ except: MODELS["tts"].to("cuda")
 
 
114
 
115
  # 3. Helpers
116
  if MODELS["denoiser"] is None:
 
118
  except: pass
119
  if MODELS["translate"] is None: MODELS["translate"] = "active"
120
  chatterbox_utils.load_chatterbox(device="cuda" if torch.cuda.is_available() else "cpu")
121
+ gc.collect()
122
+ if torch.cuda.is_available(): torch.cuda.empty_cache()
123
 
124
+ def warmup_task():
125
+ """Background thread to handle heavy downloads (v92)"""
126
+ global WARMUP_STATUS
127
+ with WARMUP_LOCK:
128
+ if WARMUP_STATUS["complete"] or WARMUP_STATUS["in_progress"]: return
129
+ WARMUP_STATUS["in_progress"] = True
130
+
131
+ print("\nπŸ”₯ --- BACKGROUND WARMUP STARTED (v92) ---")
132
  start = time.time()
133
  try:
134
+ # Check if local files exist to skip slow verification if possible
135
+ # snapshot_download is quite smart, but we'll log it clearly
136
+ print("πŸ“₯ Caching Whisper large-v3 weights...")
137
+ snapshot_download(repo_id="Systran/faster-whisper-large-v3", local_files_only=False)
138
 
139
+ print("πŸ“₯ Caching XTTS-v2 weights...")
140
+ snapshot_download(repo_id="coqui/XTTS-v2", local_files_only=False)
 
141
 
 
 
 
 
 
 
142
  chatterbox_utils.warmup_chatterbox()
143
 
144
+ WARMUP_STATUS["complete"] = True
145
+ print(f"βœ… --- BACKGROUND WARMUP COMPLETE ({time.time()-start:.2f}s) --- \n")
146
  except Exception as e:
147
+ print(f"❌ Warmup error: {e}")
148
+ WARMUP_STATUS["error"] = str(e)
149
+ finally:
150
+ WARMUP_STATUS["in_progress"] = False
151
+
152
+ def start_background_warmup():
153
+ Thread(target=warmup_task, daemon=True).start()
154
 
155
  def _stt_logic(request_dict):
156
  audio_bytes = base64.b64decode(request_dict.get("file"))
 
164
  if os.path.exists(temp_path): os.unlink(temp_path)
165
 
166
  def _translate_logic(text, target_lang):
167
+ return deep_translator.GoogleTranslator(source='auto', target=target_lang).translate(text)
 
168
 
169
  def _tts_logic(text, lang, speaker_wav_b64):
170
  if not text or not text.strip(): return {"error": "Input empty"}
 
203
  def core_process(request_dict):
204
  action = request_dict.get("action")
205
  t0 = time.time()
206
+ print(f"--- [v92] πŸš€ GPU SESSION START: {action} ---")
207
  activate_gpu_models(action)
208
  try:
209
  if action == "stt": res = _stt_logic(request_dict)
 
217
  elif action == "health": res = {"status": "awake"}
218
  else: res = {"error": f"Unknown action: {action}"}
219
  finally:
220
+ print(f"--- [v92] ✨ END: {action} ({time.time()-t0:.2f}s) ---")
221
  gc.collect()
222
  if torch.cuda.is_available(): torch.cuda.empty_cache()
223
  return res
 
229
  except Exception as e: traceback.print_exc(); return {"error": str(e)}
230
 
231
  @app.get("/health")
232
+ def health():
233
+ return {
234
+ "status": "ok",
235
+ "gpu": torch.cuda.is_available(),
236
+ "warmup_complete": WARMUP_STATUS["complete"],
237
+ "warmup_in_progress": WARMUP_STATUS["in_progress"],
238
+ "time": time.ctime()
239
+ }
240
 
241
  @app.post("/api/v1/clear_cache")
242
  async def clear_cache():
 
259
  app = gr.mount_gradio_app(app, demo, path="/")
260
 
261
  if __name__ == "__main__":
262
+ start_background_warmup()
263
+ print("πŸš€ Starting FastAPI Server...")
264
  uvicorn.run(app, host="0.0.0.0", port=7860)