TGPro1 commited on
Commit
2b7cc23
Β·
verified Β·
1 Parent(s): b2f29d9

Upload app.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. app.py +62 -48
app.py CHANGED
@@ -13,11 +13,18 @@ import torchaudio
13
  import gc
14
  import sys
15
  import types
 
16
  from threading import Thread, Lock
17
  from huggingface_hub import snapshot_download
18
 
19
- # πŸ› οΈ 1. CRITICAL COMPATIBILITY MONKEYPATCHES
20
- # These MUST happen before any AI imports
 
 
 
 
 
 
21
  if "torchaudio.backend" not in sys.modules:
22
  backend = types.ModuleType("torchaudio.backend")
23
  common = types.ModuleType("torchaudio.backend.common")
@@ -55,7 +62,7 @@ try:
55
  torchaudio.load = patched_load
56
  except Exception: pass
57
 
58
- # πŸ“¦ 2. AI LIBRARIES (No engines yet)
59
  import chatterbox_utils
60
  from faster_whisper import WhisperModel
61
  from TTS.api import TTS
@@ -71,80 +78,89 @@ except ImportError:
71
  if f is None: return lambda x: x
72
  return f
73
 
74
- # FORCE BUILD TRIGGER: 11:35:00 Jan 21 2026
75
- # v92: Background Warmup (Fixes infinite reload loop and redundant downloads)
76
 
77
  os.environ["COQUI_TOS_AGREED"] = "1"
78
  MODELS = {"stt": None, "translate": None, "tts": None, "denoiser": None}
79
 
80
- # --- THREAD SAFETY & STATUS ---
81
  WARMUP_STATUS = {"complete": False, "in_progress": False, "error": None}
82
  WARMUP_LOCK = Lock()
83
 
84
  def activate_gpu_models(action):
85
- """v92: Safety wait for background download"""
86
  global MODELS, WARMUP_STATUS
87
 
88
- # If warmup is still running, wait for it (simple polling to avoid complex locks)
89
- wait_start = time.time()
90
- while WARMUP_STATUS["in_progress"] and not WARMUP_STATUS["complete"]:
91
- if time.time() - wait_start > 120: # 2 min max wait
92
- print("⚠️ Warmup taking too long, proceeding anyway...")
93
- break
94
- print(f"⏳ Waiting for background model download to finish for {action}...")
95
- time.sleep(5)
96
-
97
  # 1. Faster-Whisper GPU Activation
98
  if action in ["stt", "s2st"]:
99
- if MODELS["stt"] is None or MODELS["stt"].model.device != "cuda":
100
- print(f"πŸŽ™οΈ [v92] Activating Whisper on GPU for {action}...")
101
- MODELS["stt"] = WhisperModel("large-v3", device="cuda", compute_type="float16")
 
 
 
 
 
 
 
 
 
 
 
 
 
102
 
103
  # 2. XTTS-v2 GPU Activation
104
  if action in ["tts", "s2st"]:
105
- if MODELS["tts"] is None:
106
- print("πŸ”Š Initializing XTTS directly to GPU...")
107
- MODELS["tts"] = TTS(model_name="tts_models/multilingual/multi-dataset/xtts_v2", gpu=True)
108
  try:
109
  current_dev = str(next(MODELS["tts"].synthesizer.tts_model.parameters()).device)
110
- if "cuda" not in current_dev:
111
- print("πŸš€ Moving XTTS-v2 to GPU...")
 
 
 
 
 
 
112
  MODELS["tts"].to("cuda")
113
- except: MODELS["tts"].to("cuda")
114
 
115
- # 3. Helpers
116
  if MODELS["denoiser"] is None:
117
  try: MODELS["denoiser"] = init_df()
118
  except: pass
119
  if MODELS["translate"] is None: MODELS["translate"] = "active"
120
  chatterbox_utils.load_chatterbox(device="cuda" if torch.cuda.is_available() else "cpu")
121
- gc.collect()
122
- if torch.cuda.is_available(): torch.cuda.empty_cache()
123
 
124
  def warmup_task():
125
- """Background thread to handle heavy downloads (v92)"""
126
  global WARMUP_STATUS
127
  with WARMUP_LOCK:
128
  if WARMUP_STATUS["complete"] or WARMUP_STATUS["in_progress"]: return
129
  WARMUP_STATUS["in_progress"] = True
130
 
131
- print("\nπŸ”₯ --- BACKGROUND WARMUP STARTED (v92) ---")
 
132
  start = time.time()
133
  try:
134
- # Check if local files exist to skip slow verification if possible
135
- # snapshot_download is quite smart, but we'll log it clearly
136
- print("πŸ“₯ Caching Whisper large-v3 weights...")
137
- snapshot_download(repo_id="Systran/faster-whisper-large-v3", local_files_only=False)
138
 
139
- print("πŸ“₯ Caching XTTS-v2 weights...")
140
- snapshot_download(repo_id="coqui/XTTS-v2", local_files_only=False)
 
141
 
 
142
  chatterbox_utils.warmup_chatterbox()
143
 
144
  WARMUP_STATUS["complete"] = True
145
- print(f"βœ… --- BACKGROUND WARMUP COMPLETE ({time.time()-start:.2f}s) --- \n")
146
  except Exception as e:
147
- print(f"❌ Warmup error: {e}")
148
  WARMUP_STATUS["error"] = str(e)
149
  finally:
150
  WARMUP_STATUS["in_progress"] = False
@@ -203,7 +219,8 @@ def _tts_logic(text, lang, speaker_wav_b64):
203
  def core_process(request_dict):
204
  action = request_dict.get("action")
205
  t0 = time.time()
206
- print(f"--- [v92] πŸš€ GPU SESSION START: {action} ---")
 
207
  activate_gpu_models(action)
208
  try:
209
  if action == "stt": res = _stt_logic(request_dict)
@@ -217,7 +234,7 @@ def core_process(request_dict):
217
  elif action == "health": res = {"status": "awake"}
218
  else: res = {"error": f"Unknown action: {action}"}
219
  finally:
220
- print(f"--- [v92] ✨ END: {action} ({time.time()-t0:.2f}s) ---")
221
  gc.collect()
222
  if torch.cuda.is_available(): torch.cuda.empty_cache()
223
  return res
@@ -226,23 +243,21 @@ app = FastAPI()
226
  @app.post("/api/v1/process")
227
  async def api_process(request: Request):
228
  try: return core_process(await request.json())
229
- except Exception as e: traceback.print_exc(); return {"error": str(e)}
230
 
231
  @app.get("/health")
232
  def health():
233
  return {
234
  "status": "ok",
235
- "gpu": torch.cuda.is_available(),
236
- "warmup_complete": WARMUP_STATUS["complete"],
237
- "warmup_in_progress": WARMUP_STATUS["in_progress"],
238
  "time": time.ctime()
239
  }
240
 
241
  @app.post("/api/v1/clear_cache")
242
  async def clear_cache():
243
  try:
244
- gc.collect()
245
- if torch.cuda.is_available(): torch.cuda.empty_cache()
246
  temp_dir = tempfile.gettempdir()
247
  for f in os.listdir(temp_dir):
248
  if f.endswith(".wav") or f.startswith("tm"):
@@ -260,5 +275,4 @@ app = gr.mount_gradio_app(app, demo, path="/")
260
 
261
  if __name__ == "__main__":
262
  start_background_warmup()
263
- print("πŸš€ Starting FastAPI Server...")
264
- uvicorn.run(app, host="0.0.0.0", port=7860)
 
13
  import gc
14
  import sys
15
  import types
16
+ import logging
17
  from threading import Thread, Lock
18
  from huggingface_hub import snapshot_download
19
 
20
+ # πŸ›‘οΈ 1. SILENCE VERBOSE LOGGING
21
+ # Minimize "steps" in logs as requested by user
22
+ logging.getLogger("transformers").setLevel(logging.ERROR)
23
+ logging.getLogger("TTS").setLevel(logging.ERROR)
24
+ os.environ["CT2_VERBOSE"] = "0"
25
+ os.environ["KMP_WARNINGS"] = "0"
26
+
27
+ # πŸ› οΈ 2. CRITICAL COMPATIBILITY MONKEYPATCHES
28
  if "torchaudio.backend" not in sys.modules:
29
  backend = types.ModuleType("torchaudio.backend")
30
  common = types.ModuleType("torchaudio.backend.common")
 
62
  torchaudio.load = patched_load
63
  except Exception: pass
64
 
65
+ # πŸ“¦ 3. AI LIBRARIES
66
  import chatterbox_utils
67
  from faster_whisper import WhisperModel
68
  from TTS.api import TTS
 
78
  if f is None: return lambda x: x
79
  return f
80
 
81
+ # FORCE BUILD TRIGGER: 11:45:00 Jan 21 2026
82
+ # v93: Silent Local-Only Mode. Forces instant retrieval after warmup.
83
 
84
  os.environ["COQUI_TOS_AGREED"] = "1"
85
  MODELS = {"stt": None, "translate": None, "tts": None, "denoiser": None}
86
 
 
87
  WARMUP_STATUS = {"complete": False, "in_progress": False, "error": None}
88
  WARMUP_LOCK = Lock()
89
 
90
  def activate_gpu_models(action):
91
+ """v93: Silent Instant Activation"""
92
  global MODELS, WARMUP_STATUS
93
 
94
+ # Force local-only if warmup is done
95
+ local_only = WARMUP_STATUS["complete"]
96
+
 
 
 
 
 
 
97
  # 1. Faster-Whisper GPU Activation
98
  if action in ["stt", "s2st"]:
99
+ stt_ready = False
100
+ try: stt_ready = MODELS["stt"] is not None and MODELS["stt"].model.device == "cuda"
101
+ except: pass
102
+
103
+ if not stt_ready:
104
+ print(f"πŸŽ™οΈ [v93] Activating Whisper (Local Mode={local_only})...")
105
+ # Explicitly clear any CPU crumbs to prevent CUDA conflicts
106
+ if MODELS["stt"]: del MODELS["stt"]
107
+ gc.collect(); torch.cuda.empty_cache()
108
+
109
+ MODELS["stt"] = WhisperModel(
110
+ "large-v3",
111
+ device="cuda",
112
+ compute_type="float16",
113
+ local_files_only=local_only
114
+ )
115
 
116
  # 2. XTTS-v2 GPU Activation
117
  if action in ["tts", "s2st"]:
118
+ tts_on_gpu = False
 
 
119
  try:
120
  current_dev = str(next(MODELS["tts"].synthesizer.tts_model.parameters()).device)
121
+ tts_on_gpu = "cuda" in current_dev
122
+ except: pass
123
+
124
+ if MODELS["tts"] is None or not tts_on_gpu:
125
+ print(f"πŸ”Š [v93] Activating XTTS-v2 (Local Mode={local_only})...")
126
+ if MODELS["tts"] is None:
127
+ MODELS["tts"] = TTS(model_name="tts_models/multilingual/multi-dataset/xtts_v2", gpu=True)
128
+ else:
129
  MODELS["tts"].to("cuda")
 
130
 
131
+ # 3. Helpers (Always Local)
132
  if MODELS["denoiser"] is None:
133
  try: MODELS["denoiser"] = init_df()
134
  except: pass
135
  if MODELS["translate"] is None: MODELS["translate"] = "active"
136
  chatterbox_utils.load_chatterbox(device="cuda" if torch.cuda.is_available() else "cpu")
 
 
137
 
138
  def warmup_task():
139
+ """Silent Background Warmup (v93)"""
140
  global WARMUP_STATUS
141
  with WARMUP_LOCK:
142
  if WARMUP_STATUS["complete"] or WARMUP_STATUS["in_progress"]: return
143
  WARMUP_STATUS["in_progress"] = True
144
 
145
+ # We load them to CPU RAM first to ensure weights are in OS page cache
146
+ print("\nπŸ”₯ --- SILENT WARMUP STARTED (v93) ---")
147
  start = time.time()
148
  try:
149
+ # 1. Faster-Whisper
150
+ print("πŸ“₯ Pre-loading Whisper to System RAM...")
151
+ MODELS["stt"] = WhisperModel("large-v3", device="cpu", compute_type="int8")
 
152
 
153
+ # 2. XTTS-v2
154
+ print("πŸ“₯ Pre-loading XTTS-v2 to System RAM...")
155
+ MODELS["tts"] = TTS(model_name="tts_models/multilingual/multi-dataset/xtts_v2", gpu=False)
156
 
157
+ # 3. Chatterbox
158
  chatterbox_utils.warmup_chatterbox()
159
 
160
  WARMUP_STATUS["complete"] = True
161
+ print(f"βœ… --- SYSTEM OPTIMIZED: INSTANT RETRIEVAL READY ({time.time()-start:.2f}s) --- \n")
162
  except Exception as e:
163
+ print(f"❌ Warmup fail: {e}")
164
  WARMUP_STATUS["error"] = str(e)
165
  finally:
166
  WARMUP_STATUS["in_progress"] = False
 
219
  def core_process(request_dict):
220
  action = request_dict.get("action")
221
  t0 = time.time()
222
+ # v93: Optimized logs (less "steps")
223
+ print(f"--- [v93] πŸš€ GPU SESSION START: {action} ---")
224
  activate_gpu_models(action)
225
  try:
226
  if action == "stt": res = _stt_logic(request_dict)
 
234
  elif action == "health": res = {"status": "awake"}
235
  else: res = {"error": f"Unknown action: {action}"}
236
  finally:
237
+ print(f"--- [v93] ✨ END: {action} ({time.time()-t0:.2f}s) ---")
238
  gc.collect()
239
  if torch.cuda.is_available(): torch.cuda.empty_cache()
240
  return res
 
243
  @app.post("/api/v1/process")
244
  async def api_process(request: Request):
245
  try: return core_process(await request.json())
246
+ except Exception as e: return {"error": str(e)}
247
 
248
  @app.get("/health")
249
  def health():
250
  return {
251
  "status": "ok",
252
+ "optimized": WARMUP_STATUS["complete"],
253
+ "gpu_available": torch.cuda.is_available(),
 
254
  "time": time.ctime()
255
  }
256
 
257
  @app.post("/api/v1/clear_cache")
258
  async def clear_cache():
259
  try:
260
+ gc.collect(); torch.cuda.empty_cache()
 
261
  temp_dir = tempfile.gettempdir()
262
  for f in os.listdir(temp_dir):
263
  if f.endswith(".wav") or f.startswith("tm"):
 
275
 
276
  if __name__ == "__main__":
277
  start_background_warmup()
278
+ uvicorn.run(app, host="0.0.0.0", port=7860, log_level="error")