TGPro1 commited on
Commit
7905c18
Β·
verified Β·
1 Parent(s): 6ac1ef6

Upload app.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. app.py +45 -31
app.py CHANGED
@@ -17,11 +17,10 @@ import logging
17
  from threading import Thread, Lock
18
  from huggingface_hub import snapshot_download
19
 
20
- # πŸ›‘οΈ 1. SILENCE VERBOSE LOGGING
21
  logging.getLogger("transformers").setLevel(logging.ERROR)
22
  logging.getLogger("TTS").setLevel(logging.ERROR)
23
  os.environ["CT2_VERBOSE"] = "0"
24
- os.environ["KMP_WARNINGS"] = "0"
25
 
26
  # πŸ› οΈ 2. COMPATIBILITY PATCHES
27
  if "torchaudio.backend" not in sys.modules:
@@ -77,68 +76,88 @@ except ImportError:
77
  if f is None: return lambda x: x
78
  return f
79
 
80
- # FORCE BUILD TRIGGER: 11:55:00 Jan 21 2026
81
- # v94: Startup Event Warmup + Fix Port Conflict. Final Stabilization.
82
 
83
  os.environ["COQUI_TOS_AGREED"] = "1"
84
  MODELS = {"stt": None, "translate": None, "tts": None, "denoiser": None}
85
 
86
- WARMUP_STATUS = {"complete": False, "in_progress": False, "error": None}
87
  WARMUP_LOCK = Lock()
88
 
89
  def activate_gpu_models(action):
90
- """v94: Direct GPU Activation"""
91
  global MODELS, WARMUP_STATUS
92
  local_only = WARMUP_STATUS["complete"]
93
 
94
  # 1. Faster-Whisper
95
  if action in ["stt", "s2st"]:
96
- stt_ready = False
97
- try: stt_ready = MODELS["stt"] is not None and MODELS["stt"].model.device == "cuda"
98
  except: pass
99
- if not stt_ready:
100
- print(f"πŸŽ™οΈ [v94] Activating Whisper (Local={local_only})...")
101
- if MODELS["stt"]: del MODELS["stt"]; gc.collect(); torch.cuda.empty_cache()
 
102
  MODELS["stt"] = WhisperModel("large-v3", device="cuda", compute_type="float16", local_files_only=local_only)
103
 
104
  # 2. XTTS-v2
105
  if action in ["tts", "s2st"]:
106
- tts_ready = False
107
  try:
108
  curr = str(next(MODELS["tts"].synthesizer.tts_model.parameters()).device)
109
- tts_ready = "cuda" in curr
110
  except: pass
111
- if MODELS["tts"] is None or not tts_ready:
112
- print(f"πŸ”Š [v94] Activating XTTS-v2 (Local={local_only})...")
113
  if MODELS["tts"] is None:
114
  MODELS["tts"] = TTS(model_name="tts_models/multilingual/multi-dataset/xtts_v2", gpu=True)
115
  else: MODELS["tts"].to("cuda")
116
 
117
- # 3. Helpers
118
  if MODELS["denoiser"] is None:
119
  try: MODELS["denoiser"] = init_df()
120
  except: pass
121
  if MODELS["translate"] is None: MODELS["translate"] = "active"
122
- chatterbox_utils.load_chatterbox(device="cuda" if torch.cuda.is_available() else "cpu")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
123
 
124
  def warmup_task():
125
- """Silent Background Warmup (Threaded)"""
126
  global WARMUP_STATUS
127
  with WARMUP_LOCK:
128
  if WARMUP_STATUS["complete"] or WARMUP_STATUS["in_progress"]: return
129
  WARMUP_STATUS["in_progress"] = True
130
 
131
- print("\nπŸ”₯ --- SILENT WARMUP STARTED (v94) ---")
132
  start = time.time()
133
  try:
134
  MODELS["stt"] = WhisperModel("large-v3", device="cpu", compute_type="int8")
135
  MODELS["tts"] = TTS(model_name="tts_models/multilingual/multi-dataset/xtts_v2", gpu=False)
136
  chatterbox_utils.warmup_chatterbox()
137
  WARMUP_STATUS["complete"] = True
138
- print(f"βœ… --- SYSTEM READY ({time.time()-start:.2f}s) --- \n")
139
  except Exception as e:
140
  print(f"❌ Warmup fail: {e}")
141
- WARMUP_STATUS["error"] = str(e)
142
  finally:
143
  WARMUP_STATUS["in_progress"] = False
144
 
@@ -191,7 +210,7 @@ def _tts_logic(text, lang, speaker_wav_b64):
191
  def core_process(request_dict):
192
  action = request_dict.get("action")
193
  t1 = time.time()
194
- print(f"--- [v94] πŸš€ GPU SESSION START: {action} ---")
195
  activate_gpu_models(action)
196
  try:
197
  if action == "stt": res = _stt_logic(request_dict)
@@ -205,17 +224,14 @@ def core_process(request_dict):
205
  elif action == "health": res = {"status": "awake"}
206
  else: res = {"error": f"Unknown action: {action}"}
207
  finally:
208
- print(f"--- [v94] ✨ END: {action} ({time.time()-t1:.2f}s) ---")
209
- gc.collect()
210
- if torch.cuda.is_available(): torch.cuda.empty_cache()
211
  return res
212
 
213
  app = FastAPI()
214
 
215
  @app.on_event("startup")
216
  async def startup_event():
217
- """Ensure warmup starts regardless of entry point (v94)"""
218
- print("πŸš€ App Startup Event: Launching Background Warmup")
219
  Thread(target=warmup_task, daemon=True).start()
220
 
221
  @app.post("/api/v1/process")
@@ -225,12 +241,12 @@ async def api_process(request: Request):
225
 
226
  @app.get("/health")
227
  def health():
228
- return {"status": "ok", "optimized": WARMUP_STATUS["complete"], "time": time.ctime()}
229
 
230
  @app.post("/api/v1/clear_cache")
231
  async def clear_cache():
232
  try:
233
- gc.collect(); torch.cuda.empty_cache()
234
  temp_dir = tempfile.gettempdir()
235
  for f in os.listdir(temp_dir):
236
  if f.endswith(".wav") or f.startswith("tm"):
@@ -246,7 +262,5 @@ def gradio_fn(req_json):
246
  demo = gr.Interface(fn=gradio_fn, inputs="text", outputs="text", title="πŸš€ AI Engine")
247
  app = gr.mount_gradio_app(app, demo, path="/")
248
 
249
- # Note: if __name__ == "__main__" is skipped if launched via 'uvicorn app:app'
250
  if __name__ == "__main__":
251
- print("πŸ› οΈ Manual Start detected")
252
  uvicorn.run(app, host="0.0.0.0", port=7860, log_level="error")
 
17
  from threading import Thread, Lock
18
  from huggingface_hub import snapshot_download
19
 
20
+ # πŸ›‘οΈ 1. SILENCE LOGGING
21
  logging.getLogger("transformers").setLevel(logging.ERROR)
22
  logging.getLogger("TTS").setLevel(logging.ERROR)
23
  os.environ["CT2_VERBOSE"] = "0"
 
24
 
25
  # πŸ› οΈ 2. COMPATIBILITY PATCHES
26
  if "torchaudio.backend" not in sys.modules:
 
76
  if f is None: return lambda x: x
77
  return f
78
 
79
+ # FORCE BUILD TRIGGER: 12:00:00 Jan 21 2026
80
+ # v95: Serverless GPU Efficiency. Auto-release GPU, models stay WARM in RAM.
81
 
82
  os.environ["COQUI_TOS_AGREED"] = "1"
83
  MODELS = {"stt": None, "translate": None, "tts": None, "denoiser": None}
84
 
85
+ WARMUP_STATUS = {"complete": False, "in_progress": False}
86
  WARMUP_LOCK = Lock()
87
 
88
  def activate_gpu_models(action):
89
+ """v95: Optimized GPU Activation"""
90
  global MODELS, WARMUP_STATUS
91
  local_only = WARMUP_STATUS["complete"]
92
 
93
  # 1. Faster-Whisper
94
  if action in ["stt", "s2st"]:
95
+ stt_on_gpu = False
96
+ try: stt_on_gpu = MODELS["stt"] is not None and MODELS["stt"].model.device == "cuda"
97
  except: pass
98
+ if not stt_on_gpu:
99
+ print(f"πŸŽ™οΈ [v95] Activating Whisper (GPU)...")
100
+ if MODELS["stt"]: del MODELS["stt"]
101
+ gc.collect(); torch.cuda.empty_cache()
102
  MODELS["stt"] = WhisperModel("large-v3", device="cuda", compute_type="float16", local_files_only=local_only)
103
 
104
  # 2. XTTS-v2
105
  if action in ["tts", "s2st"]:
106
+ tts_on_gpu = False
107
  try:
108
  curr = str(next(MODELS["tts"].synthesizer.tts_model.parameters()).device)
109
+ tts_on_gpu = "cuda" in curr
110
  except: pass
111
+ if MODELS["tts"] is None or not tts_on_gpu:
112
+ print(f"πŸ”Š [v95] Activating XTTS-v2 (GPU)...")
113
  if MODELS["tts"] is None:
114
  MODELS["tts"] = TTS(model_name="tts_models/multilingual/multi-dataset/xtts_v2", gpu=True)
115
  else: MODELS["tts"].to("cuda")
116
 
117
+ # 3. Helpers (Chatterbox stays on CPU for faster session startup)
118
  if MODELS["denoiser"] is None:
119
  try: MODELS["denoiser"] = init_df()
120
  except: pass
121
  if MODELS["translate"] is None: MODELS["translate"] = "active"
122
+ chatterbox_utils.load_chatterbox(device="cpu")
123
+
124
+ def release_gpu_models():
125
+ """v95: PERSISTENT RAM LOADING - Move models back to CPU to save GPU quota"""
126
+ global MODELS
127
+ print("🧹 [v95] Releasing GPU resources. Returning models to System RAM...")
128
+
129
+ # 1. Whisper: Re-init on CPU (int8) to free GPU handles
130
+ if MODELS["stt"] and MODELS["stt"].model.device == "cuda":
131
+ del MODELS["stt"]
132
+ MODELS["stt"] = WhisperModel("large-v3", device="cpu", compute_type="int8", local_files_only=True)
133
+
134
+ # 2. XTTS: Move weights to CPU
135
+ if MODELS["tts"]:
136
+ try: MODELS["tts"].to("cpu")
137
+ except: pass
138
+
139
+ gc.collect()
140
+ if torch.cuda.is_available():
141
+ torch.cuda.empty_cache()
142
+ print("βœ… GPU quota saved. Session is Warm but Idle.")
143
 
144
  def warmup_task():
145
+ """Silent Background Warmup (Resident RAM)"""
146
  global WARMUP_STATUS
147
  with WARMUP_LOCK:
148
  if WARMUP_STATUS["complete"] or WARMUP_STATUS["in_progress"]: return
149
  WARMUP_STATUS["in_progress"] = True
150
 
151
+ print("\nπŸ”₯ --- SILENT WARMUP: RESIDENT RAM LOADING (v95) ---")
152
  start = time.time()
153
  try:
154
  MODELS["stt"] = WhisperModel("large-v3", device="cpu", compute_type="int8")
155
  MODELS["tts"] = TTS(model_name="tts_models/multilingual/multi-dataset/xtts_v2", gpu=False)
156
  chatterbox_utils.warmup_chatterbox()
157
  WARMUP_STATUS["complete"] = True
158
+ print(f"βœ… --- SYSTEM READY: MODELS RESIDENT IN RAM ({time.time()-start:.2f}s) --- \n")
159
  except Exception as e:
160
  print(f"❌ Warmup fail: {e}")
 
161
  finally:
162
  WARMUP_STATUS["in_progress"] = False
163
 
 
210
  def core_process(request_dict):
211
  action = request_dict.get("action")
212
  t1 = time.time()
213
+ print(f"--- [v95] πŸš€ GPU SESSION: {action} ---")
214
  activate_gpu_models(action)
215
  try:
216
  if action == "stt": res = _stt_logic(request_dict)
 
224
  elif action == "health": res = {"status": "awake"}
225
  else: res = {"error": f"Unknown action: {action}"}
226
  finally:
227
+ print(f"--- [v95] ✨ SUCCESS: {action} ({time.time()-t1:.2f}s) ---")
228
+ release_gpu_models()
 
229
  return res
230
 
231
  app = FastAPI()
232
 
233
  @app.on_event("startup")
234
  async def startup_event():
 
 
235
  Thread(target=warmup_task, daemon=True).start()
236
 
237
  @app.post("/api/v1/process")
 
241
 
242
  @app.get("/health")
243
  def health():
244
+ return {"status": "ok", "warm": WARMUP_STATUS["complete"], "time": time.ctime()}
245
 
246
  @app.post("/api/v1/clear_cache")
247
  async def clear_cache():
248
  try:
249
+ release_gpu_models()
250
  temp_dir = tempfile.gettempdir()
251
  for f in os.listdir(temp_dir):
252
  if f.endswith(".wav") or f.startswith("tm"):
 
262
  demo = gr.Interface(fn=gradio_fn, inputs="text", outputs="text", title="πŸš€ AI Engine")
263
  app = gr.mount_gradio_app(app, demo, path="/")
264
 
 
265
  if __name__ == "__main__":
 
266
  uvicorn.run(app, host="0.0.0.0", port=7860, log_level="error")