TGPro1 commited on
Commit
66d68db
Β·
verified Β·
1 Parent(s): fc295c3

Upload app.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. app.py +37 -24
app.py CHANGED
@@ -1,5 +1,6 @@
1
  from fastapi import FastAPI, Request, Response
2
  from fastapi.responses import StreamingResponse
 
3
  import gradio as gr
4
  import uvicorn
5
  import base64
@@ -17,10 +18,12 @@ import logging
17
  from threading import Thread, Lock
18
  from huggingface_hub import snapshot_download
19
 
20
- # πŸ›‘οΈ 1. SILENCE LOGS
21
  logging.getLogger("transformers").setLevel(logging.ERROR)
22
  logging.getLogger("TTS").setLevel(logging.ERROR)
 
23
  os.environ["CT2_VERBOSE"] = "0"
 
24
 
25
  # πŸ› οΈ 2. COMPATIBILITY PATCHES
26
  if "torchaudio.backend" not in sys.modules:
@@ -76,8 +79,8 @@ except ImportError:
76
  if f is None: return lambda x: x
77
  return f
78
 
79
- # FORCE BUILD TRIGGER: 12:40:00 Jan 21 2026
80
- # v98: Peak Performance Re-enabled (4-Workers).
81
 
82
  os.environ["COQUI_TOS_AGREED"] = "1"
83
  MODELS = {"stt": None, "translate": None, "tts": None, "denoiser": None}
@@ -86,17 +89,17 @@ WARMUP_STATUS = {"complete": False, "in_progress": False}
86
  WARMUP_LOCK = Lock()
87
 
88
  def activate_gpu_models(action):
89
- """v98: High-Parallelism Peak Mode"""
90
  global MODELS, WARMUP_STATUS
91
  local_only = WARMUP_STATUS["complete"]
92
 
93
- # 1. Faster-Whisper: Peak Performance with 4 workers
94
  if action in ["stt", "s2st"]:
95
  stt_on_gpu = False
96
  try: stt_on_gpu = MODELS["stt"] is not None and MODELS["stt"].model.device == "cuda"
97
  except: pass
98
  if not stt_on_gpu:
99
- print(f"πŸŽ™οΈ [v98] Peak Performance Activation: Whisper (GPU) with 4-Workers...")
100
  try:
101
  if MODELS["stt"]: del MODELS["stt"]
102
  gc.collect(); torch.cuda.empty_cache()
@@ -104,11 +107,11 @@ def activate_gpu_models(action):
104
  "large-v3",
105
  device="cuda",
106
  compute_type="int8_float16",
107
- num_workers=4, # RESTORED (User request)
108
  local_files_only=local_only
109
  )
110
  except Exception as e:
111
- print(f"⚠️ Whisper GPU Peak failed: {e}. Falling back to standard mode.")
112
  MODELS["stt"] = WhisperModel("large-v3", device="cuda", compute_type="float16", local_files_only=local_only)
113
 
114
  # 2. XTTS-v2
@@ -119,12 +122,12 @@ def activate_gpu_models(action):
119
  tts_on_gpu = "cuda" in curr
120
  except: pass
121
  if MODELS["tts"] is None or not tts_on_gpu:
122
- print(f"πŸ”Š [v98] Activating XTTS-v2 (GPU)...")
123
  if MODELS["tts"] is None:
124
  MODELS["tts"] = TTS(model_name="tts_models/multilingual/multi-dataset/xtts_v2", gpu=True)
125
  else: MODELS["tts"].to("cuda")
126
 
127
- # 3. Chatterbox Accelerated
128
  chatterbox_utils.load_chatterbox(device="cuda" if torch.cuda.is_available() else "cpu")
129
 
130
  # 4. Helpers
@@ -134,9 +137,9 @@ def activate_gpu_models(action):
134
  if MODELS["translate"] is None: MODELS["translate"] = "active"
135
 
136
  def release_gpu_models():
137
- """v98: Clean Idle (Resident RAM)"""
138
  global MODELS
139
- print("🧹 [v98] Session complete. Releasing GPU...")
140
  try:
141
  if MODELS["stt"] and MODELS["stt"].model.device == "cuda":
142
  del MODELS["stt"]
@@ -150,19 +153,20 @@ def release_gpu_models():
150
  if torch.cuda.is_available(): torch.cuda.empty_cache()
151
 
152
  def warmup_task():
153
- """Silent Power-Warmup"""
154
  global WARMUP_STATUS
155
  with WARMUP_LOCK:
156
  if WARMUP_STATUS["complete"] or WARMUP_STATUS["in_progress"]: return
157
  WARMUP_STATUS["in_progress"] = True
158
- print("\nπŸ”₯ --- SILENT POWER-WARMUP STARTED (v98) ---")
159
  try:
160
  MODELS["stt"] = WhisperModel("large-v3", device="cpu", compute_type="int8")
161
  MODELS["tts"] = TTS(model_name="tts_models/multilingual/multi-dataset/xtts_v2", gpu=False)
162
  chatterbox_utils.warmup_chatterbox()
163
  WARMUP_STATUS["complete"] = True
164
- print(f"βœ… --- PEAK READY -- \n")
165
- except: pass
 
166
  finally: WARMUP_STATUS["in_progress"] = False
167
 
168
  def _stt_logic(request_dict):
@@ -171,7 +175,6 @@ def _stt_logic(request_dict):
171
  with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as f:
172
  f.write(audio_bytes); temp_path = f.name
173
  try:
174
- # Multi-worker Power (4 workers)
175
  segments, _ = MODELS["stt"].transcribe(temp_path, language=lang, beam_size=1)
176
  return {"text": " ".join([s.text for s in segments]).strip()}
177
  finally:
@@ -215,7 +218,7 @@ def _tts_logic(text, lang, speaker_wav_b64):
215
  def core_process(request_dict):
216
  action = request_dict.get("action")
217
  t1 = time.time()
218
- print(f"--- [v98] πŸš€ PEAK GPU SESSION: {action} ---")
219
  activate_gpu_models(action)
220
  try:
221
  if action == "stt": res = _stt_logic(request_dict)
@@ -226,21 +229,31 @@ def core_process(request_dict):
226
  translated = _translate_logic(stt_res.get("text", ""), request_dict.get("target_lang"))
227
  tts_res = _tts_logic(translated, request_dict.get("target_lang"), request_dict.get("speaker_wav"))
228
  res = {"text": stt_res.get("text"), "translated": translated, "audio": tts_res.get("audio")}
229
- elif action == "health": res = {"status": "awake"}
230
  else: res = {"error": f"Unknown action: {action}"}
231
  finally:
232
- print(f"--- [v98] ✨ PEAK-DONE: {action} ({time.time()-t1:.2f}s) ---")
233
  release_gpu_models()
234
  return res
235
 
236
- app = FastAPI()
237
- @app.on_event("startup")
238
- async def startup_event():
239
  Thread(target=warmup_task, daemon=True).start()
 
 
 
 
 
240
 
241
  @app.post("/api/v1/process")
242
  async def api_process(request: Request):
243
- try: return core_process(await request.json())
 
 
 
 
 
 
244
  except Exception as e: return {"error": str(e)}
245
 
246
  @app.get("/health")
 
1
  from fastapi import FastAPI, Request, Response
2
  from fastapi.responses import StreamingResponse
3
+ from contextlib import asynccontextmanager
4
  import gradio as gr
5
  import uvicorn
6
  import base64
 
18
  from threading import Thread, Lock
19
  from huggingface_hub import snapshot_download
20
 
21
+ # πŸ›‘οΈ 1. SILENCE LOGS & WARNINGS
22
  logging.getLogger("transformers").setLevel(logging.ERROR)
23
  logging.getLogger("TTS").setLevel(logging.ERROR)
24
+ logging.getLogger("onnxruntime").setLevel(logging.ERROR)
25
  os.environ["CT2_VERBOSE"] = "0"
26
+ os.environ["ORT_LOGGING_LEVEL"] = "3" # Silence ONNX discovery noise
27
 
28
  # πŸ› οΈ 2. COMPATIBILITY PATCHES
29
  if "torchaudio.backend" not in sys.modules:
 
79
  if f is None: return lambda x: x
80
  return f
81
 
82
+ # FORCE BUILD TRIGGER: 12:45:00 Jan 21 2026
83
+ # v99: Fix Queue Loop. Health on CPU. Re-enable 4 Workers. Lifespan events.
84
 
85
  os.environ["COQUI_TOS_AGREED"] = "1"
86
  MODELS = {"stt": None, "translate": None, "tts": None, "denoiser": None}
 
89
  WARMUP_LOCK = Lock()
90
 
91
  def activate_gpu_models(action):
92
+ """v99: Optimized GPU Session Activation"""
93
  global MODELS, WARMUP_STATUS
94
  local_only = WARMUP_STATUS["complete"]
95
 
96
+ # 1. Faster-Whisper: Peak Performance Requested
97
  if action in ["stt", "s2st"]:
98
  stt_on_gpu = False
99
  try: stt_on_gpu = MODELS["stt"] is not None and MODELS["stt"].model.device == "cuda"
100
  except: pass
101
  if not stt_on_gpu:
102
+ print(f"πŸŽ™οΈ [v99] Activating Whisper (GPU: 4-Workers)...")
103
  try:
104
  if MODELS["stt"]: del MODELS["stt"]
105
  gc.collect(); torch.cuda.empty_cache()
 
107
  "large-v3",
108
  device="cuda",
109
  compute_type="int8_float16",
110
+ num_workers=4,
111
  local_files_only=local_only
112
  )
113
  except Exception as e:
114
+ print(f"⚠️ GPU Init failed: {e}. Falling back to standard.")
115
  MODELS["stt"] = WhisperModel("large-v3", device="cuda", compute_type="float16", local_files_only=local_only)
116
 
117
  # 2. XTTS-v2
 
122
  tts_on_gpu = "cuda" in curr
123
  except: pass
124
  if MODELS["tts"] is None or not tts_on_gpu:
125
+ print(f"πŸ”Š [v99] Activating XTTS-v2 (GPU)...")
126
  if MODELS["tts"] is None:
127
  MODELS["tts"] = TTS(model_name="tts_models/multilingual/multi-dataset/xtts_v2", gpu=True)
128
  else: MODELS["tts"].to("cuda")
129
 
130
+ # 3. Chatterbox GPU-Mode
131
  chatterbox_utils.load_chatterbox(device="cuda" if torch.cuda.is_available() else "cpu")
132
 
133
  # 4. Helpers
 
137
  if MODELS["translate"] is None: MODELS["translate"] = "active"
138
 
139
  def release_gpu_models():
140
+ """v99: Persistence in RAM (CPU)"""
141
  global MODELS
142
+ print("🧹 [v99] Releasing GPU. Engines staying WARM in RAM.")
143
  try:
144
  if MODELS["stt"] and MODELS["stt"].model.device == "cuda":
145
  del MODELS["stt"]
 
153
  if torch.cuda.is_available(): torch.cuda.empty_cache()
154
 
155
  def warmup_task():
156
+ """V99: Resident RAM Pre-loading"""
157
  global WARMUP_STATUS
158
  with WARMUP_LOCK:
159
  if WARMUP_STATUS["complete"] or WARMUP_STATUS["in_progress"]: return
160
  WARMUP_STATUS["in_progress"] = True
161
+ print("\nπŸ”₯ --- V99: PEAK WARMUP STARTED ---")
162
  try:
163
  MODELS["stt"] = WhisperModel("large-v3", device="cpu", compute_type="int8")
164
  MODELS["tts"] = TTS(model_name="tts_models/multilingual/multi-dataset/xtts_v2", gpu=False)
165
  chatterbox_utils.warmup_chatterbox()
166
  WARMUP_STATUS["complete"] = True
167
+ print(f"βœ… --- SYSTEM READY --- \n")
168
+ except Exception as e:
169
+ print(f"❌ Warmup fail: {e}")
170
  finally: WARMUP_STATUS["in_progress"] = False
171
 
172
  def _stt_logic(request_dict):
 
175
  with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as f:
176
  f.write(audio_bytes); temp_path = f.name
177
  try:
 
178
  segments, _ = MODELS["stt"].transcribe(temp_path, language=lang, beam_size=1)
179
  return {"text": " ".join([s.text for s in segments]).strip()}
180
  finally:
 
218
  def core_process(request_dict):
219
  action = request_dict.get("action")
220
  t1 = time.time()
221
+ print(f"--- [v99] πŸš€ GPU SESSION: {action} ---")
222
  activate_gpu_models(action)
223
  try:
224
  if action == "stt": res = _stt_logic(request_dict)
 
229
  translated = _translate_logic(stt_res.get("text", ""), request_dict.get("target_lang"))
230
  tts_res = _tts_logic(translated, request_dict.get("target_lang"), request_dict.get("speaker_wav"))
231
  res = {"text": stt_res.get("text"), "translated": translated, "audio": tts_res.get("audio")}
 
232
  else: res = {"error": f"Unknown action: {action}"}
233
  finally:
234
+ print(f"--- [v99] ✨ SUCCESS: {action} ({time.time()-t1:.2f}s) ---")
235
  release_gpu_models()
236
  return res
237
 
238
+ @asynccontextmanager
239
+ async def lifespan(app: FastAPI):
240
+ # Startup: Background Warmup
241
  Thread(target=warmup_task, daemon=True).start()
242
+ yield
243
+ # Shutdown logic (optional)
244
+ pass
245
+
246
+ app = FastAPI(lifespan=lifespan)
247
 
248
  @app.post("/api/v1/process")
249
  async def api_process(request: Request):
250
+ try:
251
+ req_data = await request.json()
252
+ action = req_data.get("action")
253
+ # πŸ”₯ V99 CRITICAL FIX: Handle 'health' on CPU to prevent GPU queue loops
254
+ if action == "health":
255
+ return {"status": "awake", "warm": WARMUP_STATUS["complete"]}
256
+ return core_process(req_data)
257
  except Exception as e: return {"error": str(e)}
258
 
259
  @app.get("/health")