TGPro1 commited on
Commit
5e62ae0
ยท
verified ยท
1 Parent(s): 1b24af3

Upload app.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. app.py +50 -62
app.py CHANGED
@@ -17,12 +17,10 @@ import logging
17
  from threading import Thread, Lock
18
  from huggingface_hub import snapshot_download
19
 
20
- # ๐Ÿ›ก๏ธ 1. SILENCE LOGS (User requested zero clutter)
21
  logging.getLogger("transformers").setLevel(logging.ERROR)
22
  logging.getLogger("TTS").setLevel(logging.ERROR)
23
- logging.getLogger("onnxruntime").setLevel(logging.ERROR)
24
  os.environ["CT2_VERBOSE"] = "0"
25
- os.environ["KMP_WARNINGS"] = "0"
26
 
27
  # ๐Ÿ› ๏ธ 2. COMPATIBILITY PATCHES
28
  if "torchaudio.backend" not in sys.modules:
@@ -78,8 +76,8 @@ except ImportError:
78
  if f is None: return lambda x: x
79
  return f
80
 
81
- # FORCE BUILD TRIGGER: 12:25:00 Jan 21 2026
82
- # v96: Power-Worker & Zero-Latency Chatterbox (GPU). 4x Workers for Whisper.
83
 
84
  os.environ["COQUI_TOS_AGREED"] = "1"
85
  MODELS = {"stt": None, "translate": None, "tts": None, "denoiser": None}
@@ -88,28 +86,30 @@ WARMUP_STATUS = {"complete": False, "in_progress": False}
88
  WARMUP_LOCK = Lock()
89
 
90
  def activate_gpu_models(action):
91
- """v96: Power-Mode Activation"""
92
  global MODELS, WARMUP_STATUS
93
  local_only = WARMUP_STATUS["complete"]
94
 
95
- device = "cuda" if torch.cuda.is_available() else "cpu"
96
-
97
- # 1. Faster-Whisper Power-Mode
98
  if action in ["stt", "s2st"]:
99
  stt_on_gpu = False
100
  try: stt_on_gpu = MODELS["stt"] is not None and MODELS["stt"].model.device == "cuda"
101
  except: pass
102
  if not stt_on_gpu:
103
- print(f"๐ŸŽ™๏ธ [v96] Power-Mode Activation: Whisper (GPU) with 4-Workers...")
104
- if MODELS["stt"]: del MODELS["stt"]
105
- gc.collect(); torch.cuda.empty_cache()
106
- # Optimized for H200 MIG 3g
107
- MODELS["stt"] = WhisperModel(
108
- "large-v3",
109
- device="cuda",
110
- compute_type="int8_float16",
111
- local_files_only=local_only
112
- )
 
 
 
 
113
 
114
  # 2. XTTS-v2
115
  if action in ["tts", "s2st"]:
@@ -119,14 +119,18 @@ def activate_gpu_models(action):
119
  tts_on_gpu = "cuda" in curr
120
  except: pass
121
  if MODELS["tts"] is None or not tts_on_gpu:
122
- print(f"๐Ÿ”Š [v96] Activating XTTS-v2 (GPU)...")
123
- if MODELS["tts"] is None:
124
- MODELS["tts"] = TTS(model_name="tts_models/multilingual/multi-dataset/xtts_v2", gpu=True)
125
- else: MODELS["tts"].to("cuda")
 
 
 
 
 
126
 
127
- # 3. Chatterbox GPU-Mode (Zero-Latency)
128
- # v96: Moving to GPU during active sessions to eliminate CPU delays
129
- chatterbox_utils.load_chatterbox(device=device)
130
 
131
  # 4. Helpers
132
  if MODELS["denoiser"] is None:
@@ -135,47 +139,36 @@ def activate_gpu_models(action):
135
  if MODELS["translate"] is None: MODELS["translate"] = "active"
136
 
137
  def release_gpu_models():
138
- """v96: Persistent RAM Handoff (Serverless)"""
139
  global MODELS
140
- print("๐Ÿงน [v96] Releasing GPU resources. Moving models to System RAM (CPU)...")
141
-
142
- # Whisper: Switch to CPU (Fast transition)
143
- if MODELS["stt"] and MODELS["stt"].model.device == "cuda":
144
- del MODELS["stt"]
145
- MODELS["stt"] = WhisperModel("large-v3", device="cpu", compute_type="int8", local_files_only=True)
146
-
147
- # XTTS: Move to CPU
148
- if MODELS["tts"]:
149
- try: MODELS["tts"].to("cpu")
150
- except: pass
151
-
152
- # Chatterbox: Move session to CPU for idle
153
- chatterbox_utils.load_chatterbox(device="cpu")
154
-
155
  gc.collect()
156
  if torch.cuda.is_available(): torch.cuda.empty_cache()
157
- print("โœ… System in Warm-Idle (System RAM).")
158
 
159
  def warmup_task():
160
- """Silent Power-Warmup"""
161
  global WARMUP_STATUS
162
  with WARMUP_LOCK:
163
  if WARMUP_STATUS["complete"] or WARMUP_STATUS["in_progress"]: return
164
  WARMUP_STATUS["in_progress"] = True
165
-
166
- print("\n๐Ÿ”ฅ --- SILENT POWER-WARMUP STARTED (v96) ---")
167
- start = time.time()
168
  try:
169
- # Pre-load to RAM
170
  MODELS["stt"] = WhisperModel("large-v3", device="cpu", compute_type="int8")
171
  MODELS["tts"] = TTS(model_name="tts_models/multilingual/multi-dataset/xtts_v2", gpu=False)
172
  chatterbox_utils.warmup_chatterbox()
173
  WARMUP_STATUS["complete"] = True
174
- print(f"โœ… --- PEAK STABILITY: SYSTEM WARM ({time.time()-start:.2f}s) --- \n")
175
- except Exception as e:
176
- print(f"โŒ Warmup fail: {e}")
177
- finally:
178
- WARMUP_STATUS["in_progress"] = False
179
 
180
  def _stt_logic(request_dict):
181
  audio_bytes = base64.b64decode(request_dict.get("file"))
@@ -183,7 +176,7 @@ def _stt_logic(request_dict):
183
  with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as f:
184
  f.write(audio_bytes); temp_path = f.name
185
  try:
186
- # v96: num_workers=4 for extreme speed
187
  segments, _ = MODELS["stt"].transcribe(temp_path, language=lang, beam_size=1)
188
  return {"text": " ".join([s.text for s in segments]).strip()}
189
  finally:
@@ -197,7 +190,6 @@ def _tts_logic(text, lang, speaker_wav_b64):
197
  XTTS_MAP = {"en": "en", "de": "de", "fr": "fr", "es": "es", "it": "it", "pl": "pl", "pt": "pt", "tr": "tr", "ru": "ru", "nl": "nl", "cs": "cs", "ar": "ar", "hu": "hu", "ko": "ko", "hi": "hi", "zh": "zh-cn"}
198
  clean_lang = lang.strip().lower().split('-')[0]
199
  mapped_lang = XTTS_MAP.get(clean_lang) or ("zh-cn" if clean_lang == "zh" else None)
200
-
201
  if mapped_lang:
202
  speaker_wav_path = None
203
  if speaker_wav_b64:
@@ -213,8 +205,6 @@ def _tts_logic(text, lang, speaker_wav_b64):
213
  finally:
214
  if speaker_wav_path and "default_speaker" not in speaker_wav_path and os.path.exists(speaker_wav_path): os.unlink(speaker_wav_path)
215
  if 'output_path' in locals() and os.path.exists(output_path): os.unlink(output_path)
216
-
217
- # Fallback to Chatterbox (Already on GPU if in core_process)
218
  try:
219
  temp_ref = None
220
  if speaker_wav_b64:
@@ -230,7 +220,7 @@ def _tts_logic(text, lang, speaker_wav_b64):
230
  def core_process(request_dict):
231
  action = request_dict.get("action")
232
  t1 = time.time()
233
- print(f"--- [v96] ๐Ÿš€ POWER-MODE GPU SESSION: {action} ---")
234
  activate_gpu_models(action)
235
  try:
236
  if action == "stt": res = _stt_logic(request_dict)
@@ -244,12 +234,11 @@ def core_process(request_dict):
244
  elif action == "health": res = {"status": "awake"}
245
  else: res = {"error": f"Unknown action: {action}"}
246
  finally:
247
- print(f"--- [v96] โœจ POWER-DONE: {action} ({time.time()-t1:.2f}s) ---")
248
  release_gpu_models()
249
  return res
250
 
251
  app = FastAPI()
252
-
253
  @app.on_event("startup")
254
  async def startup_event():
255
  Thread(target=warmup_task, daemon=True).start()
@@ -260,8 +249,7 @@ async def api_process(request: Request):
260
  except Exception as e: return {"error": str(e)}
261
 
262
  @app.get("/health")
263
- def health():
264
- return {"status": "ok", "power_warm": WARMUP_STATUS["complete"], "time": time.ctime()}
265
 
266
  @app.post("/api/v1/clear_cache")
267
  async def clear_cache():
 
17
  from threading import Thread, Lock
18
  from huggingface_hub import snapshot_download
19
 
20
+ # ๐Ÿ›ก๏ธ 1. SILENCE LOGS
21
  logging.getLogger("transformers").setLevel(logging.ERROR)
22
  logging.getLogger("TTS").setLevel(logging.ERROR)
 
23
  os.environ["CT2_VERBOSE"] = "0"
 
24
 
25
  # ๐Ÿ› ๏ธ 2. COMPATIBILITY PATCHES
26
  if "torchaudio.backend" not in sys.modules:
 
76
  if f is None: return lambda x: x
77
  return f
78
 
79
+ # FORCE BUILD TRIGGER: 12:35:00 Jan 21 2026
80
+ # v97: Bulletproof Handoff (Loop Prevention). Stability Focus (1-Worker).
81
 
82
  os.environ["COQUI_TOS_AGREED"] = "1"
83
  MODELS = {"stt": None, "translate": None, "tts": None, "denoiser": None}
 
86
  WARMUP_LOCK = Lock()
87
 
88
  def activate_gpu_models(action):
89
+ """v97: Stability-First Activation"""
90
  global MODELS, WARMUP_STATUS
91
  local_only = WARMUP_STATUS["complete"]
92
 
93
+ # 1. Faster-Whisper: Stability Focus
 
 
94
  if action in ["stt", "s2st"]:
95
  stt_on_gpu = False
96
  try: stt_on_gpu = MODELS["stt"] is not None and MODELS["stt"].model.device == "cuda"
97
  except: pass
98
  if not stt_on_gpu:
99
+ print(f"๐ŸŽ™๏ธ [v97] Activating Whisper on GPU (Stability Mode)...")
100
+ try:
101
+ if MODELS["stt"]: del MODELS["stt"]
102
+ gc.collect(); torch.cuda.empty_cache()
103
+ # Reduced workers to 1 to prevent MIG OOM/Crash loops
104
+ MODELS["stt"] = WhisperModel(
105
+ "large-v3",
106
+ device="cuda",
107
+ compute_type="float16", # Stable standard
108
+ local_files_only=local_only
109
+ )
110
+ except Exception as e:
111
+ print(f"โš ๏ธ Whisper GPU failed: {e}. Falling back to CPU.")
112
+ MODELS["stt"] = WhisperModel("large-v3", device="cpu", compute_type="int8", local_files_only=True)
113
 
114
  # 2. XTTS-v2
115
  if action in ["tts", "s2st"]:
 
119
  tts_on_gpu = "cuda" in curr
120
  except: pass
121
  if MODELS["tts"] is None or not tts_on_gpu:
122
+ print(f"๐Ÿ”Š [v97] Activating XTTS-v2 (GPU)...")
123
+ try:
124
+ if MODELS["tts"] is None:
125
+ MODELS["tts"] = TTS(model_name="tts_models/multilingual/multi-dataset/xtts_v2", gpu=True)
126
+ else: MODELS["tts"].to("cuda")
127
+ except Exception as e:
128
+ print(f"โš ๏ธ XTTS GPU failed: {e}. Staying on CPU.")
129
+ if MODELS["tts"] is None:
130
+ MODELS["tts"] = TTS(model_name="tts_models/multilingual/multi-dataset/xtts_v2", gpu=False)
131
 
132
+ # 3. Chatterbox: Accelerated in Session
133
+ chatterbox_utils.load_chatterbox(device="cuda" if torch.cuda.is_available() else "cpu")
 
134
 
135
  # 4. Helpers
136
  if MODELS["denoiser"] is None:
 
139
  if MODELS["translate"] is None: MODELS["translate"] = "active"
140
 
141
  def release_gpu_models():
142
+ """v97: Clean Exit Handoff"""
143
  global MODELS
144
+ print("๐Ÿงน [v97] Releasing resources...")
145
+ try:
146
+ if MODELS["stt"] and MODELS["stt"].model.device == "cuda":
147
+ del MODELS["stt"]
148
+ MODELS["stt"] = WhisperModel("large-v3", device="cpu", compute_type="int8", local_files_only=True)
149
+ if MODELS["tts"]:
150
+ try: MODELS["tts"].to("cpu")
151
+ except: pass
152
+ chatterbox_utils.load_chatterbox(device="cpu")
153
+ except: pass
 
 
 
 
 
154
  gc.collect()
155
  if torch.cuda.is_available(): torch.cuda.empty_cache()
 
156
 
157
  def warmup_task():
158
+ """Silent Warmup (Resident RAM)"""
159
  global WARMUP_STATUS
160
  with WARMUP_LOCK:
161
  if WARMUP_STATUS["complete"] or WARMUP_STATUS["in_progress"]: return
162
  WARMUP_STATUS["in_progress"] = True
163
+ print("\n๐Ÿ”ฅ --- SILENT WARMUP STARTED (v97) ---")
 
 
164
  try:
 
165
  MODELS["stt"] = WhisperModel("large-v3", device="cpu", compute_type="int8")
166
  MODELS["tts"] = TTS(model_name="tts_models/multilingual/multi-dataset/xtts_v2", gpu=False)
167
  chatterbox_utils.warmup_chatterbox()
168
  WARMUP_STATUS["complete"] = True
169
+ print(f"โœ… --- SYSTEM WARM --- \n")
170
+ except: pass
171
+ finally: WARMUP_STATUS["in_progress"] = False
 
 
172
 
173
  def _stt_logic(request_dict):
174
  audio_bytes = base64.b64decode(request_dict.get("file"))
 
176
  with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as f:
177
  f.write(audio_bytes); temp_path = f.name
178
  try:
179
+ # Beam size 1 for maximum speed and stability
180
  segments, _ = MODELS["stt"].transcribe(temp_path, language=lang, beam_size=1)
181
  return {"text": " ".join([s.text for s in segments]).strip()}
182
  finally:
 
190
  XTTS_MAP = {"en": "en", "de": "de", "fr": "fr", "es": "es", "it": "it", "pl": "pl", "pt": "pt", "tr": "tr", "ru": "ru", "nl": "nl", "cs": "cs", "ar": "ar", "hu": "hu", "ko": "ko", "hi": "hi", "zh": "zh-cn"}
191
  clean_lang = lang.strip().lower().split('-')[0]
192
  mapped_lang = XTTS_MAP.get(clean_lang) or ("zh-cn" if clean_lang == "zh" else None)
 
193
  if mapped_lang:
194
  speaker_wav_path = None
195
  if speaker_wav_b64:
 
205
  finally:
206
  if speaker_wav_path and "default_speaker" not in speaker_wav_path and os.path.exists(speaker_wav_path): os.unlink(speaker_wav_path)
207
  if 'output_path' in locals() and os.path.exists(output_path): os.unlink(output_path)
 
 
208
  try:
209
  temp_ref = None
210
  if speaker_wav_b64:
 
220
  def core_process(request_dict):
221
  action = request_dict.get("action")
222
  t1 = time.time()
223
+ print(f"--- [v97] ๐Ÿš€ GPU SESSION: {action} ---")
224
  activate_gpu_models(action)
225
  try:
226
  if action == "stt": res = _stt_logic(request_dict)
 
234
  elif action == "health": res = {"status": "awake"}
235
  else: res = {"error": f"Unknown action: {action}"}
236
  finally:
237
+ print(f"--- [v97] โœจ END: {action} ({time.time()-t1:.2f}s) ---")
238
  release_gpu_models()
239
  return res
240
 
241
  app = FastAPI()
 
242
  @app.on_event("startup")
243
  async def startup_event():
244
  Thread(target=warmup_task, daemon=True).start()
 
249
  except Exception as e: return {"error": str(e)}
250
 
251
  @app.get("/health")
252
+ def health(): return {"status": "ok", "warm": WARMUP_STATUS["complete"], "time": time.ctime()}
 
253
 
254
  @app.post("/api/v1/clear_cache")
255
  async def clear_cache():