TGPro1 commited on
Commit
fc295c3
Β·
verified Β·
1 Parent(s): 5e62ae0

Upload app.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. app.py +22 -27
app.py CHANGED
@@ -76,8 +76,8 @@ except ImportError:
76
  if f is None: return lambda x: x
77
  return f
78
 
79
- # FORCE BUILD TRIGGER: 12:35:00 Jan 21 2026
80
- # v97: Bulletproof Handoff (Loop Prevention). Stability Focus (1-Worker).
81
 
82
  os.environ["COQUI_TOS_AGREED"] = "1"
83
  MODELS = {"stt": None, "translate": None, "tts": None, "denoiser": None}
@@ -86,30 +86,30 @@ WARMUP_STATUS = {"complete": False, "in_progress": False}
86
  WARMUP_LOCK = Lock()
87
 
88
  def activate_gpu_models(action):
89
- """v97: Stability-First Activation"""
90
  global MODELS, WARMUP_STATUS
91
  local_only = WARMUP_STATUS["complete"]
92
 
93
- # 1. Faster-Whisper: Stability Focus
94
  if action in ["stt", "s2st"]:
95
  stt_on_gpu = False
96
  try: stt_on_gpu = MODELS["stt"] is not None and MODELS["stt"].model.device == "cuda"
97
  except: pass
98
  if not stt_on_gpu:
99
- print(f"πŸŽ™οΈ [v97] Activating Whisper on GPU (Stability Mode)...")
100
  try:
101
  if MODELS["stt"]: del MODELS["stt"]
102
  gc.collect(); torch.cuda.empty_cache()
103
- # Reduced workers to 1 to prevent MIG OOM/Crash loops
104
  MODELS["stt"] = WhisperModel(
105
  "large-v3",
106
  device="cuda",
107
- compute_type="float16", # Stable standard
 
108
  local_files_only=local_only
109
  )
110
  except Exception as e:
111
- print(f"⚠️ Whisper GPU failed: {e}. Falling back to CPU.")
112
- MODELS["stt"] = WhisperModel("large-v3", device="cpu", compute_type="int8", local_files_only=True)
113
 
114
  # 2. XTTS-v2
115
  if action in ["tts", "s2st"]:
@@ -119,17 +119,12 @@ def activate_gpu_models(action):
119
  tts_on_gpu = "cuda" in curr
120
  except: pass
121
  if MODELS["tts"] is None or not tts_on_gpu:
122
- print(f"πŸ”Š [v97] Activating XTTS-v2 (GPU)...")
123
- try:
124
- if MODELS["tts"] is None:
125
- MODELS["tts"] = TTS(model_name="tts_models/multilingual/multi-dataset/xtts_v2", gpu=True)
126
- else: MODELS["tts"].to("cuda")
127
- except Exception as e:
128
- print(f"⚠️ XTTS GPU failed: {e}. Staying on CPU.")
129
- if MODELS["tts"] is None:
130
- MODELS["tts"] = TTS(model_name="tts_models/multilingual/multi-dataset/xtts_v2", gpu=False)
131
 
132
- # 3. Chatterbox: Accelerated in Session
133
  chatterbox_utils.load_chatterbox(device="cuda" if torch.cuda.is_available() else "cpu")
134
 
135
  # 4. Helpers
@@ -139,9 +134,9 @@ def activate_gpu_models(action):
139
  if MODELS["translate"] is None: MODELS["translate"] = "active"
140
 
141
  def release_gpu_models():
142
- """v97: Clean Exit Handoff"""
143
  global MODELS
144
- print("🧹 [v97] Releasing resources...")
145
  try:
146
  if MODELS["stt"] and MODELS["stt"].model.device == "cuda":
147
  del MODELS["stt"]
@@ -155,18 +150,18 @@ def release_gpu_models():
155
  if torch.cuda.is_available(): torch.cuda.empty_cache()
156
 
157
  def warmup_task():
158
- """Silent Warmup (Resident RAM)"""
159
  global WARMUP_STATUS
160
  with WARMUP_LOCK:
161
  if WARMUP_STATUS["complete"] or WARMUP_STATUS["in_progress"]: return
162
  WARMUP_STATUS["in_progress"] = True
163
- print("\nπŸ”₯ --- SILENT WARMUP STARTED (v97) ---")
164
  try:
165
  MODELS["stt"] = WhisperModel("large-v3", device="cpu", compute_type="int8")
166
  MODELS["tts"] = TTS(model_name="tts_models/multilingual/multi-dataset/xtts_v2", gpu=False)
167
  chatterbox_utils.warmup_chatterbox()
168
  WARMUP_STATUS["complete"] = True
169
- print(f"βœ… --- SYSTEM WARM --- \n")
170
  except: pass
171
  finally: WARMUP_STATUS["in_progress"] = False
172
 
@@ -176,7 +171,7 @@ def _stt_logic(request_dict):
176
  with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as f:
177
  f.write(audio_bytes); temp_path = f.name
178
  try:
179
- # Beam size 1 for maximum speed and stability
180
  segments, _ = MODELS["stt"].transcribe(temp_path, language=lang, beam_size=1)
181
  return {"text": " ".join([s.text for s in segments]).strip()}
182
  finally:
@@ -220,7 +215,7 @@ def _tts_logic(text, lang, speaker_wav_b64):
220
  def core_process(request_dict):
221
  action = request_dict.get("action")
222
  t1 = time.time()
223
- print(f"--- [v97] πŸš€ GPU SESSION: {action} ---")
224
  activate_gpu_models(action)
225
  try:
226
  if action == "stt": res = _stt_logic(request_dict)
@@ -234,7 +229,7 @@ def core_process(request_dict):
234
  elif action == "health": res = {"status": "awake"}
235
  else: res = {"error": f"Unknown action: {action}"}
236
  finally:
237
- print(f"--- [v97] ✨ END: {action} ({time.time()-t1:.2f}s) ---")
238
  release_gpu_models()
239
  return res
240
 
 
76
  if f is None: return lambda x: x
77
  return f
78
 
79
+ # FORCE BUILD TRIGGER: 12:40:00 Jan 21 2026
80
+ # v98: Peak Performance Re-enabled (4-Workers).
81
 
82
  os.environ["COQUI_TOS_AGREED"] = "1"
83
  MODELS = {"stt": None, "translate": None, "tts": None, "denoiser": None}
 
86
  WARMUP_LOCK = Lock()
87
 
88
  def activate_gpu_models(action):
89
+ """v98: High-Parallelism Peak Mode"""
90
  global MODELS, WARMUP_STATUS
91
  local_only = WARMUP_STATUS["complete"]
92
 
93
+ # 1. Faster-Whisper: Peak Performance with 4 workers
94
  if action in ["stt", "s2st"]:
95
  stt_on_gpu = False
96
  try: stt_on_gpu = MODELS["stt"] is not None and MODELS["stt"].model.device == "cuda"
97
  except: pass
98
  if not stt_on_gpu:
99
+ print(f"πŸŽ™οΈ [v98] Peak Performance Activation: Whisper (GPU) with 4-Workers...")
100
  try:
101
  if MODELS["stt"]: del MODELS["stt"]
102
  gc.collect(); torch.cuda.empty_cache()
 
103
  MODELS["stt"] = WhisperModel(
104
  "large-v3",
105
  device="cuda",
106
+ compute_type="int8_float16",
107
+ num_workers=4, # RESTORED (User request)
108
  local_files_only=local_only
109
  )
110
  except Exception as e:
111
+ print(f"⚠️ Whisper GPU Peak failed: {e}. Falling back to standard mode.")
112
+ MODELS["stt"] = WhisperModel("large-v3", device="cuda", compute_type="float16", local_files_only=local_only)
113
 
114
  # 2. XTTS-v2
115
  if action in ["tts", "s2st"]:
 
119
  tts_on_gpu = "cuda" in curr
120
  except: pass
121
  if MODELS["tts"] is None or not tts_on_gpu:
122
+ print(f"πŸ”Š [v98] Activating XTTS-v2 (GPU)...")
123
+ if MODELS["tts"] is None:
124
+ MODELS["tts"] = TTS(model_name="tts_models/multilingual/multi-dataset/xtts_v2", gpu=True)
125
+ else: MODELS["tts"].to("cuda")
 
 
 
 
 
126
 
127
+ # 3. Chatterbox Accelerated
128
  chatterbox_utils.load_chatterbox(device="cuda" if torch.cuda.is_available() else "cpu")
129
 
130
  # 4. Helpers
 
134
  if MODELS["translate"] is None: MODELS["translate"] = "active"
135
 
136
  def release_gpu_models():
137
+ """v98: Clean Idle (Resident RAM)"""
138
  global MODELS
139
+ print("🧹 [v98] Session complete. Releasing GPU...")
140
  try:
141
  if MODELS["stt"] and MODELS["stt"].model.device == "cuda":
142
  del MODELS["stt"]
 
150
  if torch.cuda.is_available(): torch.cuda.empty_cache()
151
 
152
  def warmup_task():
153
+ """Silent Power-Warmup"""
154
  global WARMUP_STATUS
155
  with WARMUP_LOCK:
156
  if WARMUP_STATUS["complete"] or WARMUP_STATUS["in_progress"]: return
157
  WARMUP_STATUS["in_progress"] = True
158
+ print("\nπŸ”₯ --- SILENT POWER-WARMUP STARTED (v98) ---")
159
  try:
160
  MODELS["stt"] = WhisperModel("large-v3", device="cpu", compute_type="int8")
161
  MODELS["tts"] = TTS(model_name="tts_models/multilingual/multi-dataset/xtts_v2", gpu=False)
162
  chatterbox_utils.warmup_chatterbox()
163
  WARMUP_STATUS["complete"] = True
164
+ print(f"βœ… --- PEAK READY -- \n")
165
  except: pass
166
  finally: WARMUP_STATUS["in_progress"] = False
167
 
 
171
  with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as f:
172
  f.write(audio_bytes); temp_path = f.name
173
  try:
174
+ # Multi-worker Power (4 workers)
175
  segments, _ = MODELS["stt"].transcribe(temp_path, language=lang, beam_size=1)
176
  return {"text": " ".join([s.text for s in segments]).strip()}
177
  finally:
 
215
  def core_process(request_dict):
216
  action = request_dict.get("action")
217
  t1 = time.time()
218
+ print(f"--- [v98] πŸš€ PEAK GPU SESSION: {action} ---")
219
  activate_gpu_models(action)
220
  try:
221
  if action == "stt": res = _stt_logic(request_dict)
 
229
  elif action == "health": res = {"status": "awake"}
230
  else: res = {"error": f"Unknown action: {action}"}
231
  finally:
232
+ print(f"--- [v98] ✨ PEAK-DONE: {action} ({time.time()-t1:.2f}s) ---")
233
  release_gpu_models()
234
  return res
235