TGPro1 commited on
Commit
a5419b4
Β·
verified Β·
1 Parent(s): 5da62fe

Upload app.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. app.py +63 -55
app.py CHANGED
@@ -13,8 +13,9 @@ import torchaudio
13
  import gc
14
  import sys
15
  import types
 
16
 
17
- # πŸ› οΈ 1. CRITICAL COMPATIBILITY MONKEYPATCHES
18
  print("πŸ› οΈ Applying compatibility monkeypatches...")
19
  if "torchaudio.backend" not in sys.modules:
20
  backend = types.ModuleType("torchaudio.backend")
@@ -54,13 +55,13 @@ try:
54
  print("βœ… Torchaudio patched")
55
  except Exception as e: print(f"⚠️ Patch failed: {e}")
56
 
57
- # πŸ“¦ 2. PRE-LOADING (v90 Optimization)
58
- print("πŸ“¦ Pre-loading AI Engines into RAM...")
59
  import chatterbox_utils
 
60
  from faster_whisper import WhisperModel
61
  from TTS.api import TTS
62
- from df.enhance import init_df, enhance, load_audio, save_audio
63
- import deep_translator
64
  print("βœ… Imports Complete")
65
 
66
  try:
@@ -73,57 +74,49 @@ except ImportError:
73
  if f is None: return lambda x: x
74
  return f
75
 
76
- # FORCE BUILD TRIGGER: 10:55:00 Jan 21 2026
77
- # v90: Fixed Whisper CUDA 'Invalid Argument' crash. (Cleaner GPU Handoff)
78
 
79
  os.environ["COQUI_TOS_AGREED"] = "1"
 
80
  MODELS = {"stt": None, "translate": None, "tts": None, "denoiser": None}
81
 
82
  def activate_gpu_models(action):
83
- """v90: Optimized GPU Activation with clean handoff"""
84
  global MODELS
85
 
86
- # 1. Faster-Whisper GPU Activation
87
  if action in ["stt", "s2st"]:
88
- stt_on_gpu = False
89
- try:
90
- if MODELS["stt"] is not None and hasattr(MODELS["stt"], "model") and MODELS["stt"].model.device == "cuda":
91
- stt_on_gpu = True
92
- except: pass
93
-
94
- if not stt_on_gpu:
95
- print(f"πŸŽ™οΈ Activating Whisper on GPU for {action}...")
96
- # 🧹 CRITICAL: Clear old instance to avoid "Invalid Argument" CUDA errors
97
- old_stt = MODELS.pop("stt", None)
98
- if old_stt: del old_stt
 
 
99
  gc.collect()
100
- if torch.cuda.is_available(): torch.cuda.empty_cache()
101
-
102
- # Re-init on GPU with safe parameters for ZeroGPU MIG
103
- try:
104
- MODELS["stt"] = WhisperModel(
105
- "large-v3",
106
- device="cuda",
107
- device_index=0,
108
- compute_type="int8_float16", # Better stability on H100/H200 MIG
109
- cpu_threads=4,
110
- num_workers=1
111
- )
112
- print("✨ Whisper Activated on GPU")
113
- except Exception as e:
114
- print(f"❌ Whisper GPU fail: {e}. Falling back to CPU in-session.")
115
- MODELS["stt"] = WhisperModel("large-v3", device="cpu", compute_type="int8")
116
-
117
- # 2. XTTS-v2 GPU Activation
118
  if action in ["tts", "s2st"]:
119
  if MODELS["tts"] is None:
120
- MODELS["tts"] = TTS(model_name="tts_models/multilingual/multi-dataset/xtts_v2", gpu=False)
121
- try:
122
- current_dev = str(next(MODELS["tts"].synthesizer.tts_model.parameters()).device)
123
- if "cuda" not in current_dev:
124
- print(f"πŸš€ Moving XTTS-v2 to GPU...")
125
- MODELS["tts"].to("cuda")
126
- except: MODELS["tts"].to("cuda")
 
 
 
127
 
128
  # 3. Helpers
129
  if MODELS["denoiser"] is None:
@@ -133,17 +126,30 @@ def activate_gpu_models(action):
133
  chatterbox_utils.load_chatterbox(device="cuda" if torch.cuda.is_available() else "cpu")
134
 
135
  def warmup_models():
136
- """PRE-LOAD MODELS INTO SYSTEM RAM"""
137
- print("\nπŸ”₯ --- SYSTEM STARTUP: RAM LOADING (v90) ---")
138
  start = time.time()
139
  try:
140
- MODELS["stt"] = WhisperModel("large-v3", device="cpu", compute_type="int8")
141
- MODELS["tts"] = TTS(model_name="tts_models/multilingual/multi-dataset/xtts_v2", gpu=False)
142
- try: MODELS["denoiser"] = init_df()
143
- except: pass
 
 
 
 
 
 
 
 
 
 
144
  chatterbox_utils.warmup_chatterbox()
145
- print(f"βœ… --- SYSTEM READY ({time.time()-start:.2f}s) --- \n")
146
- except Exception as e: print(f"⚠️ Startup warning: {e}")
 
 
 
147
 
148
  def _stt_logic(request_dict):
149
  audio_bytes = base64.b64decode(request_dict.get("file"))
@@ -165,6 +171,7 @@ def _tts_logic(text, lang, speaker_wav_b64):
165
  XTTS_MAP = {"en": "en", "de": "de", "fr": "fr", "es": "es", "it": "it", "pl": "pl", "pt": "pt", "tr": "tr", "ru": "ru", "nl": "nl", "cs": "cs", "ar": "ar", "hu": "hu", "ko": "ko", "hi": "hi", "zh": "zh-cn"}
166
  clean_lang = lang.strip().lower().split('-')[0]
167
  mapped_lang = XTTS_MAP.get(clean_lang) or ("zh-cn" if clean_lang == "zh" else None)
 
168
  if mapped_lang:
169
  speaker_wav_path = None
170
  if speaker_wav_b64:
@@ -180,6 +187,7 @@ def _tts_logic(text, lang, speaker_wav_b64):
180
  finally:
181
  if speaker_wav_path and "default_speaker" not in speaker_wav_path and os.path.exists(speaker_wav_path): os.unlink(speaker_wav_path)
182
  if 'output_path' in locals() and os.path.exists(output_path): os.unlink(output_path)
 
183
  try:
184
  temp_ref = None
185
  if speaker_wav_b64:
@@ -195,7 +203,7 @@ def _tts_logic(text, lang, speaker_wav_b64):
195
  def core_process(request_dict):
196
  action = request_dict.get("action")
197
  t0 = time.time()
198
- print(f"--- [v90] πŸš€ GPU SESSION START: {action} ---")
199
  activate_gpu_models(action)
200
  try:
201
  if action == "stt": res = _stt_logic(request_dict)
@@ -209,7 +217,7 @@ def core_process(request_dict):
209
  elif action == "health": res = {"status": "awake"}
210
  else: res = {"error": f"Unknown action: {action}"}
211
  finally:
212
- print(f"--- [v90] ✨ END: {action} ({time.time()-t0:.2f}s) ---")
213
  gc.collect()
214
  if torch.cuda.is_available(): torch.cuda.empty_cache()
215
  return res
 
13
  import gc
14
  import sys
15
  import types
16
+ from huggingface_hub import snapshot_download
17
 
18
+ # πŸ›‘οΈ 1. CRITICAL COMPATIBILITY MONKEYPATCHES
19
  print("πŸ› οΈ Applying compatibility monkeypatches...")
20
  if "torchaudio.backend" not in sys.modules:
21
  backend = types.ModuleType("torchaudio.backend")
 
55
  print("βœ… Torchaudio patched")
56
  except Exception as e: print(f"⚠️ Patch failed: {e}")
57
 
58
+ # πŸ“¦ 2. TOP-LEVEL IMPORTS (No engine initialization yet)
59
+ print("πŸ“¦ Importing AI Libraries...")
60
  import chatterbox_utils
61
+ # Note: We import the classes, but DO NOT instantiate them on the CPU
62
  from faster_whisper import WhisperModel
63
  from TTS.api import TTS
64
+ from df.enhance import init_df
 
65
  print("βœ… Imports Complete")
66
 
67
  try:
 
74
  if f is None: return lambda x: x
75
  return f
76
 
77
+ # FORCE BUILD TRIGGER: 11:05:00 Jan 21 2026
78
+ # v91: No-Instance Startup (Resolved CUDA std::system_error)
79
 
80
  os.environ["COQUI_TOS_AGREED"] = "1"
81
+ # MODELS starts empty to ensure a clean CUDA handoff
82
  MODELS = {"stt": None, "translate": None, "tts": None, "denoiser": None}
83
 
84
  def activate_gpu_models(action):
85
+ """v91: Direct GPU initialization (Safe & Clean)"""
86
  global MODELS
87
 
88
+ # 1. Faster-Whisper: Initialize directly on GPU
89
  if action in ["stt", "s2st"]:
90
+ if MODELS["stt"] is None:
91
+ print(f"πŸŽ™οΈ [v91] Initializing Whisper directly on GPU for {action}...")
92
+ # No CPU instance should exist at this point
93
+ MODELS["stt"] = WhisperModel(
94
+ "large-v3",
95
+ device="cuda",
96
+ compute_type="float16"
97
+ )
98
+ print("✨ Whisper GPU Engine Ready")
99
+ elif MODELS["stt"].model.device != "cuda":
100
+ # This case shouldn't happen with No-Instance Startup, but for safety:
101
+ print("⚠️ Switching Whisper to GPU...")
102
+ del MODELS["stt"]
103
  gc.collect()
104
+ torch.cuda.empty_cache()
105
+ MODELS["stt"] = WhisperModel("large-v3", device="cuda", compute_type="float16")
106
+
107
+ # 2. XTTS-v2: Initialize directly on GPU
 
 
 
 
 
 
 
 
 
 
 
 
 
 
108
  if action in ["tts", "s2st"]:
109
  if MODELS["tts"] is None:
110
+ print(f"πŸ”Š [v91] Initializing XTTS directly on GPU for {action}...")
111
+ MODELS["tts"] = TTS(model_name="tts_models/multilingual/multi-dataset/xtts_v2", gpu=True)
112
+ print("✨ XTTS GPU Engine Ready")
113
+ else:
114
+ try:
115
+ current_dev = str(next(MODELS["tts"].synthesizer.tts_model.parameters()).device)
116
+ if "cuda" not in current_dev:
117
+ print("πŸš€ Moving XTTS-v2 to GPU...")
118
+ MODELS["tts"].to("cuda")
119
+ except: MODELS["tts"].to("cuda")
120
 
121
  # 3. Helpers
122
  if MODELS["denoiser"] is None:
 
126
  chatterbox_utils.load_chatterbox(device="cuda" if torch.cuda.is_available() else "cpu")
127
 
128
  def warmup_models():
129
+ """v91: DOWNLOAD ONLY (No engine initialization)"""
130
+ print("\nπŸ”₯ --- SYSTEM STARTUP: NO-INSTANCE WARMUP (v91) ---")
131
  start = time.time()
132
  try:
133
+ # 1. Download Whisper (CTranslate2 format)
134
+ print("πŸ“₯ Pre-downloading Whisper large-v3 weights...")
135
+ snapshot_download(repo_id="Systran/faster-whisper-large-v3")
136
+
137
+ # 2. Download XTTS-v2
138
+ print("πŸ“₯ Pre-downloading XTTS-v2 weights...")
139
+ snapshot_download(repo_id="coqui/XTTS-v2")
140
+
141
+ # 3. Download DeepFilterNet
142
+ print("πŸ“₯ Pre-downloading DeepFilterNet...")
143
+ # DeepFilterNet downloads usually happen via init_df, but we can try to force it
144
+ # snapshot_download(repo_id="RVoice/DeepFilterNet3")
145
+
146
+ # 4. Chatterbox Warmup
147
  chatterbox_utils.warmup_chatterbox()
148
+
149
+ print(f"βœ… --- STARTUP COMPLETE: DATA ON DISK ({time.time()-start:.2f}s) --- \n")
150
+ print("⚠️ NOTE: No engine instances created on CPU to prevent CUDA conflicts.")
151
+ except Exception as e:
152
+ print(f"⚠️ Startup warning: {e}")
153
 
154
  def _stt_logic(request_dict):
155
  audio_bytes = base64.b64decode(request_dict.get("file"))
 
171
  XTTS_MAP = {"en": "en", "de": "de", "fr": "fr", "es": "es", "it": "it", "pl": "pl", "pt": "pt", "tr": "tr", "ru": "ru", "nl": "nl", "cs": "cs", "ar": "ar", "hu": "hu", "ko": "ko", "hi": "hi", "zh": "zh-cn"}
172
  clean_lang = lang.strip().lower().split('-')[0]
173
  mapped_lang = XTTS_MAP.get(clean_lang) or ("zh-cn" if clean_lang == "zh" else None)
174
+
175
  if mapped_lang:
176
  speaker_wav_path = None
177
  if speaker_wav_b64:
 
187
  finally:
188
  if speaker_wav_path and "default_speaker" not in speaker_wav_path and os.path.exists(speaker_wav_path): os.unlink(speaker_wav_path)
189
  if 'output_path' in locals() and os.path.exists(output_path): os.unlink(output_path)
190
+
191
  try:
192
  temp_ref = None
193
  if speaker_wav_b64:
 
203
  def core_process(request_dict):
204
  action = request_dict.get("action")
205
  t0 = time.time()
206
+ print(f"--- [v91] πŸš€ GPU SESSION START: {action} ---")
207
  activate_gpu_models(action)
208
  try:
209
  if action == "stt": res = _stt_logic(request_dict)
 
217
  elif action == "health": res = {"status": "awake"}
218
  else: res = {"error": f"Unknown action: {action}"}
219
  finally:
220
+ print(f"--- [v91] ✨ END: {action} ({time.time()-t0:.2f}s) ---")
221
  gc.collect()
222
  if torch.cuda.is_available(): torch.cuda.empty_cache()
223
  return res