TGPro1 commited on
Commit
52ea7a6
·
verified ·
1 Parent(s): 36145e9

Upload app.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. app.py +107 -28
app.py CHANGED
@@ -10,6 +10,8 @@ import traceback
10
  import json
11
  import time
12
  import torchaudio
 
 
13
 
14
  # 🛡️ ZeroGPU Support (v69)
15
  # CRITICAL: @spaces.GPU MUST only be used on synchronous functions (def, not async def)
@@ -59,8 +61,8 @@ if not hasattr(torchaudio, "info"):
59
 
60
  from df.enhance import enhance, init_df, load_audio, save_audio
61
 
62
- # FORCE BUILD TRIGGER: 08:26:00 Jan 21 2026
63
- # v79: Full 16-Language XTTS Mapping Support
64
 
65
  # 🛠️ Monkeypatch torchaudio.load
66
  try:
@@ -100,6 +102,15 @@ def load_models():
100
  print("⚠️ Falling back to CPU (int8)")
101
  MODELS["stt"] = WhisperModel("large-v3", device="cpu", compute_type="int8")
102
 
 
 
 
 
 
 
 
 
 
103
  if MODELS["translate"] is None:
104
  print("🌍 Loading Google Translate...")
105
  MODELS["translate"] = "active"
@@ -168,37 +179,64 @@ def _tts_logic(text, lang, speaker_wav_b64):
168
  "zh": "zh-cn", "zh-cn": "zh-cn", "zh-tw": "zh-cn"
169
  }
170
 
 
 
 
171
  if lang:
172
  lang_key = lang.strip().lower()
173
- # 1. Try exact match (e.g. 'zh-cn')
174
- # 2. Try the sub-code split match (e.g. 'en-US' -> 'en')
175
- # 3. Fallback to the original key if not in map
176
- lang = XTTS_MAP.get(lang_key) or XTTS_MAP.get(lang_key.split('-')[0]) or lang_key
177
-
178
- print(f"[v79] TTS mapped language: {lang}")
179
- speaker_wav_path = None
180
- if speaker_wav_b64:
181
- sb = base64.b64decode(speaker_wav_b64)
182
- with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as f:
183
- f.write(sb)
184
- speaker_wav_path = f.name
185
- else:
186
- speaker_wav_path = "default_speaker.wav"
187
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
188
  try:
189
- with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as output_file:
190
- output_path = output_file.name
 
 
 
 
 
 
 
 
191
 
192
- # 🎙️ XTTS Inference
193
- MODELS["tts"].tts_to_file(text=text, language=lang, file_path=output_path, speaker_wav=speaker_wav_path)
194
 
195
- with open(output_path, "rb") as f:
196
- audio_b64 = base64.b64encode(f.read()).decode()
197
  return {"audio": audio_b64}
198
- finally:
199
- if speaker_wav_path and "default_speaker" not in speaker_wav_path:
200
- if os.path.exists(speaker_wav_path): os.unlink(speaker_wav_path)
201
- if 'output_path' in locals() and os.path.exists(output_path): os.unlink(output_path)
202
 
203
  @spaces.GPU
204
  def core_process(request_dict):
@@ -233,7 +271,12 @@ def core_process(request_dict):
233
  else:
234
  res = {"error": f"Unknown action: {action}"}
235
 
236
- print(f"--- [v77] ✨ GPU SESSION END: {action} (Total: {time.time()-t0:.2f}s) ---")
 
 
 
 
 
237
  return res
238
 
239
  return {"error": f"Unknown action: {action}"}
@@ -274,6 +317,9 @@ def gpu_tts_generator(text, lang, speaker_wav_path):
274
  finally:
275
  if speaker_wav_path and "default_speaker" not in speaker_wav_path:
276
  if os.path.exists(speaker_wav_path): os.unlink(speaker_wav_path)
 
 
 
277
 
278
  # --- FastAPI Entry Points ---
279
  app = FastAPI()
@@ -314,7 +360,40 @@ async def api_tts_stream(request: Request):
314
 
315
  @app.get("/health")
316
  def health():
317
- return {"status": "ok", "gpu": torch.cuda.is_available()}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
318
 
319
  # --- Gradio UI ---
320
  def gradio_fn(req_json):
 
10
  import json
11
  import time
12
  import torchaudio
13
+ import chatterbox_utils
14
+ import gc
15
 
16
  # 🛡️ ZeroGPU Support (v69)
17
  # CRITICAL: @spaces.GPU MUST only be used on synchronous functions (def, not async def)
 
61
 
62
  from df.enhance import enhance, init_df, load_audio, save_audio
63
 
64
+ # FORCE BUILD TRIGGER: 09:10:00 Jan 21 2026
65
+ # v81: Stability Optimizations (Memory Management + Cache Clearing)
66
 
67
  # 🛠️ Monkeypatch torchaudio.load
68
  try:
 
102
  print("⚠️ Falling back to CPU (int8)")
103
  MODELS["stt"] = WhisperModel("large-v3", device="cpu", compute_type="int8")
104
 
105
+ # 🧹 Proactive Memory Cleanup
106
+ gc.collect()
107
+ if torch.cuda.is_available():
108
+ torch.cuda.empty_cache()
109
+
110
+ # Initialize Chatterbox ONNX (High-Speed Fallback)
111
+ # This will load the model if not already loaded internally by chatterbox_utils
112
+ chatterbox_utils.load_chatterbox(device="cuda" if torch.cuda.is_available() else "cpu")
113
+
114
  if MODELS["translate"] is None:
115
  print("🌍 Loading Google Translate...")
116
  MODELS["translate"] = "active"
 
179
  "zh": "zh-cn", "zh-cn": "zh-cn", "zh-tw": "zh-cn"
180
  }
181
 
182
+ XTTS_LANG_CODES = set(XTTS_MAP.values())
183
+
184
+ mapped_lang = None
185
  if lang:
186
  lang_key = lang.strip().lower()
187
+ mapped_lang = XTTS_MAP.get(lang_key) or XTTS_MAP.get(lang_key.split('-')[0])
 
 
 
 
 
 
 
 
 
 
 
 
 
188
 
189
+ print(f"[v80] TTS Request - Original: {lang}, Mapped: {mapped_lang}")
190
+
191
+ # 🛣️ INTELLIGENT ROUTING
192
+ # Case A: XTTS Support (Voice Cloning)
193
+ if mapped_lang and mapped_lang in XTTS_LANG_CODES:
194
+ print(f"[v80] Using XTTS-v2 for '{mapped_lang}'")
195
+ speaker_wav_path = None
196
+ if speaker_wav_b64:
197
+ sb = base64.b64decode(speaker_wav_b64)
198
+ with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as f:
199
+ f.write(sb)
200
+ speaker_wav_path = f.name
201
+ else:
202
+ speaker_wav_path = "default_speaker.wav"
203
+
204
+ try:
205
+ with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as output_file:
206
+ output_path = output_file.name
207
+
208
+ # 🎙️ XTTS Inference
209
+ MODELS["tts"].tts_to_file(text=text, language=mapped_lang, file_path=output_path, speaker_wav=speaker_wav_path)
210
+
211
+ with open(output_path, "rb") as f:
212
+ audio_b64 = base64.b64encode(f.read()).decode()
213
+ return {"audio": audio_b64}
214
+ finally:
215
+ if speaker_wav_path and "default_speaker" not in speaker_wav_path:
216
+ if os.path.exists(speaker_wav_path): os.unlink(speaker_wav_path)
217
+ if 'output_path' in locals() and os.path.exists(output_path): os.unlink(output_path)
218
+
219
+ # Case B: Chatterbox ONNX Support (High-Quality Fast Fallback)
220
+ print(f"[v80] Using Chatterbox ONNX Fallback for '{lang}'")
221
  try:
222
+ # Use local file if available for cloning in Chatterbox too
223
+ temp_ref = None
224
+ if speaker_wav_b64:
225
+ sb = base64.b64decode(speaker_wav_b64)
226
+ with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as f:
227
+ f.write(sb); temp_ref = f.name
228
+
229
+ # Chatterbox supports codes like 'fi', 'el', 'da', etc.
230
+ chatter_lang = lang.strip().lower().split('-')[0]
231
+ audio_bytes = chatterbox_utils.run_chatterbox_inference(text, chatter_lang, speaker_wav_path=temp_ref)
232
 
233
+ if temp_ref and os.path.exists(temp_ref): os.unlink(temp_ref)
 
234
 
235
+ audio_b64 = base64.b64encode(audio_bytes).decode()
 
236
  return {"audio": audio_b64}
237
+ except Exception as e:
238
+ print(f" Chatterbox Fallback failed: {e}")
239
+ return {"error": f"TTS Failure: '{lang}' not supported by XTTS or Chatterbox."}
 
240
 
241
  @spaces.GPU
242
  def core_process(request_dict):
 
271
  else:
272
  res = {"error": f"Unknown action: {action}"}
273
 
274
+ finally:
275
+ print(f"--- [v81] ✨ SESSION END: {action} ---")
276
+ gc.collect()
277
+ if torch.cuda.is_available():
278
+ torch.cuda.empty_cache()
279
+
280
  return res
281
 
282
  return {"error": f"Unknown action: {action}"}
 
317
  finally:
318
  if speaker_wav_path and "default_speaker" not in speaker_wav_path:
319
  if os.path.exists(speaker_wav_path): os.unlink(speaker_wav_path)
320
+ gc.collect()
321
+ if torch.cuda.is_available():
322
+ torch.cuda.empty_cache()
323
 
324
  # --- FastAPI Entry Points ---
325
  app = FastAPI()
 
360
 
361
  @app.get("/health")
362
  def health():
363
+ return {"status": "ok", "gpu": torch.cuda.is_available(), "time": time.ctime()}
364
+
365
+ @app.post("/api/v1/clear_cache")
366
+ async def clear_cache():
367
+ """Manual deep cleanup of memory and caches"""
368
+ try:
369
+ t0 = time.time()
370
+ print("🧹 Manual Cache Clearing Triggered...")
371
+
372
+ # 1. GC collect
373
+ gc.collect()
374
+
375
+ # 2. CUDA cache
376
+ if torch.cuda.is_available():
377
+ torch.cuda.empty_cache()
378
+
379
+ # 3. Clean temp files
380
+ temp_dir = tempfile.gettempdir()
381
+ count = 0
382
+ for f in os.listdir(temp_dir):
383
+ if f.endswith(".wav") or f.startswith("tm"):
384
+ try:
385
+ os.unlink(os.path.join(temp_dir, f))
386
+ count += 1
387
+ except: pass
388
+
389
+ return {
390
+ "status": "success",
391
+ "cleaned_files": count,
392
+ "duration": f"{time.time()-t0:.2f}s",
393
+ "gpu_memory": f"{torch.cuda.memory_allocated() / 1024**2:.2f}MB" if torch.cuda.is_available() else "N/A"
394
+ }
395
+ except Exception as e:
396
+ return {"status": "error", "message": str(e)}
397
 
398
  # --- Gradio UI ---
399
  def gradio_fn(req_json):