TGPro1 commited on
Commit
d4685df
Β·
verified Β·
1 Parent(s): 1dc8ab4

Upload app.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. app.py +30 -29
app.py CHANGED
@@ -61,8 +61,8 @@ if not hasattr(torchaudio, "info"):
61
 
62
  from df.enhance import enhance, init_df, load_audio, save_audio
63
 
64
- # FORCE BUILD TRIGGER: 09:10:00 Jan 21 2026
65
- # v81: Stability Optimizations (Memory Management + Cache Clearing)
66
 
67
  # πŸ› οΈ Monkeypatch torchaudio.load
68
  try:
@@ -186,12 +186,12 @@ def _tts_logic(text, lang, speaker_wav_b64):
186
  lang_key = lang.strip().lower()
187
  mapped_lang = XTTS_MAP.get(lang_key) or XTTS_MAP.get(lang_key.split('-')[0])
188
 
189
- print(f"[v81] TTS Request - Original: {lang}, Mapped: {mapped_lang}")
190
 
191
  # πŸ›£οΈ INTELLIGENT ROUTING
192
  # Case A: XTTS Support (Voice Cloning)
193
  if mapped_lang and mapped_lang in XTTS_LANG_CODES:
194
- print(f"[v81] Using XTTS-v2 for '{mapped_lang}'")
195
  speaker_wav_path = None
196
  if speaker_wav_b64:
197
  sb = base64.b64decode(speaker_wav_b64)
@@ -217,7 +217,7 @@ def _tts_logic(text, lang, speaker_wav_b64):
217
  if 'output_path' in locals() and os.path.exists(output_path): os.unlink(output_path)
218
 
219
  # Case B: Chatterbox ONNX Support (High-Quality Fast Fallback)
220
- print(f"[v81] Using Chatterbox ONNX Fallback for '{lang}'")
221
  try:
222
  # Use local file if available for cloning in Chatterbox too
223
  temp_ref = None
@@ -241,38 +241,39 @@ def _tts_logic(text, lang, speaker_wav_b64):
241
  @spaces.GPU
242
  def core_process(request_dict):
243
  """
244
- Unified GPU Entry Point (v81).
245
  This function handles all high-speed tasks inside a single GPU allocation.
246
  The container stays resident on CPU but triggers GPU on demand.
247
  """
248
  action = request_dict.get("action")
249
  t0 = time.time()
250
- print(f"--- [v81] πŸš€ GPU SESSION START: {action} at {time.ctime()} ---")
251
  load_models()
252
 
253
- if action == "stt":
254
- res = _stt_logic(request_dict)
255
- elif action == "translate":
256
- res = {"translated": _translate_logic(request_dict.get("text"), request_dict.get("target_lang", "en"))}
257
- elif action == "tts":
258
- res = _tts_logic(request_dict.get("text"), request_dict.get("lang"), request_dict.get("speaker_wav"))
259
- elif action == "s2st":
260
- # πŸ”— FULL PIPELINE (Single GPU Call)
261
- stt_res = _stt_logic({"file": request_dict.get("file"), "lang": request_dict.get("source_lang")})
262
- text = stt_res.get("text", "")
263
- if not text: return {"error": "No speech detected"}
264
-
265
- translated = _translate_logic(text, request_dict.get("target_lang"))
266
-
267
- tts_res = _tts_logic(translated, request_dict.get("target_lang"), request_dict.get("speaker_wav"))
268
- res = {"text": text, "translated": translated, "audio": tts_res.get("audio")}
269
- elif action == "health":
270
- res = {"status": "awake", "time": time.ctime()}
271
- else:
272
- res = {"error": f"Unknown action: {action}"}
273
-
 
274
  finally:
275
- print(f"--- [v81] ✨ SESSION END: {action} ---")
276
  gc.collect()
277
  if torch.cuda.is_available():
278
  torch.cuda.empty_cache()
 
61
 
62
  from df.enhance import enhance, init_df, load_audio, save_audio
63
 
64
+ # FORCE BUILD TRIGGER: 09:40:00 Jan 21 2026
65
+ # v84: Fixed SyntaxError (Missing try block in core_process)
66
 
67
  # πŸ› οΈ Monkeypatch torchaudio.load
68
  try:
 
186
  lang_key = lang.strip().lower()
187
  mapped_lang = XTTS_MAP.get(lang_key) or XTTS_MAP.get(lang_key.split('-')[0])
188
 
189
+ print(f"[v84] TTS Request - Original: {lang}, Mapped: {mapped_lang}")
190
 
191
  # πŸ›£οΈ INTELLIGENT ROUTING
192
  # Case A: XTTS Support (Voice Cloning)
193
  if mapped_lang and mapped_lang in XTTS_LANG_CODES:
194
+ print(f"[v84] Using XTTS-v2 for '{mapped_lang}'")
195
  speaker_wav_path = None
196
  if speaker_wav_b64:
197
  sb = base64.b64decode(speaker_wav_b64)
 
217
  if 'output_path' in locals() and os.path.exists(output_path): os.unlink(output_path)
218
 
219
  # Case B: Chatterbox ONNX Support (High-Quality Fast Fallback)
220
+ print(f"[v84] Using Chatterbox ONNX Fallback for '{lang}'")
221
  try:
222
  # Use local file if available for cloning in Chatterbox too
223
  temp_ref = None
 
241
  @spaces.GPU
242
  def core_process(request_dict):
243
  """
244
+ Unified GPU Entry Point (v84).
245
  This function handles all high-speed tasks inside a single GPU allocation.
246
  The container stays resident on CPU but triggers GPU on demand.
247
  """
248
  action = request_dict.get("action")
249
  t0 = time.time()
250
+ print(f"--- [v84] πŸš€ GPU SESSION START: {action} at {time.ctime()} ---")
251
  load_models()
252
 
253
+ try:
254
+ if action == "stt":
255
+ res = _stt_logic(request_dict)
256
+ elif action == "translate":
257
+ res = {"translated": _translate_logic(request_dict.get("text"), request_dict.get("target_lang", "en"))}
258
+ elif action == "tts":
259
+ res = _tts_logic(request_dict.get("text"), request_dict.get("lang"), request_dict.get("speaker_wav"))
260
+ elif action == "s2st":
261
+ # πŸ”— FULL PIPELINE (Single GPU Call)
262
+ stt_res = _stt_logic({"file": request_dict.get("file"), "lang": request_dict.get("source_lang")})
263
+ text = stt_res.get("text", "")
264
+ if not text: return {"error": "No speech detected"}
265
+
266
+ translated = _translate_logic(text, request_dict.get("target_lang"))
267
+
268
+ tts_res = _tts_logic(translated, request_dict.get("target_lang"), request_dict.get("speaker_wav"))
269
+ res = {"text": text, "translated": translated, "audio": tts_res.get("audio")}
270
+ elif action == "health":
271
+ res = {"status": "awake", "time": time.ctime()}
272
+ else:
273
+ res = {"error": f"Unknown action: {action}"}
274
+
275
  finally:
276
+ print(f"--- [v84] ✨ SESSION END: {action} (Total: {time.time()-t0:.2f}s) ---")
277
  gc.collect()
278
  if torch.cuda.is_available():
279
  torch.cuda.empty_cache()