Upload app.py with huggingface_hub
Browse files
app.py
CHANGED
|
@@ -186,12 +186,12 @@ def _tts_logic(text, lang, speaker_wav_b64):
|
|
| 186 |
lang_key = lang.strip().lower()
|
| 187 |
mapped_lang = XTTS_MAP.get(lang_key) or XTTS_MAP.get(lang_key.split('-')[0])
|
| 188 |
|
| 189 |
-
print(f"[
|
| 190 |
|
| 191 |
# 🛣️ INTELLIGENT ROUTING
|
| 192 |
# Case A: XTTS Support (Voice Cloning)
|
| 193 |
if mapped_lang and mapped_lang in XTTS_LANG_CODES:
|
| 194 |
-
print(f"[
|
| 195 |
speaker_wav_path = None
|
| 196 |
if speaker_wav_b64:
|
| 197 |
sb = base64.b64decode(speaker_wav_b64)
|
|
@@ -217,7 +217,7 @@ def _tts_logic(text, lang, speaker_wav_b64):
|
|
| 217 |
if 'output_path' in locals() and os.path.exists(output_path): os.unlink(output_path)
|
| 218 |
|
| 219 |
# Case B: Chatterbox ONNX Support (High-Quality Fast Fallback)
|
| 220 |
-
print(f"[
|
| 221 |
try:
|
| 222 |
# Use local file if available for cloning in Chatterbox too
|
| 223 |
temp_ref = None
|
|
@@ -241,13 +241,13 @@ def _tts_logic(text, lang, speaker_wav_b64):
|
|
| 241 |
@spaces.GPU
|
| 242 |
def core_process(request_dict):
|
| 243 |
"""
|
| 244 |
-
Unified GPU Entry Point (
|
| 245 |
This function handles all high-speed tasks inside a single GPU allocation.
|
| 246 |
The container stays resident on CPU but triggers GPU on demand.
|
| 247 |
"""
|
| 248 |
action = request_dict.get("action")
|
| 249 |
t0 = time.time()
|
| 250 |
-
print(f"--- [
|
| 251 |
load_models()
|
| 252 |
|
| 253 |
if action == "stt":
|
|
@@ -279,8 +279,6 @@ def core_process(request_dict):
|
|
| 279 |
|
| 280 |
return res
|
| 281 |
|
| 282 |
-
return {"error": f"Unknown action: {action}"}
|
| 283 |
-
|
| 284 |
def create_wav_header(sample_rate=24000, channels=1, bit_depth=16):
|
| 285 |
"""Returns a standard WAV header as standard BYTES"""
|
| 286 |
header = bytearray(b'RIFF')
|
|
|
|
| 186 |
lang_key = lang.strip().lower()
|
| 187 |
mapped_lang = XTTS_MAP.get(lang_key) or XTTS_MAP.get(lang_key.split('-')[0])
|
| 188 |
|
| 189 |
+
print(f"[v81] TTS Request - Original: {lang}, Mapped: {mapped_lang}")
|
| 190 |
|
| 191 |
# 🛣️ INTELLIGENT ROUTING
|
| 192 |
# Case A: XTTS Support (Voice Cloning)
|
| 193 |
if mapped_lang and mapped_lang in XTTS_LANG_CODES:
|
| 194 |
+
print(f"[v81] Using XTTS-v2 for '{mapped_lang}'")
|
| 195 |
speaker_wav_path = None
|
| 196 |
if speaker_wav_b64:
|
| 197 |
sb = base64.b64decode(speaker_wav_b64)
|
|
|
|
| 217 |
if 'output_path' in locals() and os.path.exists(output_path): os.unlink(output_path)
|
| 218 |
|
| 219 |
# Case B: Chatterbox ONNX Support (High-Quality Fast Fallback)
|
| 220 |
+
print(f"[v81] Using Chatterbox ONNX Fallback for '{lang}'")
|
| 221 |
try:
|
| 222 |
# Use local file if available for cloning in Chatterbox too
|
| 223 |
temp_ref = None
|
|
|
|
| 241 |
@spaces.GPU
|
| 242 |
def core_process(request_dict):
|
| 243 |
"""
|
| 244 |
+
Unified GPU Entry Point (v81).
|
| 245 |
This function handles all high-speed tasks inside a single GPU allocation.
|
| 246 |
The container stays resident on CPU but triggers GPU on demand.
|
| 247 |
"""
|
| 248 |
action = request_dict.get("action")
|
| 249 |
t0 = time.time()
|
| 250 |
+
print(f"--- [v81] 🚀 GPU SESSION START: {action} at {time.ctime()} ---")
|
| 251 |
load_models()
|
| 252 |
|
| 253 |
if action == "stt":
|
|
|
|
| 279 |
|
| 280 |
return res
|
| 281 |
|
|
|
|
|
|
|
| 282 |
def create_wav_header(sample_rate=24000, channels=1, bit_depth=16):
|
| 283 |
"""Returns a standard WAV header as standard BYTES"""
|
| 284 |
header = bytearray(b'RIFF')
|