test / app.py
IAsistemofinteres's picture
Upload 7 files
7358212 verified
"""
Chatterbox TTS Enhanced - Monolithic Pro Edition (UI UPDATE)
Fixes: Progress bar moved below audio output.
"""
import sys
import os
import glob
import shutil
import time
import random
import re
import numpy as np
import torch
import gradio as gr
from pathlib import Path
import gc
# ==============================================================================
# 0. SETUP DE RUTAS Y LIBRERÍA
# ==============================================================================
PROJECT_ROOT = os.path.dirname(os.path.abspath(__file__))
SRC_PATH = os.path.join(PROJECT_ROOT, "src")
# Añadir 'src' a las rutas de búsqueda
if os.path.exists(SRC_PATH):
if SRC_PATH not in sys.path:
sys.path.append(SRC_PATH)
print(f"✅ Path 'src' añadido: {SRC_PATH}")
else:
if PROJECT_ROOT not in sys.path:
sys.path.append(PROJECT_ROOT)
# Intentar importar la librería REAL
try:
from chatterbox.tts import ChatterboxTTS
from chatterbox.vc import ChatterboxVC
from chatterbox.mtl_tts import ChatterboxMultilingualTTS, SUPPORTED_LANGUAGES
from chatterbox.tts_turbo import ChatterboxTurboTTS
print("✅ Librería 'chatterbox' importada correctamente.")
except ImportError as e:
print(f"❌ ERROR CRÍTICO: No se pudo importar 'chatterbox'. Detalle: {e}")
print("⚠️ EJECUTANDO EN MODO MOCK (SIN AUDIO REAL)")
SUPPORTED_LANGUAGES = {"en": "English", "es": "Spanish"}
class MockModel:
def __init__(self, *args, **kwargs): self.sr = 24000
@classmethod
def from_pretrained(cls, device): return cls()
def generate(self, *args, **kwargs):
time.sleep(1)
return torch.zeros(1, 48000)
ChatterboxTTS = ChatterboxVC = ChatterboxMultilingualTTS = ChatterboxTurboTTS = MockModel
# ==============================================================================
# 1. CONFIGURACIÓN Y UTILIDADES UI
# ==============================================================================
VOICE_WAV_ROOT = os.path.join(PROJECT_ROOT, "modules", "voice_wav")
os.makedirs(VOICE_WAV_ROOT, exist_ok=True)
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
# --- GENERADOR DE BARRA DE PROGRESO HTML ---
def make_progress_html(percentage, message="Ready"):
color = "linear-gradient(90deg, #f97316 0%, #fbbf24 100%)"
if percentage == 100: color = "#22c55e"
html = f"""
<div style="display: flex; flex-direction: column; gap: 5px; width: 100%; margin-top: 10px;">
<div style="width: 100%; background-color: #334155; border-radius: 99px; height: 24px; border: 1px solid #475569; overflow: hidden; position: relative; box-shadow: inset 0 2px 4px rgba(0,0,0,0.3);">
<div style="width: {percentage}%; background: {color}; height: 100%; transition: width 0.3s ease-out, background 0.3s ease;"></div>
<div style="position: absolute; top: 0; left: 0; width: 100%; height: 100%; display: flex; align-items: center; justify-content: center; color: white; font-size: 11px; font-weight: bold; text-shadow: 0 1px 2px rgba(0,0,0,0.5);">
{int(percentage)}%
</div>
</div>
<p style="color: #cbd5e1; font-size: 0.9em; margin: 0; padding: 0; white-space: pre-wrap; font-family: monospace;">{message}</p>
</div>
"""
return html
# ==============================================================================
# 2. MODEL MANAGER & VOICE MANAGER
# ==============================================================================
class ModelManager:
def __init__(self):
self.tts_model = None; self.mtl_model = None; self.vc_model = None; self.turbo_model = None
self.current_model_type = None
def unload_all(self):
self.tts_model = None; self.mtl_model = None; self.vc_model = None; self.turbo_model = None
if DEVICE == "cuda": torch.cuda.empty_cache(); gc.collect()
self.current_model_type = None
def get_model(self, type_key):
if self.current_model_type != type_key:
print(f"🔄 Switching to {type_key.upper()} model...")
self.unload_all()
try:
if type_key == "tts": self.tts_model = ChatterboxTTS.from_pretrained(DEVICE)
elif type_key == "mtl": self.mtl_model = ChatterboxMultilingualTTS.from_pretrained(DEVICE)
elif type_key == "vc": self.vc_model = ChatterboxVC.from_pretrained(DEVICE)
elif type_key == "turbo": self.turbo_model = ChatterboxTurboTTS.from_pretrained(device=DEVICE)
self.current_model_type = type_key
print(f"✅ {type_key.upper()} loaded.")
except Exception as e:
print(f"❌ Error loading {type_key}: {e}")
return None
return getattr(self, f"{type_key}_model")
model_manager = ModelManager()
# --- VOICE DB ---
VOICE_DB = {}; CATEGORY_LIST = []; FLAT_PATH_MAP = {}; ALL_VOICES_FLAT_LIST = []
def scan_voice_wav_structure():
global VOICE_DB, CATEGORY_LIST, FLAT_PATH_MAP, ALL_VOICES_FLAT_LIST
VOICE_DB = {}; CATEGORY_LIST = []; FLAT_PATH_MAP = {}; ALL_VOICES_FLAT_LIST = []
if not os.path.exists(VOICE_WAV_ROOT): return
print(f"📂 Escaneando voces en: {VOICE_WAV_ROOT}...")
for root, dirs, files in os.walk(VOICE_WAV_ROOT):
for f in files:
if f.endswith((".wav", ".mp3")):
full_path = os.path.join(root, f)
rel_path = os.path.relpath(full_path, VOICE_WAV_ROOT)
parts = rel_path.split(os.sep)
if len(parts) >= 2:
raw_cat = parts[0]
clean_cat = raw_cat.replace("ElevenLabs_", "").replace("_female", "").replace("_male", "").replace("_", " ").strip().title()
if clean_cat not in VOICE_DB:
VOICE_DB[clean_cat] = {}
if clean_cat not in CATEGORY_LIST: CATEGORY_LIST.append(clean_cat)
lang = "en"
if len(parts) >= 3:
possible_lang = parts[1].lower()
if len(possible_lang) == 2:
lang = possible_lang
icon = "♀️" if "_female" in raw_cat.lower() else ("♂️" if "_male" in raw_cat.lower() else "🎙️")
raw_name = os.path.splitext(f)[0]
display = f"{icon} {raw_name}"
if lang not in VOICE_DB[clean_cat]: VOICE_DB[clean_cat][lang] = {}
VOICE_DB[clean_cat][lang][display] = full_path
FLAT_PATH_MAP[display] = full_path
if display not in ALL_VOICES_FLAT_LIST: ALL_VOICES_FLAT_LIST.append(display)
CATEGORY_LIST.sort()
print(f"✅ {len(ALL_VOICES_FLAT_LIST)} voces encontradas.")
def resolve_voice_path(voice_name, lang="en"):
if not voice_name or voice_name == "None": return None
if voice_name in FLAT_PATH_MAP: return FLAT_PATH_MAP[voice_name]
for name, path in FLAT_PATH_MAP.items():
if voice_name in name: return path
return None
def get_available_languages(category):
if not category or category not in VOICE_DB: return []
return [(f"{SUPPORTED_LANGUAGES.get(c, c)} ({c})", c) for c in sorted(VOICE_DB[category].keys())]
def get_voices_for_ui(category, lang):
if category in VOICE_DB and lang in VOICE_DB[category]:
return sorted(list(VOICE_DB[category][lang].keys()))
return []
def get_all_voices_list():
return sorted(ALL_VOICES_FLAT_LIST)
# Inicializar
scan_voice_wav_structure()
# ==============================================================================
# 3. FUNCIONES DE GENERACIÓN
# ==============================================================================
def set_seed(seed):
torch.manual_seed(seed)
if torch.cuda.is_available(): torch.cuda.manual_seed_all(seed)
random.seed(seed)
np.random.seed(seed)
def format_time(seconds):
if seconds < 60: return f"{seconds:.1f}s"
return f"{int(seconds//60)}m {seconds%60:.1f}s"
def estimate_generation_time(text_length):
return (text_length / 50) * 2 + 1
def smart_chunk_text(text, max_words=40):
def has_cjk(text): return bool(re.search(r'[\u4e00-\u9fff\u3040-\u309f\u30a0-\u30ff]', text))
is_cjk = has_cjk(text)
sentences = re.split(r'(?<=[.!?。!?।؟])\s*|\n+', text)
chunks = []; current_chunk = []; current_count = 0
for sentence in sentences:
sentence = sentence.strip()
if not sentence: continue
count = len(re.sub(r'\s+', '', sentence)) if is_cjk else len(sentence.split())
if current_count + count > max_words:
if current_chunk:
chunks.append(''.join(current_chunk) if is_cjk else ' '.join(current_chunk))
current_chunk = []; current_count = 0
current_chunk.append(sentence); current_count += count
if current_chunk: chunks.append(''.join(current_chunk) if is_cjk else ' '.join(current_chunk))
return chunks if chunks else [text]
def generate_speech(text, voice_name, exaggeration, temperature, seed_num, cfgw, min_p, top_p, repetition_penalty):
try:
start_time = time.time()
if not text.strip(): yield make_progress_html(0, "❌ Error: Texto vacío"), None; return
path = resolve_voice_path(voice_name, "en")
if not path: yield make_progress_html(0, "❌ Error: Voz no encontrada"), None; return
yield make_progress_html(20, "⚙️ Cargando modelo TTS..."), None
model = model_manager.get_model("tts")
if model is None: yield make_progress_html(0, "❌ Error modelo"), None; return
if seed_num != 0: set_seed(int(seed_num))
chunks = smart_chunk_text(text)
wavs = []
for i, chunk in enumerate(chunks):
pct = 30 + int((i / len(chunks)) * 60)
yield make_progress_html(pct, f"🎙️ Generando parte {i+1}/{len(chunks)}..."), None
w = model.generate(chunk, audio_prompt_path=path, exaggeration=exaggeration, temperature=temperature, cfg_weight=cfgw, min_p=min_p, top_p=top_p, repetition_penalty=repetition_penalty)
wavs.append(w)
yield make_progress_html(95, "🎹 Uniendo audio..."), None
full_wav = torch.cat(wavs, dim=-1) if len(wavs) > 1 else wavs[0]
yield make_progress_html(100, f"✅ Listo ({format_time(time.time()-start_time)})"), (model.sr, full_wav.squeeze(0).numpy())
except Exception as e:
yield make_progress_html(0, f"❌ Error: {str(e)}"), None
def generate_turbo_speech(text, voice_name):
try:
start_time = time.time()
if not text.strip(): yield make_progress_html(0, "❌ Error: Texto vacío"), None; return
path = resolve_voice_path(voice_name, "en")
if not path: yield make_progress_html(0, "❌ Error: Voz no encontrada"), None; return
yield make_progress_html(20, "⚡ Cargando Turbo..."), None
model = model_manager.get_model("turbo")
if model is None: yield make_progress_html(0, "❌ Error Turbo"), None; return
chunks = smart_chunk_text(text)
wavs = []
for i, chunk in enumerate(chunks):
pct = 30 + int((i / len(chunks)) * 60)
yield make_progress_html(pct, f"⚡ Turbo chunk {i+1}/{len(chunks)}..."), None
w = model.generate(chunk, audio_prompt_path=path)
wavs.append(w)
full_wav = torch.cat(wavs, dim=-1) if len(wavs) > 1 else wavs[0]
yield make_progress_html(100, f"✅ Turbo listo ({format_time(time.time()-start_time)})"), (model.sr, full_wav.squeeze(0).numpy())
except Exception as e:
yield make_progress_html(0, f"❌ Error: {str(e)}"), None
def generate_multilingual_speech(text, voice_name, lang_code, exaggeration, temperature, seed_num, cfgw):
try:
start_time = time.time()
path = resolve_voice_path(voice_name, lang_code)
yield make_progress_html(20, "🌍 Cargando Multi-TTS..."), None
model = model_manager.get_model("mtl")
if model is None: yield make_progress_html(0, "❌ Error modelo"), None; return
if seed_num != 0: set_seed(int(seed_num))
chunks = smart_chunk_text(text)
wavs = []
for i, chunk in enumerate(chunks):
pct = 30 + int((i / len(chunks)) * 60)
yield make_progress_html(pct, f"🌍 Generando ({lang_code}) {i+1}..."), None
w = model.generate(chunk, language_id=lang_code, audio_prompt_path=path, exaggeration=exaggeration, temperature=temperature, cfg_weight=cfgw)
wavs.append(w)
full_wav = torch.cat(wavs, dim=-1) if len(wavs) > 1 else wavs[0]
yield make_progress_html(100, "✅ Listo"), (model.sr, full_wav.squeeze(0).numpy())
except Exception as e:
yield make_progress_html(0, f"❌ Error: {str(e)}"), None
def convert_voice(audio, target_voice):
try:
start_time = time.time()
path = resolve_voice_path(target_voice, "en")
if not path: yield make_progress_html(0, "❌ Error: Voz destino no válida"), None; return
yield make_progress_html(50, "🔄 Cargando VC..."), None
model = model_manager.get_model("vc")
if model is None: yield make_progress_html(0, "❌ Error VC"), None; return
yield make_progress_html(70, "🔄 Convirtiendo..."), None
w = model.generate(audio, target_voice_path=path)
yield make_progress_html(100, f"✅ Listo ({format_time(time.time()-start_time)})"), (model.sr, w.squeeze(0).numpy())
except Exception as e:
yield make_progress_html(0, f"❌ Error: {str(e)}"), None
def clone_voice_wrapper(ref_audio, name, cat, lang, gender):
try:
if not name: return "Nombre requerido", gr.update()
cat_slug = cat.lower().replace(" ", "_")
gender_slug = gender.lower()
target_dir = os.path.join(VOICE_WAV_ROOT, f"{cat_slug}_{gender_slug}", lang)
os.makedirs(target_dir, exist_ok=True)
clean_name = "".join(x for x in name if x.isalnum() or x in " -_").strip()
dest = os.path.join(target_dir, f"{clean_name}.wav")
if os.path.exists(dest): return "❌ La voz ya existe", gr.update()
shutil.copy(ref_audio, dest)
scan_voice_wav_structure()
return f"✅ Clonada: {clean_name}", gr.update(choices=get_all_voices_list())
except Exception as e:
return f"❌ Error: {e}", gr.update()
def delete_voice_wrapper(voice_name):
try:
path = resolve_voice_path(voice_name, "en")
if path and os.path.exists(path):
os.remove(path)
scan_voice_wav_structure()
return f"✅ Eliminada: {voice_name}", gr.update(choices=get_all_voices_list(), value=None)
return "❌ Archivo no encontrado", gr.update()
except Exception as e:
return f"❌ Error: {e}", gr.update()
# ==============================================================================
# 4. INTERFAZ GRÁFICA (UI)
# ==============================================================================
def create_header():
gr.HTML("""
<div style="text-align: center; margin-bottom: 1rem; background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); padding: 25px; border-radius: 16px; color: white; box-shadow: 0 4px 15px rgba(0,0,0,0.2);">
<h1 style="font-size: 2.8em; margin: 0; font-weight: 800;">⚡ Chatterbox Turbo</h1>
<p style="font-size: 1.1em; opacity: 0.9; margin-top: 10px;">Pro Audio Synthesis Suite</p>
</div>
""")
INITIAL_PROGRESS = make_progress_html(0, "Esperando inicio...")
def create_turbo_tab():
with gr.Row():
with gr.Column(scale=1):
text = gr.Textbox(label="Texto a sintetizar", value="Hello! [laugh] This is Turbo speed!", lines=4, elem_id="turbo_textbox")
with gr.Row(elem_classes="tag-container"):
tags = [gr.Button(t, size="sm", elem_classes="tag-btn") for t in ["[laugh]", "[sigh]", "[cough]", "[clear throat]", "[gasp]", "[chuckle]"]]
voice = gr.Dropdown(label="Seleccionar Voz", choices=[], interactive=True)
preview = gr.Audio(label="Preview", interactive=False, visible=True, type="filepath")
btn = gr.Button("⚡ Generar Audio (Turbo)", variant="primary", size="lg")
with gr.Column(scale=1):
# MODIFICADO: Audio primero, luego progreso
audio_out = gr.Audio(label="Resultado Final", autoplay=True, show_download_button=True)
progress_html = gr.HTML(value=INITIAL_PROGRESS, label="Estado")
return {"text": text, "voice": voice, "prev": preview, "btn": btn, "prog": progress_html, "out": audio_out, "tags": tags}
def create_tts_tab():
with gr.Row():
with gr.Column(scale=1):
text = gr.Textbox(label="Texto", value="Hello world!", lines=4)
voice = gr.Dropdown(label="Voz Clonada", choices=[], interactive=True)
preview = gr.Audio(label="Preview", interactive=False)
with gr.Accordion("⚙️ Opciones Avanzadas", open=False):
exag = gr.Slider(0.25, 2, value=.5, step=0.05, label="Exaggeration")
cfg = gr.Slider(0, 1, value=.5, step=0.05, label="CFG")
temp = gr.Slider(0.05, 5, value=.8, step=0.05, label="Temp")
seed = gr.Number(0, label="Seed")
min_p = gr.Slider(0, 1, value=0.05, label="Min P")
top_p = gr.Slider(0, 1, value=1.0, label="Top P")
rep = gr.Slider(1, 2, value=1.2, label="Repetition")
btn = gr.Button("🎙️ Generar Audio", variant="primary", size="lg")
with gr.Column(scale=1):
# MODIFICADO: Audio primero, luego progreso
audio_out = gr.Audio(label="Resultado", autoplay=True)
progress_html = gr.HTML(value=INITIAL_PROGRESS)
return {"text": text, "voice": voice, "prev": preview, "btn": btn, "prog": progress_html, "out": audio_out, "opts": [exag, temp, seed, cfg, min_p, top_p, rep]}
def create_mtl_tab():
with gr.Row():
with gr.Column(scale=1):
text = gr.Textbox(label="Texto", value="Hola mundo", lines=4)
lang_choices = [(f"{v} ({k})", k) for k,v in SUPPORTED_LANGUAGES.items()]
lang = gr.Dropdown(label="Idioma", choices=lang_choices, value="es")
voice = gr.Dropdown(label="Voz", choices=[])
preview = gr.Audio(label="Preview", interactive=False)
with gr.Accordion("⚙️ Opciones", open=False):
exag = gr.Slider(0.25, 2, value=.5, label="Exaggeration")
temp = gr.Slider(0.05, 5, value=.8, label="Temp")
seed = gr.Number(0, label="Seed")
cfg = gr.Slider(0, 1, value=.5, label="CFG")
btn = gr.Button("🌍 Generar", variant="primary", size="lg")
with gr.Column(scale=1):
# MODIFICADO: Audio primero, luego progreso
audio_out = gr.Audio(label="Resultado", autoplay=True)
progress_html = gr.HTML(value=INITIAL_PROGRESS)
return {"text": text, "lang": lang, "voice": voice, "prev": preview, "btn": btn, "prog": progress_html, "out": audio_out, "opts": [exag, temp, seed, cfg]}
def create_vc_tab():
with gr.Row():
with gr.Column(scale=1):
inp = gr.Audio(label="Entrada", sources=["upload", "microphone"], type="filepath")
voice = gr.Dropdown(label="Voz Objetivo", choices=[])
preview = gr.Audio(label="Preview", interactive=False)
btn = gr.Button("🔄 Convertir", variant="primary", size="lg")
with gr.Column(scale=1):
# MODIFICADO: Audio primero, luego progreso
audio_out = gr.Audio(label="Resultado", autoplay=True)
progress_html = gr.HTML(value=INITIAL_PROGRESS)
return {"inp": inp, "voice": voice, "prev": preview, "btn": btn, "prog": progress_html, "out": audio_out}
def create_clone_tab():
with gr.Row():
with gr.Column():
gr.Markdown("### 🧬 Clonar Voz")
name = gr.Textbox(label="Nombre")
with gr.Row():
gender = gr.Radio(["Male", "Female"], value="Male", label="Género")
lang_choices = [(f"{v} ({k})", k) for k,v in SUPPORTED_LANGUAGES.items()]
lang = gr.Dropdown(label="Idioma", choices=lang_choices, value="es")
cat = gr.Dropdown(label="Categoría", choices=CATEGORY_LIST, allow_custom_value=False)
ref = gr.Audio(label="Referencia", type="filepath")
btn = gr.Button("💾 Clonar", variant="primary")
status = gr.Textbox(label="Estado")
with gr.Column():
gr.Markdown("### 🗑️ Borrar")
del_sel = gr.Dropdown(label="Seleccionar Voz", choices=[])
del_btn = gr.Button("🗑️ Eliminar", variant="stop")
del_stat = gr.Textbox(label="Estado")
return {"name": name, "gender": gender, "cat": cat, "lang": lang, "ref": ref, "btn": btn, "stat": status, "del_sel": del_sel, "del_btn": del_btn, "del_stat": del_stat}
CSS = """
body, .gradio-container { background-color: #0f172a; font-family: 'Segoe UI', sans-serif; }
.sidebar-container { background-color: #1e293b; padding: 20px; border-right: 1px solid #334155; }
.nav-btn { background: transparent; border: none; color: #94a3b8; text-align: left; padding: 15px; font-weight: 600; width: 100%; border-radius: 8px; margin-bottom: 5px; transition: all 0.2s; }
.nav-btn:hover { background: #334155; color: white; padding-left: 20px; }
.active-btn { background: #334155; color: white; border-left: 4px solid #f97316; padding-left: 20px; box-shadow: 0 4px 6px rgba(0,0,0,0.1); }
.content-panel { background: #1e293b; border: 1px solid #334155; border-radius: 16px; padding: 30px; margin-top: 20px; box-shadow: 0 10px 15px -3px rgba(0, 0, 0, 0.1); }
input, textarea, select { background-color: #0f172a !important; border: 1px solid #334155 !important; color: white !important; }
.tag-btn { background: #334155; color: #e2e8f0; border: 1px solid #475569; margin-right: 5px; }
"""
with gr.Blocks(title="Chatterbox Pro", css=CSS, theme=gr.themes.Base()) as demo:
with gr.Row(elem_classes="main-layout", equal_height=True):
with gr.Column(scale=1, min_width=250, elem_classes="sidebar-container"):
gr.Markdown("### 🎛️ CONTROL PANEL")
gr.Markdown("---")
initial_cat = CATEGORY_LIST[0] if CATEGORY_LIST else None
cat_filter = gr.Dropdown(label="📚 Librería", choices=CATEGORY_LIST, value=initial_cat)
lang_filter = gr.Dropdown(label="🌐 Idioma", choices=[])
gr.Markdown("---")
btn_turbo = gr.Button("🚀 Turbo Mode", elem_classes=["nav-btn", "active-btn"])
btn_tts = gr.Button("🎤 TTS Pro", elem_classes=["nav-btn"])
btn_mtl = gr.Button("🌍 Multilingual", elem_classes=["nav-btn"])
btn_vc = gr.Button("🔄 Converter", elem_classes=["nav-btn"])
btn_clone = gr.Button("🧬 Cloning Lab", elem_classes=["nav-btn"])
with gr.Column(scale=4, elem_classes="content-area"):
create_header()
with gr.Column(visible=True, elem_classes="content-panel") as v_turbo: ui_turbo = create_turbo_tab()
with gr.Column(visible=False, elem_classes="content-panel") as v_tts: ui_tts = create_tts_tab()
with gr.Column(visible=False, elem_classes="content-panel") as v_mtl: ui_mtl = create_mtl_tab()
with gr.Column(visible=False, elem_classes="content-panel") as v_vc: ui_vc = create_vc_tab()
with gr.Column(visible=False, elem_classes="content-panel") as v_clone: ui_clone = create_clone_tab()
views = [v_turbo, v_tts, v_mtl, v_vc, v_clone]; btns = [btn_turbo, btn_tts, btn_mtl, btn_vc, btn_clone]
def switch_view(idx):
return [gr.update(visible=(i==idx)) for i in range(len(views))] + [gr.update(elem_classes=["nav-btn", "active-btn"] if i==idx else ["nav-btn"]) for i in range(len(btns))]
for i, b in enumerate(btns): b.click(lambda idx=i: switch_view(idx), outputs=views+btns)
def update_ui_lists(cat, lang_code=None):
langs = get_available_languages(cat)
valid_codes = [c[1] for c in langs]
curr_lang = lang_code if lang_code in valid_codes else (valid_codes[0] if valid_codes else None)
voices = get_voices_for_ui(cat, curr_lang)
v_val = voices[0] if voices else None
return (gr.update(choices=langs, value=curr_lang), gr.update(choices=voices, value=v_val), gr.update(choices=voices, value=v_val), gr.update(choices=voices, value=v_val), gr.update(choices=voices, value=v_val), gr.update(choices=CATEGORY_LIST, value=cat), gr.update(choices=get_all_voices_list()))
cat_filter.change(update_ui_lists, inputs=[cat_filter], outputs=[lang_filter, ui_turbo["voice"], ui_tts["voice"], ui_mtl["voice"], ui_vc["voice"], ui_clone["cat"], ui_clone["del_sel"]])
lang_filter.change(lambda c, l: update_ui_lists(c, l), inputs=[cat_filter, lang_filter], outputs=[lang_filter, ui_turbo["voice"], ui_tts["voice"], ui_mtl["voice"], ui_vc["voice"], ui_clone["cat"], ui_clone["del_sel"]])
demo.load(lambda: update_ui_lists(initial_cat), outputs=[lang_filter, ui_turbo["voice"], ui_tts["voice"], ui_mtl["voice"], ui_vc["voice"], ui_clone["cat"], ui_clone["del_sel"]])
ui_turbo["voice"].change(lambda v: resolve_voice_path(v, "en"), inputs=ui_turbo["voice"], outputs=ui_turbo["prev"])
ui_tts["voice"].change(lambda v: resolve_voice_path(v, "en"), inputs=ui_tts["voice"], outputs=ui_tts["prev"])
ui_mtl["voice"].change(lambda v, l: resolve_voice_path(v, l), inputs=[ui_mtl["voice"], ui_mtl["lang"]], outputs=ui_mtl["prev"])
ui_vc["voice"].change(lambda v: resolve_voice_path(v, "en"), inputs=ui_vc["voice"], outputs=ui_vc["prev"])
ui_turbo["btn"].click(generate_turbo_speech, inputs=[ui_turbo["text"], ui_turbo["voice"]], outputs=[ui_turbo["prog"], ui_turbo["out"]])
ui_tts["btn"].click(generate_speech, inputs=[ui_tts["text"], ui_tts["voice"]] + ui_tts["opts"], outputs=[ui_tts["prog"], ui_tts["out"]])
ui_mtl["btn"].click(generate_multilingual_speech, inputs=[ui_mtl["text"], ui_mtl["voice"], ui_mtl["lang"]] + ui_mtl["opts"], outputs=[ui_mtl["prog"], ui_mtl["out"]])
ui_vc["btn"].click(convert_voice, inputs=[ui_vc["inp"], ui_vc["voice"]], outputs=[ui_vc["prog"], ui_vc["out"]])
ui_clone["btn"].click(clone_voice_wrapper, inputs=[ui_clone["ref"], ui_clone["name"], ui_clone["cat"], ui_clone["lang"], ui_clone["gender"]], outputs=[ui_clone["stat"], ui_clone["del_sel"]])
ui_clone["del_btn"].click(delete_voice_wrapper, inputs=[ui_clone["del_sel"]], outputs=[ui_clone["del_stat"], ui_clone["del_sel"]])
JS_TAGS = """(tag, text) => { var el = document.querySelector('#turbo_textbox textarea'); if(el) { var start = el.selectionStart; var end = el.selectionEnd; return text.slice(0, start) + " " + tag + " " + text.slice(end); } return text + " " + tag; }"""
for btn in ui_turbo["tags"]: btn.click(None, inputs=[btn, ui_turbo["text"]], outputs=ui_turbo["text"], js=JS_TAGS)
if __name__ == "__main__":
demo.queue().launch(inbrowser=True)