⚡ Chatterbox Turbo

"""
Chatterbox TTS Enhanced - Monolithic Pro Edition (UI UPDATE)
Fixes: Progress bar moved below audio output.
"""
import sys
import os
import glob
import shutil
import time
import random
import re
import numpy as np
import torch
import gradio as gr
from pathlib import Path
import gc

# ==============================================================================
# 0. SETUP DE RUTAS Y LIBRERÍA
# ==============================================================================

PROJECT_ROOT = os.path.dirname(os.path.abspath(__file__))
SRC_PATH = os.path.join(PROJECT_ROOT, "src")

# Añadir 'src' a las rutas de búsqueda
if os.path.exists(SRC_PATH):
    if SRC_PATH not in sys.path:
        sys.path.append(SRC_PATH)
        print(f"✅ Path 'src' añadido: {SRC_PATH}")
else:
    if PROJECT_ROOT not in sys.path:
        sys.path.append(PROJECT_ROOT)

# Intentar importar la librería REAL
try:
    from chatterbox.tts import ChatterboxTTS
    from chatterbox.vc import ChatterboxVC
    from chatterbox.mtl_tts import ChatterboxMultilingualTTS, SUPPORTED_LANGUAGES
    from chatterbox.tts_turbo import ChatterboxTurboTTS
    print("✅ Librería 'chatterbox' importada correctamente.")
    
except ImportError as e:
    print(f"❌ ERROR CRÍTICO: No se pudo importar 'chatterbox'. Detalle: {e}")
    print("⚠️  EJECUTANDO EN MODO MOCK (SIN AUDIO REAL)")
    
    SUPPORTED_LANGUAGES = {"en": "English", "es": "Spanish"}
    class MockModel:
        def __init__(self, *args, **kwargs): self.sr = 24000
        @classmethod
        def from_pretrained(cls, device): return cls()
        def generate(self, *args, **kwargs): 
            time.sleep(1) 
            return torch.zeros(1, 48000)
    ChatterboxTTS = ChatterboxVC = ChatterboxMultilingualTTS = ChatterboxTurboTTS = MockModel

# ==============================================================================
# 1. CONFIGURACIÓN Y UTILIDADES UI
# ==============================================================================

VOICE_WAV_ROOT = os.path.join(PROJECT_ROOT, "modules", "voice_wav")
os.makedirs(VOICE_WAV_ROOT, exist_ok=True)
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"

# --- GENERADOR DE BARRA DE PROGRESO HTML ---
def make_progress_html(percentage, message="Ready"):
    color = "linear-gradient(90deg, #f97316 0%, #fbbf24 100%)"
    if percentage == 100: color = "#22c55e"
    
    html = f"""
    <div style="display: flex; flex-direction: column; gap: 5px; width: 100%; margin-top: 10px;">
        <div style="width: 100%; background-color: #334155; border-radius: 99px; height: 24px; border: 1px solid #475569; overflow: hidden; position: relative; box-shadow: inset 0 2px 4px rgba(0,0,0,0.3);">
            <div style="width: {percentage}%; background: {color}; height: 100%; transition: width 0.3s ease-out, background 0.3s ease;"></div>
            <div style="position: absolute; top: 0; left: 0; width: 100%; height: 100%; display: flex; align-items: center; justify-content: center; color: white; font-size: 11px; font-weight: bold; text-shadow: 0 1px 2px rgba(0,0,0,0.5);">
                {int(percentage)}%
            </div>
        </div>
        <p style="color: #cbd5e1; font-size: 0.9em; margin: 0; padding: 0; white-space: pre-wrap; font-family: monospace;">{message}</p>
    </div>
    """
    return html

# ==============================================================================
# 2. MODEL MANAGER & VOICE MANAGER
# ==============================================================================

class ModelManager:
    def __init__(self):
        self.tts_model = None; self.mtl_model = None; self.vc_model = None; self.turbo_model = None
        self.current_model_type = None

    def unload_all(self):
        self.tts_model = None; self.mtl_model = None; self.vc_model = None; self.turbo_model = None
        if DEVICE == "cuda": torch.cuda.empty_cache(); gc.collect()
        self.current_model_type = None

    def get_model(self, type_key):
        if self.current_model_type != type_key:
            print(f"🔄 Switching to {type_key.upper()} model...")
            self.unload_all()
            try:
                if type_key == "tts": self.tts_model = ChatterboxTTS.from_pretrained(DEVICE)
                elif type_key == "mtl": self.mtl_model = ChatterboxMultilingualTTS.from_pretrained(DEVICE)
                elif type_key == "vc": self.vc_model = ChatterboxVC.from_pretrained(DEVICE)
                elif type_key == "turbo": self.turbo_model = ChatterboxTurboTTS.from_pretrained(device=DEVICE)
                self.current_model_type = type_key
                print(f"✅ {type_key.upper()} loaded.")
            except Exception as e:
                print(f"❌ Error loading {type_key}: {e}")
                return None
        return getattr(self, f"{type_key}_model")

model_manager = ModelManager()

# --- VOICE DB ---
VOICE_DB = {}; CATEGORY_LIST = []; FLAT_PATH_MAP = {}; ALL_VOICES_FLAT_LIST = []

def scan_voice_wav_structure():
    global VOICE_DB, CATEGORY_LIST, FLAT_PATH_MAP, ALL_VOICES_FLAT_LIST
    VOICE_DB = {}; CATEGORY_LIST = []; FLAT_PATH_MAP = {}; ALL_VOICES_FLAT_LIST = []
    
    if not os.path.exists(VOICE_WAV_ROOT): return

    print(f"📂 Escaneando voces en: {VOICE_WAV_ROOT}...")
    for root, dirs, files in os.walk(VOICE_WAV_ROOT):
        for f in files:
            if f.endswith((".wav", ".mp3")):
                full_path = os.path.join(root, f)
                rel_path = os.path.relpath(full_path, VOICE_WAV_ROOT)
                parts = rel_path.split(os.sep)
                
                if len(parts) >= 2:
                    raw_cat = parts[0]
                    clean_cat = raw_cat.replace("ElevenLabs_", "").replace("_female", "").replace("_male", "").replace("_", " ").strip().title()
                    
                    if clean_cat not in VOICE_DB:
                        VOICE_DB[clean_cat] = {}
                        if clean_cat not in CATEGORY_LIST: CATEGORY_LIST.append(clean_cat)
                    
                    lang = "en"
                    if len(parts) >= 3:
                        possible_lang = parts[1].lower()
                        if len(possible_lang) == 2:
                            lang = possible_lang
                    
                    icon = "♀️" if "_female" in raw_cat.lower() else ("♂️" if "_male" in raw_cat.lower() else "🎙️")
                    raw_name = os.path.splitext(f)[0]
                    display = f"{icon} {raw_name}"
                    
                    if lang not in VOICE_DB[clean_cat]: VOICE_DB[clean_cat][lang] = {}
                    VOICE_DB[clean_cat][lang][display] = full_path
                    FLAT_PATH_MAP[display] = full_path
                    if display not in ALL_VOICES_FLAT_LIST: ALL_VOICES_FLAT_LIST.append(display)

    CATEGORY_LIST.sort()
    print(f"✅ {len(ALL_VOICES_FLAT_LIST)} voces encontradas.")

def resolve_voice_path(voice_name, lang="en"):
    if not voice_name or voice_name == "None": return None
    if voice_name in FLAT_PATH_MAP: return FLAT_PATH_MAP[voice_name]
    for name, path in FLAT_PATH_MAP.items():
        if voice_name in name: return path
    return None

def get_available_languages(category):
    if not category or category not in VOICE_DB: return []
    return [(f"{SUPPORTED_LANGUAGES.get(c, c)} ({c})", c) for c in sorted(VOICE_DB[category].keys())]

def get_voices_for_ui(category, lang):
    if category in VOICE_DB and lang in VOICE_DB[category]:
        return sorted(list(VOICE_DB[category][lang].keys()))
    return []

def get_all_voices_list():
    return sorted(ALL_VOICES_FLAT_LIST)

# Inicializar
scan_voice_wav_structure()

# ==============================================================================
# 3. FUNCIONES DE GENERACIÓN
# ==============================================================================

def set_seed(seed):
    torch.manual_seed(seed)
    if torch.cuda.is_available(): torch.cuda.manual_seed_all(seed)
    random.seed(seed)
    np.random.seed(seed)

def format_time(seconds):
    if seconds < 60: return f"{seconds:.1f}s"
    return f"{int(seconds//60)}m {seconds%60:.1f}s"

def estimate_generation_time(text_length):
    return (text_length / 50) * 2 + 1

def smart_chunk_text(text, max_words=40):
    def has_cjk(text): return bool(re.search(r'[\u4e00-\u9fff\u3040-\u309f\u30a0-\u30ff]', text))
    is_cjk = has_cjk(text)
    sentences = re.split(r'(?<=[.!?。！？।؟])\s*|\n+', text)
    chunks = []; current_chunk = []; current_count = 0
    for sentence in sentences:
        sentence = sentence.strip()
        if not sentence: continue
        count = len(re.sub(r'\s+', '', sentence)) if is_cjk else len(sentence.split())
        if current_count + count > max_words:
            if current_chunk:
                chunks.append(''.join(current_chunk) if is_cjk else ' '.join(current_chunk))
                current_chunk = []; current_count = 0
        current_chunk.append(sentence); current_count += count
    if current_chunk: chunks.append(''.join(current_chunk) if is_cjk else ' '.join(current_chunk))
    return chunks if chunks else [text]

def generate_speech(text, voice_name, exaggeration, temperature, seed_num, cfgw, min_p, top_p, repetition_penalty):
    try:
        start_time = time.time()
        if not text.strip(): yield make_progress_html(0, "❌ Error: Texto vacío"), None; return
        path = resolve_voice_path(voice_name, "en")
        if not path: yield make_progress_html(0, "❌ Error: Voz no encontrada"), None; return

        yield make_progress_html(20, "⚙️ Cargando modelo TTS..."), None
        model = model_manager.get_model("tts")
        if model is None: yield make_progress_html(0, "❌ Error modelo"), None; return

        if seed_num != 0: set_seed(int(seed_num))
        chunks = smart_chunk_text(text)
        wavs = []
        
        for i, chunk in enumerate(chunks):
            pct = 30 + int((i / len(chunks)) * 60)
            yield make_progress_html(pct, f"🎙️ Generando parte {i+1}/{len(chunks)}..."), None
            w = model.generate(chunk, audio_prompt_path=path, exaggeration=exaggeration, temperature=temperature, cfg_weight=cfgw, min_p=min_p, top_p=top_p, repetition_penalty=repetition_penalty)
            wavs.append(w)

        yield make_progress_html(95, "🎹 Uniendo audio..."), None
        full_wav = torch.cat(wavs, dim=-1) if len(wavs) > 1 else wavs[0]
        yield make_progress_html(100, f"✅ Listo ({format_time(time.time()-start_time)})"), (model.sr, full_wav.squeeze(0).numpy())
    except Exception as e:
        yield make_progress_html(0, f"❌ Error: {str(e)}"), None

def generate_turbo_speech(text, voice_name):
    try:
        start_time = time.time()
        if not text.strip(): yield make_progress_html(0, "❌ Error: Texto vacío"), None; return
        path = resolve_voice_path(voice_name, "en")
        if not path: yield make_progress_html(0, "❌ Error: Voz no encontrada"), None; return
        
        yield make_progress_html(20, "⚡ Cargando Turbo..."), None
        model = model_manager.get_model("turbo")
        if model is None: yield make_progress_html(0, "❌ Error Turbo"), None; return
        
        chunks = smart_chunk_text(text)
        wavs = []
        for i, chunk in enumerate(chunks):
            pct = 30 + int((i / len(chunks)) * 60)
            yield make_progress_html(pct, f"⚡ Turbo chunk {i+1}/{len(chunks)}..."), None
            w = model.generate(chunk, audio_prompt_path=path)
            wavs.append(w)
            
        full_wav = torch.cat(wavs, dim=-1) if len(wavs) > 1 else wavs[0]
        yield make_progress_html(100, f"✅ Turbo listo ({format_time(time.time()-start_time)})"), (model.sr, full_wav.squeeze(0).numpy())
    except Exception as e:
        yield make_progress_html(0, f"❌ Error: {str(e)}"), None

def generate_multilingual_speech(text, voice_name, lang_code, exaggeration, temperature, seed_num, cfgw):
    try:
        start_time = time.time()
        path = resolve_voice_path(voice_name, lang_code)
        yield make_progress_html(20, "🌍 Cargando Multi-TTS..."), None
        model = model_manager.get_model("mtl")
        if model is None: yield make_progress_html(0, "❌ Error modelo"), None; return
        
        if seed_num != 0: set_seed(int(seed_num))
        chunks = smart_chunk_text(text)
        wavs = []
        for i, chunk in enumerate(chunks):
            pct = 30 + int((i / len(chunks)) * 60)
            yield make_progress_html(pct, f"🌍 Generando ({lang_code}) {i+1}..."), None
            w = model.generate(chunk, language_id=lang_code, audio_prompt_path=path, exaggeration=exaggeration, temperature=temperature, cfg_weight=cfgw)
            wavs.append(w)
            
        full_wav = torch.cat(wavs, dim=-1) if len(wavs) > 1 else wavs[0]
        yield make_progress_html(100, "✅ Listo"), (model.sr, full_wav.squeeze(0).numpy())
    except Exception as e:
        yield make_progress_html(0, f"❌ Error: {str(e)}"), None

def convert_voice(audio, target_voice):
    try:
        start_time = time.time()
        path = resolve_voice_path(target_voice, "en")
        if not path: yield make_progress_html(0, "❌ Error: Voz destino no válida"), None; return
        
        yield make_progress_html(50, "🔄 Cargando VC..."), None
        model = model_manager.get_model("vc")
        if model is None: yield make_progress_html(0, "❌ Error VC"), None; return
        
        yield make_progress_html(70, "🔄 Convirtiendo..."), None
        w = model.generate(audio, target_voice_path=path)
        yield make_progress_html(100, f"✅ Listo ({format_time(time.time()-start_time)})"), (model.sr, w.squeeze(0).numpy())
    except Exception as e:
        yield make_progress_html(0, f"❌ Error: {str(e)}"), None

def clone_voice_wrapper(ref_audio, name, cat, lang, gender):
    try:
        if not name: return "Nombre requerido", gr.update()
        cat_slug = cat.lower().replace(" ", "_")
        gender_slug = gender.lower()
        target_dir = os.path.join(VOICE_WAV_ROOT, f"{cat_slug}_{gender_slug}", lang)
        os.makedirs(target_dir, exist_ok=True)
        
        clean_name = "".join(x for x in name if x.isalnum() or x in " -_").strip()
        dest = os.path.join(target_dir, f"{clean_name}.wav")
        
        if os.path.exists(dest): return "❌ La voz ya existe", gr.update()
        shutil.copy(ref_audio, dest)
        
        scan_voice_wav_structure()
        return f"✅ Clonada: {clean_name}", gr.update(choices=get_all_voices_list())
    except Exception as e:
        return f"❌ Error: {e}", gr.update()

def delete_voice_wrapper(voice_name):
    try:
        path = resolve_voice_path(voice_name, "en")
        if path and os.path.exists(path):
            os.remove(path)
            scan_voice_wav_structure()
            return f"✅ Eliminada: {voice_name}", gr.update(choices=get_all_voices_list(), value=None)
        return "❌ Archivo no encontrado", gr.update()
    except Exception as e:
        return f"❌ Error: {e}", gr.update()

# ==============================================================================
# 4. INTERFAZ GRÁFICA (UI)
# ==============================================================================

def create_header():
    gr.HTML("""
        <div style="text-align: center; margin-bottom: 1rem; background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); padding: 25px; border-radius: 16px; color: white; box-shadow: 0 4px 15px rgba(0,0,0,0.2);">
            <h1 style="font-size: 2.8em; margin: 0; font-weight: 800;">⚡ Chatterbox Turbo</h1>
            <p style="font-size: 1.1em; opacity: 0.9; margin-top: 10px;">Pro Audio Synthesis Suite</p>
        </div>
    """)

INITIAL_PROGRESS = make_progress_html(0, "Esperando inicio...")

def create_turbo_tab():
    with gr.Row():
        with gr.Column(scale=1):
            text = gr.Textbox(label="Texto a sintetizar", value="Hello! [laugh] This is Turbo speed!", lines=4, elem_id="turbo_textbox")
            with gr.Row(elem_classes="tag-container"):
                tags = [gr.Button(t, size="sm", elem_classes="tag-btn") for t in ["[laugh]", "[sigh]", "[cough]", "[clear throat]", "[gasp]", "[chuckle]"]]
            voice = gr.Dropdown(label="Seleccionar Voz", choices=[], interactive=True)
            preview = gr.Audio(label="Preview", interactive=False, visible=True, type="filepath")
            btn = gr.Button("⚡ Generar Audio (Turbo)", variant="primary", size="lg")
        with gr.Column(scale=1):
            # MODIFICADO: Audio primero, luego progreso
            audio_out = gr.Audio(label="Resultado Final", autoplay=True, show_download_button=True)
            progress_html = gr.HTML(value=INITIAL_PROGRESS, label="Estado")
    return {"text": text, "voice": voice, "prev": preview, "btn": btn, "prog": progress_html, "out": audio_out, "tags": tags}

def create_tts_tab():
    with gr.Row():
        with gr.Column(scale=1):
            text = gr.Textbox(label="Texto", value="Hello world!", lines=4)
            voice = gr.Dropdown(label="Voz Clonada", choices=[], interactive=True)
            preview = gr.Audio(label="Preview", interactive=False)
            with gr.Accordion("⚙️ Opciones Avanzadas", open=False):
                exag = gr.Slider(0.25, 2, value=.5, step=0.05, label="Exaggeration")
                cfg = gr.Slider(0, 1, value=.5, step=0.05, label="CFG")
                temp = gr.Slider(0.05, 5, value=.8, step=0.05, label="Temp")
                seed = gr.Number(0, label="Seed")
                min_p = gr.Slider(0, 1, value=0.05, label="Min P")
                top_p = gr.Slider(0, 1, value=1.0, label="Top P")
                rep = gr.Slider(1, 2, value=1.2, label="Repetition")
            btn = gr.Button("🎙️ Generar Audio", variant="primary", size="lg")
        with gr.Column(scale=1):
            # MODIFICADO: Audio primero, luego progreso
            audio_out = gr.Audio(label="Resultado", autoplay=True)
            progress_html = gr.HTML(value=INITIAL_PROGRESS)
    return {"text": text, "voice": voice, "prev": preview, "btn": btn, "prog": progress_html, "out": audio_out, "opts": [exag, temp, seed, cfg, min_p, top_p, rep]}

def create_mtl_tab():
    with gr.Row():
        with gr.Column(scale=1):
            text = gr.Textbox(label="Texto", value="Hola mundo", lines=4)
            lang_choices = [(f"{v} ({k})", k) for k,v in SUPPORTED_LANGUAGES.items()]
            lang = gr.Dropdown(label="Idioma", choices=lang_choices, value="es")
            
            voice = gr.Dropdown(label="Voz", choices=[])
            preview = gr.Audio(label="Preview", interactive=False)
            with gr.Accordion("⚙️ Opciones", open=False):
                exag = gr.Slider(0.25, 2, value=.5, label="Exaggeration")
                temp = gr.Slider(0.05, 5, value=.8, label="Temp")
                seed = gr.Number(0, label="Seed")
                cfg = gr.Slider(0, 1, value=.5, label="CFG")
            btn = gr.Button("🌍 Generar", variant="primary", size="lg")
        with gr.Column(scale=1):
            # MODIFICADO: Audio primero, luego progreso
            audio_out = gr.Audio(label="Resultado", autoplay=True)
            progress_html = gr.HTML(value=INITIAL_PROGRESS)
    return {"text": text, "lang": lang, "voice": voice, "prev": preview, "btn": btn, "prog": progress_html, "out": audio_out, "opts": [exag, temp, seed, cfg]}

def create_vc_tab():
    with gr.Row():
        with gr.Column(scale=1):
            inp = gr.Audio(label="Entrada", sources=["upload", "microphone"], type="filepath")
            voice = gr.Dropdown(label="Voz Objetivo", choices=[])
            preview = gr.Audio(label="Preview", interactive=False)
            btn = gr.Button("🔄 Convertir", variant="primary", size="lg")
        with gr.Column(scale=1):
            # MODIFICADO: Audio primero, luego progreso
            audio_out = gr.Audio(label="Resultado", autoplay=True)
            progress_html = gr.HTML(value=INITIAL_PROGRESS)
    return {"inp": inp, "voice": voice, "prev": preview, "btn": btn, "prog": progress_html, "out": audio_out}

def create_clone_tab():
    with gr.Row():
        with gr.Column():
            gr.Markdown("### 🧬 Clonar Voz")
            name = gr.Textbox(label="Nombre")
            with gr.Row():
                gender = gr.Radio(["Male", "Female"], value="Male", label="Género")
                lang_choices = [(f"{v} ({k})", k) for k,v in SUPPORTED_LANGUAGES.items()]
                lang = gr.Dropdown(label="Idioma", choices=lang_choices, value="es")
                
            cat = gr.Dropdown(label="Categoría", choices=CATEGORY_LIST, allow_custom_value=False)
            ref = gr.Audio(label="Referencia", type="filepath")
            btn = gr.Button("💾 Clonar", variant="primary")
            status = gr.Textbox(label="Estado")
        with gr.Column():
            gr.Markdown("### 🗑️ Borrar")
            del_sel = gr.Dropdown(label="Seleccionar Voz", choices=[])
            del_btn = gr.Button("🗑️ Eliminar", variant="stop")
            del_stat = gr.Textbox(label="Estado")
    return {"name": name, "gender": gender, "cat": cat, "lang": lang, "ref": ref, "btn": btn, "stat": status, "del_sel": del_sel, "del_btn": del_btn, "del_stat": del_stat}

CSS = """
body, .gradio-container { background-color: #0f172a; font-family: 'Segoe UI', sans-serif; }
.sidebar-container { background-color: #1e293b; padding: 20px; border-right: 1px solid #334155; }
.nav-btn { background: transparent; border: none; color: #94a3b8; text-align: left; padding: 15px; font-weight: 600; width: 100%; border-radius: 8px; margin-bottom: 5px; transition: all 0.2s; }
.nav-btn:hover { background: #334155; color: white; padding-left: 20px; }
.active-btn { background: #334155; color: white; border-left: 4px solid #f97316; padding-left: 20px; box-shadow: 0 4px 6px rgba(0,0,0,0.1); }
.content-panel { background: #1e293b; border: 1px solid #334155; border-radius: 16px; padding: 30px; margin-top: 20px; box-shadow: 0 10px 15px -3px rgba(0, 0, 0, 0.1); }
input, textarea, select { background-color: #0f172a !important; border: 1px solid #334155 !important; color: white !important; }
.tag-btn { background: #334155; color: #e2e8f0; border: 1px solid #475569; margin-right: 5px; }
"""

with gr.Blocks(title="Chatterbox Pro", css=CSS, theme=gr.themes.Base()) as demo:
    with gr.Row(elem_classes="main-layout", equal_height=True):
        with gr.Column(scale=1, min_width=250, elem_classes="sidebar-container"):
            gr.Markdown("### 🎛️ CONTROL PANEL")
            gr.Markdown("---")
            initial_cat = CATEGORY_LIST[0] if CATEGORY_LIST else None
            cat_filter = gr.Dropdown(label="📚 Librería", choices=CATEGORY_LIST, value=initial_cat)
            lang_filter = gr.Dropdown(label="🌐 Idioma", choices=[])
            gr.Markdown("---")
            btn_turbo = gr.Button("🚀 Turbo Mode", elem_classes=["nav-btn", "active-btn"])
            btn_tts = gr.Button("🎤 TTS Pro", elem_classes=["nav-btn"])
            btn_mtl = gr.Button("🌍 Multilingual", elem_classes=["nav-btn"])
            btn_vc = gr.Button("🔄 Converter", elem_classes=["nav-btn"])
            btn_clone = gr.Button("🧬 Cloning Lab", elem_classes=["nav-btn"])

        with gr.Column(scale=4, elem_classes="content-area"):
            create_header()
            with gr.Column(visible=True, elem_classes="content-panel") as v_turbo: ui_turbo = create_turbo_tab()
            with gr.Column(visible=False, elem_classes="content-panel") as v_tts: ui_tts = create_tts_tab()
            with gr.Column(visible=False, elem_classes="content-panel") as v_mtl: ui_mtl = create_mtl_tab()
            with gr.Column(visible=False, elem_classes="content-panel") as v_vc: ui_vc = create_vc_tab()
            with gr.Column(visible=False, elem_classes="content-panel") as v_clone: ui_clone = create_clone_tab()

    views = [v_turbo, v_tts, v_mtl, v_vc, v_clone]; btns = [btn_turbo, btn_tts, btn_mtl, btn_vc, btn_clone]
    def switch_view(idx):
        return [gr.update(visible=(i==idx)) for i in range(len(views))] + [gr.update(elem_classes=["nav-btn", "active-btn"] if i==idx else ["nav-btn"]) for i in range(len(btns))]

    for i, b in enumerate(btns): b.click(lambda idx=i: switch_view(idx), outputs=views+btns)

    def update_ui_lists(cat, lang_code=None):
        langs = get_available_languages(cat)
        valid_codes = [c[1] for c in langs]
        curr_lang = lang_code if lang_code in valid_codes else (valid_codes[0] if valid_codes else None)
        voices = get_voices_for_ui(cat, curr_lang)
        v_val = voices[0] if voices else None
        return (gr.update(choices=langs, value=curr_lang), gr.update(choices=voices, value=v_val), gr.update(choices=voices, value=v_val), gr.update(choices=voices, value=v_val), gr.update(choices=voices, value=v_val), gr.update(choices=CATEGORY_LIST, value=cat), gr.update(choices=get_all_voices_list()))

    cat_filter.change(update_ui_lists, inputs=[cat_filter], outputs=[lang_filter, ui_turbo["voice"], ui_tts["voice"], ui_mtl["voice"], ui_vc["voice"], ui_clone["cat"], ui_clone["del_sel"]])
    lang_filter.change(lambda c, l: update_ui_lists(c, l), inputs=[cat_filter, lang_filter], outputs=[lang_filter, ui_turbo["voice"], ui_tts["voice"], ui_mtl["voice"], ui_vc["voice"], ui_clone["cat"], ui_clone["del_sel"]])
    demo.load(lambda: update_ui_lists(initial_cat), outputs=[lang_filter, ui_turbo["voice"], ui_tts["voice"], ui_mtl["voice"], ui_vc["voice"], ui_clone["cat"], ui_clone["del_sel"]])

    ui_turbo["voice"].change(lambda v: resolve_voice_path(v, "en"), inputs=ui_turbo["voice"], outputs=ui_turbo["prev"])
    ui_tts["voice"].change(lambda v: resolve_voice_path(v, "en"), inputs=ui_tts["voice"], outputs=ui_tts["prev"])
    ui_mtl["voice"].change(lambda v, l: resolve_voice_path(v, l), inputs=[ui_mtl["voice"], ui_mtl["lang"]], outputs=ui_mtl["prev"])
    ui_vc["voice"].change(lambda v: resolve_voice_path(v, "en"), inputs=ui_vc["voice"], outputs=ui_vc["prev"])

    ui_turbo["btn"].click(generate_turbo_speech, inputs=[ui_turbo["text"], ui_turbo["voice"]], outputs=[ui_turbo["prog"], ui_turbo["out"]])
    ui_tts["btn"].click(generate_speech, inputs=[ui_tts["text"], ui_tts["voice"]] + ui_tts["opts"], outputs=[ui_tts["prog"], ui_tts["out"]])
    ui_mtl["btn"].click(generate_multilingual_speech, inputs=[ui_mtl["text"], ui_mtl["voice"], ui_mtl["lang"]] + ui_mtl["opts"], outputs=[ui_mtl["prog"], ui_mtl["out"]])
    ui_vc["btn"].click(convert_voice, inputs=[ui_vc["inp"], ui_vc["voice"]], outputs=[ui_vc["prog"], ui_vc["out"]])
    ui_clone["btn"].click(clone_voice_wrapper, inputs=[ui_clone["ref"], ui_clone["name"], ui_clone["cat"], ui_clone["lang"], ui_clone["gender"]], outputs=[ui_clone["stat"], ui_clone["del_sel"]])
    ui_clone["del_btn"].click(delete_voice_wrapper, inputs=[ui_clone["del_sel"]], outputs=[ui_clone["del_stat"], ui_clone["del_sel"]])

    JS_TAGS = """(tag, text) => { var el = document.querySelector('#turbo_textbox textarea'); if(el) { var start = el.selectionStart; var end = el.selectionEnd; return text.slice(0, start) + " " + tag + " " + text.slice(end); } return text + " " + tag; }"""
    for btn in ui_turbo["tags"]: btn.click(None, inputs=[btn, ui_turbo["text"]], outputs=ui_turbo["text"], js=JS_TAGS)

if __name__ == "__main__":
    demo.queue().launch(inbrowser=True)