Spaces:

IAsistemofinteres
/

test

Build error

App Files Files Community

test / app.py

IAsistemofinteres

Upload 7 files

7358212 verified about 1 month ago

raw

history blame contribute delete

28 kB

	"""
	Chatterbox TTS Enhanced - Monolithic Pro Edition (UI UPDATE)
	Fixes: Progress bar moved below audio output.
	"""
	import sys
	import os
	import glob
	import shutil
	import time
	import random
	import re
	import numpy as np
	import torch
	import gradio as gr
	from pathlib import Path
	import gc

	# ==============================================================================
	# 0. SETUP DE RUTAS Y LIBRERÍA
	# ==============================================================================

	PROJECT_ROOT = os.path.dirname(os.path.abspath(__file__))
	SRC_PATH = os.path.join(PROJECT_ROOT, "src")

	# Añadir 'src' a las rutas de búsqueda
	if os.path.exists(SRC_PATH):
	if SRC_PATH not in sys.path:
	sys.path.append(SRC_PATH)
	print(f"✅ Path 'src' añadido: {SRC_PATH}")
	else:
	if PROJECT_ROOT not in sys.path:
	sys.path.append(PROJECT_ROOT)

	# Intentar importar la librería REAL
	try:
	from chatterbox.tts import ChatterboxTTS
	from chatterbox.vc import ChatterboxVC
	from chatterbox.mtl_tts import ChatterboxMultilingualTTS, SUPPORTED_LANGUAGES
	from chatterbox.tts_turbo import ChatterboxTurboTTS
	print("✅ Librería 'chatterbox' importada correctamente.")

	except ImportError as e:
	print(f"❌ ERROR CRÍTICO: No se pudo importar 'chatterbox'. Detalle: {e}")
	print("⚠️ EJECUTANDO EN MODO MOCK (SIN AUDIO REAL)")

	SUPPORTED_LANGUAGES = {"en": "English", "es": "Spanish"}
	class MockModel:
	def __init__(self, args, *kwargs): self.sr = 24000
	@classmethod
	def from_pretrained(cls, device): return cls()
	def generate(self, args, *kwargs):
	time.sleep(1)
	return torch.zeros(1, 48000)
	ChatterboxTTS = ChatterboxVC = ChatterboxMultilingualTTS = ChatterboxTurboTTS = MockModel

	# ==============================================================================
	# 1. CONFIGURACIÓN Y UTILIDADES UI
	# ==============================================================================

	VOICE_WAV_ROOT = os.path.join(PROJECT_ROOT, "modules", "voice_wav")
	os.makedirs(VOICE_WAV_ROOT, exist_ok=True)
	DEVICE = "cuda" if torch.cuda.is_available() else "cpu"

	# --- GENERADOR DE BARRA DE PROGRESO HTML ---
	def make_progress_html(percentage, message="Ready"):
	color = "linear-gradient(90deg, #f97316 0%, #fbbf24 100%)"
	if percentage == 100: color = "#22c55e"

	html = f"""
	<div style="display: flex; flex-direction: column; gap: 5px; width: 100%; margin-top: 10px;">
	<div style="width: 100%; background-color: #334155; border-radius: 99px; height: 24px; border: 1px solid #475569; overflow: hidden; position: relative; box-shadow: inset 0 2px 4px rgba(0,0,0,0.3);">
	<div style="width: {percentage}%; background: {color}; height: 100%; transition: width 0.3s ease-out, background 0.3s ease;"></div>
	<div style="position: absolute; top: 0; left: 0; width: 100%; height: 100%; display: flex; align-items: center; justify-content: center; color: white; font-size: 11px; font-weight: bold; text-shadow: 0 1px 2px rgba(0,0,0,0.5);">
	{int(percentage)}%
	</div>
	</div>
	<p style="color: #cbd5e1; font-size: 0.9em; margin: 0; padding: 0; white-space: pre-wrap; font-family: monospace;">{message}</p>
	</div>
	"""
	return html

	# ==============================================================================
	# 2. MODEL MANAGER & VOICE MANAGER
	# ==============================================================================

	class ModelManager:
	def __init__(self):
	self.tts_model = None; self.mtl_model = None; self.vc_model = None; self.turbo_model = None
	self.current_model_type = None

	def unload_all(self):
	self.tts_model = None; self.mtl_model = None; self.vc_model = None; self.turbo_model = None
	if DEVICE == "cuda": torch.cuda.empty_cache(); gc.collect()
	self.current_model_type = None

	def get_model(self, type_key):
	if self.current_model_type != type_key:
	print(f"🔄 Switching to {type_key.upper()} model...")
	self.unload_all()
	try:
	if type_key == "tts": self.tts_model = ChatterboxTTS.from_pretrained(DEVICE)
	elif type_key == "mtl": self.mtl_model = ChatterboxMultilingualTTS.from_pretrained(DEVICE)
	elif type_key == "vc": self.vc_model = ChatterboxVC.from_pretrained(DEVICE)
	elif type_key == "turbo": self.turbo_model = ChatterboxTurboTTS.from_pretrained(device=DEVICE)
	self.current_model_type = type_key
	print(f"✅ {type_key.upper()} loaded.")
	except Exception as e:
	print(f"❌ Error loading {type_key}: {e}")
	return None
	return getattr(self, f"{type_key}_model")

	model_manager = ModelManager()

	# --- VOICE DB ---
	VOICE_DB = {}; CATEGORY_LIST = []; FLAT_PATH_MAP = {}; ALL_VOICES_FLAT_LIST = []

	def scan_voice_wav_structure():
	global VOICE_DB, CATEGORY_LIST, FLAT_PATH_MAP, ALL_VOICES_FLAT_LIST
	VOICE_DB = {}; CATEGORY_LIST = []; FLAT_PATH_MAP = {}; ALL_VOICES_FLAT_LIST = []

	if not os.path.exists(VOICE_WAV_ROOT): return

	print(f"📂 Escaneando voces en: {VOICE_WAV_ROOT}...")
	for root, dirs, files in os.walk(VOICE_WAV_ROOT):
	for f in files:
	if f.endswith((".wav", ".mp3")):
	full_path = os.path.join(root, f)
	rel_path = os.path.relpath(full_path, VOICE_WAV_ROOT)
	parts = rel_path.split(os.sep)

	if len(parts) >= 2:
	raw_cat = parts[0]
	clean_cat = raw_cat.replace("ElevenLabs_", "").replace("_female", "").replace("_male", "").replace("_", " ").strip().title()

	if clean_cat not in VOICE_DB:
	VOICE_DB[clean_cat] = {}
	if clean_cat not in CATEGORY_LIST: CATEGORY_LIST.append(clean_cat)

	lang = "en"
	if len(parts) >= 3:
	possible_lang = parts[1].lower()
	if len(possible_lang) == 2:
	lang = possible_lang

	icon = "♀️" if "_female" in raw_cat.lower() else ("♂️" if "_male" in raw_cat.lower() else "🎙️")
	raw_name = os.path.splitext(f)[0]
	display = f"{icon} {raw_name}"

	if lang not in VOICE_DB[clean_cat]: VOICE_DB[clean_cat][lang] = {}
	VOICE_DB[clean_cat][lang][display] = full_path
	FLAT_PATH_MAP[display] = full_path
	if display not in ALL_VOICES_FLAT_LIST: ALL_VOICES_FLAT_LIST.append(display)

	CATEGORY_LIST.sort()
	print(f"✅ {len(ALL_VOICES_FLAT_LIST)} voces encontradas.")

	def resolve_voice_path(voice_name, lang="en"):
	if not voice_name or voice_name == "None": return None
	if voice_name in FLAT_PATH_MAP: return FLAT_PATH_MAP[voice_name]
	for name, path in FLAT_PATH_MAP.items():
	if voice_name in name: return path
	return None

	def get_available_languages(category):
	if not category or category not in VOICE_DB: return []
	return [(f"{SUPPORTED_LANGUAGES.get(c, c)} ({c})", c) for c in sorted(VOICE_DB[category].keys())]

	def get_voices_for_ui(category, lang):
	if category in VOICE_DB and lang in VOICE_DB[category]:
	return sorted(list(VOICE_DB[category][lang].keys()))
	return []

	def get_all_voices_list():
	return sorted(ALL_VOICES_FLAT_LIST)

	# Inicializar
	scan_voice_wav_structure()

	# ==============================================================================
	# 3. FUNCIONES DE GENERACIÓN
	# ==============================================================================

	def set_seed(seed):
	torch.manual_seed(seed)
	if torch.cuda.is_available(): torch.cuda.manual_seed_all(seed)
	random.seed(seed)
	np.random.seed(seed)

	def format_time(seconds):
	if seconds < 60: return f"{seconds:.1f}s"
	return f"{int(seconds//60)}m {seconds%60:.1f}s"

	def estimate_generation_time(text_length):
	return (text_length / 50) * 2 + 1

	def smart_chunk_text(text, max_words=40):
	def has_cjk(text): return bool(re.search(r'[\u4e00-\u9fff\u3040-\u309f\u30a0-\u30ff]', text))
	is_cjk = has_cjk(text)
	sentences = re.split(r'(?<=[.!?。！？।؟])\s*\|\n+', text)
	chunks = []; current_chunk = []; current_count = 0
	for sentence in sentences:
	sentence = sentence.strip()
	if not sentence: continue
	count = len(re.sub(r'\s+', '', sentence)) if is_cjk else len(sentence.split())
	if current_count + count > max_words:
	if current_chunk:
	chunks.append(''.join(current_chunk) if is_cjk else ' '.join(current_chunk))
	current_chunk = []; current_count = 0
	current_chunk.append(sentence); current_count += count
	if current_chunk: chunks.append(''.join(current_chunk) if is_cjk else ' '.join(current_chunk))
	return chunks if chunks else [text]

	def generate_speech(text, voice_name, exaggeration, temperature, seed_num, cfgw, min_p, top_p, repetition_penalty):
	try:
	start_time = time.time()
	if not text.strip(): yield make_progress_html(0, "❌ Error: Texto vacío"), None; return
	path = resolve_voice_path(voice_name, "en")
	if not path: yield make_progress_html(0, "❌ Error: Voz no encontrada"), None; return

	yield make_progress_html(20, "⚙️ Cargando modelo TTS..."), None
	model = model_manager.get_model("tts")
	if model is None: yield make_progress_html(0, "❌ Error modelo"), None; return

	if seed_num != 0: set_seed(int(seed_num))
	chunks = smart_chunk_text(text)
	wavs = []

	for i, chunk in enumerate(chunks):
	pct = 30 + int((i / len(chunks)) * 60)
	yield make_progress_html(pct, f"🎙️ Generando parte {i+1}/{len(chunks)}..."), None
	w = model.generate(chunk, audio_prompt_path=path, exaggeration=exaggeration, temperature=temperature, cfg_weight=cfgw, min_p=min_p, top_p=top_p, repetition_penalty=repetition_penalty)
	wavs.append(w)

	yield make_progress_html(95, "🎹 Uniendo audio..."), None
	full_wav = torch.cat(wavs, dim=-1) if len(wavs) > 1 else wavs[0]
	yield make_progress_html(100, f"✅ Listo ({format_time(time.time()-start_time)})"), (model.sr, full_wav.squeeze(0).numpy())
	except Exception as e:
	yield make_progress_html(0, f"❌ Error: {str(e)}"), None

	def generate_turbo_speech(text, voice_name):
	try:
	start_time = time.time()
	if not text.strip(): yield make_progress_html(0, "❌ Error: Texto vacío"), None; return
	path = resolve_voice_path(voice_name, "en")
	if not path: yield make_progress_html(0, "❌ Error: Voz no encontrada"), None; return

	yield make_progress_html(20, "⚡ Cargando Turbo..."), None
	model = model_manager.get_model("turbo")
	if model is None: yield make_progress_html(0, "❌ Error Turbo"), None; return

	chunks = smart_chunk_text(text)
	wavs = []
	for i, chunk in enumerate(chunks):
	pct = 30 + int((i / len(chunks)) * 60)
	yield make_progress_html(pct, f"⚡ Turbo chunk {i+1}/{len(chunks)}..."), None
	w = model.generate(chunk, audio_prompt_path=path)
	wavs.append(w)

	full_wav = torch.cat(wavs, dim=-1) if len(wavs) > 1 else wavs[0]
	yield make_progress_html(100, f"✅ Turbo listo ({format_time(time.time()-start_time)})"), (model.sr, full_wav.squeeze(0).numpy())
	except Exception as e:
	yield make_progress_html(0, f"❌ Error: {str(e)}"), None

	def generate_multilingual_speech(text, voice_name, lang_code, exaggeration, temperature, seed_num, cfgw):
	try:
	start_time = time.time()
	path = resolve_voice_path(voice_name, lang_code)
	yield make_progress_html(20, "🌍 Cargando Multi-TTS..."), None
	model = model_manager.get_model("mtl")
	if model is None: yield make_progress_html(0, "❌ Error modelo"), None; return

	if seed_num != 0: set_seed(int(seed_num))
	chunks = smart_chunk_text(text)
	wavs = []
	for i, chunk in enumerate(chunks):
	pct = 30 + int((i / len(chunks)) * 60)
	yield make_progress_html(pct, f"🌍 Generando ({lang_code}) {i+1}..."), None
	w = model.generate(chunk, language_id=lang_code, audio_prompt_path=path, exaggeration=exaggeration, temperature=temperature, cfg_weight=cfgw)
	wavs.append(w)

	full_wav = torch.cat(wavs, dim=-1) if len(wavs) > 1 else wavs[0]
	yield make_progress_html(100, "✅ Listo"), (model.sr, full_wav.squeeze(0).numpy())
	except Exception as e:
	yield make_progress_html(0, f"❌ Error: {str(e)}"), None

	def convert_voice(audio, target_voice):
	try:
	start_time = time.time()
	path = resolve_voice_path(target_voice, "en")
	if not path: yield make_progress_html(0, "❌ Error: Voz destino no válida"), None; return

	yield make_progress_html(50, "🔄 Cargando VC..."), None
	model = model_manager.get_model("vc")
	if model is None: yield make_progress_html(0, "❌ Error VC"), None; return

	yield make_progress_html(70, "🔄 Convirtiendo..."), None
	w = model.generate(audio, target_voice_path=path)
	yield make_progress_html(100, f"✅ Listo ({format_time(time.time()-start_time)})"), (model.sr, w.squeeze(0).numpy())
	except Exception as e:
	yield make_progress_html(0, f"❌ Error: {str(e)}"), None

	def clone_voice_wrapper(ref_audio, name, cat, lang, gender):
	try:
	if not name: return "Nombre requerido", gr.update()
	cat_slug = cat.lower().replace(" ", "_")
	gender_slug = gender.lower()
	target_dir = os.path.join(VOICE_WAV_ROOT, f"{cat_slug}_{gender_slug}", lang)
	os.makedirs(target_dir, exist_ok=True)

	clean_name = "".join(x for x in name if x.isalnum() or x in " -_").strip()
	dest = os.path.join(target_dir, f"{clean_name}.wav")

	if os.path.exists(dest): return "❌ La voz ya existe", gr.update()
	shutil.copy(ref_audio, dest)

	scan_voice_wav_structure()
	return f"✅ Clonada: {clean_name}", gr.update(choices=get_all_voices_list())
	except Exception as e:
	return f"❌ Error: {e}", gr.update()

	def delete_voice_wrapper(voice_name):
	try:
	path = resolve_voice_path(voice_name, "en")
	if path and os.path.exists(path):
	os.remove(path)
	scan_voice_wav_structure()
	return f"✅ Eliminada: {voice_name}", gr.update(choices=get_all_voices_list(), value=None)
	return "❌ Archivo no encontrado", gr.update()
	except Exception as e:
	return f"❌ Error: {e}", gr.update()

	# ==============================================================================
	# 4. INTERFAZ GRÁFICA (UI)
	# ==============================================================================

	def create_header():
	gr.HTML("""
	<div style="text-align: center; margin-bottom: 1rem; background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); padding: 25px; border-radius: 16px; color: white; box-shadow: 0 4px 15px rgba(0,0,0,0.2);">
	<h1 style="font-size: 2.8em; margin: 0; font-weight: 800;">⚡ Chatterbox Turbo</h1>
	<p style="font-size: 1.1em; opacity: 0.9; margin-top: 10px;">Pro Audio Synthesis Suite</p>
	</div>
	""")

	INITIAL_PROGRESS = make_progress_html(0, "Esperando inicio...")

	def create_turbo_tab():
	with gr.Row():
	with gr.Column(scale=1):
	text = gr.Textbox(label="Texto a sintetizar", value="Hello! [laugh] This is Turbo speed!", lines=4, elem_id="turbo_textbox")
	with gr.Row(elem_classes="tag-container"):
	tags = [gr.Button(t, size="sm", elem_classes="tag-btn") for t in ["[laugh]", "[sigh]", "[cough]", "[clear throat]", "[gasp]", "[chuckle]"]]
	voice = gr.Dropdown(label="Seleccionar Voz", choices=[], interactive=True)
	preview = gr.Audio(label="Preview", interactive=False, visible=True, type="filepath")
	btn = gr.Button("⚡ Generar Audio (Turbo)", variant="primary", size="lg")
	with gr.Column(scale=1):
	# MODIFICADO: Audio primero, luego progreso
	audio_out = gr.Audio(label="Resultado Final", autoplay=True, show_download_button=True)
	progress_html = gr.HTML(value=INITIAL_PROGRESS, label="Estado")
	return {"text": text, "voice": voice, "prev": preview, "btn": btn, "prog": progress_html, "out": audio_out, "tags": tags}

	def create_tts_tab():
	with gr.Row():
	with gr.Column(scale=1):
	text = gr.Textbox(label="Texto", value="Hello world!", lines=4)
	voice = gr.Dropdown(label="Voz Clonada", choices=[], interactive=True)
	preview = gr.Audio(label="Preview", interactive=False)
	with gr.Accordion("⚙️ Opciones Avanzadas", open=False):
	exag = gr.Slider(0.25, 2, value=.5, step=0.05, label="Exaggeration")
	cfg = gr.Slider(0, 1, value=.5, step=0.05, label="CFG")
	temp = gr.Slider(0.05, 5, value=.8, step=0.05, label="Temp")
	seed = gr.Number(0, label="Seed")
	min_p = gr.Slider(0, 1, value=0.05, label="Min P")
	top_p = gr.Slider(0, 1, value=1.0, label="Top P")
	rep = gr.Slider(1, 2, value=1.2, label="Repetition")
	btn = gr.Button("🎙️ Generar Audio", variant="primary", size="lg")
	with gr.Column(scale=1):
	# MODIFICADO: Audio primero, luego progreso
	audio_out = gr.Audio(label="Resultado", autoplay=True)
	progress_html = gr.HTML(value=INITIAL_PROGRESS)
	return {"text": text, "voice": voice, "prev": preview, "btn": btn, "prog": progress_html, "out": audio_out, "opts": [exag, temp, seed, cfg, min_p, top_p, rep]}

	def create_mtl_tab():
	with gr.Row():
	with gr.Column(scale=1):
	text = gr.Textbox(label="Texto", value="Hola mundo", lines=4)
	lang_choices = [(f"{v} ({k})", k) for k,v in SUPPORTED_LANGUAGES.items()]
	lang = gr.Dropdown(label="Idioma", choices=lang_choices, value="es")

	voice = gr.Dropdown(label="Voz", choices=[])
	preview = gr.Audio(label="Preview", interactive=False)
	with gr.Accordion("⚙️ Opciones", open=False):
	exag = gr.Slider(0.25, 2, value=.5, label="Exaggeration")
	temp = gr.Slider(0.05, 5, value=.8, label="Temp")
	seed = gr.Number(0, label="Seed")
	cfg = gr.Slider(0, 1, value=.5, label="CFG")
	btn = gr.Button("🌍 Generar", variant="primary", size="lg")
	with gr.Column(scale=1):
	# MODIFICADO: Audio primero, luego progreso
	audio_out = gr.Audio(label="Resultado", autoplay=True)
	progress_html = gr.HTML(value=INITIAL_PROGRESS)
	return {"text": text, "lang": lang, "voice": voice, "prev": preview, "btn": btn, "prog": progress_html, "out": audio_out, "opts": [exag, temp, seed, cfg]}

	def create_vc_tab():
	with gr.Row():
	with gr.Column(scale=1):
	inp = gr.Audio(label="Entrada", sources=["upload", "microphone"], type="filepath")
	voice = gr.Dropdown(label="Voz Objetivo", choices=[])
	preview = gr.Audio(label="Preview", interactive=False)
	btn = gr.Button("🔄 Convertir", variant="primary", size="lg")
	with gr.Column(scale=1):
	# MODIFICADO: Audio primero, luego progreso
	audio_out = gr.Audio(label="Resultado", autoplay=True)
	progress_html = gr.HTML(value=INITIAL_PROGRESS)
	return {"inp": inp, "voice": voice, "prev": preview, "btn": btn, "prog": progress_html, "out": audio_out}

	def create_clone_tab():
	with gr.Row():
	with gr.Column():
	gr.Markdown("### 🧬 Clonar Voz")
	name = gr.Textbox(label="Nombre")
	with gr.Row():
	gender = gr.Radio(["Male", "Female"], value="Male", label="Género")
	lang_choices = [(f"{v} ({k})", k) for k,v in SUPPORTED_LANGUAGES.items()]
	lang = gr.Dropdown(label="Idioma", choices=lang_choices, value="es")

	cat = gr.Dropdown(label="Categoría", choices=CATEGORY_LIST, allow_custom_value=False)
	ref = gr.Audio(label="Referencia", type="filepath")
	btn = gr.Button("💾 Clonar", variant="primary")
	status = gr.Textbox(label="Estado")
	with gr.Column():
	gr.Markdown("### 🗑️ Borrar")
	del_sel = gr.Dropdown(label="Seleccionar Voz", choices=[])
	del_btn = gr.Button("🗑️ Eliminar", variant="stop")
	del_stat = gr.Textbox(label="Estado")
	return {"name": name, "gender": gender, "cat": cat, "lang": lang, "ref": ref, "btn": btn, "stat": status, "del_sel": del_sel, "del_btn": del_btn, "del_stat": del_stat}

	CSS = """
	body, .gradio-container { background-color: #0f172a; font-family: 'Segoe UI', sans-serif; }
	.sidebar-container { background-color: #1e293b; padding: 20px; border-right: 1px solid #334155; }
	.nav-btn { background: transparent; border: none; color: #94a3b8; text-align: left; padding: 15px; font-weight: 600; width: 100%; border-radius: 8px; margin-bottom: 5px; transition: all 0.2s; }
	.nav-btn:hover { background: #334155; color: white; padding-left: 20px; }
	.active-btn { background: #334155; color: white; border-left: 4px solid #f97316; padding-left: 20px; box-shadow: 0 4px 6px rgba(0,0,0,0.1); }
	.content-panel { background: #1e293b; border: 1px solid #334155; border-radius: 16px; padding: 30px; margin-top: 20px; box-shadow: 0 10px 15px -3px rgba(0, 0, 0, 0.1); }
	input, textarea, select { background-color: #0f172a !important; border: 1px solid #334155 !important; color: white !important; }
	.tag-btn { background: #334155; color: #e2e8f0; border: 1px solid #475569; margin-right: 5px; }
	"""

	with gr.Blocks(title="Chatterbox Pro", css=CSS, theme=gr.themes.Base()) as demo:
	with gr.Row(elem_classes="main-layout", equal_height=True):
	with gr.Column(scale=1, min_width=250, elem_classes="sidebar-container"):
	gr.Markdown("### 🎛️ CONTROL PANEL")
	gr.Markdown("---")
	initial_cat = CATEGORY_LIST[0] if CATEGORY_LIST else None
	cat_filter = gr.Dropdown(label="📚 Librería", choices=CATEGORY_LIST, value=initial_cat)
	lang_filter = gr.Dropdown(label="🌐 Idioma", choices=[])
	gr.Markdown("---")
	btn_turbo = gr.Button("🚀 Turbo Mode", elem_classes=["nav-btn", "active-btn"])
	btn_tts = gr.Button("🎤 TTS Pro", elem_classes=["nav-btn"])
	btn_mtl = gr.Button("🌍 Multilingual", elem_classes=["nav-btn"])
	btn_vc = gr.Button("🔄 Converter", elem_classes=["nav-btn"])
	btn_clone = gr.Button("🧬 Cloning Lab", elem_classes=["nav-btn"])

	with gr.Column(scale=4, elem_classes="content-area"):
	create_header()
	with gr.Column(visible=True, elem_classes="content-panel") as v_turbo: ui_turbo = create_turbo_tab()
	with gr.Column(visible=False, elem_classes="content-panel") as v_tts: ui_tts = create_tts_tab()
	with gr.Column(visible=False, elem_classes="content-panel") as v_mtl: ui_mtl = create_mtl_tab()
	with gr.Column(visible=False, elem_classes="content-panel") as v_vc: ui_vc = create_vc_tab()
	with gr.Column(visible=False, elem_classes="content-panel") as v_clone: ui_clone = create_clone_tab()

	views = [v_turbo, v_tts, v_mtl, v_vc, v_clone]; btns = [btn_turbo, btn_tts, btn_mtl, btn_vc, btn_clone]
	def switch_view(idx):
	return [gr.update(visible=(i==idx)) for i in range(len(views))] + [gr.update(elem_classes=["nav-btn", "active-btn"] if i==idx else ["nav-btn"]) for i in range(len(btns))]

	for i, b in enumerate(btns): b.click(lambda idx=i: switch_view(idx), outputs=views+btns)

	def update_ui_lists(cat, lang_code=None):
	langs = get_available_languages(cat)
	valid_codes = [c[1] for c in langs]
	curr_lang = lang_code if lang_code in valid_codes else (valid_codes[0] if valid_codes else None)
	voices = get_voices_for_ui(cat, curr_lang)
	v_val = voices[0] if voices else None
	return (gr.update(choices=langs, value=curr_lang), gr.update(choices=voices, value=v_val), gr.update(choices=voices, value=v_val), gr.update(choices=voices, value=v_val), gr.update(choices=voices, value=v_val), gr.update(choices=CATEGORY_LIST, value=cat), gr.update(choices=get_all_voices_list()))

	cat_filter.change(update_ui_lists, inputs=[cat_filter], outputs=[lang_filter, ui_turbo["voice"], ui_tts["voice"], ui_mtl["voice"], ui_vc["voice"], ui_clone["cat"], ui_clone["del_sel"]])
	lang_filter.change(lambda c, l: update_ui_lists(c, l), inputs=[cat_filter, lang_filter], outputs=[lang_filter, ui_turbo["voice"], ui_tts["voice"], ui_mtl["voice"], ui_vc["voice"], ui_clone["cat"], ui_clone["del_sel"]])
	demo.load(lambda: update_ui_lists(initial_cat), outputs=[lang_filter, ui_turbo["voice"], ui_tts["voice"], ui_mtl["voice"], ui_vc["voice"], ui_clone["cat"], ui_clone["del_sel"]])

	ui_turbo["voice"].change(lambda v: resolve_voice_path(v, "en"), inputs=ui_turbo["voice"], outputs=ui_turbo["prev"])
	ui_tts["voice"].change(lambda v: resolve_voice_path(v, "en"), inputs=ui_tts["voice"], outputs=ui_tts["prev"])
	ui_mtl["voice"].change(lambda v, l: resolve_voice_path(v, l), inputs=[ui_mtl["voice"], ui_mtl["lang"]], outputs=ui_mtl["prev"])
	ui_vc["voice"].change(lambda v: resolve_voice_path(v, "en"), inputs=ui_vc["voice"], outputs=ui_vc["prev"])

	ui_turbo["btn"].click(generate_turbo_speech, inputs=[ui_turbo["text"], ui_turbo["voice"]], outputs=[ui_turbo["prog"], ui_turbo["out"]])
	ui_tts["btn"].click(generate_speech, inputs=[ui_tts["text"], ui_tts["voice"]] + ui_tts["opts"], outputs=[ui_tts["prog"], ui_tts["out"]])
	ui_mtl["btn"].click(generate_multilingual_speech, inputs=[ui_mtl["text"], ui_mtl["voice"], ui_mtl["lang"]] + ui_mtl["opts"], outputs=[ui_mtl["prog"], ui_mtl["out"]])
	ui_vc["btn"].click(convert_voice, inputs=[ui_vc["inp"], ui_vc["voice"]], outputs=[ui_vc["prog"], ui_vc["out"]])
	ui_clone["btn"].click(clone_voice_wrapper, inputs=[ui_clone["ref"], ui_clone["name"], ui_clone["cat"], ui_clone["lang"], ui_clone["gender"]], outputs=[ui_clone["stat"], ui_clone["del_sel"]])
	ui_clone["del_btn"].click(delete_voice_wrapper, inputs=[ui_clone["del_sel"]], outputs=[ui_clone["del_stat"], ui_clone["del_sel"]])

	JS_TAGS = """(tag, text) => { var el = document.querySelector('#turbo_textbox textarea'); if(el) { var start = el.selectionStart; var end = el.selectionEnd; return text.slice(0, start) + " " + tag + " " + text.slice(end); } return text + " " + tag; }"""
	for btn in ui_turbo["tags"]: btn.click(None, inputs=[btn, ui_turbo["text"]], outputs=ui_turbo["text"], js=JS_TAGS)

	if __name__ == "__main__":
	demo.queue().launch(inbrowser=True)