Spaces:
Build error
Build error
| """ | |
| Chatterbox TTS Enhanced - Monolithic Pro Edition (UI UPDATE) | |
| Fixes: Progress bar moved below audio output. | |
| """ | |
| import sys | |
| import os | |
| import glob | |
| import shutil | |
| import time | |
| import random | |
| import re | |
| import numpy as np | |
| import torch | |
| import gradio as gr | |
| from pathlib import Path | |
| import gc | |
| # ============================================================================== | |
| # 0. SETUP DE RUTAS Y LIBRERÍA | |
| # ============================================================================== | |
| PROJECT_ROOT = os.path.dirname(os.path.abspath(__file__)) | |
| SRC_PATH = os.path.join(PROJECT_ROOT, "src") | |
| # Añadir 'src' a las rutas de búsqueda | |
| if os.path.exists(SRC_PATH): | |
| if SRC_PATH not in sys.path: | |
| sys.path.append(SRC_PATH) | |
| print(f"✅ Path 'src' añadido: {SRC_PATH}") | |
| else: | |
| if PROJECT_ROOT not in sys.path: | |
| sys.path.append(PROJECT_ROOT) | |
| # Intentar importar la librería REAL | |
| try: | |
| from chatterbox.tts import ChatterboxTTS | |
| from chatterbox.vc import ChatterboxVC | |
| from chatterbox.mtl_tts import ChatterboxMultilingualTTS, SUPPORTED_LANGUAGES | |
| from chatterbox.tts_turbo import ChatterboxTurboTTS | |
| print("✅ Librería 'chatterbox' importada correctamente.") | |
| except ImportError as e: | |
| print(f"❌ ERROR CRÍTICO: No se pudo importar 'chatterbox'. Detalle: {e}") | |
| print("⚠️ EJECUTANDO EN MODO MOCK (SIN AUDIO REAL)") | |
| SUPPORTED_LANGUAGES = {"en": "English", "es": "Spanish"} | |
| class MockModel: | |
| def __init__(self, *args, **kwargs): self.sr = 24000 | |
| def from_pretrained(cls, device): return cls() | |
| def generate(self, *args, **kwargs): | |
| time.sleep(1) | |
| return torch.zeros(1, 48000) | |
| ChatterboxTTS = ChatterboxVC = ChatterboxMultilingualTTS = ChatterboxTurboTTS = MockModel | |
| # ============================================================================== | |
| # 1. CONFIGURACIÓN Y UTILIDADES UI | |
| # ============================================================================== | |
| VOICE_WAV_ROOT = os.path.join(PROJECT_ROOT, "modules", "voice_wav") | |
| os.makedirs(VOICE_WAV_ROOT, exist_ok=True) | |
| DEVICE = "cuda" if torch.cuda.is_available() else "cpu" | |
| # --- GENERADOR DE BARRA DE PROGRESO HTML --- | |
| def make_progress_html(percentage, message="Ready"): | |
| color = "linear-gradient(90deg, #f97316 0%, #fbbf24 100%)" | |
| if percentage == 100: color = "#22c55e" | |
| html = f""" | |
| <div style="display: flex; flex-direction: column; gap: 5px; width: 100%; margin-top: 10px;"> | |
| <div style="width: 100%; background-color: #334155; border-radius: 99px; height: 24px; border: 1px solid #475569; overflow: hidden; position: relative; box-shadow: inset 0 2px 4px rgba(0,0,0,0.3);"> | |
| <div style="width: {percentage}%; background: {color}; height: 100%; transition: width 0.3s ease-out, background 0.3s ease;"></div> | |
| <div style="position: absolute; top: 0; left: 0; width: 100%; height: 100%; display: flex; align-items: center; justify-content: center; color: white; font-size: 11px; font-weight: bold; text-shadow: 0 1px 2px rgba(0,0,0,0.5);"> | |
| {int(percentage)}% | |
| </div> | |
| </div> | |
| <p style="color: #cbd5e1; font-size: 0.9em; margin: 0; padding: 0; white-space: pre-wrap; font-family: monospace;">{message}</p> | |
| </div> | |
| """ | |
| return html | |
| # ============================================================================== | |
| # 2. MODEL MANAGER & VOICE MANAGER | |
| # ============================================================================== | |
| class ModelManager: | |
| def __init__(self): | |
| self.tts_model = None; self.mtl_model = None; self.vc_model = None; self.turbo_model = None | |
| self.current_model_type = None | |
| def unload_all(self): | |
| self.tts_model = None; self.mtl_model = None; self.vc_model = None; self.turbo_model = None | |
| if DEVICE == "cuda": torch.cuda.empty_cache(); gc.collect() | |
| self.current_model_type = None | |
| def get_model(self, type_key): | |
| if self.current_model_type != type_key: | |
| print(f"🔄 Switching to {type_key.upper()} model...") | |
| self.unload_all() | |
| try: | |
| if type_key == "tts": self.tts_model = ChatterboxTTS.from_pretrained(DEVICE) | |
| elif type_key == "mtl": self.mtl_model = ChatterboxMultilingualTTS.from_pretrained(DEVICE) | |
| elif type_key == "vc": self.vc_model = ChatterboxVC.from_pretrained(DEVICE) | |
| elif type_key == "turbo": self.turbo_model = ChatterboxTurboTTS.from_pretrained(device=DEVICE) | |
| self.current_model_type = type_key | |
| print(f"✅ {type_key.upper()} loaded.") | |
| except Exception as e: | |
| print(f"❌ Error loading {type_key}: {e}") | |
| return None | |
| return getattr(self, f"{type_key}_model") | |
| model_manager = ModelManager() | |
| # --- VOICE DB --- | |
| VOICE_DB = {}; CATEGORY_LIST = []; FLAT_PATH_MAP = {}; ALL_VOICES_FLAT_LIST = [] | |
| def scan_voice_wav_structure(): | |
| global VOICE_DB, CATEGORY_LIST, FLAT_PATH_MAP, ALL_VOICES_FLAT_LIST | |
| VOICE_DB = {}; CATEGORY_LIST = []; FLAT_PATH_MAP = {}; ALL_VOICES_FLAT_LIST = [] | |
| if not os.path.exists(VOICE_WAV_ROOT): return | |
| print(f"📂 Escaneando voces en: {VOICE_WAV_ROOT}...") | |
| for root, dirs, files in os.walk(VOICE_WAV_ROOT): | |
| for f in files: | |
| if f.endswith((".wav", ".mp3")): | |
| full_path = os.path.join(root, f) | |
| rel_path = os.path.relpath(full_path, VOICE_WAV_ROOT) | |
| parts = rel_path.split(os.sep) | |
| if len(parts) >= 2: | |
| raw_cat = parts[0] | |
| clean_cat = raw_cat.replace("ElevenLabs_", "").replace("_female", "").replace("_male", "").replace("_", " ").strip().title() | |
| if clean_cat not in VOICE_DB: | |
| VOICE_DB[clean_cat] = {} | |
| if clean_cat not in CATEGORY_LIST: CATEGORY_LIST.append(clean_cat) | |
| lang = "en" | |
| if len(parts) >= 3: | |
| possible_lang = parts[1].lower() | |
| if len(possible_lang) == 2: | |
| lang = possible_lang | |
| icon = "♀️" if "_female" in raw_cat.lower() else ("♂️" if "_male" in raw_cat.lower() else "🎙️") | |
| raw_name = os.path.splitext(f)[0] | |
| display = f"{icon} {raw_name}" | |
| if lang not in VOICE_DB[clean_cat]: VOICE_DB[clean_cat][lang] = {} | |
| VOICE_DB[clean_cat][lang][display] = full_path | |
| FLAT_PATH_MAP[display] = full_path | |
| if display not in ALL_VOICES_FLAT_LIST: ALL_VOICES_FLAT_LIST.append(display) | |
| CATEGORY_LIST.sort() | |
| print(f"✅ {len(ALL_VOICES_FLAT_LIST)} voces encontradas.") | |
| def resolve_voice_path(voice_name, lang="en"): | |
| if not voice_name or voice_name == "None": return None | |
| if voice_name in FLAT_PATH_MAP: return FLAT_PATH_MAP[voice_name] | |
| for name, path in FLAT_PATH_MAP.items(): | |
| if voice_name in name: return path | |
| return None | |
| def get_available_languages(category): | |
| if not category or category not in VOICE_DB: return [] | |
| return [(f"{SUPPORTED_LANGUAGES.get(c, c)} ({c})", c) for c in sorted(VOICE_DB[category].keys())] | |
| def get_voices_for_ui(category, lang): | |
| if category in VOICE_DB and lang in VOICE_DB[category]: | |
| return sorted(list(VOICE_DB[category][lang].keys())) | |
| return [] | |
| def get_all_voices_list(): | |
| return sorted(ALL_VOICES_FLAT_LIST) | |
| # Inicializar | |
| scan_voice_wav_structure() | |
| # ============================================================================== | |
| # 3. FUNCIONES DE GENERACIÓN | |
| # ============================================================================== | |
| def set_seed(seed): | |
| torch.manual_seed(seed) | |
| if torch.cuda.is_available(): torch.cuda.manual_seed_all(seed) | |
| random.seed(seed) | |
| np.random.seed(seed) | |
| def format_time(seconds): | |
| if seconds < 60: return f"{seconds:.1f}s" | |
| return f"{int(seconds//60)}m {seconds%60:.1f}s" | |
| def estimate_generation_time(text_length): | |
| return (text_length / 50) * 2 + 1 | |
| def smart_chunk_text(text, max_words=40): | |
| def has_cjk(text): return bool(re.search(r'[\u4e00-\u9fff\u3040-\u309f\u30a0-\u30ff]', text)) | |
| is_cjk = has_cjk(text) | |
| sentences = re.split(r'(?<=[.!?。!?।؟])\s*|\n+', text) | |
| chunks = []; current_chunk = []; current_count = 0 | |
| for sentence in sentences: | |
| sentence = sentence.strip() | |
| if not sentence: continue | |
| count = len(re.sub(r'\s+', '', sentence)) if is_cjk else len(sentence.split()) | |
| if current_count + count > max_words: | |
| if current_chunk: | |
| chunks.append(''.join(current_chunk) if is_cjk else ' '.join(current_chunk)) | |
| current_chunk = []; current_count = 0 | |
| current_chunk.append(sentence); current_count += count | |
| if current_chunk: chunks.append(''.join(current_chunk) if is_cjk else ' '.join(current_chunk)) | |
| return chunks if chunks else [text] | |
| def generate_speech(text, voice_name, exaggeration, temperature, seed_num, cfgw, min_p, top_p, repetition_penalty): | |
| try: | |
| start_time = time.time() | |
| if not text.strip(): yield make_progress_html(0, "❌ Error: Texto vacío"), None; return | |
| path = resolve_voice_path(voice_name, "en") | |
| if not path: yield make_progress_html(0, "❌ Error: Voz no encontrada"), None; return | |
| yield make_progress_html(20, "⚙️ Cargando modelo TTS..."), None | |
| model = model_manager.get_model("tts") | |
| if model is None: yield make_progress_html(0, "❌ Error modelo"), None; return | |
| if seed_num != 0: set_seed(int(seed_num)) | |
| chunks = smart_chunk_text(text) | |
| wavs = [] | |
| for i, chunk in enumerate(chunks): | |
| pct = 30 + int((i / len(chunks)) * 60) | |
| yield make_progress_html(pct, f"🎙️ Generando parte {i+1}/{len(chunks)}..."), None | |
| w = model.generate(chunk, audio_prompt_path=path, exaggeration=exaggeration, temperature=temperature, cfg_weight=cfgw, min_p=min_p, top_p=top_p, repetition_penalty=repetition_penalty) | |
| wavs.append(w) | |
| yield make_progress_html(95, "🎹 Uniendo audio..."), None | |
| full_wav = torch.cat(wavs, dim=-1) if len(wavs) > 1 else wavs[0] | |
| yield make_progress_html(100, f"✅ Listo ({format_time(time.time()-start_time)})"), (model.sr, full_wav.squeeze(0).numpy()) | |
| except Exception as e: | |
| yield make_progress_html(0, f"❌ Error: {str(e)}"), None | |
| def generate_turbo_speech(text, voice_name): | |
| try: | |
| start_time = time.time() | |
| if not text.strip(): yield make_progress_html(0, "❌ Error: Texto vacío"), None; return | |
| path = resolve_voice_path(voice_name, "en") | |
| if not path: yield make_progress_html(0, "❌ Error: Voz no encontrada"), None; return | |
| yield make_progress_html(20, "⚡ Cargando Turbo..."), None | |
| model = model_manager.get_model("turbo") | |
| if model is None: yield make_progress_html(0, "❌ Error Turbo"), None; return | |
| chunks = smart_chunk_text(text) | |
| wavs = [] | |
| for i, chunk in enumerate(chunks): | |
| pct = 30 + int((i / len(chunks)) * 60) | |
| yield make_progress_html(pct, f"⚡ Turbo chunk {i+1}/{len(chunks)}..."), None | |
| w = model.generate(chunk, audio_prompt_path=path) | |
| wavs.append(w) | |
| full_wav = torch.cat(wavs, dim=-1) if len(wavs) > 1 else wavs[0] | |
| yield make_progress_html(100, f"✅ Turbo listo ({format_time(time.time()-start_time)})"), (model.sr, full_wav.squeeze(0).numpy()) | |
| except Exception as e: | |
| yield make_progress_html(0, f"❌ Error: {str(e)}"), None | |
| def generate_multilingual_speech(text, voice_name, lang_code, exaggeration, temperature, seed_num, cfgw): | |
| try: | |
| start_time = time.time() | |
| path = resolve_voice_path(voice_name, lang_code) | |
| yield make_progress_html(20, "🌍 Cargando Multi-TTS..."), None | |
| model = model_manager.get_model("mtl") | |
| if model is None: yield make_progress_html(0, "❌ Error modelo"), None; return | |
| if seed_num != 0: set_seed(int(seed_num)) | |
| chunks = smart_chunk_text(text) | |
| wavs = [] | |
| for i, chunk in enumerate(chunks): | |
| pct = 30 + int((i / len(chunks)) * 60) | |
| yield make_progress_html(pct, f"🌍 Generando ({lang_code}) {i+1}..."), None | |
| w = model.generate(chunk, language_id=lang_code, audio_prompt_path=path, exaggeration=exaggeration, temperature=temperature, cfg_weight=cfgw) | |
| wavs.append(w) | |
| full_wav = torch.cat(wavs, dim=-1) if len(wavs) > 1 else wavs[0] | |
| yield make_progress_html(100, "✅ Listo"), (model.sr, full_wav.squeeze(0).numpy()) | |
| except Exception as e: | |
| yield make_progress_html(0, f"❌ Error: {str(e)}"), None | |
| def convert_voice(audio, target_voice): | |
| try: | |
| start_time = time.time() | |
| path = resolve_voice_path(target_voice, "en") | |
| if not path: yield make_progress_html(0, "❌ Error: Voz destino no válida"), None; return | |
| yield make_progress_html(50, "🔄 Cargando VC..."), None | |
| model = model_manager.get_model("vc") | |
| if model is None: yield make_progress_html(0, "❌ Error VC"), None; return | |
| yield make_progress_html(70, "🔄 Convirtiendo..."), None | |
| w = model.generate(audio, target_voice_path=path) | |
| yield make_progress_html(100, f"✅ Listo ({format_time(time.time()-start_time)})"), (model.sr, w.squeeze(0).numpy()) | |
| except Exception as e: | |
| yield make_progress_html(0, f"❌ Error: {str(e)}"), None | |
| def clone_voice_wrapper(ref_audio, name, cat, lang, gender): | |
| try: | |
| if not name: return "Nombre requerido", gr.update() | |
| cat_slug = cat.lower().replace(" ", "_") | |
| gender_slug = gender.lower() | |
| target_dir = os.path.join(VOICE_WAV_ROOT, f"{cat_slug}_{gender_slug}", lang) | |
| os.makedirs(target_dir, exist_ok=True) | |
| clean_name = "".join(x for x in name if x.isalnum() or x in " -_").strip() | |
| dest = os.path.join(target_dir, f"{clean_name}.wav") | |
| if os.path.exists(dest): return "❌ La voz ya existe", gr.update() | |
| shutil.copy(ref_audio, dest) | |
| scan_voice_wav_structure() | |
| return f"✅ Clonada: {clean_name}", gr.update(choices=get_all_voices_list()) | |
| except Exception as e: | |
| return f"❌ Error: {e}", gr.update() | |
| def delete_voice_wrapper(voice_name): | |
| try: | |
| path = resolve_voice_path(voice_name, "en") | |
| if path and os.path.exists(path): | |
| os.remove(path) | |
| scan_voice_wav_structure() | |
| return f"✅ Eliminada: {voice_name}", gr.update(choices=get_all_voices_list(), value=None) | |
| return "❌ Archivo no encontrado", gr.update() | |
| except Exception as e: | |
| return f"❌ Error: {e}", gr.update() | |
| # ============================================================================== | |
| # 4. INTERFAZ GRÁFICA (UI) | |
| # ============================================================================== | |
| def create_header(): | |
| gr.HTML(""" | |
| <div style="text-align: center; margin-bottom: 1rem; background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); padding: 25px; border-radius: 16px; color: white; box-shadow: 0 4px 15px rgba(0,0,0,0.2);"> | |
| <h1 style="font-size: 2.8em; margin: 0; font-weight: 800;">⚡ Chatterbox Turbo</h1> | |
| <p style="font-size: 1.1em; opacity: 0.9; margin-top: 10px;">Pro Audio Synthesis Suite</p> | |
| </div> | |
| """) | |
| INITIAL_PROGRESS = make_progress_html(0, "Esperando inicio...") | |
| def create_turbo_tab(): | |
| with gr.Row(): | |
| with gr.Column(scale=1): | |
| text = gr.Textbox(label="Texto a sintetizar", value="Hello! [laugh] This is Turbo speed!", lines=4, elem_id="turbo_textbox") | |
| with gr.Row(elem_classes="tag-container"): | |
| tags = [gr.Button(t, size="sm", elem_classes="tag-btn") for t in ["[laugh]", "[sigh]", "[cough]", "[clear throat]", "[gasp]", "[chuckle]"]] | |
| voice = gr.Dropdown(label="Seleccionar Voz", choices=[], interactive=True) | |
| preview = gr.Audio(label="Preview", interactive=False, visible=True, type="filepath") | |
| btn = gr.Button("⚡ Generar Audio (Turbo)", variant="primary", size="lg") | |
| with gr.Column(scale=1): | |
| # MODIFICADO: Audio primero, luego progreso | |
| audio_out = gr.Audio(label="Resultado Final", autoplay=True, show_download_button=True) | |
| progress_html = gr.HTML(value=INITIAL_PROGRESS, label="Estado") | |
| return {"text": text, "voice": voice, "prev": preview, "btn": btn, "prog": progress_html, "out": audio_out, "tags": tags} | |
| def create_tts_tab(): | |
| with gr.Row(): | |
| with gr.Column(scale=1): | |
| text = gr.Textbox(label="Texto", value="Hello world!", lines=4) | |
| voice = gr.Dropdown(label="Voz Clonada", choices=[], interactive=True) | |
| preview = gr.Audio(label="Preview", interactive=False) | |
| with gr.Accordion("⚙️ Opciones Avanzadas", open=False): | |
| exag = gr.Slider(0.25, 2, value=.5, step=0.05, label="Exaggeration") | |
| cfg = gr.Slider(0, 1, value=.5, step=0.05, label="CFG") | |
| temp = gr.Slider(0.05, 5, value=.8, step=0.05, label="Temp") | |
| seed = gr.Number(0, label="Seed") | |
| min_p = gr.Slider(0, 1, value=0.05, label="Min P") | |
| top_p = gr.Slider(0, 1, value=1.0, label="Top P") | |
| rep = gr.Slider(1, 2, value=1.2, label="Repetition") | |
| btn = gr.Button("🎙️ Generar Audio", variant="primary", size="lg") | |
| with gr.Column(scale=1): | |
| # MODIFICADO: Audio primero, luego progreso | |
| audio_out = gr.Audio(label="Resultado", autoplay=True) | |
| progress_html = gr.HTML(value=INITIAL_PROGRESS) | |
| return {"text": text, "voice": voice, "prev": preview, "btn": btn, "prog": progress_html, "out": audio_out, "opts": [exag, temp, seed, cfg, min_p, top_p, rep]} | |
| def create_mtl_tab(): | |
| with gr.Row(): | |
| with gr.Column(scale=1): | |
| text = gr.Textbox(label="Texto", value="Hola mundo", lines=4) | |
| lang_choices = [(f"{v} ({k})", k) for k,v in SUPPORTED_LANGUAGES.items()] | |
| lang = gr.Dropdown(label="Idioma", choices=lang_choices, value="es") | |
| voice = gr.Dropdown(label="Voz", choices=[]) | |
| preview = gr.Audio(label="Preview", interactive=False) | |
| with gr.Accordion("⚙️ Opciones", open=False): | |
| exag = gr.Slider(0.25, 2, value=.5, label="Exaggeration") | |
| temp = gr.Slider(0.05, 5, value=.8, label="Temp") | |
| seed = gr.Number(0, label="Seed") | |
| cfg = gr.Slider(0, 1, value=.5, label="CFG") | |
| btn = gr.Button("🌍 Generar", variant="primary", size="lg") | |
| with gr.Column(scale=1): | |
| # MODIFICADO: Audio primero, luego progreso | |
| audio_out = gr.Audio(label="Resultado", autoplay=True) | |
| progress_html = gr.HTML(value=INITIAL_PROGRESS) | |
| return {"text": text, "lang": lang, "voice": voice, "prev": preview, "btn": btn, "prog": progress_html, "out": audio_out, "opts": [exag, temp, seed, cfg]} | |
| def create_vc_tab(): | |
| with gr.Row(): | |
| with gr.Column(scale=1): | |
| inp = gr.Audio(label="Entrada", sources=["upload", "microphone"], type="filepath") | |
| voice = gr.Dropdown(label="Voz Objetivo", choices=[]) | |
| preview = gr.Audio(label="Preview", interactive=False) | |
| btn = gr.Button("🔄 Convertir", variant="primary", size="lg") | |
| with gr.Column(scale=1): | |
| # MODIFICADO: Audio primero, luego progreso | |
| audio_out = gr.Audio(label="Resultado", autoplay=True) | |
| progress_html = gr.HTML(value=INITIAL_PROGRESS) | |
| return {"inp": inp, "voice": voice, "prev": preview, "btn": btn, "prog": progress_html, "out": audio_out} | |
| def create_clone_tab(): | |
| with gr.Row(): | |
| with gr.Column(): | |
| gr.Markdown("### 🧬 Clonar Voz") | |
| name = gr.Textbox(label="Nombre") | |
| with gr.Row(): | |
| gender = gr.Radio(["Male", "Female"], value="Male", label="Género") | |
| lang_choices = [(f"{v} ({k})", k) for k,v in SUPPORTED_LANGUAGES.items()] | |
| lang = gr.Dropdown(label="Idioma", choices=lang_choices, value="es") | |
| cat = gr.Dropdown(label="Categoría", choices=CATEGORY_LIST, allow_custom_value=False) | |
| ref = gr.Audio(label="Referencia", type="filepath") | |
| btn = gr.Button("💾 Clonar", variant="primary") | |
| status = gr.Textbox(label="Estado") | |
| with gr.Column(): | |
| gr.Markdown("### 🗑️ Borrar") | |
| del_sel = gr.Dropdown(label="Seleccionar Voz", choices=[]) | |
| del_btn = gr.Button("🗑️ Eliminar", variant="stop") | |
| del_stat = gr.Textbox(label="Estado") | |
| return {"name": name, "gender": gender, "cat": cat, "lang": lang, "ref": ref, "btn": btn, "stat": status, "del_sel": del_sel, "del_btn": del_btn, "del_stat": del_stat} | |
| CSS = """ | |
| body, .gradio-container { background-color: #0f172a; font-family: 'Segoe UI', sans-serif; } | |
| .sidebar-container { background-color: #1e293b; padding: 20px; border-right: 1px solid #334155; } | |
| .nav-btn { background: transparent; border: none; color: #94a3b8; text-align: left; padding: 15px; font-weight: 600; width: 100%; border-radius: 8px; margin-bottom: 5px; transition: all 0.2s; } | |
| .nav-btn:hover { background: #334155; color: white; padding-left: 20px; } | |
| .active-btn { background: #334155; color: white; border-left: 4px solid #f97316; padding-left: 20px; box-shadow: 0 4px 6px rgba(0,0,0,0.1); } | |
| .content-panel { background: #1e293b; border: 1px solid #334155; border-radius: 16px; padding: 30px; margin-top: 20px; box-shadow: 0 10px 15px -3px rgba(0, 0, 0, 0.1); } | |
| input, textarea, select { background-color: #0f172a !important; border: 1px solid #334155 !important; color: white !important; } | |
| .tag-btn { background: #334155; color: #e2e8f0; border: 1px solid #475569; margin-right: 5px; } | |
| """ | |
| with gr.Blocks(title="Chatterbox Pro", css=CSS, theme=gr.themes.Base()) as demo: | |
| with gr.Row(elem_classes="main-layout", equal_height=True): | |
| with gr.Column(scale=1, min_width=250, elem_classes="sidebar-container"): | |
| gr.Markdown("### 🎛️ CONTROL PANEL") | |
| gr.Markdown("---") | |
| initial_cat = CATEGORY_LIST[0] if CATEGORY_LIST else None | |
| cat_filter = gr.Dropdown(label="📚 Librería", choices=CATEGORY_LIST, value=initial_cat) | |
| lang_filter = gr.Dropdown(label="🌐 Idioma", choices=[]) | |
| gr.Markdown("---") | |
| btn_turbo = gr.Button("🚀 Turbo Mode", elem_classes=["nav-btn", "active-btn"]) | |
| btn_tts = gr.Button("🎤 TTS Pro", elem_classes=["nav-btn"]) | |
| btn_mtl = gr.Button("🌍 Multilingual", elem_classes=["nav-btn"]) | |
| btn_vc = gr.Button("🔄 Converter", elem_classes=["nav-btn"]) | |
| btn_clone = gr.Button("🧬 Cloning Lab", elem_classes=["nav-btn"]) | |
| with gr.Column(scale=4, elem_classes="content-area"): | |
| create_header() | |
| with gr.Column(visible=True, elem_classes="content-panel") as v_turbo: ui_turbo = create_turbo_tab() | |
| with gr.Column(visible=False, elem_classes="content-panel") as v_tts: ui_tts = create_tts_tab() | |
| with gr.Column(visible=False, elem_classes="content-panel") as v_mtl: ui_mtl = create_mtl_tab() | |
| with gr.Column(visible=False, elem_classes="content-panel") as v_vc: ui_vc = create_vc_tab() | |
| with gr.Column(visible=False, elem_classes="content-panel") as v_clone: ui_clone = create_clone_tab() | |
| views = [v_turbo, v_tts, v_mtl, v_vc, v_clone]; btns = [btn_turbo, btn_tts, btn_mtl, btn_vc, btn_clone] | |
| def switch_view(idx): | |
| return [gr.update(visible=(i==idx)) for i in range(len(views))] + [gr.update(elem_classes=["nav-btn", "active-btn"] if i==idx else ["nav-btn"]) for i in range(len(btns))] | |
| for i, b in enumerate(btns): b.click(lambda idx=i: switch_view(idx), outputs=views+btns) | |
| def update_ui_lists(cat, lang_code=None): | |
| langs = get_available_languages(cat) | |
| valid_codes = [c[1] for c in langs] | |
| curr_lang = lang_code if lang_code in valid_codes else (valid_codes[0] if valid_codes else None) | |
| voices = get_voices_for_ui(cat, curr_lang) | |
| v_val = voices[0] if voices else None | |
| return (gr.update(choices=langs, value=curr_lang), gr.update(choices=voices, value=v_val), gr.update(choices=voices, value=v_val), gr.update(choices=voices, value=v_val), gr.update(choices=voices, value=v_val), gr.update(choices=CATEGORY_LIST, value=cat), gr.update(choices=get_all_voices_list())) | |
| cat_filter.change(update_ui_lists, inputs=[cat_filter], outputs=[lang_filter, ui_turbo["voice"], ui_tts["voice"], ui_mtl["voice"], ui_vc["voice"], ui_clone["cat"], ui_clone["del_sel"]]) | |
| lang_filter.change(lambda c, l: update_ui_lists(c, l), inputs=[cat_filter, lang_filter], outputs=[lang_filter, ui_turbo["voice"], ui_tts["voice"], ui_mtl["voice"], ui_vc["voice"], ui_clone["cat"], ui_clone["del_sel"]]) | |
| demo.load(lambda: update_ui_lists(initial_cat), outputs=[lang_filter, ui_turbo["voice"], ui_tts["voice"], ui_mtl["voice"], ui_vc["voice"], ui_clone["cat"], ui_clone["del_sel"]]) | |
| ui_turbo["voice"].change(lambda v: resolve_voice_path(v, "en"), inputs=ui_turbo["voice"], outputs=ui_turbo["prev"]) | |
| ui_tts["voice"].change(lambda v: resolve_voice_path(v, "en"), inputs=ui_tts["voice"], outputs=ui_tts["prev"]) | |
| ui_mtl["voice"].change(lambda v, l: resolve_voice_path(v, l), inputs=[ui_mtl["voice"], ui_mtl["lang"]], outputs=ui_mtl["prev"]) | |
| ui_vc["voice"].change(lambda v: resolve_voice_path(v, "en"), inputs=ui_vc["voice"], outputs=ui_vc["prev"]) | |
| ui_turbo["btn"].click(generate_turbo_speech, inputs=[ui_turbo["text"], ui_turbo["voice"]], outputs=[ui_turbo["prog"], ui_turbo["out"]]) | |
| ui_tts["btn"].click(generate_speech, inputs=[ui_tts["text"], ui_tts["voice"]] + ui_tts["opts"], outputs=[ui_tts["prog"], ui_tts["out"]]) | |
| ui_mtl["btn"].click(generate_multilingual_speech, inputs=[ui_mtl["text"], ui_mtl["voice"], ui_mtl["lang"]] + ui_mtl["opts"], outputs=[ui_mtl["prog"], ui_mtl["out"]]) | |
| ui_vc["btn"].click(convert_voice, inputs=[ui_vc["inp"], ui_vc["voice"]], outputs=[ui_vc["prog"], ui_vc["out"]]) | |
| ui_clone["btn"].click(clone_voice_wrapper, inputs=[ui_clone["ref"], ui_clone["name"], ui_clone["cat"], ui_clone["lang"], ui_clone["gender"]], outputs=[ui_clone["stat"], ui_clone["del_sel"]]) | |
| ui_clone["del_btn"].click(delete_voice_wrapper, inputs=[ui_clone["del_sel"]], outputs=[ui_clone["del_stat"], ui_clone["del_sel"]]) | |
| JS_TAGS = """(tag, text) => { var el = document.querySelector('#turbo_textbox textarea'); if(el) { var start = el.selectionStart; var end = el.selectionEnd; return text.slice(0, start) + " " + tag + " " + text.slice(end); } return text + " " + tag; }""" | |
| for btn in ui_turbo["tags"]: btn.click(None, inputs=[btn, ui_turbo["text"]], outputs=ui_turbo["text"], js=JS_TAGS) | |
| if __name__ == "__main__": | |
| demo.queue().launch(inbrowser=True) |