import os import sys import logging import tempfile import shutil import gradio as gr try: import gradio_client.utils as _gc_utils _orig_get_type = _gc_utils.get_type def _patched_get_type(schema, *args, **kwargs): if not isinstance(schema, dict): return "Any" return _orig_get_type(schema, *args, **kwargs) _gc_utils.get_type = _patched_get_type _orig_json_schema = _gc_utils._json_schema_to_python_type def _patched_json_schema(schema, *args, **kwargs): if not isinstance(schema, dict): return "Any" return _orig_json_schema(schema, *args, **kwargs) _gc_utils._json_schema_to_python_type = _patched_json_schema _gc_utils.json_schema_to_python_type = lambda schema, defs=None: _patched_json_schema( schema, defs ) except Exception: pass logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s] %(message)s") logger = logging.getLogger(__name__) # Corregido: usualmente es __name__ logger.info("Inicializando la aplicación...") from pipeline.setup import setup_seed_vc from pipeline.storage import init_storage, list_models, download_model, delete_model, get_reference_path try: setup_seed_vc() except Exception as e: logger.error("Error durante la configuración: {}".format(e)) HF_MODELS_REPO = os.environ.get("HF_MODELS_REPO", "") if HF_MODELS_REPO: init_storage(HF_MODELS_REPO) logger.info("Almacenamiento HuggingFace configurado: {}".format(HF_MODELS_REPO)) from pipeline.training import save_voice_reference, _gpu_warmup from pipeline.separation import separate_audio from pipeline.inference import convert_voice def train_voice_model(audio_file, model_name, progress=gr.Progress()): if audio_file is None: return "Error: Por favor suba un archivo de audio.", None if not model_name or not model_name.strip(): return "Error: Por favor ingrese un nombre para el modelo.", None model_name = model_name.strip().replace(" ", "_") def progress_callback(value, desc): progress(value, desc=desc) try: progress(0.0, desc="Iniciando...") pth_path, ref_path = save_voice_reference( audio_path=audio_file, model_name=model_name, progress_callback=progress_callback, ) return "Referencia de voz '{}' guardada con éxito".format(model_name), ref_path except Exception as e: import traceback tb = traceback.format_exc() logger.error("Error en entrenamiento: {}".format(tb)) return "Error: {}: {}\n\nDetalles:\n{}".format( type(e).__name__, str(e), tb[-500:] ), None def get_model_choices(): models = list_models() if not models: return ["(ningún modelo)"] return models def convert_song( model_choice, song_file, pitch, similarity, diffusion_steps, vocal_volume, instrumental_volume, progress=gr.Progress(), ): if song_file is None: return "Error: Por favor suba un archivo de audio.", None, None, None if model_choice == "(ningún modelo)" or not model_choice: return "Error: Por favor, registre una referencia de voz primero.", None, None, None from pipeline.mixing import mix_audio try: progress(0.05, desc="Cargando el modelo...") pth_path, ref_or_index = download_model(model_choice) if not pth_path: return "Error: Modelo '{}' no encontrado.".format(model_choice), None, None, None reference_path = get_reference_path(model_choice) if not reference_path: return "Error: Audio de referencia no encontrado para '{}'.".format(model_choice), None, None, None progress(0.10, desc="Separación de pistas (Demucs)...") vocals_path, instruments_path = separate_audio(song_file) progress(0.40, desc="Conversión de voz (Seed-VC)...") converted_path = convert_voice( audio_path=vocals_path, reference_path=reference_path, pitch=int(pitch), diffusion_steps=int(diffusion_steps), similarity=float(similarity), ) progress(0.85, desc="Mezcla final...") final_path = mix_audio( vocals_path=converted_path, instruments_path=instruments_path, vocal_volume=float(vocal_volume), instrumental_volume=float(instrumental_volume), ) progress(1.0, desc="Terminado") return ( "Conversión terminada con éxito", vocals_path, converted_path, final_path, ) except Exception as e: import traceback tb = traceback.format_exc() logger.error("Error en conversión: {}".format(tb)) return "Error: {}: {}\n\nDetalles:\n{}".format( type(e).__name__, str(e), tb[-500:] ), None, None, None def refresh_models(): models = list_models() if not models: return "Ningún modelo registrado" rows = "".join( "{} Disponible".format(m) for m in models ) return "Nombre | Estado\n" + rows # Simplificado para el ejemplo def delete_selected_model(model_name_to_delete): if not model_name_to_delete or model_name_to_delete == "(ningún modelo)": return "Por favor seleccione un modelo para eliminar.", refresh_models() try: delete_model(model_name_to_delete) return "Modelo '{}' eliminado.".format(model_name_to_delete), refresh_models() except Exception as e: return "Error: {}".format(e), refresh_models() DESCRIPTION = """ # Clon Vocal Herramienta de clonación de voz zero-shot basada en Seed-VC. """ with gr.Blocks(title="Clon Vocal", theme=gr.themes.Soft()) as app: gr.Markdown(DESCRIPTION) with gr.Tabs(): with gr.TabItem("Mi voz"): gr.Markdown("Registrar su referencia de voz") with gr.Row(): with gr.Column(scale=2): train_audio = gr.Audio(label="Extracto de su voz", type="filepath", sources=["upload"]) train_model_name = gr.Textbox(label="Nombre del perfil", placeholder="ej: mi_voz") train_btn = gr.Button("Guardar", variant="primary", size="lg") with gr.Column(scale=1): train_status = gr.Textbox(label="Estado", interactive=False, lines=3) train_download = gr.File(label="Archivo de referencia", interactive=False) train_btn.click( fn=train_voice_model, inputs=[train_audio, train_model_name], outputs=[train_status, train_download], ) with gr.TabItem("Convertir una canción"): with gr.Row(): with gr.Column(scale=2): convert_model = gr.Dropdown(choices=get_model_choices(), label="Perfil vocal") refresh_btn = gr.Button("Actualizar la lista", size="sm") convert_audio = gr.Audio(label="Canción a convertir", type="filepath") with gr.Accordion("Parámetros avanzados", open=False): convert_pitch = gr.Slider(minimum=-24, maximum=24, value=0, step=1, label="Pitch") convert_similarity = gr.Slider(minimum=0.0, maximum=1.0, value=0.7, step=0.05, label="Similitud") convert_diffusion = gr.Slider(minimum=5, maximum=100, value=25, step=5, label="Calidad") convert_vocal_vol = gr.Slider(minimum=0.0, maximum=2.0, value=1.0, step=0.1, label="Vol. Voz") convert_inst_vol = gr.Slider(minimum=0.0, maximum=2.0, value=1.0, step=0.1, label="Vol. Inst") convert_btn = gr.Button("Convertir", variant="primary", size="lg") with gr.Column(scale=1): convert_status = gr.Textbox(label="Estado", interactive=False) preview_vocals = gr.Audio(label="Original", interactive=False) preview_converted = gr.Audio(label="Convertida", interactive=False) final_output = gr.Audio(label="Final", interactive=False) refresh_btn.click(fn=lambda: gr.Dropdown(choices=get_model_choices()), outputs=[convert_model]) convert_btn.click( fn=convert_song, inputs=[convert_model, convert_audio, convert_pitch, convert_similarity, convert_diffusion, convert_vocal_vol, convert_inst_vol], outputs=[convert_status, preview_vocals, preview_converted, final_output], ) with gr.TabItem("Mis modelos"): models_table = gr.HTML(value=refresh_models()) with gr.Row(): models_refresh_btn = gr.Button("Actualizar") models_delete_name = gr.Dropdown(choices=get_model_choices(), label="Modelo a eliminar") models_delete_btn = gr.Button("Eliminar", variant="stop") models_delete_status = gr.Textbox(label="Estado") models_refresh_btn.click(fn=refresh_models, outputs=[models_table]) models_delete_btn.click(fn=delete_selected_model, inputs=[models_delete_name], outputs=[models_delete_status, models_table]) with gr.TabItem("Depuración GPU"): debug_output = gr.Textbox(label="Registros", interactive=False, lines=20) debug_btn = gr.Button("Leer") def read_debug_log(): log_path = "/home/user/app/debug_gpu.log" return open(log_path, "r").read() if os.path.exists(log_path) else "Sin registros." debug_btn.click(fn=read_debug_log, outputs=[debug_output]) if __name__ == "__main__": app.launch(server_name="0.0.0.0")