rvc / app.py
dimensionalpulsar's picture
Update app.py
e940250 verified
import os
import sys
import logging
import tempfile
import shutil
import gradio as gr
try:
import gradio_client.utils as _gc_utils
_orig_get_type = _gc_utils.get_type
def _patched_get_type(schema, *args, **kwargs):
if not isinstance(schema, dict):
return "Any"
return _orig_get_type(schema, *args, **kwargs)
_gc_utils.get_type = _patched_get_type
_orig_json_schema = _gc_utils._json_schema_to_python_type
def _patched_json_schema(schema, *args, **kwargs):
if not isinstance(schema, dict):
return "Any"
return _orig_json_schema(schema, *args, **kwargs)
_gc_utils._json_schema_to_python_type = _patched_json_schema
_gc_utils.json_schema_to_python_type = lambda schema, defs=None: _patched_json_schema(
schema, defs
)
except Exception:
pass
logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s] %(message)s")
logger = logging.getLogger(__name__) # Corregido: usualmente es __name__
logger.info("Inicializando la aplicación...")
from pipeline.setup import setup_seed_vc
from pipeline.storage import init_storage, list_models, download_model, delete_model, get_reference_path
try:
setup_seed_vc()
except Exception as e:
logger.error("Error durante la configuración: {}".format(e))
HF_MODELS_REPO = os.environ.get("HF_MODELS_REPO", "")
if HF_MODELS_REPO:
init_storage(HF_MODELS_REPO)
logger.info("Almacenamiento HuggingFace configurado: {}".format(HF_MODELS_REPO))
from pipeline.training import save_voice_reference, _gpu_warmup
from pipeline.separation import separate_audio
from pipeline.inference import convert_voice
def train_voice_model(audio_file, model_name, progress=gr.Progress()):
if audio_file is None:
return "Error: Por favor suba un archivo de audio.", None
if not model_name or not model_name.strip():
return "Error: Por favor ingrese un nombre para el modelo.", None
model_name = model_name.strip().replace(" ", "_")
def progress_callback(value, desc):
progress(value, desc=desc)
try:
progress(0.0, desc="Iniciando...")
pth_path, ref_path = save_voice_reference(
audio_path=audio_file,
model_name=model_name,
progress_callback=progress_callback,
)
return "Referencia de voz '{}' guardada con éxito".format(model_name), ref_path
except Exception as e:
import traceback
tb = traceback.format_exc()
logger.error("Error en entrenamiento: {}".format(tb))
return "Error: {}: {}\n\nDetalles:\n{}".format(
type(e).__name__, str(e), tb[-500:]
), None
def get_model_choices():
models = list_models()
if not models:
return ["(ningún modelo)"]
return models
def convert_song(
model_choice,
song_file,
pitch,
similarity,
diffusion_steps,
vocal_volume,
instrumental_volume,
progress=gr.Progress(),
):
if song_file is None:
return "Error: Por favor suba un archivo de audio.", None, None, None
if model_choice == "(ningún modelo)" or not model_choice:
return "Error: Por favor, registre una referencia de voz primero.", None, None, None
from pipeline.mixing import mix_audio
try:
progress(0.05, desc="Cargando el modelo...")
pth_path, ref_or_index = download_model(model_choice)
if not pth_path:
return "Error: Modelo '{}' no encontrado.".format(model_choice), None, None, None
reference_path = get_reference_path(model_choice)
if not reference_path:
return "Error: Audio de referencia no encontrado para '{}'.".format(model_choice), None, None, None
progress(0.10, desc="Separación de pistas (Demucs)...")
vocals_path, instruments_path = separate_audio(song_file)
progress(0.40, desc="Conversión de voz (Seed-VC)...")
converted_path = convert_voice(
audio_path=vocals_path,
reference_path=reference_path,
pitch=int(pitch),
diffusion_steps=int(diffusion_steps),
similarity=float(similarity),
)
progress(0.85, desc="Mezcla final...")
final_path = mix_audio(
vocals_path=converted_path,
instruments_path=instruments_path,
vocal_volume=float(vocal_volume),
instrumental_volume=float(instrumental_volume),
)
progress(1.0, desc="Terminado")
return (
"Conversión terminada con éxito",
vocals_path,
converted_path,
final_path,
)
except Exception as e:
import traceback
tb = traceback.format_exc()
logger.error("Error en conversión: {}".format(tb))
return "Error: {}: {}\n\nDetalles:\n{}".format(
type(e).__name__, str(e), tb[-500:]
), None, None, None
def refresh_models():
models = list_models()
if not models:
return "Ningún modelo registrado"
rows = "".join(
"{} Disponible".format(m) for m in models
)
return "Nombre | Estado\n" + rows # Simplificado para el ejemplo
def delete_selected_model(model_name_to_delete):
if not model_name_to_delete or model_name_to_delete == "(ningún modelo)":
return "Por favor seleccione un modelo para eliminar.", refresh_models()
try:
delete_model(model_name_to_delete)
return "Modelo '{}' eliminado.".format(model_name_to_delete), refresh_models()
except Exception as e:
return "Error: {}".format(e), refresh_models()
DESCRIPTION = """
# Clon Vocal
Herramienta de clonación de voz zero-shot basada en Seed-VC.
"""
with gr.Blocks(title="Clon Vocal", theme=gr.themes.Soft()) as app:
gr.Markdown(DESCRIPTION)
with gr.Tabs():
with gr.TabItem("Mi voz"):
gr.Markdown("Registrar su referencia de voz")
with gr.Row():
with gr.Column(scale=2):
train_audio = gr.Audio(label="Extracto de su voz", type="filepath", sources=["upload"])
train_model_name = gr.Textbox(label="Nombre del perfil", placeholder="ej: mi_voz")
train_btn = gr.Button("Guardar", variant="primary", size="lg")
with gr.Column(scale=1):
train_status = gr.Textbox(label="Estado", interactive=False, lines=3)
train_download = gr.File(label="Archivo de referencia", interactive=False)
train_btn.click(
fn=train_voice_model,
inputs=[train_audio, train_model_name],
outputs=[train_status, train_download],
)
with gr.TabItem("Convertir una canción"):
with gr.Row():
with gr.Column(scale=2):
convert_model = gr.Dropdown(choices=get_model_choices(), label="Perfil vocal")
refresh_btn = gr.Button("Actualizar la lista", size="sm")
convert_audio = gr.Audio(label="Canción a convertir", type="filepath")
with gr.Accordion("Parámetros avanzados", open=False):
convert_pitch = gr.Slider(minimum=-24, maximum=24, value=0, step=1, label="Pitch")
convert_similarity = gr.Slider(minimum=0.0, maximum=1.0, value=0.7, step=0.05, label="Similitud")
convert_diffusion = gr.Slider(minimum=5, maximum=100, value=25, step=5, label="Calidad")
convert_vocal_vol = gr.Slider(minimum=0.0, maximum=2.0, value=1.0, step=0.1, label="Vol. Voz")
convert_inst_vol = gr.Slider(minimum=0.0, maximum=2.0, value=1.0, step=0.1, label="Vol. Inst")
convert_btn = gr.Button("Convertir", variant="primary", size="lg")
with gr.Column(scale=1):
convert_status = gr.Textbox(label="Estado", interactive=False)
preview_vocals = gr.Audio(label="Original", interactive=False)
preview_converted = gr.Audio(label="Convertida", interactive=False)
final_output = gr.Audio(label="Final", interactive=False)
refresh_btn.click(fn=lambda: gr.Dropdown(choices=get_model_choices()), outputs=[convert_model])
convert_btn.click(
fn=convert_song,
inputs=[convert_model, convert_audio, convert_pitch, convert_similarity, convert_diffusion, convert_vocal_vol, convert_inst_vol],
outputs=[convert_status, preview_vocals, preview_converted, final_output],
)
with gr.TabItem("Mis modelos"):
models_table = gr.HTML(value=refresh_models())
with gr.Row():
models_refresh_btn = gr.Button("Actualizar")
models_delete_name = gr.Dropdown(choices=get_model_choices(), label="Modelo a eliminar")
models_delete_btn = gr.Button("Eliminar", variant="stop")
models_delete_status = gr.Textbox(label="Estado")
models_refresh_btn.click(fn=refresh_models, outputs=[models_table])
models_delete_btn.click(fn=delete_selected_model, inputs=[models_delete_name], outputs=[models_delete_status, models_table])
with gr.TabItem("Depuración GPU"):
debug_output = gr.Textbox(label="Registros", interactive=False, lines=20)
debug_btn = gr.Button("Leer")
def read_debug_log():
log_path = "/home/user/app/debug_gpu.log"
return open(log_path, "r").read() if os.path.exists(log_path) else "Sin registros."
debug_btn.click(fn=read_debug_log, outputs=[debug_output])
if __name__ == "__main__":
app.launch(server_name="0.0.0.0")