""" RVC Voice Conversion – HuggingFace Space Simple, fast, GPU/CPU auto-detected. Now with video upload and 5-output generation (acapella, instrumental, RVC on original, RVC on acapella). Original job queue and all tabs fully preserved. """ from __future__ import annotations import os import re import gradio as gr from lib.config import ( BUILTIN_MODELS, CSS, DEVICE_LABEL, MAX_INPUT_DURATION, logger, ) from lib.jobs import ( get_jobs_table, get_queue_info, poll_job, submit_job, submit_full_pipeline, ) from lib.models import list_models, startup_downloads from lib.ui import ( refresh_models, toggle_autotune, upload_model, create_video_section, create_five_outputs, ) # ── Startup ─────────────────────────────────────────────────────────────────── startup_status = "" default_model = "" try: default_model = startup_downloads() startup_status = f"✅ Ready · {DEVICE_LABEL}" except Exception as e: startup_status = f"⚠️ Some assets unavailable: {e} · {DEVICE_LABEL}" logger.warning("Startup download issue: %s", e) initial_models = list_models() initial_value = default_model if default_model in initial_models else ( initial_models[0] if initial_models else None ) # ── Gradio UI ───────────────────────────────────────────────────────────────── with gr.Blocks(title="RVC Voice Conversion - Full Suite", delete_cache=(3600, 3600)) as demo: gr.HTML(f"""
Conversão de voz com suporte a vídeos, extração de acapella/instrumental e 5 saídas!
{startup_status}
""") with gr.Tabs(): # ── TAB 1: Convert ──────────────────────────────────────────────────── with gr.Tab("🎤 Convert"): gr.Markdown("## Opção 1: Conversão RVC clássica (um arquivo de saída)") with gr.Row(): with gr.Column(scale=1): gr.Markdown("### 🔊 Input Audio") with gr.Tabs(): with gr.Tab("🎙️ Microphone"): inp_mic = gr.Audio( sources=["microphone"], type="filepath", label="Record", ) with gr.Tab("📁 Upload File"): inp_file = gr.Audio( sources=["upload"], type="filepath", label="Upload audio (wav / mp3 / flac / ogg …)", ) gr.Markdown("### 🤖 Model") model_dd = gr.Dropdown( choices=initial_models, value=initial_value, label="Active Voice Model", interactive=True, ) gr.Markdown("### 🎚️ Basic Settings") pitch_sl = gr.Slider( minimum=-24, maximum=24, value=0, step=1, label="Pitch Shift (semitones)", info="0 = unchanged · positive = higher · negative = lower", ) f0_radio = gr.Radio( choices=["rmvpe", "fcpe", "crepe", "crepe-tiny"], value="rmvpe", label="Pitch Extraction Method", info="rmvpe = fastest & accurate · crepe = highest quality (slower)", ) with gr.Column(scale=1): gr.Markdown("### ⚙️ Advanced Settings") with gr.Accordion("Expand advanced options", open=False): index_rate_sl = gr.Slider( 0.0, 1.0, value=0.75, step=0.05, label="Index Rate", info="How strongly the FAISS index influences timbre (0 = off)", ) protect_sl = gr.Slider( 0.0, 0.5, value=0.5, step=0.01, label="Protect Consonants", info="Protects unvoiced consonants — 0.5 = max protection", ) filter_radius_sl = gr.Slider( 0, 7, value=3, step=1, label="Respiration Filter Radius", info="Median filter on pitch — higher = smoother, reduces breath noise", ) vol_env_sl = gr.Slider( 0.0, 1.0, value=0.25, step=0.05, label="Volume Envelope Mix", info="0.25 = natural blend · 1 = preserve input loudness · 0 = model output", ) with gr.Row(): clean_cb = gr.Checkbox(value=False, label="Noise Reduction") clean_sl = gr.Slider( 0.0, 1.0, value=0.5, step=0.05, label="Reduction Strength", ) with gr.Row(): split_cb = gr.Checkbox(value=False, label="Split Long Audio") autotune_cb = gr.Checkbox(value=False, label="Autotune") autotune_sl = gr.Slider( 0.0, 1.0, value=1.0, step=0.05, label="Autotune Strength", visible=False, ) autotune_cb.change( fn=toggle_autotune, inputs=autotune_cb, outputs=autotune_sl, ) gr.Markdown("**🎛️ Reverb**") reverb_cb = gr.Checkbox(value=False, label="Enable Reverb") with gr.Group(visible=False) as reverb_group: reverb_room_sl = gr.Slider( 0.0, 1.0, value=0.15, step=0.05, label="Room Size", info="Larger = bigger sounding space", ) reverb_damp_sl = gr.Slider( 0.0, 1.0, value=0.7, step=0.05, label="Damping", info="Higher = more absorption, less echo tail", ) reverb_wet_sl = gr.Slider( 0.0, 1.0, value=0.15, step=0.05, label="Wet Level", info="How much reverb is mixed in (0.15 = subtle)", ) reverb_cb.change( fn=lambda v: gr.update(visible=v), inputs=reverb_cb, outputs=reverb_group, ) fmt_radio = gr.Radio( choices=["WAV", "MP3", "FLAC", "OPUS"], value="WAV", label="Output Format", info="WAV = lossless, large file; MP3/OPUS = smaller", ) with gr.Row(): classic_btn = gr.Button("🚀 Convert Voice (Classic)", variant="primary") classic_status = gr.Markdown(value="") classic_audio = gr.Audio(label="Converted Audio", type="filepath", interactive=False) gr.Markdown("---") gr.Markdown("## Opção 2: Pipeline completo – 5 saídas (vídeo + separação de faixas + RVC)") with gr.Row(): with gr.Column(scale=1): video_input = create_video_section() gr.Markdown("*(Ou use áudio/microfone acima – o pipeline aceita qualquer fonte)*") with gr.Column(scale=1): full_btn = gr.Button("🚀 Full Pipeline (5 Outputs)", variant="secondary") full_status = gr.Markdown(value="") # 5 componentes de saída (entrada_acapella, entrada_audio, entrada_instrumental, saida_audio, saida_acapella) = create_five_outputs() # Seção de verificação de jobs legada gr.Markdown("---") gr.Markdown("### 🔍 Verificar status de um job (clássico)") with gr.Row(): job_id_box = gr.Textbox( label="Job ID", placeholder="e.g. a3f2b1c9", scale=3, ) poll_btn = gr.Button("🔄 Check", scale=1) poll_status = gr.Markdown(value="") poll_audio = gr.Audio(label="Result", type="filepath", interactive=False) # ── TAB 2: Models (original) ────────────────────────────────────────── with gr.Tab("📦 Models"): gr.Markdown(""" ### Upload a Custom RVC Model Provide a **`.zip`** containing: - **`model.pth`** — weights (required) - **`model.index`** — FAISS index (optional, improves voice matching) **Built-in models** (pre-downloaded on startup): Vestia Zeta v1 · Vestia Zeta v2 · Ayunda Risu · Gawr Gura """) with gr.Row(): with gr.Column(scale=1): up_zip = gr.File(label="Model ZIP", file_types=[".zip"]) up_name = gr.Textbox( label="Model Name", placeholder="Leave blank to use zip filename", ) up_btn = gr.Button("📤 Load Model", variant="primary") up_status = gr.Textbox(label="Status", interactive=False, lines=2) with gr.Column(scale=1): gr.Markdown("### Loaded Models") models_table = gr.Dataframe( col_count=(1, "fixed"), value=[[m] for m in initial_models], interactive=False, label="", ) refresh_btn = gr.Button("🔄 Refresh") up_btn.click( fn=upload_model, inputs=[up_zip, up_name], outputs=[up_status, model_dd, models_table], ) refresh_btn.click( fn=refresh_models, outputs=[models_table, model_dd], ) # ── TAB 3: Jobs (original, intocada) ────────────────────────────────── with gr.Tab("📋 Jobs"): gr.Markdown("All submitted jobs, newest first. Click **Refresh** to update.") queue_status = gr.Markdown(value=get_queue_info, every=10) jobs_table = gr.Dataframe( headers=["Job ID", "Model", "Status", "Time", "Download"], col_count=(5, "fixed"), value=get_jobs_table, interactive=False, wrap=True, datatype=["str", "str", "str", "str", "markdown"], every=10, ) refresh_jobs_btn = gr.Button("🔄 Refresh") def _refresh_jobs(): return get_queue_info(), get_jobs_table() refresh_jobs_btn.click(fn=_refresh_jobs, outputs=[queue_status, jobs_table]) # ── TAB 4: Help (atualizada) ────────────────────────────────────────── with gr.Tab("ℹ️ Help"): gr.Markdown(f""" ## How it works RVC (Retrieval-Based Voice Conversion) transforms a voice recording to sound like a target speaker using only that speaker's model file. --- ## Two conversion modes ### 1. Classic Mode (single output) - Upload an audio file or record your voice - Choose a model and settings - Click **Convert Voice (Classic)** - Result is added to the job queue – you can monitor it in the **Jobs** tab ### 2. Full Pipeline Mode (5 outputs) - Works with **video (MP4)** or any audio source - Automatically extracts **acapella** (vocals) and **instrumental** using Demucs - Runs RVC conversion on **both** the original audio and the acapella - Returns 5 files: 1. `entrada_acapella.mp3` – extracted vocals 2. `entrada.mp3` – original audio 3. `entrada_instrumental.mp3` – background music 4. `saida.mp3` – RVC applied to original audio 5. `saida_acapella.mp3` – RVC applied only to vocals - All 5 files appear directly on the interface – no need to poll jobs --- **Device:** `{DEVICE_LABEL}` **Max input duration:** {MAX_INPUT_DURATION // 60} minutes --- ## Credits Engine: [Ultimate RVC](https://github.com/JackismyShephard/ultimate-rvc) Separation: [Demucs](https://github.com/facebookresearch/demucs) """) # ── Event handlers ──────────────────────────────────────────────────────── def _submit_classic(*args): status, audio = submit_job(*args) match = re.search(r"[a-f0-9]{8}", status or "") job_id = match.group(0) if match else "" return status, audio, job_id, get_queue_info(), get_jobs_table() classic_btn.click( fn=_submit_classic, inputs=[ inp_mic, inp_file, model_dd, pitch_sl, f0_radio, index_rate_sl, protect_sl, vol_env_sl, clean_cb, clean_sl, split_cb, autotune_cb, autotune_sl, filter_radius_sl, fmt_radio, reverb_cb, reverb_room_sl, reverb_damp_sl, reverb_wet_sl, ], outputs=[classic_status, classic_audio, job_id_box, queue_status, jobs_table], ) full_btn.click( fn=submit_full_pipeline, inputs=[ video_input, inp_mic, inp_file, model_dd, pitch_sl, f0_radio, index_rate_sl, protect_sl, vol_env_sl, clean_cb, clean_sl, split_cb, autotune_cb, autotune_sl, filter_radius_sl, fmt_radio, reverb_cb, reverb_room_sl, reverb_damp_sl, reverb_wet_sl, ], outputs=[ full_status, entrada_acapella, entrada_audio, entrada_instrumental, saida_audio, saida_acapella, ], ) def _poll_and_refresh(job_id): status, file = poll_job(job_id) return status, file, get_queue_info(), get_jobs_table() poll_btn.click( fn=_poll_and_refresh, inputs=[job_id_box], outputs=[poll_status, poll_audio, queue_status, jobs_table], ) # ── Launch ──────────────────────────────────────────────────────────────────── if __name__ == "__main__": demo.queue(default_concurrency_limit=5) demo.launch( server_name="0.0.0.0", server_port=int(os.getenv("PORT", 7860)), max_threads=10, ssr_mode=False, css=CSS, )