Spaces:
Running
Running
| """ | |
| RVC Voice Conversion β HuggingFace Space | |
| Simple, fast, GPU/CPU auto-detected. | |
| Now with video upload and 5-output generation (acapella, instrumental, RVC on original, RVC on acapella). | |
| Original job queue and all tabs fully preserved. | |
| """ | |
| from __future__ import annotations | |
| import os | |
| import re | |
| import gradio as gr | |
| from lib.config import ( | |
| BUILTIN_MODELS, | |
| CSS, | |
| DEVICE_LABEL, | |
| MAX_INPUT_DURATION, | |
| logger, | |
| ) | |
| from lib.jobs import ( | |
| get_jobs_table, | |
| get_queue_info, | |
| poll_job, | |
| submit_job, | |
| submit_full_pipeline, | |
| ) | |
| from lib.models import list_models, startup_downloads | |
| from lib.ui import ( | |
| refresh_models, | |
| toggle_autotune, | |
| upload_model, | |
| create_video_section, | |
| create_five_outputs, | |
| ) | |
| # ββ Startup βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| startup_status = "" | |
| default_model = "" | |
| try: | |
| default_model = startup_downloads() | |
| startup_status = f"β Ready Β· {DEVICE_LABEL}" | |
| except Exception as e: | |
| startup_status = f"β οΈ Some assets unavailable: {e} Β· {DEVICE_LABEL}" | |
| logger.warning("Startup download issue: %s", e) | |
| initial_models = list_models() | |
| initial_value = default_model if default_model in initial_models else ( | |
| initial_models[0] if initial_models else None | |
| ) | |
| # ββ Gradio UI βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| with gr.Blocks(title="RVC Voice Conversion - Full Suite", delete_cache=(3600, 3600)) as demo: | |
| gr.HTML(f""" | |
| <div id="header"> | |
| <h1>ποΈ RVC Voice Conversion - Full Suite</h1> | |
| <p>ConversΓ£o de voz com suporte a vΓdeos, extraΓ§Γ£o de acapella/instrumental e 5 saΓdas!</p> | |
| </div> | |
| <p id="status">{startup_status}</p> | |
| """) | |
| with gr.Tabs(): | |
| # ββ TAB 1: Convert ββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| with gr.Tab("π€ Convert"): | |
| gr.Markdown("## OpΓ§Γ£o 1: ConversΓ£o RVC clΓ‘ssica (um arquivo de saΓda)") | |
| with gr.Row(): | |
| with gr.Column(scale=1): | |
| gr.Markdown("### π Input Audio") | |
| with gr.Tabs(): | |
| with gr.Tab("ποΈ Microphone"): | |
| inp_mic = gr.Audio( | |
| sources=["microphone"], | |
| type="filepath", | |
| label="Record", | |
| ) | |
| with gr.Tab("π Upload File"): | |
| inp_file = gr.Audio( | |
| sources=["upload"], | |
| type="filepath", | |
| label="Upload audio (wav / mp3 / flac / ogg β¦)", | |
| ) | |
| gr.Markdown("### π€ Model") | |
| model_dd = gr.Dropdown( | |
| choices=initial_models, | |
| value=initial_value, | |
| label="Active Voice Model", | |
| interactive=True, | |
| ) | |
| gr.Markdown("### ποΈ Basic Settings") | |
| pitch_sl = gr.Slider( | |
| minimum=-24, maximum=24, value=0, step=1, | |
| label="Pitch Shift (semitones)", | |
| info="0 = unchanged Β· positive = higher Β· negative = lower", | |
| ) | |
| f0_radio = gr.Radio( | |
| choices=["rmvpe", "fcpe", "crepe", "crepe-tiny"], | |
| value="rmvpe", | |
| label="Pitch Extraction Method", | |
| info="rmvpe = fastest & accurate Β· crepe = highest quality (slower)", | |
| ) | |
| with gr.Column(scale=1): | |
| gr.Markdown("### βοΈ Advanced Settings") | |
| with gr.Accordion("Expand advanced options", open=False): | |
| index_rate_sl = gr.Slider( | |
| 0.0, 1.0, value=0.75, step=0.05, | |
| label="Index Rate", | |
| info="How strongly the FAISS index influences timbre (0 = off)", | |
| ) | |
| protect_sl = gr.Slider( | |
| 0.0, 0.5, value=0.5, step=0.01, | |
| label="Protect Consonants", | |
| info="Protects unvoiced consonants β 0.5 = max protection", | |
| ) | |
| filter_radius_sl = gr.Slider( | |
| 0, 7, value=3, step=1, | |
| label="Respiration Filter Radius", | |
| info="Median filter on pitch β higher = smoother, reduces breath noise", | |
| ) | |
| vol_env_sl = gr.Slider( | |
| 0.0, 1.0, value=0.25, step=0.05, | |
| label="Volume Envelope Mix", | |
| info="0.25 = natural blend Β· 1 = preserve input loudness Β· 0 = model output", | |
| ) | |
| with gr.Row(): | |
| clean_cb = gr.Checkbox(value=False, label="Noise Reduction") | |
| clean_sl = gr.Slider( | |
| 0.0, 1.0, value=0.5, step=0.05, | |
| label="Reduction Strength", | |
| ) | |
| with gr.Row(): | |
| split_cb = gr.Checkbox(value=False, label="Split Long Audio") | |
| autotune_cb = gr.Checkbox(value=False, label="Autotune") | |
| autotune_sl = gr.Slider( | |
| 0.0, 1.0, value=1.0, step=0.05, | |
| label="Autotune Strength", | |
| visible=False, | |
| ) | |
| autotune_cb.change( | |
| fn=toggle_autotune, | |
| inputs=autotune_cb, | |
| outputs=autotune_sl, | |
| ) | |
| gr.Markdown("**ποΈ Reverb**") | |
| reverb_cb = gr.Checkbox(value=False, label="Enable Reverb") | |
| with gr.Group(visible=False) as reverb_group: | |
| reverb_room_sl = gr.Slider( | |
| 0.0, 1.0, value=0.15, step=0.05, | |
| label="Room Size", | |
| info="Larger = bigger sounding space", | |
| ) | |
| reverb_damp_sl = gr.Slider( | |
| 0.0, 1.0, value=0.7, step=0.05, | |
| label="Damping", | |
| info="Higher = more absorption, less echo tail", | |
| ) | |
| reverb_wet_sl = gr.Slider( | |
| 0.0, 1.0, value=0.15, step=0.05, | |
| label="Wet Level", | |
| info="How much reverb is mixed in (0.15 = subtle)", | |
| ) | |
| reverb_cb.change( | |
| fn=lambda v: gr.update(visible=v), | |
| inputs=reverb_cb, | |
| outputs=reverb_group, | |
| ) | |
| fmt_radio = gr.Radio( | |
| choices=["WAV", "MP3", "FLAC", "OPUS"], | |
| value="WAV", | |
| label="Output Format", | |
| info="WAV = lossless, large file; MP3/OPUS = smaller", | |
| ) | |
| with gr.Row(): | |
| classic_btn = gr.Button("π Convert Voice (Classic)", variant="primary") | |
| classic_status = gr.Markdown(value="") | |
| classic_audio = gr.Audio(label="Converted Audio", type="filepath", interactive=False) | |
| gr.Markdown("---") | |
| gr.Markdown("## OpΓ§Γ£o 2: Pipeline completo β 5 saΓdas (vΓdeo + separaΓ§Γ£o de faixas + RVC)") | |
| with gr.Row(): | |
| with gr.Column(scale=1): | |
| video_input = create_video_section() | |
| gr.Markdown("*(Ou use Γ‘udio/microfone acima β o pipeline aceita qualquer fonte)*") | |
| with gr.Column(scale=1): | |
| full_btn = gr.Button("π Full Pipeline (5 Outputs)", variant="secondary") | |
| full_status = gr.Markdown(value="") | |
| # 5 componentes de saΓda | |
| (entrada_acapella, entrada_audio, entrada_instrumental, | |
| saida_audio, saida_acapella) = create_five_outputs() | |
| # SeΓ§Γ£o de verificaΓ§Γ£o de jobs legada | |
| gr.Markdown("---") | |
| gr.Markdown("### π Verificar status de um job (clΓ‘ssico)") | |
| with gr.Row(): | |
| job_id_box = gr.Textbox( | |
| label="Job ID", | |
| placeholder="e.g. a3f2b1c9", | |
| scale=3, | |
| ) | |
| poll_btn = gr.Button("π Check", scale=1) | |
| poll_status = gr.Markdown(value="") | |
| poll_audio = gr.Audio(label="Result", type="filepath", interactive=False) | |
| # ββ TAB 2: Models (original) ββββββββββββββββββββββββββββββββββββββββββ | |
| with gr.Tab("π¦ Models"): | |
| gr.Markdown(""" | |
| ### Upload a Custom RVC Model | |
| Provide a **`.zip`** containing: | |
| - **`model.pth`** β weights (required) | |
| - **`model.index`** β FAISS index (optional, improves voice matching) | |
| **Built-in models** (pre-downloaded on startup): | |
| Vestia Zeta v1 Β· Vestia Zeta v2 Β· Ayunda Risu Β· Gawr Gura | |
| """) | |
| with gr.Row(): | |
| with gr.Column(scale=1): | |
| up_zip = gr.File(label="Model ZIP", file_types=[".zip"]) | |
| up_name = gr.Textbox( | |
| label="Model Name", | |
| placeholder="Leave blank to use zip filename", | |
| ) | |
| up_btn = gr.Button("π€ Load Model", variant="primary") | |
| up_status = gr.Textbox(label="Status", interactive=False, lines=2) | |
| with gr.Column(scale=1): | |
| gr.Markdown("### Loaded Models") | |
| models_table = gr.Dataframe( | |
| col_count=(1, "fixed"), | |
| value=[[m] for m in initial_models], | |
| interactive=False, | |
| label="", | |
| ) | |
| refresh_btn = gr.Button("π Refresh") | |
| up_btn.click( | |
| fn=upload_model, | |
| inputs=[up_zip, up_name], | |
| outputs=[up_status, model_dd, models_table], | |
| ) | |
| refresh_btn.click( | |
| fn=refresh_models, | |
| outputs=[models_table, model_dd], | |
| ) | |
| # ββ TAB 3: Jobs (original, intocada) ββββββββββββββββββββββββββββββββββ | |
| with gr.Tab("π Jobs"): | |
| gr.Markdown("All submitted jobs, newest first. Click **Refresh** to update.") | |
| queue_status = gr.Markdown(value=get_queue_info, every=10) | |
| jobs_table = gr.Dataframe( | |
| headers=["Job ID", "Model", "Status", "Time", "Download"], | |
| col_count=(5, "fixed"), | |
| value=get_jobs_table, | |
| interactive=False, | |
| wrap=True, | |
| datatype=["str", "str", "str", "str", "markdown"], | |
| every=10, | |
| ) | |
| refresh_jobs_btn = gr.Button("π Refresh") | |
| def _refresh_jobs(): | |
| return get_queue_info(), get_jobs_table() | |
| refresh_jobs_btn.click(fn=_refresh_jobs, outputs=[queue_status, jobs_table]) | |
| # ββ TAB 4: Help (atualizada) ββββββββββββββββββββββββββββββββββββββββββ | |
| with gr.Tab("βΉοΈ Help"): | |
| gr.Markdown(f""" | |
| ## How it works | |
| RVC (Retrieval-Based Voice Conversion) transforms a voice recording to sound | |
| like a target speaker using only that speaker's model file. | |
| --- | |
| ## Two conversion modes | |
| ### 1. Classic Mode (single output) | |
| - Upload an audio file or record your voice | |
| - Choose a model and settings | |
| - Click **Convert Voice (Classic)** | |
| - Result is added to the job queue β you can monitor it in the **Jobs** tab | |
| ### 2. Full Pipeline Mode (5 outputs) | |
| - Works with **video (MP4)** or any audio source | |
| - Automatically extracts **acapella** (vocals) and **instrumental** using Demucs | |
| - Runs RVC conversion on **both** the original audio and the acapella | |
| - Returns 5 files: | |
| 1. `entrada_acapella.mp3` β extracted vocals | |
| 2. `entrada.mp3` β original audio | |
| 3. `entrada_instrumental.mp3` β background music | |
| 4. `saida.mp3` β RVC applied to original audio | |
| 5. `saida_acapella.mp3` β RVC applied only to vocals | |
| - All 5 files appear directly on the interface β no need to poll jobs | |
| --- | |
| **Device:** `{DEVICE_LABEL}` | |
| **Max input duration:** {MAX_INPUT_DURATION // 60} minutes | |
| --- | |
| ## Credits | |
| Engine: [Ultimate RVC](https://github.com/JackismyShephard/ultimate-rvc) | |
| Separation: [Demucs](https://github.com/facebookresearch/demucs) | |
| """) | |
| # ββ Event handlers ββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| def _submit_classic(*args): | |
| status, audio = submit_job(*args) | |
| match = re.search(r"[a-f0-9]{8}", status or "") | |
| job_id = match.group(0) if match else "" | |
| return status, audio, job_id, get_queue_info(), get_jobs_table() | |
| classic_btn.click( | |
| fn=_submit_classic, | |
| inputs=[ | |
| inp_mic, inp_file, model_dd, | |
| pitch_sl, f0_radio, | |
| index_rate_sl, protect_sl, vol_env_sl, | |
| clean_cb, clean_sl, | |
| split_cb, autotune_cb, autotune_sl, | |
| filter_radius_sl, | |
| fmt_radio, | |
| reverb_cb, reverb_room_sl, reverb_damp_sl, reverb_wet_sl, | |
| ], | |
| outputs=[classic_status, classic_audio, job_id_box, queue_status, jobs_table], | |
| ) | |
| full_btn.click( | |
| fn=submit_full_pipeline, | |
| inputs=[ | |
| video_input, inp_mic, inp_file, model_dd, | |
| pitch_sl, f0_radio, | |
| index_rate_sl, protect_sl, vol_env_sl, | |
| clean_cb, clean_sl, | |
| split_cb, autotune_cb, autotune_sl, | |
| filter_radius_sl, | |
| fmt_radio, | |
| reverb_cb, reverb_room_sl, reverb_damp_sl, reverb_wet_sl, | |
| ], | |
| outputs=[ | |
| full_status, | |
| entrada_acapella, entrada_audio, entrada_instrumental, | |
| saida_audio, saida_acapella, | |
| ], | |
| ) | |
| def _poll_and_refresh(job_id): | |
| status, file = poll_job(job_id) | |
| return status, file, get_queue_info(), get_jobs_table() | |
| poll_btn.click( | |
| fn=_poll_and_refresh, | |
| inputs=[job_id_box], | |
| outputs=[poll_status, poll_audio, queue_status, jobs_table], | |
| ) | |
| # ββ Launch ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| if __name__ == "__main__": | |
| demo.queue(default_concurrency_limit=5) | |
| demo.launch( | |
| server_name="0.0.0.0", | |
| server_port=int(os.getenv("PORT", 7860)), | |
| max_threads=10, | |
| ssr_mode=False, | |
| css=CSS, | |
| ) |