| """RVC Voice Conversion β HuggingFace Space |
| |
| Simple, fast, GPU/CPU auto-detected. |
| """ |
| from __future__ import annotations |
|
|
| import os |
|
|
| import gradio as gr |
|
|
| from lib.config import ( |
| BUILTIN_MODELS, |
| CSS, |
| DEVICE_LABEL, |
| MAX_INPUT_DURATION, |
| logger, |
| ) |
| from lib.jobs import ( |
| get_jobs_table, |
| get_queue_info, |
| poll_job, |
| submit_job, |
| ) |
| from lib.models import list_models, startup_downloads |
| from lib.ui import refresh_models, toggle_autotune, upload_model |
|
|
| |
| startup_status = "" |
| default_model = "" |
| try: |
| default_model = startup_downloads() |
| startup_status = f"β
Ready Β· {DEVICE_LABEL}" |
| except Exception as e: |
| startup_status = f"β οΈ Some assets unavailable: {e} Β· {DEVICE_LABEL}" |
| logger.warning("Startup download issue: %s", e) |
|
|
| initial_models = list_models() |
| initial_value = default_model if default_model in initial_models else ( |
| initial_models[0] if initial_models else None |
| ) |
|
|
| |
| with gr.Blocks(title="RVC Voice Conversion", delete_cache=(3600, 3600)) as demo: |
|
|
| gr.HTML(f""" |
| <div id="header"> |
| <h1>ποΈ RVC Voice Conversion</h1> |
| <p>Retrieval-Based Voice Conversion Β· record or upload Β· custom models Β· GPU/CPU auto</p> |
| </div> |
| <p id="status">{startup_status}</p> |
| """) |
|
|
| with gr.Tabs(): |
|
|
| |
| with gr.Tab("π€ Convert"): |
| with gr.Row(): |
|
|
| with gr.Column(scale=1): |
| gr.Markdown("### π Input Audio") |
| with gr.Tabs(): |
| with gr.Tab("ποΈ Microphone"): |
| inp_mic = gr.Audio( |
| sources=["microphone"], |
| type="filepath", |
| label="Record", |
| ) |
| with gr.Tab("π Upload File"): |
| inp_file = gr.Audio( |
| sources=["upload"], |
| type="filepath", |
| label="Upload audio (wav / mp3 / flac / ogg β¦)", |
| ) |
|
|
| gr.Markdown("### π€ Model") |
| model_dd = gr.Dropdown( |
| choices=initial_models, |
| value=initial_value, |
| label="Active Voice Model", |
| interactive=True, |
| ) |
|
|
| gr.Markdown("### ποΈ Basic Settings") |
| pitch_sl = gr.Slider( |
| minimum=-24, maximum=24, value=0, step=1, |
| label="Pitch Shift (semitones)", |
| info="0 = unchanged Β· positive = higher Β· negative = lower", |
| ) |
| f0_radio = gr.Radio( |
| choices=["rmvpe", "fcpe", "crepe", "crepe-tiny"], |
| value="rmvpe", |
| label="Pitch Extraction Method", |
| info="rmvpe = fastest & accurate Β· crepe = highest quality (slower)", |
| ) |
|
|
| with gr.Column(scale=1): |
| gr.Markdown("### βοΈ Advanced Settings") |
| with gr.Accordion("Expand advanced options", open=False): |
| index_rate_sl = gr.Slider( |
| 0.0, 1.0, value=0.75, step=0.05, |
| label="Index Rate", |
| info="How strongly the FAISS index influences timbre (0 = off)", |
| ) |
| protect_sl = gr.Slider( |
| 0.0, 0.5, value=0.5, step=0.01, |
| label="Protect Consonants", |
| info="Protects unvoiced consonants β 0.5 = max protection", |
| ) |
| filter_radius_sl = gr.Slider( |
| 0, 7, value=3, step=1, |
| label="Respiration Filter Radius", |
| info="Median filter on pitch β higher = smoother, reduces breath noise", |
| ) |
| vol_env_sl = gr.Slider( |
| 0.0, 1.0, value=0.25, step=0.05, |
| label="Volume Envelope Mix", |
| info="0.25 = natural blend Β· 1 = preserve input loudness Β· 0 = model output", |
| ) |
| with gr.Row(): |
| clean_cb = gr.Checkbox(value=False, label="Noise Reduction") |
| clean_sl = gr.Slider( |
| 0.0, 1.0, value=0.5, step=0.05, |
| label="Reduction Strength", |
| ) |
| with gr.Row(): |
| split_cb = gr.Checkbox(value=False, label="Split Long Audio") |
| autotune_cb = gr.Checkbox(value=False, label="Autotune") |
| autotune_sl = gr.Slider( |
| 0.0, 1.0, value=1.0, step=0.05, |
| label="Autotune Strength", |
| visible=False, |
| ) |
| autotune_cb.change( |
| fn=toggle_autotune, |
| inputs=autotune_cb, |
| outputs=autotune_sl, |
| ) |
|
|
| gr.Markdown("**ποΈ Reverb**") |
| reverb_cb = gr.Checkbox(value=False, label="Enable Reverb") |
| with gr.Group(visible=False) as reverb_group: |
| reverb_room_sl = gr.Slider( |
| 0.0, 1.0, value=0.15, step=0.05, |
| label="Room Size", |
| info="Larger = bigger sounding space", |
| ) |
| reverb_damp_sl = gr.Slider( |
| 0.0, 1.0, value=0.7, step=0.05, |
| label="Damping", |
| info="Higher = more absorption, less echo tail", |
| ) |
| reverb_wet_sl = gr.Slider( |
| 0.0, 1.0, value=0.15, step=0.05, |
| label="Wet Level", |
| info="How much reverb is mixed in (0.15 = subtle)", |
| ) |
| reverb_cb.change( |
| fn=lambda v: gr.update(visible=v), |
| inputs=reverb_cb, |
| outputs=reverb_group, |
| ) |
|
|
| fmt_radio = gr.Radio( |
| choices=["WAV", "MP3", "FLAC", "OPUS"], |
| value="WAV", |
| label="Output Format", |
| info="OPUS = small file (~64 kbps, Telegram/Discord quality)", |
| ) |
| convert_btn = gr.Button( |
| "π Convert Voice", |
| variant="primary", |
| ) |
|
|
| gr.Markdown("### π§ Output") |
| out_status = gr.Markdown(value="") |
| out_audio = gr.Audio(label="Result (if still on page)", type="filepath", interactive=False) |
|
|
| gr.Markdown("#### π Check Job Status") |
| with gr.Row(): |
| job_id_box = gr.Textbox( |
| label="Job ID", |
| placeholder="e.g. a3f2b1c9", |
| scale=3, |
| ) |
| poll_btn = gr.Button("π Check", scale=1) |
| poll_status = gr.Markdown(value="") |
| poll_audio = gr.Audio(label="Result", type="filepath", interactive=False) |
|
|
| |
| with gr.Tab("π¦ Models"): |
| gr.Markdown(""" |
| ### Upload a Custom RVC Model |
| Provide a **`.zip`** containing: |
| - **`model.pth`** β weights (required) |
| - **`model.index`** β FAISS index (optional, improves voice matching) |
| |
| **Built-in models** (pre-downloaded on startup): |
| Vestia Zeta v1 Β· Vestia Zeta v2 Β· Ayunda Risu Β· Gawr Gura |
| """) |
| with gr.Row(): |
| with gr.Column(scale=1): |
| up_zip = gr.File(label="Model ZIP", file_types=[".zip"]) |
| up_name = gr.Textbox( |
| label="Model Name", |
| placeholder="Leave blank to use zip filename", |
| ) |
| up_btn = gr.Button("π€ Load Model", variant="primary") |
| up_status = gr.Textbox(label="Status", interactive=False, lines=2) |
| with gr.Column(scale=1): |
| gr.Markdown("### Loaded Models") |
| models_table = gr.Dataframe( |
| col_count=(1, "fixed"), |
| value=[[m] for m in initial_models], |
| interactive=False, |
| label="", |
| ) |
| refresh_btn = gr.Button("π Refresh") |
|
|
| up_btn.click( |
| fn=upload_model, |
| inputs=[up_zip, up_name], |
| outputs=[up_status, model_dd, models_table], |
| ) |
| refresh_btn.click( |
| fn=refresh_models, |
| outputs=[models_table, model_dd], |
| ) |
|
|
| |
| with gr.Tab("π Jobs"): |
| gr.Markdown("All submitted jobs, newest first. Click **Refresh** to update.") |
| queue_status = gr.Markdown(value=get_queue_info, every=10) |
| jobs_table = gr.Dataframe( |
| headers=["Job ID", "Model", "Status", "Time", "Download"], |
| col_count=(5, "fixed"), |
| value=get_jobs_table, |
| interactive=False, |
| wrap=True, |
| datatype=["str", "str", "str", "str", "markdown"], |
| every=10, |
| ) |
| refresh_jobs_btn = gr.Button("π Refresh") |
|
|
| def _refresh_jobs(): |
| return get_queue_info(), get_jobs_table() |
|
|
| refresh_jobs_btn.click(fn=_refresh_jobs, outputs=[queue_status, jobs_table]) |
|
|
| |
| with gr.Tab("βΉοΈ Help"): |
| gr.Markdown(f""" |
| ## How it works |
| RVC (Retrieval-Based Voice Conversion) transforms a voice recording to sound |
| like a target speaker using only that speaker's model file. |
| |
| --- |
| |
| ## Quick Guide |
| 1. Open the **Convert** tab |
| 2. **Record** via microphone or **upload** an audio file (wav, mp3, flac, ogg β¦) |
| 3. Choose a **model** from the dropdown β 4 models are pre-loaded on startup |
| 4. Set **Pitch Shift** if needed (e.g. male β female: try +12 semitones) |
| 5. Click **π Convert Voice** and wait for the result |
| |
| --- |
| |
| ## Built-in Models |
| | Model | Description | |
| |---|---| |
| | **Vestia Zeta v1** | Hololive ID VTuber, v1 model | |
| | **Vestia Zeta v2** | Hololive ID VTuber, v2 model (recommended) | |
| | **Ayunda Risu** | Hololive ID VTuber | |
| | **Gawr Gura** | Hololive EN VTuber | |
| |
| --- |
| |
| ## Pitch Extraction Methods |
| | Method | Speed | Quality | Best for | |
| |---|---|---|---| |
| | **rmvpe** | β‘β‘β‘ | β
β
β
β
| General use (default) | |
| | **fcpe** | β‘β‘ | β
β
β
β
| Singing | |
| | **crepe** | β‘ | β
β
β
β
β
| Highest quality, slow | |
| | **crepe-tiny** | β‘β‘ | β
β
β
| Low resource | |
| |
| --- |
| |
| ## Advanced Settings |
| | Setting | Description | |
| |---|---| |
| | **Index Rate** | Influence of FAISS index on output timbre (0.75 recommended) | |
| | **Protect Consonants** | Prevents artefacts on consonants (0.5 = max) | |
| | **Respiration Filter Radius** | Smooths pitch curve β higher reduces breath noise (0β7, default 3) | |
| | **Volume Envelope Mix** | 0.25 = natural blend Β· 1 = preserve input loudness | |
| | **Noise Reduction** | Removes background noise before conversion | |
| | **Split Long Audio** | Chunks audio for recordings > 60 s | |
| | **Autotune** | Snaps pitch to nearest musical note | |
| |
| --- |
| |
| ## Output Formats |
| | Format | Size | Quality | |
| |---|---|---| |
| | **WAV** | Large | Lossless | |
| | **FLAC** | Medium | Lossless compressed | |
| | **MP3** | Small | Lossy | |
| | **OPUS** | Tiny (~64 kbps) | Telegram/Discord quality | |
| |
| --- |
| |
| **Device:** `{DEVICE_LABEL}` |
| **Max input duration:** {MAX_INPUT_DURATION // 60} minutes |
| |
| --- |
| |
| ## Credits |
| Engine: [Ultimate RVC](https://github.com/JackismyShephard/ultimate-rvc) |
| """) |
|
|
| |
| def _submit_and_extract_id(*args): |
| import re |
| status, audio = submit_job(*args) |
| match = re.search(r"[a-f0-9]{8}", status or "") |
| job_id = match.group(0) if match else "" |
| return status, audio, job_id, get_queue_info(), get_jobs_table() |
|
|
| convert_btn.click( |
| fn=_submit_and_extract_id, |
| inputs=[ |
| inp_mic, inp_file, model_dd, |
| pitch_sl, f0_radio, |
| index_rate_sl, protect_sl, vol_env_sl, |
| clean_cb, clean_sl, |
| split_cb, autotune_cb, autotune_sl, |
| filter_radius_sl, |
| fmt_radio, |
| reverb_cb, reverb_room_sl, reverb_damp_sl, reverb_wet_sl, |
| ], |
| outputs=[out_status, out_audio, job_id_box, queue_status, jobs_table], |
| ) |
|
|
| def _poll_and_refresh(job_id): |
| status, file = poll_job(job_id) |
| return status, file, get_queue_info(), get_jobs_table() |
|
|
| poll_btn.click( |
| fn=_poll_and_refresh, |
| inputs=[job_id_box], |
| outputs=[poll_status, poll_audio, queue_status, jobs_table], |
| ) |
|
|
|
|
| |
| if __name__ == "__main__": |
| demo.queue(default_concurrency_limit=5) |
| demo.launch( |
| server_name="0.0.0.0", |
| server_port=int(os.getenv("PORT", 7860)), |
| max_threads=10, |
| ssr_mode=False, |
| css=CSS, |
| ) |
|
|