Spaces:
Runtime error
Runtime error
| import gradio as gr | |
| from infer_rvc_python import BaseLoader | |
| import soundfile as sf | |
| import random | |
| from urllib.request import urlretrieve | |
| import os | |
| import zipfile | |
| files_to_retrieve = [ | |
| "https://replicate.delivery/pbxt/N97QM3XNFrooJhV6Fb0meBff0aAG1rEDfvuxcdLS6fTx1vmWC/test.zip", | |
| # "https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/hubert_base.pt?download=true", | |
| # "https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/rmvpe.pt?download=true" | |
| ] | |
| for file in files_to_retrieve: | |
| print(f"Downloading {file}") | |
| urlretrieve(file, file.split("/")[-1]) | |
| # unzip test.zip | |
| with zipfile.ZipFile("test.zip", "r") as zip_ref: | |
| zip_ref.extractall(".") | |
| converter = BaseLoader( | |
| only_cpu=True, hubert_path="./hubert_base.pt", rmvpe_path="./rmvpe.pt" | |
| ) | |
| model = "test.pth" | |
| index = "added_IVF839_Flat_nprobe_1_test_v2.index" | |
| def voice_conversion( | |
| audio, | |
| pitch_change, | |
| filter_radius, | |
| envelope_ratio, | |
| index_influence, | |
| consonant_breath_protection, | |
| ): | |
| global output_file | |
| audio_out = run( | |
| [str(audio)], | |
| model, | |
| "rmvpe+", | |
| pitch_change, | |
| index, | |
| index_influence, | |
| filter_radius, | |
| envelope_ratio, | |
| consonant_breath_protection, | |
| ) | |
| print(audio_out) | |
| # output_audio, sr = sf.read(output_file, dtype="int32") | |
| return audio_out | |
| def convert_now(audio_files, random_tag): | |
| return converter(audio_files, random_tag, overwrite=False, parallel_workers=8) | |
| def run( | |
| audio_files, | |
| file_m, | |
| pitch_alg, | |
| pitch_lvl, | |
| file_index, | |
| index_inf, | |
| r_m_f, | |
| e_r, | |
| c_b_p, | |
| ): | |
| random_tag = "USER_" + str(random.randint(10000000, 99999999)) | |
| print("PITCH LVL: ", pitch_lvl) | |
| converter.apply_conf( | |
| tag=random_tag, | |
| file_model=file_m, | |
| pitch_algo=pitch_alg, | |
| pitch_lvl=pitch_lvl, | |
| file_index=file_index, | |
| index_influence=index_inf, | |
| respiration_median_filtering=r_m_f, | |
| envelope_ratio=e_r, | |
| consonant_breath_protection=c_b_p, | |
| resample_sr=44100 if audio_files[0].endswith(".mp3") else 0, | |
| ) | |
| output = convert_now(audio_files, random_tag) | |
| audio, sr = sf.read(output[0], dtype="int32") | |
| return (sr, audio) | |
| def ui(): | |
| with gr.Blocks() as demo: | |
| audio_input = gr.Audio(sources=["microphone", "upload"], type="filepath") | |
| with gr.Row(): | |
| pitch_slider = gr.Slider( | |
| minimum=-24, | |
| maximum=24, | |
| value=0, | |
| step=1, | |
| label="Pitch", | |
| interactive=True, | |
| ) | |
| index_influence_slider = gr.Slider( | |
| minimum=0, | |
| maximum=1, | |
| value=0.75, | |
| step=0.01, | |
| label="Index Influence", | |
| interactive=True, | |
| ) | |
| respiration_median_filtering = gr.Slider( | |
| minimum=0, | |
| maximum=10, | |
| value=3, | |
| step=1, | |
| label="Resp. Median Filtering", | |
| interactive=True, | |
| ) | |
| envelope_ratio = gr.Slider( | |
| minimum=0, | |
| maximum=1, | |
| value=0.25, | |
| step=0.01, | |
| label="Envelope Ratio", | |
| interactive=True, | |
| ) | |
| consonant_breath_protection = gr.Slider( | |
| minimum=0, | |
| maximum=1, | |
| value=0.5, | |
| step=0.01, | |
| label="Consonant Breath Protection", | |
| interactive=True, | |
| ) | |
| button = gr.Button("Convert") | |
| audio_output = gr.Audio(interactive=False, type="numpy") | |
| button.click( | |
| voice_conversion, | |
| inputs=[ | |
| audio_input, | |
| pitch_slider, | |
| respiration_median_filtering, | |
| envelope_ratio, | |
| index_influence_slider, | |
| consonant_breath_protection, | |
| ], | |
| outputs=[audio_output], | |
| ) | |
| return demo | |
| ui().launch(auth=("output", "becreative")) | |