Spaces:

PhoenixStormJr
/

RVC_V2_Docker_Translated_might-be-broken

Sleeping

App Files Files Community

PhoenixStormJr commited on Jun 12, 2025

Commit

dd73bc9

verified ·

1 Parent(s): 1c49127

Update app.py

Browse files

Files changed (1) hide show

app.py +316 -313

app.py CHANGED Viewed

@@ -1,323 +1,326 @@
-print("app.py is running! Congrats!")
-import io
-import os
-import torch
-# os.system("wget -P cvec/ https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/hubert_base.pt")
-import gradio as gr
-import librosa
-import numpy as np
-import soundfile
-import logging
-from fairseq import checkpoint_utils
-from my_utils import load_audio
-from vc_infer_pipeline import VC
-import traceback
-from config import Config
-from infer_pack.models import (
-    SynthesizerTrnMs256NSFsid,
-    SynthesizerTrnMs256NSFsid_nono,
-    SynthesizerTrnMs768NSFsid,
-    SynthesizerTrnMs768NSFsid_nono,
-)
-from i18n import I18nAuto
-logging.getLogger("numba").setLevel(logging.WARNING)
-logging.getLogger("markdown_it").setLevel(logging.WARNING)
-logging.getLogger("urllib3").setLevel(logging.WARNING)
-logging.getLogger("matplotlib").setLevel(logging.WARNING)
-i18n = I18nAuto()
-i18n.print()
-config = Config()
-weight_root = "weights"
-weight_uvr5_root = "uvr5_weights"
-index_root = "logs"
-names = []
-hubert_model = None
-for name in os.listdir(weight_root):
-    if name.endswith(".pth"):
-        names.append(name)
-index_paths = []
-for root, dirs, files in os.walk(index_root, topdown=False):
-    for name in files:
-        if name.endswith(".index") and "trained" not in name:
-            index_paths.append("%s/%s" % (root, name))
-def get_vc(sid):
-    global n_spk, tgt_sr, net_g, vc, cpt, version
-    if sid == "" or sid == []:
-        global hubert_model
-        if hubert_model != None:  # 考虑到轮询, 需要加个判断看是否 sid 是由有模型切换到无模型的
-            print("clean_empty_cache")
-            del net_g, n_spk, vc, hubert_model, tgt_sr  # ,cpt
-            hubert_model = net_g = n_spk = vc = hubert_model = tgt_sr = None
-            if torch.cuda.is_available():
-                torch.cuda.empty_cache()
-            ###楼下不这么折腾清理不干净
-            if_f0 = cpt.get("f0", 1)
-            version = cpt.get("version", "v1")
-            if version == "v1":
-                if if_f0 == 1:
-                    net_g = SynthesizerTrnMs256NSFsid(
-                        *cpt["config"], is_half=config.is_half
-                    )
-                else:
-                    net_g = SynthesizerTrnMs256NSFsid_nono(*cpt["config"])
-            elif version == "v2":
-                if if_f0 == 1:
-                    net_g = SynthesizerTrnMs768NSFsid(
-                        *cpt["config"], is_half=config.is_half
-                    )
-                else:
-                    net_g = SynthesizerTrnMs768NSFsid_nono(*cpt["config"])
-            del net_g, cpt
-            if torch.cuda.is_available():
-                torch.cuda.empty_cache()
-            cpt = None
-        return {"visible": False, "__type__": "update"}
-    person = "%s/%s" % (weight_root, sid)
-    print("loading %s" % person)
-    cpt = torch.load(person, map_location="cpu")
-    tgt_sr = cpt["config"][-1]
-    cpt["config"][-3] = cpt["weight"]["emb_g.weight"].shape[0]  # n_spk
-    if_f0 = cpt.get("f0", 1)
-    version = cpt.get("version", "v1")
-    if version == "v1":
-        if if_f0 == 1:
-            net_g = SynthesizerTrnMs256NSFsid(*cpt["config"], is_half=config.is_half)
-        else:
-            net_g = SynthesizerTrnMs256NSFsid_nono(*cpt["config"])
-    elif version == "v2":
-        if if_f0 == 1:
-            net_g = SynthesizerTrnMs768NSFsid(*cpt["config"], is_half=config.is_half)
-        else:
-            net_g = SynthesizerTrnMs768NSFsid_nono(*cpt["config"])
-    del net_g.enc_q
-    print(net_g.load_state_dict(cpt["weight"], strict=False))
-    net_g.eval().to(config.device)
-    if config.is_half:
-        net_g = net_g.half()
-    else:
-        net_g = net_g.float()
-    vc = VC(tgt_sr, config)
-    n_spk = cpt["config"][-3]
-    return {"visible": True, "maximum": n_spk, "__type__": "update"}
-def load_hubert():
-    global hubert_model
-    models, _, _ = checkpoint_utils.load_model_ensemble_and_task(
-        ["hubert_base.pt"],
-        suffix="",
     )
-    hubert_model = models[0]
-    hubert_model = hubert_model.to(config.device)
-    if config.is_half:
-        hubert_model = hubert_model.half()
-    else:
-        hubert_model = hubert_model.float()
-    hubert_model.eval()
-def vc_single(
-    sid,
-    input_audio_path,
-    f0_up_key,
-    f0_file,
-    f0_method,
-    file_index,
-    file_index2,
-    # file_big_npy,
-    index_rate,
-    filter_radius,
-    resample_sr,
-    rms_mix_rate,
-    protect,
-):  # spk_item, input_audio0, vc_transform0,f0_file,f0method0
-    global tgt_sr, net_g, vc, hubert_model, version
-    if input_audio_path is None:
-        return "You need to upload an audio", None
-    f0_up_key = int(f0_up_key)
-    try:
-        audio = input_audio_path[1] / 32768.0
-        if len(audio.shape) == 2:
-            audio = np.mean(audio, -1)
-        audio = librosa.resample(audio, orig_sr=input_audio_path[0], target_sr=16000)
-        audio_max = np.abs(audio).max() / 0.95
-        if audio_max > 1:
-            audio /= audio_max
-        times = [0, 0, 0]
-        if hubert_model == None:
-            load_hubert()
         if_f0 = cpt.get("f0", 1)
-        file_index = (
-            (
-                file_index.strip(" ")
-                .strip('"')
-                .strip("\n")
-                .strip('"')
-                .strip(" ")
-                .replace("trained", "added")
-            )
-            if file_index != ""
-            else file_index2
-        )  # 防止小白写错，自动帮他替换掉
-        # file_big_npy = (
-        #     file_big_npy.strip(" ").strip('"').strip("\n").strip('"').strip(" ")
-        # )
-        audio_opt = vc.pipeline(
-            hubert_model,
-            net_g,
-            sid,
-            audio,
-            input_audio_path,
-            times,
-            f0_up_key,
-            f0_method,
-            file_index,
-            # file_big_npy,
-            index_rate,
-            if_f0,
-            filter_radius,
-            tgt_sr,
-            resample_sr,
-            rms_mix_rate,
-            version,
-            protect,
-            f0_file=f0_file,
-        )
-        if resample_sr >= 16000 and tgt_sr != resample_sr:
-            tgt_sr = resample_sr
-        index_info = (
-            "Using index:%s." % file_index
-            if os.path.exists(file_index)
-            else "Index not used."
         )
-        return "Success.\n %s\nTime:\n npy:%ss, f0:%ss, infer:%ss" % (
-            index_info,
-            times[0],
-            times[1],
-            times[2],
-        ), (tgt_sr, audio_opt)
-    except:
-        info = traceback.format_exc()
-        print(info)
-        return info, (None, None)
-app = gr.Blocks()
-with app:
-    with gr.Tabs():
-        with gr.TabItem("在线demo"):
-            gr.Markdown(
-                value="""
-                RVC 在线demo
-                """
             )
-            sid = gr.Dropdown(label=i18n("推理音色"), choices=sorted(names))
-            with gr.Column():
-                spk_item = gr.Slider(
                     minimum=0,
-                    maximum=2333,
                     step=1,
-                    label=i18n("请选择说话人id"),
                     value=0,
-                    visible=False,
                     interactive=True,
                 )
-            sid.change(
-                fn=get_vc,
-                inputs=[sid],
-                outputs=[spk_item],
-            )
-            gr.Markdown(
-                value=i18n("男转女推荐+12key, 女转男推荐-12key, 如果音域爆炸导致音色失真也可以自己调整到合适音域. ")
-            )
-            vc_input3 = gr.Audio(label="上传音频（长度小于90秒）")
-            vc_transform0 = gr.Number(label=i18n("变调(整数, 半音数量, 升八度12降八度-12)"), value=0)
-            f0method0 = gr.Radio(
-                label=i18n("选择音高提取算法,输入歌声可用pm提速,harvest低音好但巨慢无比,crepe效果好但吃GPU"),
-                choices=["pm", "harvest", "crepe"],
-                value="pm",
-                interactive=True,
-            )
-            filter_radius0 = gr.Slider(
-                minimum=0,
-                maximum=7,
-                label=i18n(">=3则使用对harvest音高识别的结果使用中值滤波，数值为滤波半径，使用可以削弱哑音"),
-                value=3,
-                step=1,
-                interactive=True,
-            )
-            with gr.Column():
-                file_index1 = gr.Textbox(
-                    label=i18n("特征检索库文件路径,为空则使用下拉的选择结果"),
-                    value="",
-                    interactive=False,
-                    visible=False,
                 )
-            file_index2 = gr.Dropdown(
-                label=i18n("自动检测index路径,下拉式选择(dropdown)"),
-                choices=sorted(index_paths),
-                interactive=True,
-            )
-            index_rate1 = gr.Slider(
-                minimum=0,
-                maximum=1,
-                label=i18n("检索特征占比"),
-                value=0.88,
-                interactive=True,
-            )
-            resample_sr0 = gr.Slider(
-                minimum=0,
-                maximum=48000,
-                label=i18n("后处理重采样至最终采样率，0为不进行重采样"),
-                value=0,
-                step=1,
-                interactive=True,
-            )
-            rms_mix_rate0 = gr.Slider(
-                minimum=0,
-                maximum=1,
-                label=i18n("输入源音量包络替换输出音量包络融合比例，越靠近1越使用输出包络"),
-                value=1,
-                interactive=True,
-            )
-            protect0 = gr.Slider(
-                minimum=0,
-                maximum=0.5,
-                label=i18n("保护清辅音和呼吸声，防止电音撕裂等artifact，拉满0.5不开启，调低加大保护力度但可能降低索引效果"),
-                value=0.33,
-                step=0.01,
-                interactive=True,
-            )
-            f0_file = gr.File(label=i18n("F0曲线文件, 可选, 一行一个音高, 代替默认F0及升降调"))
-            but0 = gr.Button(i18n("转换"), variant="primary")
-            vc_output1 = gr.Textbox(label=i18n("输出信息"))
-            vc_output2 = gr.Audio(label=i18n("输出音频(右下角三个点,点了可以下载)"))
-            but0.click(
-                vc_single,
-                [
-                    spk_item,
-                    vc_input3,
-                    vc_transform0,
-                    f0_file,
-                    f0method0,
-                    file_index1,
-                    file_index2,
-                    # file_big_npy1,
-                    index_rate1,
-                    filter_radius0,
-                    resample_sr0,
-                    rms_mix_rate0,
-                    protect0,
-                ],
-                [vc_output1, vc_output2],
-            )
-app.launch()

+try:
+    print("app.py is running! Congrats!")
+    import io
+    import os
+    import torch
+    # os.system("wget -P cvec/ https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/hubert_base.pt")
+    import gradio as gr
+    import librosa
+    import numpy as np
+    import soundfile
+    import logging
+    from fairseq import checkpoint_utils
+    from my_utils import load_audio
+    from vc_infer_pipeline import VC
+    import traceback
+    from config import Config
+    from infer_pack.models import (
+        SynthesizerTrnMs256NSFsid,
+        SynthesizerTrnMs256NSFsid_nono,
+        SynthesizerTrnMs768NSFsid,
+        SynthesizerTrnMs768NSFsid_nono,
     )
+    from i18n import I18nAuto
+    logging.getLogger("numba").setLevel(logging.WARNING)
+    logging.getLogger("markdown_it").setLevel(logging.WARNING)
+    logging.getLogger("urllib3").setLevel(logging.WARNING)
+    logging.getLogger("matplotlib").setLevel(logging.WARNING)
+    i18n = I18nAuto()
+    i18n.print()
+    config = Config()
+    weight_root = "weights"
+    weight_uvr5_root = "uvr5_weights"
+    index_root = "logs"
+    names = []
+    hubert_model = None
+    for name in os.listdir(weight_root):
+        if name.endswith(".pth"):
+            names.append(name)
+    index_paths = []
+    for root, dirs, files in os.walk(index_root, topdown=False):
+        for name in files:
+            if name.endswith(".index") and "trained" not in name:
+                index_paths.append("%s/%s" % (root, name))
+    def get_vc(sid):
+        global n_spk, tgt_sr, net_g, vc, cpt, version
+        if sid == "" or sid == []:
+            global hubert_model
+            if hubert_model != None:  # 考虑到轮询, 需要加个判断看是否 sid 是由有模型切换到无模型的
+                print("clean_empty_cache")
+                del net_g, n_spk, vc, hubert_model, tgt_sr  # ,cpt
+                hubert_model = net_g = n_spk = vc = hubert_model = tgt_sr = None
+                if torch.cuda.is_available():
+                    torch.cuda.empty_cache()
+                ###楼下不这么折腾清理不干净
+                if_f0 = cpt.get("f0", 1)
+                version = cpt.get("version", "v1")
+                if version == "v1":
+                    if if_f0 == 1:
+                        net_g = SynthesizerTrnMs256NSFsid(
+                            *cpt["config"], is_half=config.is_half
+                        )
+                    else:
+                        net_g = SynthesizerTrnMs256NSFsid_nono(*cpt["config"])
+                elif version == "v2":
+                    if if_f0 == 1:
+                        net_g = SynthesizerTrnMs768NSFsid(
+                            *cpt["config"], is_half=config.is_half
+                        )
+                    else:
+                        net_g = SynthesizerTrnMs768NSFsid_nono(*cpt["config"])
+                del net_g, cpt
+                if torch.cuda.is_available():
+                    torch.cuda.empty_cache()
+                cpt = None
+            return {"visible": False, "__type__": "update"}
+        person = "%s/%s" % (weight_root, sid)
+        print("loading %s" % person)
+        cpt = torch.load(person, map_location="cpu")
+        tgt_sr = cpt["config"][-1]
+        cpt["config"][-3] = cpt["weight"]["emb_g.weight"].shape[0]  # n_spk
         if_f0 = cpt.get("f0", 1)
+        version = cpt.get("version", "v1")
+        if version == "v1":
+            if if_f0 == 1:
+                net_g = SynthesizerTrnMs256NSFsid(*cpt["config"], is_half=config.is_half)
+            else:
+                net_g = SynthesizerTrnMs256NSFsid_nono(*cpt["config"])
+        elif version == "v2":
+            if if_f0 == 1:
+                net_g = SynthesizerTrnMs768NSFsid(*cpt["config"], is_half=config.is_half)
+            else:
+                net_g = SynthesizerTrnMs768NSFsid_nono(*cpt["config"])
+        del net_g.enc_q
+        print(net_g.load_state_dict(cpt["weight"], strict=False))
+        net_g.eval().to(config.device)
+        if config.is_half:
+            net_g = net_g.half()
+        else:
+            net_g = net_g.float()
+        vc = VC(tgt_sr, config)
+        n_spk = cpt["config"][-3]
+        return {"visible": True, "maximum": n_spk, "__type__": "update"}
+    def load_hubert():
+        global hubert_model
+        models, _, _ = checkpoint_utils.load_model_ensemble_and_task(
+            ["hubert_base.pt"],
+            suffix="",
         )
+        hubert_model = models[0]
+        hubert_model = hubert_model.to(config.device)
+        if config.is_half:
+            hubert_model = hubert_model.half()
+        else:
+            hubert_model = hubert_model.float()
+        hubert_model.eval()
+    def vc_single(
+        sid,
+        input_audio_path,
+        f0_up_key,
+        f0_file,
+        f0_method,
+        file_index,
+        file_index2,
+        # file_big_npy,
+        index_rate,
+        filter_radius,
+        resample_sr,
+        rms_mix_rate,
+        protect,
+    ):  # spk_item, input_audio0, vc_transform0,f0_file,f0method0
+        global tgt_sr, net_g, vc, hubert_model, version
+        if input_audio_path is None:
+            return "You need to upload an audio", None
+        f0_up_key = int(f0_up_key)
+        try:
+            audio = input_audio_path[1] / 32768.0
+            if len(audio.shape) == 2:
+                audio = np.mean(audio, -1)
+            audio = librosa.resample(audio, orig_sr=input_audio_path[0], target_sr=16000)
+            audio_max = np.abs(audio).max() / 0.95
+            if audio_max > 1:
+                audio /= audio_max
+            times = [0, 0, 0]
+            if hubert_model == None:
+                load_hubert()
+            if_f0 = cpt.get("f0", 1)
+            file_index = (
+                (
+                    file_index.strip(" ")
+                    .strip('"')
+                    .strip("\n")
+                    .strip('"')
+                    .strip(" ")
+                    .replace("trained", "added")
+                )
+                if file_index != ""
+                else file_index2
+            )  # 防止小白写错，自动帮他替换掉
+            # file_big_npy = (
+            #     file_big_npy.strip(" ").strip('"').strip("\n").strip('"').strip(" ")
+            # )
+            audio_opt = vc.pipeline(
+                hubert_model,
+                net_g,
+                sid,
+                audio,
+                input_audio_path,
+                times,
+                f0_up_key,
+                f0_method,
+                file_index,
+                # file_big_npy,
+                index_rate,
+                if_f0,
+                filter_radius,
+                tgt_sr,
+                resample_sr,
+                rms_mix_rate,
+                version,
+                protect,
+                f0_file=f0_file,
+            )
+            if resample_sr >= 16000 and tgt_sr != resample_sr:
+                tgt_sr = resample_sr
+            index_info = (
+                "Using index:%s." % file_index
+                if os.path.exists(file_index)
+                else "Index not used."
             )
+            return "Success.\n %s\nTime:\n npy:%ss, f0:%ss, infer:%ss" % (
+                index_info,
+                times[0],
+                times[1],
+                times[2],
+            ), (tgt_sr, audio_opt)
+        except:
+            info = traceback.format_exc()
+            print(info)
+            return info, (None, None)
+    app = gr.Blocks()
+    with app:
+        with gr.Tabs():
+            with gr.TabItem("在线demo"):
+                gr.Markdown(
+                    value="""
+                    RVC 在线demo
+                    """
+                )
+                sid = gr.Dropdown(label=i18n("推理音色"), choices=sorted(names))
+                with gr.Column():
+                    spk_item = gr.Slider(
+                        minimum=0,
+                        maximum=2333,
+                        step=1,
+                        label=i18n("请选择说话人id"),
+                        value=0,
+                        visible=False,
+                        interactive=True,
+                    )
+                sid.change(
+                    fn=get_vc,
+                    inputs=[sid],
+                    outputs=[spk_item],
+                )
+                gr.Markdown(
+                    value=i18n("男转女推荐+12key, 女转男推荐-12key, 如果音域爆炸导致音色失真也可以自己调整到合适音域. ")
+                )
+                vc_input3 = gr.Audio(label="上传音频（长度小于90秒）")
+                vc_transform0 = gr.Number(label=i18n("变调(整数, 半音数量, 升八度12降八度-12)"), value=0)
+                f0method0 = gr.Radio(
+                    label=i18n("选择音高提取算法,输入歌声可用pm提速,harvest低音好但巨慢无比,crepe效果好但吃GPU"),
+                    choices=["pm", "harvest", "crepe"],
+                    value="pm",
+                    interactive=True,
+                )
+                filter_radius0 = gr.Slider(
                     minimum=0,
+                    maximum=7,
+                    label=i18n(">=3则使用对harvest音高识别的结果使用中值滤波，数值为滤波半径，使用可以削弱哑音"),
+                    value=3,
                     step=1,
+                    interactive=True,
+                )
+                with gr.Column():
+                    file_index1 = gr.Textbox(
+                        label=i18n("特征检索库文件路径,为空则使用下拉的选择结果"),
+                        value="",
+                        interactive=False,
+                        visible=False,
+                    )
+                file_index2 = gr.Dropdown(
+                    label=i18n("自动检测index路径,下拉式选择(dropdown)"),
+                    choices=sorted(index_paths),
+                    interactive=True,
+                )
+                index_rate1 = gr.Slider(
+                    minimum=0,
+                    maximum=1,
+                    label=i18n("检索特征占比"),
+                    value=0.88,
+                    interactive=True,
+                )
+                resample_sr0 = gr.Slider(
+                    minimum=0,
+                    maximum=48000,
+                    label=i18n("后处理重采样至最终采样率，0为不进行重采样"),
                     value=0,
+                    step=1,
                     interactive=True,
                 )
+                rms_mix_rate0 = gr.Slider(
+                    minimum=0,
+                    maximum=1,
+                    label=i18n("输入源音量包络替换输出音量包络融合比例，越靠近1越使用输出包络"),
+                    value=1,
+                    interactive=True,
                 )
+                protect0 = gr.Slider(
+                    minimum=0,
+                    maximum=0.5,
+                    label=i18n("保护清辅音和呼吸声，防止电音撕裂等artifact，拉满0.5不开启，调低加大保护力度但可能降低索引效果"),
+                    value=0.33,
+                    step=0.01,
+                    interactive=True,
+                )
+                f0_file = gr.File(label=i18n("F0曲线文件, 可选, 一行一个音高, 代替默认F0及升降调"))
+                but0 = gr.Button(i18n("转换"), variant="primary")
+                vc_output1 = gr.Textbox(label=i18n("输出信息"))
+                vc_output2 = gr.Audio(label=i18n("输出音频(右下角三个点,点了可以下载)"))
+                but0.click(
+                    vc_single,
+                    [
+                        spk_item,
+                        vc_input3,
+                        vc_transform0,
+                        f0_file,
+                        f0method0,
+                        file_index1,
+                        file_index2,
+                        # file_big_npy1,
+                        index_rate1,
+                        filter_radius0,
+                        resample_sr0,
+                        rms_mix_rate0,
+                        protect0,
+                    ],
+                    [vc_output1, vc_output2],
+                )
+    app.launch()
+except Exception as e:
+    print("Startup error:", e)