RVC_V2_Docker_Translated_might-be-broken

Sleeping

App Files Files Community

PhoenixStormJr commited on Jun 12, 2025

Commit

0bd1b69

verified ·

1 Parent(s): 430d30a

Update gui.py

Browse files

Files changed (1) hide show

gui.py +47 -47

gui.py CHANGED Viewed

@@ -1,14 +1,14 @@
 """
-0416后的更新：
-    引入config中half
-    重建npy而不用填写
-    v2支持
-    无f0模型支持
-    修复
-    int16：
-    增加无索引支持
-    f0算法改harvest(怎么看就只有这个会影响CPU占用)，但是不这么改效果不好
 """
 import os, sys, traceback, re
@@ -49,7 +49,7 @@ class RVC:
         self, key, f0_method, hubert_path, pth_path, index_path, npy_path, index_rate
     ) -> None:
         """
-        初始化
         """
         try:
             self.f0_up_key = key
@@ -172,7 +172,7 @@ class RVC:
         # Calculate f0_course and f0_bak here
         f0 *= pow(2, f0_up_key / 12)
         # with open("test.txt","w")as f:f.write("\n".join([str(i)for i in f0.tolist()]))
-        tf0 = self.sr // self.window  # 每秒f0点数
         if inp_f0 is not None:
             delta_t = np.round(
                 (inp_f0[:, 0].max() - inp_f0[:, 0].min()) * tf0 + 1
@@ -195,7 +195,7 @@ class RVC:
     def infer(self, feats: torch.Tensor) -> np.ndarray:
         """
-        推理函数
         """
         audio = feats.clone().cpu().numpy()
         assert feats.dim() == 1, feats.dim()
@@ -217,7 +217,7 @@ class RVC:
                 self.model.final_proj(logits[0]) if self.version == "v1" else logits[0]
             )
-        ####索引优化
         try:
             if (
                 hasattr(self, "index")
@@ -245,10 +245,10 @@ class RVC:
         print(feats.shape)
         if self.if_f0 == 1:
             pitch, pitchf = self.get_f0(audio, self.f0_up_key)
-            p_len = min(feats.shape[1], 13000, pitch.shape[0])  # 太大了爆显存
         else:
             pitch, pitchf = None, None
-            p_len = min(feats.shape[1], 13000)  # 太大了爆显存
         torch.cuda.synchronize()
         # print(feats.shape,pitch.shape)
         feats = feats[:, :p_len, :]
@@ -338,7 +338,7 @@ class GUI:
                     ]
                 ),
                 sg.Frame(
-                    title=i18n("加载模型"),
                     layout=[
                         [
                             sg.Input(
@@ -347,7 +347,7 @@ class GUI:
                                 disabled=True,
                             ),
                             sg.FileBrowse(
-                                i18n("Hubert模型"),
                                 initial_folder=os.path.join(os.getcwd()),
                                 file_types=((". pt"),),
                             ),
@@ -358,7 +358,7 @@ class GUI:
                                 key="pth_path",
                             ),
                             sg.FileBrowse(
-                                i18n("选择.pth文件"),
                                 initial_folder=os.path.join(os.getcwd(), "weights"),
                                 file_types=((". pth"),),
                             ),
@@ -369,19 +369,19 @@ class GUI:
                                 key="index_path",
                             ),
                             sg.FileBrowse(
-                                i18n("选择.index文件"),
                                 initial_folder=os.path.join(os.getcwd(), "logs"),
                                 file_types=((". index"),),
                             ),
                         ],
                         [
                             sg.Input(
-                                default_text="你不需要填写这个You don't need write this.",
                                 key="npy_path",
                                 disabled=True,
                             ),
                             sg.FileBrowse(
-                                i18n("选择.npy文件"),
                                 initial_folder=os.path.join(os.getcwd(), "logs"),
                                 file_types=((". npy"),),
                             ),
@@ -406,7 +406,7 @@ class GUI:
                 sg.Frame(
                     layout=[
                         [
-                            sg.Text(i18n("输入设备")),
                             sg.Combo(
                                 input_devices,
                                 key="sg_input_device",
@@ -414,7 +414,7 @@ class GUI:
                             ),
                         ],
                         [
-                            sg.Text(i18n("输出设备")),
                             sg.Combo(
                                 output_devices,
                                 key="sg_output_device",
@@ -422,14 +422,14 @@ class GUI:
                             ),
                         ],
                     ],
-                    title=i18n("音频设备(请使用同种类驱动)"),
                 )
             ],
             [
                 sg.Frame(
                     layout=[
                         [
-                            sg.Text(i18n("响应阈值")),
                             sg.Slider(
                                 range=(-60, 0),
                                 key="threhold",
@@ -439,7 +439,7 @@ class GUI:
                             ),
                         ],
                         [
-                            sg.Text(i18n("音调设置")),
                             sg.Slider(
                                 range=(-24, 24),
                                 key="pitch",
@@ -459,12 +459,12 @@ class GUI:
                             ),
                         ],
                     ],
-                    title=i18n("常规设置"),
                 ),
                 sg.Frame(
                     layout=[
                         [
-                            sg.Text(i18n("采样长度")),
                             sg.Slider(
                                 range=(0.1, 3.0),
                                 key="block_time",
@@ -474,7 +474,7 @@ class GUI:
                             ),
                         ],
                         [
-                            sg.Text(i18n("淡入淡出长度")),
                             sg.Slider(
                                 range=(0.01, 0.15),
                                 key="crossfade_length",
@@ -484,7 +484,7 @@ class GUI:
                             ),
                         ],
                         [
-                            sg.Text(i18n("额外推理时长")),
                             sg.Slider(
                                 range=(0.05, 3.00),
                                 key="extra_time",
@@ -494,17 +494,17 @@ class GUI:
                             ),
                         ],
                         [
-                            sg.Checkbox(i18n("输入降噪"), key="I_noise_reduce"),
-                            sg.Checkbox(i18n("输出降噪"), key="O_noise_reduce"),
                         ],
                     ],
-                    title=i18n("性能设置"),
                 ),
             ],
             [
-                sg.Button(i18n("开始音频转换"), key="start_vc"),
-                sg.Button(i18n("停止音频转换"), key="stop_vc"),
-                sg.Text(i18n("推理时间(ms):")),
                 sg.Text("0", key="infer_time"),
             ],
         ]
@@ -557,20 +557,20 @@ class GUI:
     def set_values(self, values):
         if len(values["pth_path"].strip()) == 0:
-            sg.popup(i18n("请选择pth文件"))
             return False
         if len(values["index_path"].strip()) == 0:
-            sg.popup(i18n("请选择index文件"))
             return False
         pattern = re.compile("[^\x00-\x7F]+")
         if pattern.findall(values["hubert_path"]):
-            sg.popup(i18n("hubert模型路径不可包含中文"))
             return False
         if pattern.findall(values["pth_path"]):
-            sg.popup(i18n("pth文件路径不可包含中文"))
             return False
         if pattern.findall(values["index_path"]):
-            sg.popup(i18n("index文件路径不可包含中文"))
             return False
         self.set_devices(values["sg_input_device"], values["sg_output_device"])
         self.config.hubert_path = os.path.join(current_dir, "hubert_base.pt")
@@ -594,7 +594,7 @@ class GUI:
         self.block_frame = int(self.config.block_time * self.config.samplerate)
         self.crossfade_frame = int(self.config.crossfade_time * self.config.samplerate)
         self.sola_search_frame = int(0.012 * self.config.samplerate)
-        self.delay_frame = int(0.01 * self.config.samplerate)  # 往前预留0.02s
         self.extra_frame = int(self.config.extra_time * self.config.samplerate)
         self.rvc = None
         self.rvc = RVC(
@@ -636,7 +636,7 @@ class GUI:
     def soundinput(self):
         """
-        接受音频输入
         """
         with sd.Stream(
             callback=self.audio_callback,
@@ -653,7 +653,7 @@ class GUI:
         self, indata: np.ndarray, outdata: np.ndarray, frames, times, status
     ):
         """
-        音频处理
         """
         start_time = time.perf_counter()
         indata = librosa.to_mono(indata.T)
@@ -733,7 +733,7 @@ class GUI:
         print("f0_method: " + str(self.config.f0_method))
     def get_devices(self, update: bool = True):
-        """获取设备列表"""
         if update:
             sd._terminate()
             sd._initialize()
@@ -770,7 +770,7 @@ class GUI:
         )
     def set_devices(self, input_device, output_device):
-        """设置输出设备"""
         (
             input_devices,
             output_devices,

 """
+Updates after 0416:
+    Import half in config
+    Rebuild npy without filling
+    v2 support
+    No f0 model support
+    Fix
+int16：
+    Added support for no index
+    Changed f0 algorithm to harvest (seems like this is the only thing that affects CPU usage), but the effect is not good without this change
 """
 import os, sys, traceback, re
         self, key, f0_method, hubert_path, pth_path, index_path, npy_path, index_rate
     ) -> None:
         """
+        initialization
         """
         try:
             self.f0_up_key = key
         # Calculate f0_course and f0_bak here
         f0 *= pow(2, f0_up_key / 12)
         # with open("test.txt","w")as f:f.write("\n".join([str(i)for i in f0.tolist()]))
+        tf0 = self.sr // self.window  # f0 points per second
         if inp_f0 is not None:
             delta_t = np.round(
                 (inp_f0[:, 0].max() - inp_f0[:, 0].min()) * tf0 + 1
     def infer(self, feats: torch.Tensor) -> np.ndarray:
         """
+        inference function
         """
         audio = feats.clone().cpu().numpy()
         assert feats.dim() == 1, feats.dim()
                 self.model.final_proj(logits[0]) if self.version == "v1" else logits[0]
             )
+        ####Index optimization
         try:
             if (
                 hasattr(self, "index")
         print(feats.shape)
         if self.if_f0 == 1:
             pitch, pitchf = self.get_f0(audio, self.f0_up_key)
+            p_len = min(feats.shape[1], 13000, pitch.shape[0])  # Too big to burst video memory
         else:
             pitch, pitchf = None, None
+            p_len = min(feats.shape[1], 13000)  # Too big to burst video memory
         torch.cuda.synchronize()
         # print(feats.shape,pitch.shape)
         feats = feats[:, :p_len, :]
                     ]
                 ),
                 sg.Frame(
+                    title=i18n("Load model"),
                     layout=[
                         [
                             sg.Input(
                                 disabled=True,
                             ),
                             sg.FileBrowse(
+                                i18n("Hubert model"),
                                 initial_folder=os.path.join(os.getcwd()),
                                 file_types=((". pt"),),
                             ),
                                 key="pth_path",
                             ),
                             sg.FileBrowse(
+                                i18n("Select .pth file"),
                                 initial_folder=os.path.join(os.getcwd(), "weights"),
                                 file_types=((". pth"),),
                             ),
                                 key="index_path",
                             ),
                             sg.FileBrowse(
+                                i18n("Select .index file"),
                                 initial_folder=os.path.join(os.getcwd(), "logs"),
                                 file_types=((". index"),),
                             ),
                         ],
                         [
                             sg.Input(
+                                default_text="You don't need to write this.",
                                 key="npy_path",
                                 disabled=True,
                             ),
                             sg.FileBrowse(
+                                i18n("Select .npy file"),
                                 initial_folder=os.path.join(os.getcwd(), "logs"),
                                 file_types=((". npy"),),
                             ),
                 sg.Frame(
                     layout=[
                         [
+                            sg.Text(i18n("input device")),
                             sg.Combo(
                                 input_devices,
                                 key="sg_input_device",
                             ),
                         ],
                         [
+                            sg.Text(i18n("output device")),
                             sg.Combo(
                                 output_devices,
                                 key="sg_output_device",
                             ),
                         ],
                     ],
+                    title=i18n("Audio device (please use the same type of driver)"),
                 )
             ],
             [
                 sg.Frame(
                     layout=[
                         [
+                            sg.Text(i18n("response threshold")),
                             sg.Slider(
                                 range=(-60, 0),
                                 key="threhold",
                             ),
                         ],
                         [
+                            sg.Text(i18n("Tone settings")),
                             sg.Slider(
                                 range=(-24, 24),
                                 key="pitch",
                             ),
                         ],
                     ],
+                    title=i18n("General settings"),
                 ),
                 sg.Frame(
                     layout=[
                         [
+                            sg.Text(i18n("Sample length")),
                             sg.Slider(
                                 range=(0.1, 3.0),
                                 key="block_time",
                             ),
                         ],
                         [
+                            sg.Text(i18n("Fade Length")),
                             sg.Slider(
                                 range=(0.01, 0.15),
                                 key="crossfade_length",
                             ),
                         ],
                         [
+                            sg.Text(i18n("Additional reasoning time")),
                             sg.Slider(
                                 range=(0.05, 3.00),
                                 key="extra_time",
                             ),
                         ],
                         [
+                            sg.Checkbox(i18n("Input noise reduction"), key="I_noise_reduce"),
+                            sg.Checkbox(i18n("Output noise reduction"), key="O_noise_reduce"),
                         ],
                     ],
+                    title=i18n("Performance settings"),
                 ),
             ],
             [
+                sg.Button(i18n("Start audio conversion"), key="start_vc"),
+                sg.Button(i18n("Stop audio conversion"), key="stop_vc"),
+                sg.Text(i18n("Inference time (ms):")),
                 sg.Text("0", key="infer_time"),
             ],
         ]
     def set_values(self, values):
         if len(values["pth_path"].strip()) == 0:
+            sg.popup(i18n("Please select pth file"))
             return False
         if len(values["index_path"].strip()) == 0:
+            sg.popup(i18n("Please select index file"))
             return False
         pattern = re.compile("[^\x00-\x7F]+")
         if pattern.findall(values["hubert_path"]):
+            sg.popup(i18n("The hubert model path cannot contain Chinese characters"))
             return False
         if pattern.findall(values["pth_path"]):
+            sg.popup(i18n("pth file path cannot contain Chinese characters"))
             return False
         if pattern.findall(values["index_path"]):
+            sg.popup(i18n("The index file path cannot contain Chinese characters"))
             return False
         self.set_devices(values["sg_input_device"], values["sg_output_device"])
         self.config.hubert_path = os.path.join(current_dir, "hubert_base.pt")
         self.block_frame = int(self.config.block_time * self.config.samplerate)
         self.crossfade_frame = int(self.config.crossfade_time * self.config.samplerate)
         self.sola_search_frame = int(0.012 * self.config.samplerate)
+        self.delay_frame = int(0.01 * self.config.samplerate)  # Reserve 0.02s forward
         self.extra_frame = int(self.config.extra_time * self.config.samplerate)
         self.rvc = None
         self.rvc = RVC(
     def soundinput(self):
         """
+        accept audio input
         """
         with sd.Stream(
             callback=self.audio_callback,
         self, indata: np.ndarray, outdata: np.ndarray, frames, times, status
     ):
         """
+        audio processing
         """
         start_time = time.perf_counter()
         indata = librosa.to_mono(indata.T)
         print("f0_method: " + str(self.config.f0_method))
     def get_devices(self, update: bool = True):
+        """Get device list"""
         if update:
             sd._terminate()
             sd._initialize()
         )
     def set_devices(self, input_device, output_device):
+        """Set up output device"""
         (
             input_devices,
             output_devices,