PhoenixStormJr commited on
Commit
0bd1b69
·
verified ·
1 Parent(s): 430d30a

Update gui.py

Browse files
Files changed (1) hide show
  1. gui.py +47 -47
gui.py CHANGED
@@ -1,14 +1,14 @@
1
  """
2
- 0416后的更新:
3
- 引入config中half
4
- 重建npy而不用填写
5
- v2支持
6
- f0模型支持
7
- 修复
8
-
9
- int16:
10
- 增加无索引支持
11
- f0算法改harvest(怎么看就只有这个会影响CPU占用),但是不这么改效果不好
12
  """
13
  import os, sys, traceback, re
14
 
@@ -49,7 +49,7 @@ class RVC:
49
  self, key, f0_method, hubert_path, pth_path, index_path, npy_path, index_rate
50
  ) -> None:
51
  """
52
- 初始化
53
  """
54
  try:
55
  self.f0_up_key = key
@@ -172,7 +172,7 @@ class RVC:
172
  # Calculate f0_course and f0_bak here
173
  f0 *= pow(2, f0_up_key / 12)
174
  # with open("test.txt","w")as f:f.write("\n".join([str(i)for i in f0.tolist()]))
175
- tf0 = self.sr // self.window # 每秒f0点数
176
  if inp_f0 is not None:
177
  delta_t = np.round(
178
  (inp_f0[:, 0].max() - inp_f0[:, 0].min()) * tf0 + 1
@@ -195,7 +195,7 @@ class RVC:
195
 
196
  def infer(self, feats: torch.Tensor) -> np.ndarray:
197
  """
198
- 推理函数
199
  """
200
  audio = feats.clone().cpu().numpy()
201
  assert feats.dim() == 1, feats.dim()
@@ -217,7 +217,7 @@ class RVC:
217
  self.model.final_proj(logits[0]) if self.version == "v1" else logits[0]
218
  )
219
 
220
- ####索引优化
221
  try:
222
  if (
223
  hasattr(self, "index")
@@ -245,10 +245,10 @@ class RVC:
245
  print(feats.shape)
246
  if self.if_f0 == 1:
247
  pitch, pitchf = self.get_f0(audio, self.f0_up_key)
248
- p_len = min(feats.shape[1], 13000, pitch.shape[0]) # 太大了爆显存
249
  else:
250
  pitch, pitchf = None, None
251
- p_len = min(feats.shape[1], 13000) # 太大了爆显存
252
  torch.cuda.synchronize()
253
  # print(feats.shape,pitch.shape)
254
  feats = feats[:, :p_len, :]
@@ -338,7 +338,7 @@ class GUI:
338
  ]
339
  ),
340
  sg.Frame(
341
- title=i18n("加载模型"),
342
  layout=[
343
  [
344
  sg.Input(
@@ -347,7 +347,7 @@ class GUI:
347
  disabled=True,
348
  ),
349
  sg.FileBrowse(
350
- i18n("Hubert模型"),
351
  initial_folder=os.path.join(os.getcwd()),
352
  file_types=((". pt"),),
353
  ),
@@ -358,7 +358,7 @@ class GUI:
358
  key="pth_path",
359
  ),
360
  sg.FileBrowse(
361
- i18n("选择.pth文件"),
362
  initial_folder=os.path.join(os.getcwd(), "weights"),
363
  file_types=((". pth"),),
364
  ),
@@ -369,19 +369,19 @@ class GUI:
369
  key="index_path",
370
  ),
371
  sg.FileBrowse(
372
- i18n("选择.index文件"),
373
  initial_folder=os.path.join(os.getcwd(), "logs"),
374
  file_types=((". index"),),
375
  ),
376
  ],
377
  [
378
  sg.Input(
379
- default_text="你不需要填写这个You don't need write this.",
380
  key="npy_path",
381
  disabled=True,
382
  ),
383
  sg.FileBrowse(
384
- i18n("选择.npy文件"),
385
  initial_folder=os.path.join(os.getcwd(), "logs"),
386
  file_types=((". npy"),),
387
  ),
@@ -406,7 +406,7 @@ class GUI:
406
  sg.Frame(
407
  layout=[
408
  [
409
- sg.Text(i18n("输入设备")),
410
  sg.Combo(
411
  input_devices,
412
  key="sg_input_device",
@@ -414,7 +414,7 @@ class GUI:
414
  ),
415
  ],
416
  [
417
- sg.Text(i18n("输出设备")),
418
  sg.Combo(
419
  output_devices,
420
  key="sg_output_device",
@@ -422,14 +422,14 @@ class GUI:
422
  ),
423
  ],
424
  ],
425
- title=i18n("音频设备(请使用同种类驱动)"),
426
  )
427
  ],
428
  [
429
  sg.Frame(
430
  layout=[
431
  [
432
- sg.Text(i18n("响应阈值")),
433
  sg.Slider(
434
  range=(-60, 0),
435
  key="threhold",
@@ -439,7 +439,7 @@ class GUI:
439
  ),
440
  ],
441
  [
442
- sg.Text(i18n("音调设置")),
443
  sg.Slider(
444
  range=(-24, 24),
445
  key="pitch",
@@ -459,12 +459,12 @@ class GUI:
459
  ),
460
  ],
461
  ],
462
- title=i18n("常规设置"),
463
  ),
464
  sg.Frame(
465
  layout=[
466
  [
467
- sg.Text(i18n("采样长度")),
468
  sg.Slider(
469
  range=(0.1, 3.0),
470
  key="block_time",
@@ -474,7 +474,7 @@ class GUI:
474
  ),
475
  ],
476
  [
477
- sg.Text(i18n("淡入淡出长度")),
478
  sg.Slider(
479
  range=(0.01, 0.15),
480
  key="crossfade_length",
@@ -484,7 +484,7 @@ class GUI:
484
  ),
485
  ],
486
  [
487
- sg.Text(i18n("额外推理时长")),
488
  sg.Slider(
489
  range=(0.05, 3.00),
490
  key="extra_time",
@@ -494,17 +494,17 @@ class GUI:
494
  ),
495
  ],
496
  [
497
- sg.Checkbox(i18n("输入降噪"), key="I_noise_reduce"),
498
- sg.Checkbox(i18n("输出降噪"), key="O_noise_reduce"),
499
  ],
500
  ],
501
- title=i18n("性能设置"),
502
  ),
503
  ],
504
  [
505
- sg.Button(i18n("开始音频转换"), key="start_vc"),
506
- sg.Button(i18n("停止音频转换"), key="stop_vc"),
507
- sg.Text(i18n("推理时间(ms):")),
508
  sg.Text("0", key="infer_time"),
509
  ],
510
  ]
@@ -557,20 +557,20 @@ class GUI:
557
 
558
  def set_values(self, values):
559
  if len(values["pth_path"].strip()) == 0:
560
- sg.popup(i18n("请选择pth文件"))
561
  return False
562
  if len(values["index_path"].strip()) == 0:
563
- sg.popup(i18n("请选择index文件"))
564
  return False
565
  pattern = re.compile("[^\x00-\x7F]+")
566
  if pattern.findall(values["hubert_path"]):
567
- sg.popup(i18n("hubert模型路径不可包含中文"))
568
  return False
569
  if pattern.findall(values["pth_path"]):
570
- sg.popup(i18n("pth文件路径不可包含中文"))
571
  return False
572
  if pattern.findall(values["index_path"]):
573
- sg.popup(i18n("index文件路径不可包含中文"))
574
  return False
575
  self.set_devices(values["sg_input_device"], values["sg_output_device"])
576
  self.config.hubert_path = os.path.join(current_dir, "hubert_base.pt")
@@ -594,7 +594,7 @@ class GUI:
594
  self.block_frame = int(self.config.block_time * self.config.samplerate)
595
  self.crossfade_frame = int(self.config.crossfade_time * self.config.samplerate)
596
  self.sola_search_frame = int(0.012 * self.config.samplerate)
597
- self.delay_frame = int(0.01 * self.config.samplerate) # 往前预留0.02s
598
  self.extra_frame = int(self.config.extra_time * self.config.samplerate)
599
  self.rvc = None
600
  self.rvc = RVC(
@@ -636,7 +636,7 @@ class GUI:
636
 
637
  def soundinput(self):
638
  """
639
- 接受音频输入
640
  """
641
  with sd.Stream(
642
  callback=self.audio_callback,
@@ -653,7 +653,7 @@ class GUI:
653
  self, indata: np.ndarray, outdata: np.ndarray, frames, times, status
654
  ):
655
  """
656
- 音频处理
657
  """
658
  start_time = time.perf_counter()
659
  indata = librosa.to_mono(indata.T)
@@ -733,7 +733,7 @@ class GUI:
733
  print("f0_method: " + str(self.config.f0_method))
734
 
735
  def get_devices(self, update: bool = True):
736
- """获取设备列表"""
737
  if update:
738
  sd._terminate()
739
  sd._initialize()
@@ -770,7 +770,7 @@ class GUI:
770
  )
771
 
772
  def set_devices(self, input_device, output_device):
773
- """设置输出设备"""
774
  (
775
  input_devices,
776
  output_devices,
 
1
  """
2
+ Updates after 0416:
3
+ Import half in config
4
+ Rebuild npy without filling
5
+ v2 support
6
+ No f0 model support
7
+ Fix
8
+
9
+ int16:
10
+ Added support for no index
11
+ Changed f0 algorithm to harvest (seems like this is the only thing that affects CPU usage), but the effect is not good without this change
12
  """
13
  import os, sys, traceback, re
14
 
 
49
  self, key, f0_method, hubert_path, pth_path, index_path, npy_path, index_rate
50
  ) -> None:
51
  """
52
+ initialization
53
  """
54
  try:
55
  self.f0_up_key = key
 
172
  # Calculate f0_course and f0_bak here
173
  f0 *= pow(2, f0_up_key / 12)
174
  # with open("test.txt","w")as f:f.write("\n".join([str(i)for i in f0.tolist()]))
175
+ tf0 = self.sr // self.window # f0 points per second
176
  if inp_f0 is not None:
177
  delta_t = np.round(
178
  (inp_f0[:, 0].max() - inp_f0[:, 0].min()) * tf0 + 1
 
195
 
196
  def infer(self, feats: torch.Tensor) -> np.ndarray:
197
  """
198
+ inference function
199
  """
200
  audio = feats.clone().cpu().numpy()
201
  assert feats.dim() == 1, feats.dim()
 
217
  self.model.final_proj(logits[0]) if self.version == "v1" else logits[0]
218
  )
219
 
220
+ ####Index optimization
221
  try:
222
  if (
223
  hasattr(self, "index")
 
245
  print(feats.shape)
246
  if self.if_f0 == 1:
247
  pitch, pitchf = self.get_f0(audio, self.f0_up_key)
248
+ p_len = min(feats.shape[1], 13000, pitch.shape[0]) # Too big to burst video memory
249
  else:
250
  pitch, pitchf = None, None
251
+ p_len = min(feats.shape[1], 13000) # Too big to burst video memory
252
  torch.cuda.synchronize()
253
  # print(feats.shape,pitch.shape)
254
  feats = feats[:, :p_len, :]
 
338
  ]
339
  ),
340
  sg.Frame(
341
+ title=i18n("Load model"),
342
  layout=[
343
  [
344
  sg.Input(
 
347
  disabled=True,
348
  ),
349
  sg.FileBrowse(
350
+ i18n("Hubert model"),
351
  initial_folder=os.path.join(os.getcwd()),
352
  file_types=((". pt"),),
353
  ),
 
358
  key="pth_path",
359
  ),
360
  sg.FileBrowse(
361
+ i18n("Select .pth file"),
362
  initial_folder=os.path.join(os.getcwd(), "weights"),
363
  file_types=((". pth"),),
364
  ),
 
369
  key="index_path",
370
  ),
371
  sg.FileBrowse(
372
+ i18n("Select .index file"),
373
  initial_folder=os.path.join(os.getcwd(), "logs"),
374
  file_types=((". index"),),
375
  ),
376
  ],
377
  [
378
  sg.Input(
379
+ default_text="You don't need to write this.",
380
  key="npy_path",
381
  disabled=True,
382
  ),
383
  sg.FileBrowse(
384
+ i18n("Select .npy file"),
385
  initial_folder=os.path.join(os.getcwd(), "logs"),
386
  file_types=((". npy"),),
387
  ),
 
406
  sg.Frame(
407
  layout=[
408
  [
409
+ sg.Text(i18n("input device")),
410
  sg.Combo(
411
  input_devices,
412
  key="sg_input_device",
 
414
  ),
415
  ],
416
  [
417
+ sg.Text(i18n("output device")),
418
  sg.Combo(
419
  output_devices,
420
  key="sg_output_device",
 
422
  ),
423
  ],
424
  ],
425
+ title=i18n("Audio device (please use the same type of driver)"),
426
  )
427
  ],
428
  [
429
  sg.Frame(
430
  layout=[
431
  [
432
+ sg.Text(i18n("response threshold")),
433
  sg.Slider(
434
  range=(-60, 0),
435
  key="threhold",
 
439
  ),
440
  ],
441
  [
442
+ sg.Text(i18n("Tone settings")),
443
  sg.Slider(
444
  range=(-24, 24),
445
  key="pitch",
 
459
  ),
460
  ],
461
  ],
462
+ title=i18n("General settings"),
463
  ),
464
  sg.Frame(
465
  layout=[
466
  [
467
+ sg.Text(i18n("Sample length")),
468
  sg.Slider(
469
  range=(0.1, 3.0),
470
  key="block_time",
 
474
  ),
475
  ],
476
  [
477
+ sg.Text(i18n("Fade Length")),
478
  sg.Slider(
479
  range=(0.01, 0.15),
480
  key="crossfade_length",
 
484
  ),
485
  ],
486
  [
487
+ sg.Text(i18n("Additional reasoning time")),
488
  sg.Slider(
489
  range=(0.05, 3.00),
490
  key="extra_time",
 
494
  ),
495
  ],
496
  [
497
+ sg.Checkbox(i18n("Input noise reduction"), key="I_noise_reduce"),
498
+ sg.Checkbox(i18n("Output noise reduction"), key="O_noise_reduce"),
499
  ],
500
  ],
501
+ title=i18n("Performance settings"),
502
  ),
503
  ],
504
  [
505
+ sg.Button(i18n("Start audio conversion"), key="start_vc"),
506
+ sg.Button(i18n("Stop audio conversion"), key="stop_vc"),
507
+ sg.Text(i18n("Inference time (ms):")),
508
  sg.Text("0", key="infer_time"),
509
  ],
510
  ]
 
557
 
558
  def set_values(self, values):
559
  if len(values["pth_path"].strip()) == 0:
560
+ sg.popup(i18n("Please select pth file"))
561
  return False
562
  if len(values["index_path"].strip()) == 0:
563
+ sg.popup(i18n("Please select index file"))
564
  return False
565
  pattern = re.compile("[^\x00-\x7F]+")
566
  if pattern.findall(values["hubert_path"]):
567
+ sg.popup(i18n("The hubert model path cannot contain Chinese characters"))
568
  return False
569
  if pattern.findall(values["pth_path"]):
570
+ sg.popup(i18n("pth file path cannot contain Chinese characters"))
571
  return False
572
  if pattern.findall(values["index_path"]):
573
+ sg.popup(i18n("The index file path cannot contain Chinese characters"))
574
  return False
575
  self.set_devices(values["sg_input_device"], values["sg_output_device"])
576
  self.config.hubert_path = os.path.join(current_dir, "hubert_base.pt")
 
594
  self.block_frame = int(self.config.block_time * self.config.samplerate)
595
  self.crossfade_frame = int(self.config.crossfade_time * self.config.samplerate)
596
  self.sola_search_frame = int(0.012 * self.config.samplerate)
597
+ self.delay_frame = int(0.01 * self.config.samplerate) # Reserve 0.02s forward
598
  self.extra_frame = int(self.config.extra_time * self.config.samplerate)
599
  self.rvc = None
600
  self.rvc = RVC(
 
636
 
637
  def soundinput(self):
638
  """
639
+ accept audio input
640
  """
641
  with sd.Stream(
642
  callback=self.audio_callback,
 
653
  self, indata: np.ndarray, outdata: np.ndarray, frames, times, status
654
  ):
655
  """
656
+ audio processing
657
  """
658
  start_time = time.perf_counter()
659
  indata = librosa.to_mono(indata.T)
 
733
  print("f0_method: " + str(self.config.f0_method))
734
 
735
  def get_devices(self, update: bool = True):
736
+ """Get device list"""
737
  if update:
738
  sd._terminate()
739
  sd._initialize()
 
770
  )
771
 
772
  def set_devices(self, input_device, output_device):
773
+ """Set up output device"""
774
  (
775
  input_devices,
776
  output_devices,