R-Kentaren commited on
Commit
573a3b9
·
verified ·
1 Parent(s): 60982d5

Update vc_infer_pipeline.py

Browse files
Files changed (1) hide show
  1. vc_infer_pipeline.py +1 -57
vc_infer_pipeline.py CHANGED
@@ -11,7 +11,6 @@ import parselmouth
11
  import pyworld
12
  import torch
13
  import torch.nn.functional as F
14
- import torchcrepe
15
  from scipy import signal
16
 
17
  now_dir = os.getcwd()
@@ -22,19 +21,6 @@ bh, ah = signal.butter(N=5, Wn=48, btype="high", fs=16000)
22
  input_audio_path2wav = {}
23
 
24
 
25
- @lru_cache
26
- def cache_harvest_f0(input_audio_path, fs, f0max, f0min, frame_period):
27
- audio = input_audio_path2wav[input_audio_path]
28
- f0, t = pyworld.harvest(
29
- audio,
30
- fs=fs,
31
- f0_ceil=f0max,
32
- f0_floor=f0min,
33
- frame_period=frame_period,
34
- )
35
- f0 = pyworld.stonemask(audio, f0, t, fs)
36
- return f0
37
-
38
 
39
  def change_rms(data1, sr1, data2, sr2, rate): # 1是输入音频,2是输出音频,rate是2的占比
40
  # print(data1.max(),data2.max())
@@ -93,49 +79,7 @@ class VC(object):
93
  f0_max = 1100
94
  f0_mel_min = 1127 * np.log(1 + f0_min / 700)
95
  f0_mel_max = 1127 * np.log(1 + f0_max / 700)
96
- if f0_method == "pm":
97
- f0 = (
98
- parselmouth.Sound(x, self.sr)
99
- .to_pitch_ac(
100
- time_step=time_step / 1000,
101
- voicing_threshold=0.6,
102
- pitch_floor=f0_min,
103
- pitch_ceiling=f0_max,
104
- )
105
- .selected_array["frequency"]
106
- )
107
- pad_size = (p_len - len(f0) + 1) // 2
108
- if pad_size > 0 or p_len - len(f0) - pad_size > 0:
109
- f0 = np.pad(
110
- f0, [[pad_size, p_len - len(f0) - pad_size]], mode="constant"
111
- )
112
- elif f0_method == "harvest":
113
- input_audio_path2wav[input_audio_path] = x.astype(np.double)
114
- f0 = cache_harvest_f0(input_audio_path, self.sr, f0_max, f0_min, 10)
115
- if filter_radius > 2:
116
- f0 = signal.medfilt(f0, 3)
117
- elif f0_method == "crepe":
118
- model = "full"
119
- # Pick a batch size that doesn't cause memory errors on your gpu
120
- batch_size = 512
121
- # Compute pitch using first gpu
122
- audio = torch.tensor(np.copy(x))[None].float()
123
- f0, pd = torchcrepe.predict(
124
- audio,
125
- self.sr,
126
- self.window,
127
- f0_min,
128
- f0_max,
129
- model,
130
- batch_size=batch_size,
131
- device=self.device,
132
- return_periodicity=True,
133
- )
134
- pd = torchcrepe.filter.median(pd, 3)
135
- f0 = torchcrepe.filter.mean(f0, 3)
136
- f0[pd < 0.1] = 0
137
- f0 = f0[0].cpu().numpy()
138
- elif f0_method == "rmvpe":
139
  if hasattr(self, "model_rmvpe") == False:
140
  from rmvpe import RMVPE
141
 
 
11
  import pyworld
12
  import torch
13
  import torch.nn.functional as F
 
14
  from scipy import signal
15
 
16
  now_dir = os.getcwd()
 
21
  input_audio_path2wav = {}
22
 
23
 
 
 
 
 
 
 
 
 
 
 
 
 
 
24
 
25
  def change_rms(data1, sr1, data2, sr2, rate): # 1是输入音频,2是输出音频,rate是2的占比
26
  # print(data1.max(),data2.max())
 
79
  f0_max = 1100
80
  f0_mel_min = 1127 * np.log(1 + f0_min / 700)
81
  f0_mel_max = 1127 * np.log(1 + f0_max / 700)
82
+ if f0_method == "rmvpe":
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
83
  if hasattr(self, "model_rmvpe") == False:
84
  from rmvpe import RMVPE
85