Yoshitaka16 commited on
Commit
1d438ef
·
verified ·
1 Parent(s): 9cd255d

Update F0Extractor.py

Browse files
Files changed (1) hide show
  1. F0Extractor.py +70 -54
F0Extractor.py CHANGED
@@ -43,57 +43,73 @@ class F0Extractor:
43
  return resampy.resample(self.x, self.sample_rate, 16000)
44
 
45
  def extract_f0(self):
46
- f0 = None
47
- method = self.method
48
- if method == "crepe":
49
- wav16k_torch = torch.FloatTensor(self.wav16k).unsqueeze(0).to(config.device)
50
- f0 = torchcrepe.predict(
51
- wav16k_torch,
52
- sample_rate=16000,
53
- hop_length=160,
54
- batch_size=512,
55
- fmin=self.f0_min,
56
- fmax=self.f0_max,
57
- device=config.device,
58
- )
59
- f0 = f0[0].cpu().numpy()
60
- elif method == "fcpe":
61
- audio = librosa.to_mono(self.x)
62
- audio_length = len(audio)
63
- f0_target_length = (audio_length // self.hop_length) + 1
64
- audio = (
65
- torch.from_numpy(audio)
66
- .float()
67
- .unsqueeze(0)
68
- .unsqueeze(-1)
69
- .to(config.device)
70
- )
71
- model = torchfcpe.spawn_bundled_infer_model(device=config.device)
72
-
73
- f0 = model.infer(
74
- audio,
75
- sr=self.sample_rate,
76
- decoder_mode="local_argmax",
77
- threshold=0.006,
78
- f0_min=self.f0_min,
79
- f0_max=self.f0_max,
80
- interp_uv=False,
81
- output_interp_target_length=f0_target_length,
82
- )
83
- f0 = f0.squeeze().cpu().numpy()
84
- elif method == "rmvpe":
85
- model_rmvpe = RMVPE0Predictor(
86
- os.path.join(str(RVC_MODELS_DIR), "predictors", "rmvpe.pt"),
87
- device=config.device,
88
- )
89
- f0 = model_rmvpe.infer_from_audio(self.wav16k, thred=0.03)
90
- elif method == "djcm":
91
- from ultimate_rvc.rvc.lib.predictors.djcm_module import DJCM
92
- model_djcm = DJCM(
93
- model_path=os.path.join(str(RVC_MODELS_DIR), "predictors", "djcm.pt"),
94
- device=config.device
95
- )
96
- f0 = model_djcm.infer_from_audio(self.wav16k)
97
- else:
98
- raise ValueError(f"Unknown method: {self.method}")
99
- return self.hz_to_cents(f0, librosa.midi_to_hz(0))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
43
  return resampy.resample(self.x, self.sample_rate, 16000)
44
 
45
  def extract_f0(self):
46
+ f0 = None
47
+ method = self.method
48
+ if method == "crepe":
49
+ wav16k_torch = torch.FloatTensor(self.wav16k).unsqueeze(0).to(config.device)
50
+ f0 = torchcrepe.predict(
51
+ wav16k_torch,
52
+ sample_rate=16000,
53
+ hop_length=160,
54
+ batch_size=512,
55
+ fmin=self.f0_min,
56
+ fmax=self.f0_max,
57
+ device=config.device,
58
+ )
59
+ f0 = f0[0].cpu().numpy()
60
+ elif method == "fcpe":
61
+ audio = librosa.to_mono(self.x)
62
+ audio_length = len(audio)
63
+ f0_target_length = (audio_length // self.hop_length) + 1
64
+ audio = (
65
+ torch.from_numpy(audio)
66
+ .float()
67
+ .unsqueeze(0)
68
+ .unsqueeze(-1)
69
+ .to(config.device)
70
+ )
71
+ model = torchfcpe.spawn_bundled_infer_model(device=config.device)
72
+
73
+ f0 = model.infer(
74
+ audio,
75
+ sr=self.sample_rate,
76
+ decoder_mode="local_argmax",
77
+ threshold=0.006,
78
+ f0_min=self.f0_min,
79
+ f0_max=self.f0_max,
80
+ interp_uv=False,
81
+ output_interp_target_length=f0_target_length,
82
+ )
83
+ f0 = f0.squeeze().cpu().numpy()
84
+ elif method == "rmvpe":
85
+ model_rmvpe = RMVPE0Predictor(
86
+ os.path.join(str(RVC_MODELS_DIR), "predictors", "rmvpe.pt"),
87
+ device=config.device,
88
+ )
89
+ f0 = model_rmvpe.infer_from_audio(self.wav16k, thred=0.03)
90
+ elif method == "djcm":
91
+ from ultimate_rvc.rvc.lib.predictors.djcm_module import DJCM
92
+ model_djcm = DJCM(
93
+ model_path=os.path.join(str(RVC_MODELS_DIR), "predictors", "djcm.pt"),
94
+ device=config.device
95
+ )
96
+ f0 = model_djcm.infer_from_audio(self.wav16k)
97
+ else:
98
+ raise ValueError(f"Unknown method: {self.method}")
99
+ return self.hz_to_cents(f0, librosa.midi_to_hz(0))
100
+
101
+ def plot_f0(self, f0):
102
+ from matplotlib import pyplot as plt
103
+
104
+ plt.figure(figsize=(10, 4))
105
+ plt.plot(f0)
106
+ plt.title(self.method)
107
+ plt.xlabel("Time (frames)")
108
+ plt.ylabel("F0 (cents)")
109
+ plt.show()
110
+
111
+ def hz_to_cents(F, F_ref=55.0):
112
+ F_temp = np.array(F).astype(float)
113
+ F_temp[F_temp == 0] = np.nan
114
+ F_cents = 1200 * np.log2(F_temp / F_ref)
115
+ return F_cents