Yoshitaka16 commited on
Commit
97ea5e0
·
verified ·
1 Parent(s): 8421a1f

Upload F0Extractor.py

Browse files
Files changed (1) hide show
  1. F0Extractor.py +105 -0
F0Extractor.py ADDED
@@ -0,0 +1,105 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import dataclasses
2
+ import pathlib
3
+ import librosa
4
+ import numpy as np
5
+ import resampy
6
+ import torch
7
+ import torchcrepe
8
+ import torchfcpe
9
+ import os
10
+
11
+ # from tools.anyf0.rmvpe import RMVPE
12
+ from rvc.lib.predictors.RMVPE import RMVPE0Predictor
13
+ from rvc.configs.config import Config
14
+
15
+ config = Config()
16
+
17
+
18
+ @dataclasses.dataclass
19
+ class F0Extractor:
20
+ wav_path: pathlib.Path
21
+ sample_rate: int = 44100
22
+ hop_length: int = 512
23
+ f0_min: int = 50
24
+ f0_max: int = 1600
25
+ method: str = "rmvpe"
26
+ x: np.ndarray = dataclasses.field(init=False)
27
+
28
+ def __post_init__(self):
29
+ self.x, self.sample_rate = librosa.load(self.wav_path, sr=self.sample_rate)
30
+
31
+ @property
32
+ def hop_size(self):
33
+ return self.hop_length / self.sample_rate
34
+
35
+ @property
36
+ def wav16k(self):
37
+ return resampy.resample(self.x, self.sample_rate, 16000)
38
+
39
+ def extract_f0(self):
40
+ f0 = None
41
+ method = self.method
42
+ if method == "crepe":
43
+ wav16k_torch = torch.FloatTensor(self.wav16k).unsqueeze(0).to(config.device)
44
+ f0 = torchcrepe.predict(
45
+ wav16k_torch,
46
+ sample_rate=16000,
47
+ hop_length=160,
48
+ batch_size=512,
49
+ fmin=self.f0_min,
50
+ fmax=self.f0_max,
51
+ device=config.device,
52
+ )
53
+ f0 = f0[0].cpu().numpy()
54
+ elif method == "fcpe":
55
+ audio = librosa.to_mono(self.x)
56
+ audio_length = len(audio)
57
+ f0_target_length = (audio_length // self.hop_length) + 1
58
+ audio = (
59
+ torch.from_numpy(audio)
60
+ .float()
61
+ .unsqueeze(0)
62
+ .unsqueeze(-1)
63
+ .to(config.device)
64
+ )
65
+ model = torchfcpe.spawn_bundled_infer_model(device=config.device)
66
+
67
+ f0 = model.infer(
68
+ audio,
69
+ sr=self.sample_rate,
70
+ decoder_mode="local_argmax",
71
+ threshold=0.006,
72
+ f0_min=self.f0_min,
73
+ f0_max=self.f0_max,
74
+ interp_uv=False,
75
+ output_interp_target_length=f0_target_length,
76
+ )
77
+ f0 = f0.squeeze().cpu().numpy()
78
+ elif method == "rmvpe":
79
+ model_rmvpe = RMVPE0Predictor(
80
+ os.path.join("rvc", "models", "predictors", "rmvpe.pt"),
81
+ device=config.device,
82
+ # hop_length=80
83
+ )
84
+ f0 = model_rmvpe.infer_from_audio(self.wav16k, thred=0.03)
85
+
86
+ else:
87
+ raise ValueError(f"Unknown method: {self.method}")
88
+ return self.hz_to_cents(f0, librosa.midi_to_hz(0))
89
+
90
+ def plot_f0(self, f0):
91
+ from matplotlib import pyplot as plt
92
+
93
+ plt.figure(figsize=(10, 4))
94
+ plt.plot(f0)
95
+ plt.title(self.method)
96
+ plt.xlabel("Time (frames)")
97
+ plt.ylabel("F0 (cents)")
98
+ plt.show()
99
+
100
+ @staticmethod
101
+ def hz_to_cents(F, F_ref=55.0):
102
+ F_temp = np.array(F).astype(float)
103
+ F_temp[F_temp == 0] = np.nan
104
+ F_cents = 1200 * np.log2(F_temp / F_ref)
105
+ return F_cents