Yoshitaka16 commited on
Commit
8421a1f
·
verified ·
1 Parent(s): f7d8262

Delete F0Extractor.py

Browse files
Files changed (1) hide show
  1. F0Extractor.py +0 -115
F0Extractor.py DELETED
@@ -1,115 +0,0 @@
1
- import dataclasses
2
- import os
3
- import pathlib
4
-
5
- import resampy
6
- import torchfcpe
7
-
8
- import numpy as np
9
-
10
- import torch
11
- import torchcrepe
12
-
13
- import librosa
14
-
15
- from ultimate_rvc.common import RVC_MODELS_DIR
16
- from ultimate_rvc.rvc.configs.config import Config
17
-
18
- # from tools.anyf0.rmvpe import RMVPE
19
- from ultimate_rvc.rvc.lib.predictors.RMVPE import RMVPE0Predictor
20
-
21
- config = Config()
22
-
23
-
24
- @dataclasses.dataclass
25
- class F0Extractor:
26
- wav_path: pathlib.Path
27
- sample_rate: int = 44100
28
- hop_length: int = 512
29
- f0_min: int = 50
30
- f0_max: int = 1600
31
- method: str = "rmvpe"
32
- x: np.ndarray = dataclasses.field(init=False)
33
-
34
- def __post_init__(self):
35
- self.x, self.sample_rate = librosa.load(self.wav_path, sr=self.sample_rate)
36
-
37
- @property
38
- def hop_size(self):
39
- return self.hop_length / self.sample_rate
40
-
41
- @property
42
- def wav16k(self):
43
- return resampy.resample(self.x, self.sample_rate, 16000)
44
-
45
- def extract_f0(self):
46
- f0 = None
47
- method = self.method
48
- if method == "crepe":
49
- wav16k_torch = torch.FloatTensor(self.wav16k).unsqueeze(0).to(config.device)
50
- f0 = torchcrepe.predict(
51
- wav16k_torch,
52
- sample_rate=16000,
53
- hop_length=160,
54
- batch_size=512,
55
- fmin=self.f0_min,
56
- fmax=self.f0_max,
57
- device=config.device,
58
- )
59
- f0 = f0[0].cpu().numpy()
60
- elif method == "fcpe":
61
- audio = librosa.to_mono(self.x)
62
- audio_length = len(audio)
63
- f0_target_length = (audio_length // self.hop_length) + 1
64
- audio = (
65
- torch.from_numpy(audio)
66
- .float()
67
- .unsqueeze(0)
68
- .unsqueeze(-1)
69
- .to(config.device)
70
- )
71
- model = torchfcpe.spawn_bundled_infer_model(device=config.device)
72
-
73
- f0 = model.infer(
74
- audio,
75
- sr=self.sample_rate,
76
- decoder_mode="local_argmax",
77
- threshold=0.006,
78
- f0_min=self.f0_min,
79
- f0_max=self.f0_max,
80
- interp_uv=False,
81
- output_interp_target_length=f0_target_length,
82
- )
83
- f0 = f0.squeeze().cpu().numpy()
84
- elif method == "rmvpe":
85
- model_rmvpe = RMVPE0Predictor(
86
- os.path.join(str(RVC_MODELS_DIR), "predictors", "rmvpe.pt"),
87
- device=config.device,
88
- )
89
- f0 = model_rmvpe.infer_from_audio(self.wav16k, thred=0.03)
90
- elif method == "djcm":
91
- from ultimate_rvc.rvc.lib.predictors.djcm_module import DJCM
92
- model_djcm = DJCM(
93
- model_path=os.path.join(str(RVC_MODELS_DIR), "predictors", "djcm.pt"),
94
- device=config.device
95
- )
96
- f0 = model_djcm.infer_from_audio(self.wav16k)
97
- else:
98
- raise ValueError(f"Unknown method: {self.method}")
99
- return self.hz_to_cents(f0, librosa.midi_to_hz(0))
100
-
101
- def plot_f0(self, f0):
102
- from matplotlib import pyplot as plt
103
-
104
- plt.figure(figsize=(10, 4))
105
- plt.plot(f0)
106
- plt.title(self.method)
107
- plt.xlabel("Time (frames)")
108
- plt.ylabel("F0 (cents)")
109
- plt.show()
110
-
111
- def hz_to_cents(F, F_ref=55.0):
112
- F_temp = np.array(F).astype(float)
113
- F_temp[F_temp == 0] = np.nan
114
- F_cents = 1200 * np.log2(F_temp / F_ref)
115
- return F_cents