Spaces:
Paused
Paused
| import logging | |
| import numpy as np | |
| import librosa | |
| from .rmvpe import RMVPE | |
| from ..constants import SR_16K | |
| logger = logging.getLogger(__name__) | |
| class F0Extractor: | |
| def __init__( | |
| self, | |
| rmvpe: RMVPE = None, | |
| sr=SR_16K, | |
| f0_bin=256, | |
| f0_max=1100.0, | |
| f0_min=50.0, | |
| ): | |
| self.sr = sr | |
| self.f0_bin = f0_bin | |
| self.f0_max = f0_max | |
| self.f0_min = f0_min | |
| self.f0_mel_min = 1127 * np.log(1 + f0_min / 700) | |
| self.f0_mel_max = 1127 * np.log(1 + f0_max / 700) | |
| if rmvpe is not None: | |
| self.load(rmvpe) | |
| def load(self, rmvpe: RMVPE): | |
| self.rmvpe = rmvpe | |
| self.device = next(rmvpe.parameters()).device | |
| logger.info(f"RMVPE model is on {self.device}") | |
| def is_loaded(self) -> bool: | |
| return hasattr(self, "rmvpe") | |
| def calculate_f0_from_f0nsf(self, f0nsf: np.ndarray): | |
| f0_mel = 1127 * np.log(1 + f0nsf / 700) | |
| f0_mel[f0_mel > 0] = (f0_mel[f0_mel > 0] - self.f0_mel_min) * ( | |
| self.f0_bin - 2 | |
| ) / (self.f0_mel_max - self.f0_mel_min) + 1 | |
| # use 0 or 1 | |
| f0_mel[f0_mel <= 1] = 1 | |
| f0_mel[f0_mel > self.f0_bin - 1] = self.f0_bin - 1 | |
| f0 = np.rint(f0_mel).astype(int) | |
| assert f0.max() <= 255 and f0.min() >= 1, ( | |
| f0.max(), | |
| f0.min(), | |
| ) | |
| return f0 | |
| def extract_f0_from(self, y: np.ndarray, modification=0.0): | |
| f0nsf = self.rmvpe.infer_from_audio(y, thred=0.03) | |
| f0nsf *= pow(2, modification / 12) | |
| f0 = self.calculate_f0_from_f0nsf(f0nsf) | |
| return f0nsf, f0 | |
| def extract_f0(self, wav_file: str): | |
| y, _ = librosa.load(wav_file, sr=self.sr) | |
| return self.extract_f0_from(y) | |