Spaces:
Runtime error
Runtime error
| import argparse | |
| import os | |
| import os.path as P | |
| from copy import deepcopy | |
| from functools import partial | |
| from glob import glob | |
| from multiprocessing import Pool | |
| from pathlib import Path | |
| import librosa | |
| import numpy as np | |
| import torchvision | |
| class MelSpectrogram(object): | |
| def __init__(self, sr, nfft, fmin, fmax, nmels, hoplen, spec_power, inverse=False): | |
| self.sr = sr | |
| self.nfft = nfft | |
| self.fmin = fmin | |
| self.fmax = fmax | |
| self.nmels = nmels | |
| self.hoplen = hoplen | |
| self.spec_power = spec_power | |
| self.inverse = inverse | |
| self.mel_basis = librosa.filters.mel(sr=sr, n_fft=nfft, fmin=fmin, fmax=fmax, n_mels=nmels) | |
| def __call__(self, x): | |
| if self.inverse: | |
| spec = librosa.feature.inverse.mel_to_stft( | |
| x, sr=self.sr, n_fft=self.nfft, fmin=self.fmin, fmax=self.fmax, power=self.spec_power | |
| ) | |
| wav = librosa.griffinlim(spec, hop_length=self.hoplen) | |
| return wav | |
| else: | |
| spec = np.abs(librosa.stft(x, n_fft=self.nfft, hop_length=self.hoplen)) ** self.spec_power | |
| mel_spec = np.dot(self.mel_basis, spec) | |
| return mel_spec | |
| class LowerThresh(object): | |
| def __init__(self, min_val, inverse=False): | |
| self.min_val = min_val | |
| self.inverse = inverse | |
| def __call__(self, x): | |
| if self.inverse: | |
| return x | |
| else: | |
| return np.maximum(self.min_val, x) | |
| class Add(object): | |
| def __init__(self, val, inverse=False): | |
| self.inverse = inverse | |
| self.val = val | |
| def __call__(self, x): | |
| if self.inverse: | |
| return x - self.val | |
| else: | |
| return x + self.val | |
| class Subtract(Add): | |
| def __init__(self, val, inverse=False): | |
| self.inverse = inverse | |
| self.val = val | |
| def __call__(self, x): | |
| if self.inverse: | |
| return x + self.val | |
| else: | |
| return x - self.val | |
| class Multiply(object): | |
| def __init__(self, val, inverse=False) -> None: | |
| self.val = val | |
| self.inverse = inverse | |
| def __call__(self, x): | |
| if self.inverse: | |
| return x / self.val | |
| else: | |
| return x * self.val | |
| class Divide(Multiply): | |
| def __init__(self, val, inverse=False): | |
| self.inverse = inverse | |
| self.val = val | |
| def __call__(self, x): | |
| if self.inverse: | |
| return x * self.val | |
| else: | |
| return x / self.val | |
| class Log10(object): | |
| def __init__(self, inverse=False): | |
| self.inverse = inverse | |
| def __call__(self, x): | |
| if self.inverse: | |
| return 10 ** x | |
| else: | |
| return np.log10(x) | |
| class Clip(object): | |
| def __init__(self, min_val, max_val, inverse=False): | |
| self.min_val = min_val | |
| self.max_val = max_val | |
| self.inverse = inverse | |
| def __call__(self, x): | |
| if self.inverse: | |
| return x | |
| else: | |
| return np.clip(x, self.min_val, self.max_val) | |
| class TrimSpec(object): | |
| def __init__(self, max_len, inverse=False): | |
| self.max_len = max_len | |
| self.inverse = inverse | |
| def __call__(self, x): | |
| if self.inverse: | |
| return x | |
| else: | |
| return x[:, :self.max_len] | |
| class MaxNorm(object): | |
| def __init__(self, inverse=False): | |
| self.inverse = inverse | |
| self.eps = 1e-10 | |
| def __call__(self, x): | |
| if self.inverse: | |
| return x | |
| else: | |
| return x / (x.max() + self.eps) | |
| TRANSFORMS_16000 = torchvision.transforms.Compose([ | |
| MelSpectrogram(sr=16000, nfft=1024, fmin=125, fmax=7600, nmels=80, hoplen=1024//4, spec_power=1), | |
| LowerThresh(1e-5), | |
| Log10(), | |
| Multiply(20), | |
| Subtract(20), | |
| Add(100), | |
| Divide(100), | |
| Clip(0, 1.0) | |
| # TrimSpec(860) | |
| ]) | |