| |
|
|
| """Griffin-Lim related modules.""" |
|
|
| |
| |
|
|
| import logging |
|
|
| from distutils.version import LooseVersion |
| from functools import partial |
| from typeguard import check_argument_types |
| from typing import Optional |
|
|
| import librosa |
| import numpy as np |
|
|
| EPS = 1e-10 |
|
|
|
|
| def logmel2linear( |
| lmspc: np.ndarray, |
| fs: int, |
| n_fft: int, |
| n_mels: int, |
| fmin: int = None, |
| fmax: int = None, |
| ) -> np.ndarray: |
| """Convert log Mel filterbank to linear spectrogram. |
| |
| Args: |
| lmspc: Log Mel filterbank (T, n_mels). |
| fs: Sampling frequency. |
| n_fft: The number of FFT points. |
| n_mels: The number of mel basis. |
| f_min: Minimum frequency to analyze. |
| f_max: Maximum frequency to analyze. |
| |
| Returns: |
| Linear spectrogram (T, n_fft // 2 + 1). |
| |
| """ |
| assert lmspc.shape[1] == n_mels |
| fmin = 0 if fmin is None else fmin |
| fmax = fs / 2 if fmax is None else fmax |
| mspc = np.power(10.0, lmspc) |
| mel_basis = librosa.filters.mel(fs, n_fft, n_mels, fmin, fmax) |
| inv_mel_basis = np.linalg.pinv(mel_basis) |
| return np.maximum(EPS, np.dot(inv_mel_basis, mspc.T).T) |
|
|
|
|
| def griffin_lim( |
| spc: np.ndarray, |
| n_fft: int, |
| n_shift: int, |
| win_length: int = None, |
| window: Optional[str] = "hann", |
| n_iter: Optional[int] = 32, |
| ) -> np.ndarray: |
| """Convert linear spectrogram into waveform using Griffin-Lim. |
| |
| Args: |
| spc: Linear spectrogram (T, n_fft // 2 + 1). |
| n_fft: The number of FFT points. |
| n_shift: Shift size in points. |
| win_length: Window length in points. |
| window: Window function type. |
| n_iter: The number of iterations. |
| |
| Returns: |
| Reconstructed waveform (N,). |
| |
| """ |
| |
| assert spc.shape[1] == n_fft // 2 + 1 |
|
|
| if LooseVersion(librosa.__version__) >= LooseVersion("0.7.0"): |
| |
| spc = np.abs(spc.T) |
| y = librosa.griffinlim( |
| S=spc, |
| n_iter=n_iter, |
| hop_length=n_shift, |
| win_length=win_length, |
| window=window, |
| center=True if spc.shape[1] > 1 else False, |
| ) |
| else: |
| |
| logging.warning( |
| "librosa version is old. use slow version of Grriffin-Lim algorithm." |
| "if you want to use fast Griffin-Lim, please update librosa via " |
| "`source ./path.sh && pip install librosa==0.7.0`." |
| ) |
| cspc = np.abs(spc).astype(np.complex).T |
| angles = np.exp(2j * np.pi * np.random.rand(*cspc.shape)) |
| y = librosa.istft(cspc * angles, n_shift, win_length, window=window) |
| for i in range(n_iter): |
| angles = np.exp( |
| 1j |
| * np.angle(librosa.stft(y, n_fft, n_shift, win_length, window=window)) |
| ) |
| y = librosa.istft(cspc * angles, n_shift, win_length, window=window) |
|
|
| return y |
|
|
|
|
| |
| class Spectrogram2Waveform(object): |
| """Spectrogram to waveform conversion module.""" |
|
|
| def __init__( |
| self, |
| n_fft: int, |
| n_shift: int, |
| fs: int = None, |
| n_mels: int = None, |
| win_length: int = None, |
| window: Optional[str] = "hann", |
| fmin: int = None, |
| fmax: int = None, |
| griffin_lim_iters: Optional[int] = 32, |
| ): |
| """Initialize module. |
| |
| Args: |
| fs: Sampling frequency. |
| n_fft: The number of FFT points. |
| n_shift: Shift size in points. |
| n_mels: The number of mel basis. |
| win_length: Window length in points. |
| window: Window function type. |
| f_min: Minimum frequency to analyze. |
| f_max: Maximum frequency to analyze. |
| griffin_lim_iters: The number of iterations. |
| |
| """ |
| assert check_argument_types() |
| self.fs = fs |
| self.logmel2linear = ( |
| partial( |
| logmel2linear, fs=fs, n_fft=n_fft, n_mels=n_mels, fmin=fmin, fmax=fmax |
| ) |
| if n_mels is not None |
| else None |
| ) |
| self.griffin_lim = partial( |
| griffin_lim, |
| n_fft=n_fft, |
| n_shift=n_shift, |
| win_length=win_length, |
| window=window, |
| n_iter=griffin_lim_iters, |
| ) |
| self.params = dict( |
| n_fft=n_fft, |
| n_shift=n_shift, |
| win_length=win_length, |
| window=window, |
| n_iter=griffin_lim_iters, |
| ) |
| if n_mels is not None: |
| self.params.update(fs=fs, n_mels=n_mels, fmin=fmin, fmax=fmax) |
|
|
| def __repr__(self): |
| retval = f"{self.__class__.__name__}(" |
| for k, v in self.params.items(): |
| retval += f"{k}={v}, " |
| retval += ")" |
| return retval |
|
|
| def __call__(self, spc): |
| """Convert spectrogram to waveform. |
| |
| Args: |
| spc: Log Mel filterbank (T, n_mels) |
| or linear spectrogram (T, n_fft // 2 + 1). |
| |
| Returns: |
| Reconstructed waveform (N,). |
| |
| """ |
| if self.logmel2linear is not None: |
| spc = self.logmel2linear(spc) |
| return self.griffin_lim(spc) |
|
|