| from functools import lru_cache |
|
|
| from scipy import signal |
| import numpy as np |
| import librosa |
|
|
|
|
| @lru_cache() |
| def mel_basis(hp): |
| assert hp.fmax <= hp.sample_rate // 2 |
| return librosa.filters.mel( |
| sr=hp.sample_rate, |
| n_fft=hp.n_fft, |
| n_mels=hp.num_mels, |
| fmin=hp.fmin, |
| fmax=hp.fmax) |
|
|
|
|
| def preemphasis(wav, hp): |
| assert hp.preemphasis != 0 |
| wav = signal.lfilter([1, -hp.preemphasis], [1], wav) |
| wav = np.clip(wav, -1, 1) |
| return wav |
|
|
|
|
| def melspectrogram(wav, hp, pad=True): |
| |
| if hp.preemphasis > 0: |
| wav = preemphasis(wav, hp) |
| assert np.abs(wav).max() - 1 < 1e-07 |
|
|
| |
| spec_complex = _stft(wav, hp, pad=pad) |
|
|
| |
| spec_magnitudes = np.abs(spec_complex) |
|
|
| if hp.mel_power != 1.0: |
| spec_magnitudes **= hp.mel_power |
|
|
| |
| mel = np.dot(mel_basis(hp), spec_magnitudes) |
| if hp.mel_type == "db": |
| mel = _amp_to_db(mel, hp) |
|
|
| |
| if hp.normalized_mels: |
| mel = _normalize(mel, hp).astype(np.float32) |
|
|
| assert not pad or mel.shape[1] == 1 + len(wav) // hp.hop_size |
| return mel |
|
|
|
|
| def _stft(y, hp, pad=True): |
| |
| |
| return librosa.stft( |
| y, |
| n_fft=hp.n_fft, |
| hop_length=hp.hop_size, |
| win_length=hp.win_size, |
| center=pad, |
| pad_mode="reflect", |
| ) |
|
|
|
|
| def _amp_to_db(x, hp): |
| return 20 * np.log10(np.maximum(hp.stft_magnitude_min, x)) |
|
|
|
|
| def _db_to_amp(x): |
| return np.power(10.0, x * 0.05) |
|
|
|
|
| def _normalize(s, hp, headroom_db=15): |
| min_level_db = 20 * np.log10(hp.stft_magnitude_min) |
| s = (s - min_level_db) / (-min_level_db + headroom_db) |
| return s |
|
|