File size: 1,799 Bytes
2e62044 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 |
import numpy as np
import librosa, math
sample_rate = 22050
n_fft = 2048
fft_bins = n_fft // 2 + 1
num_mels = 80
hop_length = int(sample_rate * 0.0125) # 12.5ms
win_length = int(sample_rate * 0.05) # 50ms
fmin = 40
min_level_db = -100
ref_level_db = 20
def load_wav(filename, encode=True) :
x = librosa.load(filename, sr=sample_rate)[0]
if encode == True : x = encode_16bits(x)
return x
def save_wav(y, filename) :
if y.dtype != 'int16' :
y = encode_16bits(y)
librosa.output.write_wav(filename, y.astype(np.int16), sample_rate)
def split_signal(x) :
unsigned = x + 2**15
coarse = unsigned // 256
fine = unsigned % 256
return coarse, fine
def combine_signal(coarse, fine) :
return coarse * 256 + fine - 2**15
def encode_16bits(x) :
return np.clip(x * 2**15, -2**15, 2**15 - 1).astype(np.int16)
mel_basis = None
def linear_to_mel(spectrogram):
global mel_basis
if mel_basis is None:
mel_basis = build_mel_basis()
return np.dot(mel_basis, spectrogram)
def build_mel_basis():
return librosa.filters.mel(sample_rate, n_fft, n_mels=num_mels, fmin=fmin)
def normalize(S):
return np.clip((S - min_level_db) / -min_level_db, 0, 1)
def denormalize(S):
return (np.clip(S, 0, 1) * -min_level_db) + min_level_db
def amp_to_db(x):
return 20 * np.log10(np.maximum(1e-5, x))
def db_to_amp(x):
return np.power(10.0, x * 0.05)
def spectrogram(y):
D = stft(y)
S = amp_to_db(np.abs(D)) - ref_level_db
return normalize(S)
def melspectrogram(y):
D = stft(y)
S = amp_to_db(linear_to_mel(np.abs(D)))
return normalize(S)
def stft(y):
return librosa.stft(y=y, n_fft=n_fft, hop_length=hop_length, win_length=win_length) |