File size: 6,513 Bytes
a4d9f29 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 | import os
import librosa
import numpy as np
import matplotlib.pyplot as plt
# example of species dictionary:
species_dict = {
'Eptesicus_nilssonii': 0,
'Pipistrellus_nathusii': 1,
'Pipistrellus_pipistrellus': 2,
'Pipistrellus_pygmaeus': 3,
}
def read_species_dict(file):
d=dict()
with open(file) as f:
for line in f:
species, label = line.split()[0:2]
d[species] = int(label)
return d
def read_filelist(file):
filelist = []
with open(file) as f:
for line in f:
fname = line.split()[0]
filelist.append(fname)
return filelist
def read_filelist_and_labels(file, species_dict=species_dict, flim=True):
filelist = []
start_times = []
durations = []
labels = []
fmins = []
fmaxs = []
with open(file) as f:
for line in f:
if flim == True:
fname,start,dur,sp,f1,f2 = line.split()
else:
fname,start,dur,sp = line.split()
filelist.append(fname)
start_times.append(float(start))
durations.append(float(dur))
labels.append(species_dict.get(sp,-1))
if flim == True:
fmins.append(float(f1))
fmaxs.append(float(f2))
if flim == True:
return filelist, start_times, durations, labels, fmins, fmaxs
else:
return filelist, start_times, durations, labels
def wav2image(wavfile, start_time, dur=1.5, ntime=750, nfreq=128):
y,sr = librosa.load(wavfile,sr=384000,offset=start_time, duration=dur)
# note: librosa returns freq-by-time matrix -> transpose it!!!
S = librosa.feature.melspectrogram(y=y, sr=sr, n_fft=1024, hop_length=768, n_mels=nfreq, fmin=9000, fmax=150000).T
tlen, flen = S.shape
return np.log10(S[:ntime,:] + 1e-6)
def normalize(data):
x = (data-np.mean(data))/np.std(data)
return np.clip(x-np.median(x,axis=0), 0.0, 6.0)
def freq2mel(f, f2mel):
# a=librosa.mel_frequencies(n_mels=128,fmin=9000,fmax=150000)
# np.savetxt('mel128_freq9k_150k.txt',a,fmt='%.1f')
# f2mel=np.loadtxt('mel128_freq9k_150k.txt')
return np.searchsorted(f2mel, f)
def extract_band(S, fmin, fmax, f2mel):
i1=freq2mel(fmin, f2mel)
i2=freq2mel(fmax, f2mel)
S2 = np.copy(S)
mi = np.quantile(S[:,i1:i2], 0.1)
S2[:,:i1] = mi
S2[:,i2:] = mi
return S2
def compute_spectrograms(filelist, start_times, durations, ntime=750, nfreq=128, fmin=[], fmax=[], f2mel=[]):
if len(fmin) > 0:
assert len(f2mel) == nfreq, f"length of f2mel must equal to nfreq {nfreq}"
n = len(filelist)
data=np.ndarray((n,ntime,nfreq),dtype='float32')
for i, ifile in enumerate(filelist):
data[i] = normalize(wav2image(ifile, start_times[i], durations[i], ntime, nfreq))
if len(fmin) > 0:
data[i] = extract_band(data[i], fmin[i], fmax[i], f2mel)
return data
def wav2spectrograms(wavfile, ntime=512, nhop=250, nfreq=128):
# note: if original sampling rate not 384000Hz, librosa default resampling method is SLOW, better use faster method
# y,sr = librosa.load(wavfile,sr=384000)
y,sr = librosa.load(wavfile,sr=384000, mono=True, res_type='kaiser_fast')
S = librosa.feature.melspectrogram(y=y, sr=sr, n_fft=1024, hop_length=768, n_mels=nfreq, fmin=9000, fmax=150000).T
n = int(np.max((np.ceil((len(S)-ntime)/nhop),1)))
data=np.ndarray((n,ntime,nfreq), dtype='float32')
if len(S) < ntime:
# recording shorter than desired segment length, do zero padding
X = np.zeros((512,nfreq),dtype='float32')
X[:len(S)] = S
data[0] = normalize(np.log10(X + 1e-6))
else:
# chop into segments every nhop frames (default 250 frames == 0.5s)
for i in range(n):
start_i = i*nhop
if start_i+ntime <= len(S):
data[i] = normalize(np.log10(S[start_i:start_i+ntime] + 1e-6))
else:
# last segment too short, include data from left
start_i = len(S) - ntime
data[i] = normalize(np.log10(S[start_i:start_i+ntime] + 1e-6))
return data
def plot_spectrograms(data,labels=[],ny=1,nx=1,start_index=0,vmi=None,vma=None,num=None):
cm='gray_r'
if num==None:
fig,ax=plt.subplots(ny,nx)
else:
# don't create new figure but overdraw to existing one
fig,ax=plt.subplots(ny,nx, num=num, clear=True)
k=start_index
for j in range(ny):
for i in range(nx):
if k<len(data):
if vmi:
v1=vmi
else:
v1=np.min(data[k])
if vma:
v2=vma
else:
v2=np.max(data[k])
if ny == 1 and nx == 1:
img=ax.imshow(data[k].T, origin='lower', cmap=cm, aspect='auto', vmin=v1, vmax=v2)
fig.colorbar(img,ax=ax)
if len(labels):
ax.set_title(f'{k} ({labels[k]})')
elif ny == 1:
img=ax[i].imshow(data[k].T, origin='lower', cmap=cm, aspect='auto', vmin=v1, vmax=v2)
fig.colorbar(img,ax=ax[i])
if len(labels):
ax[i].set_title(f'{k} ({labels[k]})')
elif nx == 1:
img=ax[j].imshow(data[k].T, origin='lower', cmap=cm, aspect='auto', vmin=v1, vmax=v2)
fig.colorbar(img,ax=ax[j])
if len(labels):
ax[j].set_title(f'{k} ({labels[k]})')
else:
img=ax[j,i].imshow(data[k].T, origin='lower', cmap=cm, aspect='auto', vmin=v1, vmax=v2)
fig.colorbar(img,ax=ax[j,i])
if len(labels):
ax[j,i].set_title(f'{k} ({labels[k]})')
k=k+1
# tight_layout sometimes good sometimes not
plt.tight_layout()
plt.show(block=False)
def plot_probabilities(logits,species=[],num=None):
if num!=None:
# don't create new figure but overdraw to existing one
fig,ax=plt.subplots(1,1, num=num, clear=True)
X = 1/(1+np.exp(-logits))
cm='gray_r'
plt.imshow(X.T, origin='lower', cmap=cm, aspect='auto', vmin=0, vmax=1)
if len(species)>0:
foo=plt.yticks(np.arange(X.shape[1]),species)
plt.tight_layout()
plt.show(block=False)
|