gradio inference
Browse files- app.py +93 -0
- lda.pkl +3 -0
- requirements.txt +4 -0
app.py
ADDED
|
@@ -0,0 +1,93 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import gradio as gr
|
| 2 |
+
import librosa
|
| 3 |
+
import pickle
|
| 4 |
+
import numpy as np
|
| 5 |
+
import spafe
|
| 6 |
+
from spafe.frequencies import dominant_frequencies
|
| 7 |
+
from spafe.features.mfcc import mfcc, imfcc
|
| 8 |
+
from spafe.features.bfcc import bfcc
|
| 9 |
+
from spafe.features.cqcc import cqcc
|
| 10 |
+
from spafe.features.gfcc import erb_spectrogram
|
| 11 |
+
from spafe.features.lfcc import linear_spectrogram
|
| 12 |
+
from spafe.features.msrcc import msrcc
|
| 13 |
+
from spafe.features.ngcc import ngcc
|
| 14 |
+
from spafe.utils.preprocessing import SlidingWindow
|
| 15 |
+
from sklearn.metrics.pairwise import cosine_similarity
|
| 16 |
+
def dominant_freq_density(min_dom_freq,max_dom_freq,signal,sr):
|
| 17 |
+
dom_f = dominant_frequencies.get_dominant_frequencies(signal,sr,nfft=512,butter_filter=True)
|
| 18 |
+
dom_f = dom_f[(dom_f>min_dom_freq) & (dom_f<max_dom_freq)]
|
| 19 |
+
h,e = np.histogram(dom_f,bins = range(min_dom_freq,max_dom_freq,100),density=True)
|
| 20 |
+
return h
|
| 21 |
+
|
| 22 |
+
def dominant_freq(x):
|
| 23 |
+
return dominant_freq_density(100,1000,x['y'],x['sr'])
|
| 24 |
+
def apply_mfcc(x):
|
| 25 |
+
return np.mean(np.nan_to_num(mfcc(x['y'],fs=x['sr'],pre_emph=1,pre_emph_coeff=0.97,window=SlidingWindow(0.03, 0.015, "hamming"),nfilts=128,nfft=512,low_freq=50,high_freq=4000,normalize="mvn"),posinf=0,neginf=0),axis=0)
|
| 26 |
+
|
| 27 |
+
def apply_bfcc(x):
|
| 28 |
+
return np.mean(np.nan_to_num(bfcc(x['y'],fs=x['sr'],pre_emph=1,pre_emph_coeff=0.97,window=SlidingWindow(0.03, 0.015, "hamming"),nfilts=128,nfft=512,low_freq=50,high_freq=4000,normalize="mvn"),posinf=0,neginf=0),axis=0)
|
| 29 |
+
def apply_cqcc(x):
|
| 30 |
+
return np.mean(np.nan_to_num(cqcc(x['y'],fs=x['sr'],pre_emph=True,pre_emph_coeff=0.97,window=SlidingWindow(0.03, 0.015, "hamming"),nfft=512,low_freq=0,high_freq=None,number_of_octaves=7,number_of_bins_per_octave=24,spectral_threshold=0.005,f0=120,q_rate=1.0),posinf=0,neginf=0),axis=0)
|
| 31 |
+
|
| 32 |
+
|
| 33 |
+
def apply_gfcc(x):
|
| 34 |
+
return np.mean(np.nan_to_num(erb_spectrogram(x['y'],fs=x['sr'],pre_emph=True,pre_emph_coeff=0.97,window=SlidingWindow(0.03, 0.015, "hamming"),nfilts=24,nfft=512,low_freq=0,high_freq=None,scale='constant',fbanks=None,conversion_approach='Glasberg')[0],posinf=0,neginf=0),axis=0)
|
| 35 |
+
|
| 36 |
+
def apply_lfcc(x):
|
| 37 |
+
return np.mean(np.nan_to_num(linear_spectrogram(x['y'],fs=x['sr'],pre_emph=True,pre_emph_coeff=0.97,window=SlidingWindow(0.03, 0.015, "hamming"),nfilts=24,nfft=512,low_freq=0,high_freq=None,scale='constant',fbanks=None)[0],posinf=0,neginf=0),axis=0)
|
| 38 |
+
|
| 39 |
+
def apply_msrcc(x):
|
| 40 |
+
return np.mean(np.nan_to_num(msrcc(x['y'],fs=x['sr'],num_ceps=13,pre_emph=True,pre_emph_coeff=0.97,window=SlidingWindow(0.03, 0.015, "hamming"),nfilts=24,nfft=512,low_freq=0,high_freq=None,scale='ascendant',gamma=-0.14285714285714285,dct_type=2,use_energy=False,lifter=None,normalize=None,fbanks=None,conversion_approach='Oshaghnessy'),posinf=0,neginf=0),axis=0)
|
| 41 |
+
|
| 42 |
+
def apply_ngcc(x):
|
| 43 |
+
return np.mean(np.nan_to_num(ngcc(x['y'],fs=x['sr'],num_ceps=13,pre_emph=True,pre_emph_coeff=0.97,window=SlidingWindow(0.03, 0.015, "hamming"),nfilts=24,nfft=512,low_freq=0,high_freq=None,scale='constant',dct_type=2,use_energy=False,lifter=None,normalize=None,fbanks=None,conversion_approach='Glasberg'),posinf=0,neginf=0),axis=0)
|
| 44 |
+
|
| 45 |
+
def load_model(checkpoint):
|
| 46 |
+
model = pickle.load(open(checkpoint, 'rb'))
|
| 47 |
+
return model
|
| 48 |
+
|
| 49 |
+
def extract_features(audio):
|
| 50 |
+
y, sr = librosa.load(audio)
|
| 51 |
+
features = []
|
| 52 |
+
dom_freq = dominant_freq({'y':y, 'sr':sr})
|
| 53 |
+
features.append(dom_freq)
|
| 54 |
+
mfcc = apply_mfcc({'y':y, 'sr':sr})
|
| 55 |
+
features.append(mfcc)
|
| 56 |
+
bfcc = apply_bfcc({'y':y, 'sr':sr})
|
| 57 |
+
features.append(bfcc)
|
| 58 |
+
cqcc = apply_cqcc({'y':y, 'sr':sr})
|
| 59 |
+
features.append(cqcc)
|
| 60 |
+
gfcc = apply_gfcc({'y':y, 'sr':sr})
|
| 61 |
+
features.append(gfcc)
|
| 62 |
+
lfcc = apply_lfcc({'y':y, 'sr':sr})
|
| 63 |
+
features.append(lfcc)
|
| 64 |
+
msrcc = apply_msrcc({'y':y, 'sr':sr})
|
| 65 |
+
features.append(msrcc)
|
| 66 |
+
ngcc = apply_ngcc({'y':y, 'sr':sr})
|
| 67 |
+
features.append(ngcc)
|
| 68 |
+
features = np.concatenate(features).flatten()
|
| 69 |
+
|
| 70 |
+
return features
|
| 71 |
+
|
| 72 |
+
def inference_Verification(audio_1, audio_2):
|
| 73 |
+
model = load_model('lda.pkl')
|
| 74 |
+
|
| 75 |
+
features1 = extract_features(audio_1)
|
| 76 |
+
features2 = extract_features(audio_2)
|
| 77 |
+
|
| 78 |
+
|
| 79 |
+
|
| 80 |
+
embed1 = model.transform([features1])
|
| 81 |
+
embed2 = model.transform([features2])
|
| 82 |
+
return cosine_similarity(embed1, embed2).flatten()[0].round(4)
|
| 83 |
+
|
| 84 |
+
audio_1 = gr.Audio(sources="upload", type="filepath", label="Audio 1")
|
| 85 |
+
audio_2 = gr.Audio(sources="upload", type="filepath", label="Audio 2")
|
| 86 |
+
text_output = gr.Textbox(label="Similarity Score")
|
| 87 |
+
gr.Interface(
|
| 88 |
+
fn=inference_Verification,
|
| 89 |
+
inputs=[audio_1, audio_2],
|
| 90 |
+
outputs=text_output,
|
| 91 |
+
title="Speaker Verification",
|
| 92 |
+
description="Speaker Verification on Multilingual dataset.",
|
| 93 |
+
).launch()
|
lda.pkl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d7f8ec4727c15b390439f9a81a6a30098ba680af872a61d81b7a7f51e1822b3a
|
| 3 |
+
size 59305
|
requirements.txt
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
numpy
|
| 2 |
+
spafe
|
| 3 |
+
librosa
|
| 4 |
+
scikit-learn
|