File size: 1,261 Bytes
2b4f358
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
151e68e
2b4f358
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
import gradio as gr
from fastai.vision.all import *
from fastai.vision.all import *
import matplotlib.pyplot as plt
from matplotlib.pyplot import specgram
import librosa
import numpy as np
import librosa.display

def is_cat(x): return x[0]=='c' 

def audio_to_spectrogram(audio_file):
    samples,sample_rate = librosa.load(audio_file)
    fig = plt.figure(figsize=[0.72,0.72])
    ax = fig.add_subplot(111)
    ax.axes.get_xaxis().set_visible(False)
    ax.axes.get_yaxis().set_visible(False)
    ax.set_frame_on(False)
    filename = Path(audio_file).name.replace('mp3','png')
    S = librosa.feature.melspectrogram(y=samples,sr=sample_rate)
    librosa.display.specshow(librosa.power_to_db(S,ref=np.max))
    plt.savefig(filename,dpi=400,bbox_inches='tight',pad_inches=0)
    plt.close('all')
    return filename

categories = ('Dog','Cat')
def catvsdogsoundclassification(audio_file):
    filename = audio_to_spectrogram(audio_file)
    learner = load_learner('model.pkl')
    pred,pred_idx,probs = learner.predict(filename)
    return dict(zip(categories,map(float,probs)))

# audio_file = gr.inputs.Audio()
labeel = gr.outputs.Label()
intf = gr.Interface(fn=catvsdogsoundclassification,inputs=gr.File(),outputs=labeel)
intf.launch(inline=False,debug=True)