import gradio as gr # Import libraries import numpy as np import matplotlib.pyplot as plt import librosa import librosa.display import os from fastai.vision.all import * from PIL import Image def fig2img(fig): """Convert a Matplotlib figure to a PIL Image and return it""" import io buf = io.BytesIO() fig.savefig(buf) buf.seek(0) img = Image.open(buf) return img # Define function to convert given audio file to spectogram def audio_to_spectogram(audio_path, save_path=None): """Computes the spectogram for given audio_path and saves spectogram as a image into save_path""" y, sr = librosa.load(audio_path, sr=None) # Compute the spectrogram D = librosa.amplitude_to_db(np.abs(librosa.stft(y)), ref=np.max) plt.figure(figsize=(10,4)) librosa.display.specshow(D, sr=sr, x_axis='time', y_axis='log') plt.colorbar(format='%+2.0f dB') plt.title('Spectrogram') if save_path is not None: # Save the figure as an image plt.savefig(save_path) plt.close() else: fig = plt.gcf() return fig2img(fig) def label_fn(x): return x.parent.name categories = ('claps', 'click', 'cymbals', 'hats_closed', 'hats_open', 'kicks', 'percussion', 'rides', 'rimshot', 'shakers', 'snaps', 'snares', 'tambourines', 'toms') learn = load_learner('sample-classifier-model-01.pkl') def classify_image(audio): audio_to_spectogram(audio, save_path="spect.png") pred, idx, probs = learn.predict(PILImage.create("spect.png")) return dict(zip(categories, map(float, probs))) audio = gr.components.Audio(type='filepath') label = gr.outputs.Label() iface = gr.Interface(fn=classify_image, inputs=audio, outputs=label) iface.launch(inline=False)