| import gradio as gr | |
| from fastai.vision.all import * | |
| import librosa | |
| import numpy as np | |
| import matplotlib.pyplot as plt | |
| from pydub import AudioSegment | |
| import tempfile | |
| import PIL | |
| learn = load_learner('model.pkl') | |
| labels = learn.dls.vocab | |
| def audio_to_spectrogram(audio_file): | |
| if isinstance(audio_file, str): | |
| if audio_file.endswith('.mp3'): | |
| with tempfile.NamedTemporaryFile(suffix='.wav') as temp_wav: | |
| audio = AudioSegment.from_mp3(audio_file) | |
| audio.export(temp_wav.name, format='wav') | |
| y, sr = librosa.load(temp_wav.name, sr=None) | |
| else: | |
| y, sr = librosa.load(audio_file, sr=None) | |
| else: | |
| y, sr = librosa.load(audio_file, sr=None) | |
| S = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=128, fmax=8000) | |
| S_dB = librosa.power_to_db(S, ref=np.max) | |
| fig, ax = plt.subplots() | |
| img = librosa.display.specshow(S_dB, x_axis='time', y_axis='mel', sr=sr, fmax=8000, ax=ax) | |
| fig.colorbar(img, ax=ax, format='%+2.0f dB') | |
| ax.set(title='Mel-frequency spectrogram') | |
| spectrogram_file = "spectrogram.png" | |
| plt.savefig(spectrogram_file) | |
| plt.close() | |
| return spectrogram_file | |
| def predict(audio): | |
| spectrogram_file = audio_to_spectrogram(audio) | |
| img = PILImage.create(spectrogram_file) | |
| img = img.resize((512, 512)) | |
| pred, pred_idx, probs = learn.predict(img) | |
| return {labels[i]: float(probs[i]) for i in range(len(labels))} | |
| gr.Interface( | |
| fn=predict, | |
| inputs=[ | |
| gr.Audio(sources=["upload", "microphone"], type="filepath", label="Upload or Record audio (WAV or MP3)"), | |
| ], | |
| outputs=gr.components.Label(num_top_classes=3), | |
| live=True | |
| ).launch() |