Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| from fastai.vision.all import load_learner, PILImage | |
| import librosa | |
| import librosa.display | |
| import matplotlib.pyplot as plt | |
| import numpy as np | |
| import os | |
| import pathlib | |
| if os.name != 'nt': | |
| pathlib.WindowsPath = pathlib.PosixPath | |
| # For Windows | |
| # learn_inf = load_learner('export.pkl') | |
| # else: | |
| # learn_inf = load_learner('export.pkl') | |
| # Load your fastai model | |
| learn_inf = load_learner('export.pkl') | |
| # Function to save mel spectrogram and run inference | |
| def save_mel_spectrogram_and_predict(wav_path): | |
| # Define paths | |
| output_dir = 'temp_spectrograms' | |
| os.makedirs(output_dir, exist_ok=True) # Ensure the directory exists | |
| output_path = os.path.join(output_dir, 'temp_spectrogram.png') | |
| # Load the audio file | |
| y, sr = librosa.load(wav_path, sr=16000) | |
| # Compute the mel spectrogram | |
| S = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=128) | |
| S_dB = librosa.power_to_db(S, ref=np.max) | |
| # Save the mel spectrogram as an image | |
| plt.figure(figsize=(10, 4)) | |
| librosa.display.specshow(S_dB, sr=sr, x_axis='time', y_axis='mel', cmap='viridis') | |
| # plt.colorbar(format='%+2.0f dB') | |
| # plt.title('Mel spectrogram') | |
| plt.axis('off') | |
| plt.savefig(output_path, bbox_inches='tight', pad_inches=0, format='png') | |
| plt.close() | |
| # Run inference on the saved mel spectrogram image | |
| img = PILImage.create(output_path) | |
| pred_class, pred_idx, probs = learn_inf.predict(img) | |
| return output_path, {learn_inf.dls.vocab[i]: float(probs[i]) for i in range(len(probs))} | |
| # Gradio interface function | |
| def gradio_interface(audio): | |
| spectrogram_path, predictions = save_mel_spectrogram_and_predict(audio) | |
| return spectrogram_path, predictions | |
| # Create the Gradio interface | |
| interface = gr.Interface( | |
| fn=gradio_interface, | |
| inputs=gr.Audio(sources="upload", type="filepath"), | |
| outputs=[gr.Image(type="filepath", label="Mel Spectrogram"), gr.JSON(label="Class Probabilities")], | |
| title="Audio Classification with Mel Spectrogram", | |
| description= | |
| "Upload an audio file to see its mel spectrogram and classification probabilities. Currently supports acoustic guitar, electric guitar, bass, synth lead, and synth pad." | |
| ) | |
| # Launch the interface | |
| interface.launch() | |