piroplasmata's picture
Upload folder using huggingface_hub
924a4d4 verified
import gradio as gr
from fastai.vision.all import *
import librosa
import numpy as np
import os
import matplotlib.pyplot as plt
import uuid
# Load the trained model
learn = load_learner('final_model.pkl')
def create_mel_spectrogram(audio_path):
# Load the audio file using librosa
y, sr = librosa.load(audio_path)
# Create the mel spectrogram directly from the audio data
S = librosa.feature.melspectrogram(y=y, sr=sr, n_fft=2048, hop_length=512, n_mels=128)
S_dB = librosa.power_to_db(S=S, ref=np.max)
plt.figure(figsize=(2.24, 2.24))
librosa.display.specshow(S_dB, sr=sr, hop_length=512, x_axis='time', y_axis='mel')
plt.axis('off')
# Save the spectrogram to a unique temporary file
temp_filename = f'temp_spectrogram_{uuid.uuid4().hex}.png'
plt.savefig(temp_filename, bbox_inches='tight', pad_inches=0)
plt.close()
return temp_filename
def classify_audio(audio):
try:
# Create the mel spectrogram
spectrogram_path = create_mel_spectrogram(audio.name)
# Load the spectrogram image
img = PILImage.create(spectrogram_path)
# Get the prediction
pred_class, pred_idx, probs = learn.predict(img)
# Clean up the temporary file
os.remove(spectrogram_path)
return f"Predicted Class: {pred_class}, Probability: {probs[pred_idx].item():.4f}"
except Exception as e:
return str(e)
# Define the Gradio interface
interface = gr.Interface(
fn=classify_audio,
inputs=gr.File(label="Upload an audio file"),
outputs="text",
live=True,
)
# Launch the interface
interface.launch(share=True)