Spaces:

piroplasmata
/

Laugh_Detection

Sleeping

File size: 1,700 Bytes

924a4d4

import gradio as gr
from fastai.vision.all import *
import librosa
import numpy as np
import os
import matplotlib.pyplot as plt
import uuid

# Load the trained model
learn = load_learner('final_model.pkl')

def create_mel_spectrogram(audio_path):
    # Load the audio file using librosa
    y, sr = librosa.load(audio_path)
    
    # Create the mel spectrogram directly from the audio data
    S = librosa.feature.melspectrogram(y=y, sr=sr, n_fft=2048, hop_length=512, n_mels=128)
    S_dB = librosa.power_to_db(S=S, ref=np.max)
    
    plt.figure(figsize=(2.24, 2.24))
    librosa.display.specshow(S_dB, sr=sr, hop_length=512, x_axis='time', y_axis='mel')
    plt.axis('off')
    
    # Save the spectrogram to a unique temporary file
    temp_filename = f'temp_spectrogram_{uuid.uuid4().hex}.png'
    plt.savefig(temp_filename, bbox_inches='tight', pad_inches=0)
    plt.close()
    
    return temp_filename

def classify_audio(audio):
    try:
        # Create the mel spectrogram
        spectrogram_path = create_mel_spectrogram(audio.name)
        
        # Load the spectrogram image
        img = PILImage.create(spectrogram_path)
        
        # Get the prediction
        pred_class, pred_idx, probs = learn.predict(img)
        
        # Clean up the temporary file
        os.remove(spectrogram_path)
        
        return f"Predicted Class: {pred_class}, Probability: {probs[pred_idx].item():.4f}"
    except Exception as e:
        return str(e)

# Define the Gradio interface
interface = gr.Interface(
    fn=classify_audio, 
    inputs=gr.File(label="Upload an audio file"),
    outputs="text",
    live=True,
    
    
)

# Launch the interface
interface.launch(share=True)