Spaces:

jfforero
/

SpeechEmotionRecognition

Sleeping

File size: 2,360 Bytes

1d6f96a
 
 
 
b3ece58
1d6f96a
b3ece58
670552f
 
 
 
 
 
 
 
 
 
e2ed0ec
b3ece58
e2ed0ec
670552f
 
 
 
 
 
 
e2ed0ec
b3ece58
e2ed0ec
 
b3ece58
e2ed0ec
670552f
 
 
 
 
 
 
 
 
 
 
 
e2ed0ec
4204b4c
1d6f96a
 
d47a933
1d6f96a
f679e15
2610b1c
 
 
 
 
 
 
05889af
2610b1c
 
05889af
b3ece58
b2497fa

import gradio as gr
import numpy as np
import librosa
import time
from tensorflow.keras.models import load_model

# Load the emotion prediction model
def load_emotion_model(model_path):
    try:
        model = load_model(model_path)
        return model
    except Exception as e:
        print("Error loading emotion prediction model:", e)
        return None

model_path = 'mymodel_SER_LSTM_RAVDESS.h5'
model = load_emotion_model(model_path)

# Function to extract MFCC features from audio
def extract_mfcc(wav_file_name):
    try:
        y, sr = librosa.load(wav_file_name)
        mfccs = np.mean(librosa.feature.mfcc(y=y, sr=sr, n_mfcc=40).T, axis=0)
        return mfccs
    except Exception as e:
        print("Error extracting MFCC features:", e)
        return None

# Emotions dictionary
emotions = {1: 'neutral', 2: 'calm', 3: 'happy', 4: 'sad', 5: 'angry', 6: 'fearful', 7: 'disgust', 8: 'surprised'}

# Function to predict emotion from audio
def predict_emotion_from_audio(wav_filepath):
    try:
        test_point = extract_mfcc(wav_filepath)
        if test_point is not None:
            test_point = np.reshape(test_point, newshape=(1, 40, 1))
            predictions = model.predict(test_point)
            predicted_emotion_label = np.argmax(predictions[0]) + 1
            return emotions[predicted_emotion_label]
        else:
            return "Error: Unable to extract features"
    except Exception as e:
        print("Error predicting emotion:", e)
        return None

# Predict emotion from audio
def get_predictions(audio_input):
    emotion_prediction = predict_emotion_from_audio(audio_input)
    return emotion_prediction  # Return a single prediction instead of a list

# Create the Gradio interface
with gr.Blocks() as interface:
    gr.Markdown("Emotional Machines test: Load or Record an audio file to speech emotion analysis")
    with gr.Tabs():
        with gr.Tab("Acoustic and Semantic Predictions"):
            with gr.Row():
                input_audio = gr.Audio(label="Input Audio", type="filepath")
                submit_button = gr.Button("Submit")
            output_label = gr.Label("Prediction")  # Use a single Label instead of a list

    # Set the function to be called when the button is clicked
    submit_button.click(get_predictions, inputs=input_audio, outputs=output_label)

interface.launch()