import gradio as gr import numpy as np import librosa import tensorflow as tf import random import warnings import joblib warnings.filterwarnings("ignore") # Load model and label encoder model = tf.keras.models.load_model("final_model.keras") label_encoder = joblib.load("le.pkl") # Your feature extractor def extract_features(y, sr): try: stft = np.abs(librosa.stft(y)) mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=40) mfcc_mean = np.mean(mfcc.T, axis=0) chroma = librosa.feature.chroma_stft(S=stft, sr=sr) chroma_mean = np.mean(chroma.T, axis=0) contrast = librosa.feature.spectral_contrast(S=stft, sr=sr) contrast_mean = np.mean(contrast.T, axis=0) zcr = librosa.feature.zero_crossing_rate(y) zcr_mean = np.mean(zcr) rmse = librosa.feature.rms(y=y) rmse_mean = np.mean(rmse) return np.hstack([mfcc_mean, chroma_mean, contrast_mean, zcr_mean, rmse_mean]) except Exception as e: print(f"Error extracting features: {e}") return np.zeros(61) # Prediction function def predict_emotion(audio): y, sr = librosa.load(audio, sr=None) features = extract_features(y, sr) features = features.reshape(1, -1) # Make it 2D prediction = model.predict(features) predicted_label = label_encoder.inverse_transform([np.argmax(prediction)])[0] return predicted_label examples = [["happy.wav"], ["sad.wav"], ["angry.wav"]] # Gradio Interface interface = gr.Interface( fn=predict_emotion, inputs=gr.Audio(type="filepath"), outputs="label", title="🎙️ Emotion Recognition from Audio", description="Upload or record your voice to predict the emotion using a TensorFlow model trained on audio features.", examples=examples ) interface.launch()