Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import numpy as np | |
| import librosa | |
| import tensorflow as tf | |
| import random | |
| import warnings | |
| import joblib | |
| warnings.filterwarnings("ignore") | |
| # Load model and label encoder | |
| model = tf.keras.models.load_model("final_model.keras") | |
| label_encoder = joblib.load("le.pkl") | |
| # Your feature extractor | |
| def extract_features(y, sr): | |
| try: | |
| stft = np.abs(librosa.stft(y)) | |
| mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=40) | |
| mfcc_mean = np.mean(mfcc.T, axis=0) | |
| chroma = librosa.feature.chroma_stft(S=stft, sr=sr) | |
| chroma_mean = np.mean(chroma.T, axis=0) | |
| contrast = librosa.feature.spectral_contrast(S=stft, sr=sr) | |
| contrast_mean = np.mean(contrast.T, axis=0) | |
| zcr = librosa.feature.zero_crossing_rate(y) | |
| zcr_mean = np.mean(zcr) | |
| rmse = librosa.feature.rms(y=y) | |
| rmse_mean = np.mean(rmse) | |
| return np.hstack([mfcc_mean, chroma_mean, contrast_mean, zcr_mean, rmse_mean]) | |
| except Exception as e: | |
| print(f"Error extracting features: {e}") | |
| return np.zeros(61) | |
| # Prediction function | |
| def predict_emotion(audio): | |
| y, sr = librosa.load(audio, sr=None) | |
| features = extract_features(y, sr) | |
| features = features.reshape(1, -1) # Make it 2D | |
| prediction = model.predict(features) | |
| predicted_label = label_encoder.inverse_transform([np.argmax(prediction)])[0] | |
| return predicted_label | |
| examples = [["happy.wav"], ["sad.wav"], ["angry.wav"]] | |
| # Gradio Interface | |
| interface = gr.Interface( | |
| fn=predict_emotion, | |
| inputs=gr.Audio(type="filepath"), | |
| outputs="label", | |
| title="🎙️ Emotion Recognition from Audio", | |
| description="Upload or record your voice to predict the emotion using a TensorFlow model trained on audio features.", | |
| examples=examples | |
| ) | |
| interface.launch() | |