Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import numpy as np | |
| import librosa | |
| import tensorflow as tf | |
| # Load trained models | |
| model_female = tf.keras.models.load_model("emotion_recognition_female.h5") | |
| model_male = tf.keras.models.load_model("emotion_recognition_male.h5") | |
| # Constants | |
| FRAME_LENGTH = 2048 | |
| HOP_LENGTH = 512 | |
| emotion_to_code = { | |
| 'neutral': 0, 'happy': 1, 'sad': 2, | |
| 'angry': 3, 'fear': 4, 'disgust': 5 | |
| } | |
| code_to_emotion = {v: k for k, v in emotion_to_code.items()} | |
| # Preprocess audio | |
| def preprocess_audio(path): | |
| try: | |
| y, sr = librosa.load(path, sr=22050) | |
| y, _ = librosa.effects.trim(y, top_db=25) | |
| if len(y) > 180000: | |
| y = y[:180000] | |
| else: | |
| y = np.pad(y, (0, 180000-len(y))) | |
| return y, sr | |
| except: | |
| sr = 22050 | |
| y = np.random.randn(180000) * 0.1 | |
| return y, sr | |
| # Extract features (ZCR + RMS + 13 MFCC) | |
| def extract_features(y, sr): | |
| zcr = librosa.feature.zero_crossing_rate(y, frame_length=FRAME_LENGTH, hop_length=HOP_LENGTH) | |
| rms = librosa.feature.rms(y=y, frame_length=FRAME_LENGTH, hop_length=HOP_LENGTH) | |
| mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13, hop_length=HOP_LENGTH) | |
| features = np.vstack([zcr, rms, mfcc]) | |
| features = features.T # shape (352, 15) | |
| return features | |
| # Predict function | |
| def predict(audio_file, gender): | |
| y, sr = preprocess_audio(audio_file) | |
| features = extract_features(y, sr) | |
| features = np.expand_dims(features, axis=0) # add batch dim | |
| if gender == "female": | |
| preds = model_female.predict(features) | |
| else: | |
| preds = model_male.predict(features) | |
| pred_idx = np.argmax(preds) | |
| emotion = code_to_emotion[pred_idx] | |
| confidence = float(np.max(preds)) | |
| return {emotion: confidence} | |
| # Gradio interface | |
| demo = gr.Interface( | |
| fn=predict, | |
| inputs=[ | |
| gr.Audio(type="filepath", label="Upload Audio"), | |
| gr.Radio(choices=["female", "male"], label="Select Gender") | |
| ], | |
| outputs=gr.Label(num_top_classes=3, label="Predicted Emotion"), | |
| title="๐๏ธ Speech Emotion Recognition Demo", | |
| description="Upload an audio file and select gender to predict emotion." | |
| ) | |
| demo.launch() | |