emotion / app.py
notuser77's picture
Update app.py
743ff29 verified
import os
os.environ['TF_ENABLE_ONEDNN_OPTS'] = '0'
import gradio as gr
import tensorflow as tf
from tensorflow import keras
import librosa
import numpy as np
import warnings
warnings.filterwarnings('ignore')
# --- 1. MODEL LOADING ---
MODEL_PATH = 'best_model1_weights.h5'
try:
model = keras.models.load_model(MODEL_PATH)
print("✅ Model loaded!")
except Exception as e:
print(f"❌ Load error: {e}")
model = None
# --- 2. THE CORRECT ALPHABETICAL ORDER ---
# This is the order LabelEncoder uses by default
EMOTIONS = ['angry', 'disgust', 'fear', 'happy', 'neutral', 'sad', 'surprise']
def extract_features(data, sr):
zcr = np.mean(librosa.feature.zero_crossing_rate(y=data).T, axis=0)
rms = np.mean(librosa.feature.rms(y=data).T, axis=0)
mfcc = np.mean(librosa.feature.mfcc(y=data, sr=sr, n_mfcc=40).T, axis=0)
mel = np.mean(librosa.feature.melspectrogram(y=data, sr=sr).T, axis=0)
return np.hstack((zcr, rms, mfcc, mel))
def preprocess_audio(audio_path):
try:
data, sr = librosa.load(audio_path, duration=2.5, offset=0.6, res_type='kaiser_fast')
# Silence check
if np.sqrt(np.mean(data**2)) < 0.002:
return "SILENT"
# Stacking features to reach 2376
base = extract_features(data, sr)
features = np.concatenate((base, base, base))
if len(features) < 2376:
features = np.pad(features, (0, 2376 - len(features)), 'constant')
else:
features = features[:2376]
# Standardize
std = np.std(features)
if std > 0:
features = (features - np.mean(features)) / std
return features.reshape(1, 2376, 1)
except Exception as e:
return None
def predict_emotion(audio_filepath):
if audio_filepath is None: return "No audio", "0%", {}
feat = preprocess_audio(audio_filepath)
if feat is "SILENT": return "NEUTRAL (Silence)", "100%", {"neutral": 1.0}
if feat is None: return "Error", "0%", {}
preds = model.predict(feat, verbose=0)[0]
idx = np.argmax(preds)
# Map probabilities to the ALPHABETICAL list
prob_dict = {EMOTIONS[i]: float(preds[i]) for i in range(len(EMOTIONS))}
return EMOTIONS[idx].upper(), f"{preds[idx]*100:.2f}%", prob_dict
# --- 3. INTERFACE ---
with gr.Blocks(theme=gr.themes.Soft()) as demo:
gr.Markdown("## 🎙️ Speech Emotion Recognition (Fixed Labels)")
with gr.Row():
audio_input = gr.Audio(sources=["microphone", "upload"], type="filepath")
with gr.Column():
emotion_out = gr.Textbox(label="Detected Emotion")
conf_out = gr.Textbox(label="Confidence")
label_chart = gr.Label(label="All Probabilities", num_top_classes=7)
btn = gr.Button("Analyze", variant="primary")
btn.click(predict_emotion, inputs=audio_input, outputs=[emotion_out, conf_out, label_chart])
if __name__ == "__main__":
demo.launch(ssr_mode=False)