Spaces:

notuser77
/

emotion

Sleeping

App Files Files Community

emotion / app.py

notuser77

Update app.py

743ff29 verified about 2 months ago

raw

history blame contribute delete

2.99 kB

	import os
	os.environ['TF_ENABLE_ONEDNN_OPTS'] = '0'

	import gradio as gr
	import tensorflow as tf
	from tensorflow import keras
	import librosa
	import numpy as np
	import warnings

	warnings.filterwarnings('ignore')

	# --- 1. MODEL LOADING ---
	MODEL_PATH = 'best_model1_weights.h5'
	try:
	model = keras.models.load_model(MODEL_PATH)
	print("✅ Model loaded!")
	except Exception as e:
	print(f"❌ Load error: {e}")
	model = None

	# --- 2. THE CORRECT ALPHABETICAL ORDER ---
	# This is the order LabelEncoder uses by default
	EMOTIONS = ['angry', 'disgust', 'fear', 'happy', 'neutral', 'sad', 'surprise']

	def extract_features(data, sr):
	zcr = np.mean(librosa.feature.zero_crossing_rate(y=data).T, axis=0)
	rms = np.mean(librosa.feature.rms(y=data).T, axis=0)
	mfcc = np.mean(librosa.feature.mfcc(y=data, sr=sr, n_mfcc=40).T, axis=0)
	mel = np.mean(librosa.feature.melspectrogram(y=data, sr=sr).T, axis=0)
	return np.hstack((zcr, rms, mfcc, mel))

	def preprocess_audio(audio_path):
	try:
	data, sr = librosa.load(audio_path, duration=2.5, offset=0.6, res_type='kaiser_fast')

	# Silence check
	if np.sqrt(np.mean(data**2)) < 0.002:
	return "SILENT"

	# Stacking features to reach 2376
	base = extract_features(data, sr)
	features = np.concatenate((base, base, base))

	if len(features) < 2376:
	features = np.pad(features, (0, 2376 - len(features)), 'constant')
	else:
	features = features[:2376]

	# Standardize
	std = np.std(features)
	if std > 0:
	features = (features - np.mean(features)) / std

	return features.reshape(1, 2376, 1)
	except Exception as e:
	return None

	def predict_emotion(audio_filepath):
	if audio_filepath is None: return "No audio", "0%", {}

	feat = preprocess_audio(audio_filepath)
	if feat is "SILENT": return "NEUTRAL (Silence)", "100%", {"neutral": 1.0}
	if feat is None: return "Error", "0%", {}

	preds = model.predict(feat, verbose=0)[0]
	idx = np.argmax(preds)

	# Map probabilities to the ALPHABETICAL list
	prob_dict = {EMOTIONS[i]: float(preds[i]) for i in range(len(EMOTIONS))}

	return EMOTIONS[idx].upper(), f"{preds[idx]*100:.2f}%", prob_dict

	# --- 3. INTERFACE ---
	with gr.Blocks(theme=gr.themes.Soft()) as demo:
	gr.Markdown("## 🎙️ Speech Emotion Recognition (Fixed Labels)")
	with gr.Row():
	audio_input = gr.Audio(sources=["microphone", "upload"], type="filepath")
	with gr.Column():
	emotion_out = gr.Textbox(label="Detected Emotion")
	conf_out = gr.Textbox(label="Confidence")
	label_chart = gr.Label(label="All Probabilities", num_top_classes=7)

	btn = gr.Button("Analyze", variant="primary")
	btn.click(predict_emotion, inputs=audio_input, outputs=[emotion_out, conf_out, label_chart])

	if __name__ == "__main__":
	demo.launch(ssr_mode=False)