Spaces:

notuser77
/

emotion

Sleeping

App Files Files Community

notuser77 commited on Dec 20, 2025

Commit

743ff29

verified ·

1 Parent(s): 1c6458c

Update app.py

Browse files

Files changed (1) hide show

app.py +27 -61

app.py CHANGED Viewed

@@ -1,5 +1,4 @@
 import os
-# Fix for Hugging Face/Gradio threading issues
 os.environ['TF_ENABLE_ONEDNN_OPTS'] = '0'
 import gradio as gr
@@ -12,23 +11,19 @@ import warnings
 warnings.filterwarnings('ignore')
 # --- 1. MODEL LOADING ---
-# Ensure your file is named 'best_model1_weights.h5' in your repository
 MODEL_PATH = 'best_model1_weights.h5'
 try:
-    # Loading the complete model to avoid architecture mismatch (9 vs 13 layers)
     model = keras.models.load_model(MODEL_PATH)
-    print("✅ Model loaded successfully!")
 except Exception as e:
-    print(f"❌ Error loading model: {e}")
     model = None
-# --- 2. PREPROCESSING LOGIC ---
-# Standard Alphabetical labels for 7-class RAVDESS/TESS models
 EMOTIONS = ['angry', 'disgust', 'fear', 'happy', 'neutral', 'sad', 'surprise']
 def extract_features(data, sr):
-    """Extracts features in the exact order and mean-aggregation used in Kaggle."""
     zcr = np.mean(librosa.feature.zero_crossing_rate(y=data).T, axis=0)
     rms = np.mean(librosa.feature.rms(y=data).T, axis=0)
     mfcc = np.mean(librosa.feature.mfcc(y=data, sr=sr, n_mfcc=40).T, axis=0)
@@ -37,86 +32,57 @@ def extract_features(data, sr):
 def preprocess_audio(audio_path):
     try:
-        # Load exactly 2.5s with an offset (Kaggle notebook standard)
-        # res_type='kaiser_fast' ensures speed and consistency
         data, sr = librosa.load(audio_path, duration=2.5, offset=0.6, res_type='kaiser_fast')
-        # SILENCE CHECK: Prevents "99% Angry" on no sound
-        rms_val = np.sqrt(np.mean(data**2))
-        if rms_val < 0.005:
             return "SILENT"
-        # Stack features 3 times to hit the required 2376 input dimension
-        # (This mimics the Original + Noise + Pitch shift stacking)
-        base_feat = extract_features(data, sr)
-        features = np.concatenate((base_feat, base_feat, base_feat))
-        # Pad or Truncate to exactly 2376
         if len(features) < 2376:
             features = np.pad(features, (0, 2376 - len(features)), 'constant')
         else:
             features = features[:2376]
-        # Standardization (Zero Mean, Unit Variance)
-        # This is the most important step to prevent "Angry" bias
         std = np.std(features)
         if std > 0:
             features = (features - np.mean(features)) / std
         return features.reshape(1, 2376, 1)
     except Exception as e:
-        print(f"Preprocessing Error: {e}")
         return None
-# --- 3. PREDICTION FUNCTION ---
 def predict_emotion(audio_filepath):
-    if model is None:
-        return "Model not found", "0%", {}
-    if audio_filepath is None:
-        return "Please record audio", "0%", {}
-    processed_data = preprocess_audio(audio_filepath)
-    if processed_data is "SILENT":
-        return "Silence Detected", "100%", {"neutral": 1.0}
-    if processed_data is None:
-        return "Error in audio processing", "0%", {}
-    # Perform inference
-    preds = model.predict(processed_data, verbose=0)[0]
-    # Identify top prediction
     idx = np.argmax(preds)
-    confidence = preds[idx]
-    # Map all probabilities to emotions
     prob_dict = {EMOTIONS[i]: float(preds[i]) for i in range(len(EMOTIONS))}
-    return EMOTIONS[idx].upper(), f"{confidence*100:.2f}%", prob_dict
-# --- 4. GRADIO INTERFACE ---
 with gr.Blocks(theme=gr.themes.Soft()) as demo:
-    gr.Markdown("## 🎙️ Speech Emotion Recognition")
-    gr.Markdown("Speak for at least 3 seconds. The model will analyze the last 2.5 seconds.")
     with gr.Row():
         with gr.Column():
-            audio_input = gr.Audio(sources=["microphone", "upload"], type="filepath", label="Input Audio")
-            analyze_btn = gr.Button("Analyze Emotion", variant="primary")
-        with gr.Column():
-            with gr.Row():
-                emotion_out = gr.Textbox(label="Detected Emotion")
-                conf_out = gr.Textbox(label="Confidence Level")
-            label_chart = gr.Label(label="Confidence Distribution", num_top_classes=7)
-    analyze_btn.click(
-        fn=predict_emotion,
-        inputs=audio_input,
-        outputs=[emotion_out, conf_out, label_chart]
-    )
 if __name__ == "__main__":
-    # ssr_mode=False is CRITICAL to stop the _DictWrapper crash on HF
-    demo.launch(server_name="0.0.0.0", ssr_mode=False)

 import os
 os.environ['TF_ENABLE_ONEDNN_OPTS'] = '0'
 import gradio as gr
 warnings.filterwarnings('ignore')
 # --- 1. MODEL LOADING ---
 MODEL_PATH = 'best_model1_weights.h5'
 try:
     model = keras.models.load_model(MODEL_PATH)
+    print("✅ Model loaded!")
 except Exception as e:
+    print(f"❌ Load error: {e}")
     model = None
+# --- 2. THE CORRECT ALPHABETICAL ORDER ---
+# This is the order LabelEncoder uses by default
 EMOTIONS = ['angry', 'disgust', 'fear', 'happy', 'neutral', 'sad', 'surprise']
 def extract_features(data, sr):
     zcr = np.mean(librosa.feature.zero_crossing_rate(y=data).T, axis=0)
     rms = np.mean(librosa.feature.rms(y=data).T, axis=0)
     mfcc = np.mean(librosa.feature.mfcc(y=data, sr=sr, n_mfcc=40).T, axis=0)
 def preprocess_audio(audio_path):
     try:
         data, sr = librosa.load(audio_path, duration=2.5, offset=0.6, res_type='kaiser_fast')
+        # Silence check
+        if np.sqrt(np.mean(data**2)) < 0.002:
             return "SILENT"
+        # Stacking features to reach 2376
+        base = extract_features(data, sr)
+        features = np.concatenate((base, base, base))
         if len(features) < 2376:
             features = np.pad(features, (0, 2376 - len(features)), 'constant')
         else:
             features = features[:2376]
+        # Standardize
         std = np.std(features)
         if std > 0:
             features = (features - np.mean(features)) / std
         return features.reshape(1, 2376, 1)
     except Exception as e:
         return None
 def predict_emotion(audio_filepath):
+    if audio_filepath is None: return "No audio", "0%", {}
+    feat = preprocess_audio(audio_filepath)
+    if feat is "SILENT": return "NEUTRAL (Silence)", "100%", {"neutral": 1.0}
+    if feat is None: return "Error", "0%", {}
+    preds = model.predict(feat, verbose=0)[0]
     idx = np.argmax(preds)
+    # Map probabilities to the ALPHABETICAL list
     prob_dict = {EMOTIONS[i]: float(preds[i]) for i in range(len(EMOTIONS))}
+    return EMOTIONS[idx].upper(), f"{preds[idx]*100:.2f}%", prob_dict
+# --- 3. INTERFACE ---
 with gr.Blocks(theme=gr.themes.Soft()) as demo:
+    gr.Markdown("## 🎙️ Speech Emotion Recognition (Fixed Labels)")
     with gr.Row():
+        audio_input = gr.Audio(sources=["microphone", "upload"], type="filepath")
         with gr.Column():
+            emotion_out = gr.Textbox(label="Detected Emotion")
+            conf_out = gr.Textbox(label="Confidence")
+            label_chart = gr.Label(label="All Probabilities", num_top_classes=7)
+    btn = gr.Button("Analyze", variant="primary")
+    btn.click(predict_emotion, inputs=audio_input, outputs=[emotion_out, conf_out, label_chart])
 if __name__ == "__main__":
+    demo.launch(ssr_mode=False)