Spaces:

notuser77
/

ravdess

Sleeping

App Files Files Community

notuser77 commited on Dec 20, 2025

Commit

3c7ab19

verified ·

1 Parent(s): 76d8f21

Update app.py

Browse files

Files changed (1) hide show

app.py +35 -51

app.py CHANGED Viewed

@@ -1,103 +1,87 @@
 import os
 import huggingface_hub
 from speechbrain.inference.classifiers import EncoderClassifier
-# 1. Corrected Monkey Patch for SpeechBrain 1.0.0 + huggingface-hub 0.23+
 orig_download = huggingface_hub.hf_hub_download
 def patched_download(*args, **kwargs):
-    # Fix the 'use_auth_token' vs 'token' renaming issue
     if 'use_auth_token' in kwargs:
         kwargs['token'] = kwargs.pop('use_auth_token')
-    # Get the requested filename
     fname = kwargs.get('filename') or (args[1] if len(args) > 1 else None)
     try:
         return orig_download(*args, **kwargs)
     except Exception as e:
-        # If 'custom.py' is missing (404), return a dummy file path instead of None
-        # to prevent the 'NoneType' crash in pathlib.
         if fname == "custom.py" and ("404" in str(e) or "Not Found" in str(e)):
             dummy_path = os.path.abspath("dummy_custom.py")
             if not os.path.exists(dummy_path):
                 with open(dummy_path, "w") as f:
-                    f.write("# Dummy file for SpeechBrain compatibility\n")
             return dummy_path
         raise e
 huggingface_hub.hf_hub_download = patched_download
-# 2. Load the model
-print("Loading SpeechBrain ECAPA feature extractor...")
-feature_extractor = EncoderClassifier.from_hparams(
-    source="speechbrain/spkrec-ecapa-voxceleb",
-    savedir="pretrained_models/spkrec-ecapa-voxceleb"
-)
-# Import SpeechBrain AFTER the patch
-from speechbrain.inference.speaker import EncoderClassifier
-# Suppress scikit-learn version warnings
 warnings.filterwarnings("ignore")
-# --- STEP 2: LOAD MODELS ---
-# We check for the specific filenames you uploaded
 MODEL_PATH = 'ravdess_svm_speechbrain_ecapa_voxceleb_no_processor_cv_8class.pkl'
 if not os.path.exists(MODEL_PATH):
     MODEL_PATH = 'svm_model.joblib'
 print(f"Loading SVM classifier: {MODEL_PATH}")
-model = joblib.load(MODEL_PATH)
 print("Loading SpeechBrain ECAPA feature extractor...")
-# This will now successfully skip the missing custom.py
 feature_extractor = EncoderClassifier.from_hparams(
     source="speechbrain/spkrec-ecapa-voxceleb",
-    savedir="pretrained_models/ecapa"
 )
-# --- STEP 3: PREDICTION LOGIC ---
 def predict_emotion(audio_path):
     if audio_path is None:
         return "Please upload an audio file."
-    # Load audio and resample to 16kHz (ECAPA requirement)
     signal, fs = torchaudio.load(audio_path)
-    if fs != 16000:
-        resampler = torchaudio.transforms.Resample(orig_freq=fs, new_freq=16000)
-        signal = resampler(signal)
-    # Mono conversion
-    if signal.shape[0] > 1:
-        signal = torch.mean(signal, dim=0, keepdim=True)
-    # Extract 192-D ECAPA-TDNN Embeddings
     with torch.no_grad():
         embeddings = feature_extractor.encode_batch(signal)
-        embeddings = embeddings.squeeze().cpu().numpy().reshape(1, -1)
-    # Create DataFrame with the specific feature names expected by your SVM
-    feature_names = [f"{i}_speechbrain_embedding" for i in range(192)]
-    X = pd.DataFrame(embeddings, columns=feature_names)
-    # Predict Emotion
-    try:
-        # Returns a dictionary of {Emotion: Confidence}
-        probs = model.predict_proba(X)[0]
-        return {str(model.classes_[i]): float(probs[i]) for i in range(len(model.classes_))}
-    except Exception:
-        # Fallback if probability was not enabled during training
-        prediction = model.predict(X)[0]
-        return str(prediction)
-# --- STEP 4: GRADIO INTERFACE ---
 demo = gr.Interface(
     fn=predict_emotion,
-    inputs=gr.Audio(type="filepath", label="Upload Audio (WAV/MP3)"),
     outputs=gr.Label(label="Detected Emotion"),
-    title="RAVDESS Emotion Classifier",
-    description="Classifies emotions using ECAPA-TDNN speaker embeddings and a Support Vector Machine.",
-    allow_flagging="never"
 )
 if __name__ == "__main__":

 import os
+import joblib
+import pandas as pd
+import numpy as np
+import torch
+import torchaudio
+import warnings  # <--- This fixes the NameError
+import gradio as gr
 import huggingface_hub
 from speechbrain.inference.classifiers import EncoderClassifier
+# 1. ROBUST MONKEY PATCH
+# This fixes the 'use_auth_token' vs 'token' error and the 'NoneType' crash
 orig_download = huggingface_hub.hf_hub_download
 def patched_download(*args, **kwargs):
     if 'use_auth_token' in kwargs:
         kwargs['token'] = kwargs.pop('use_auth_token')
     fname = kwargs.get('filename') or (args[1] if len(args) > 1 else None)
     try:
         return orig_download(*args, **kwargs)
     except Exception as e:
+        # If SpeechBrain looks for 'custom.py' and it's missing (404),
+        # return a dummy file path instead of None to prevent a crash.
         if fname == "custom.py" and ("404" in str(e) or "Not Found" in str(e)):
             dummy_path = os.path.abspath("dummy_custom.py")
             if not os.path.exists(dummy_path):
                 with open(dummy_path, "w") as f:
+                    f.write("# Dummy file for compatibility\n")
             return dummy_path
         raise e
 huggingface_hub.hf_hub_download = patched_download
 warnings.filterwarnings("ignore")
+# 2. LOAD MODELS
+# Load your SVM Classifier (trying both possible filenames)
 MODEL_PATH = 'ravdess_svm_speechbrain_ecapa_voxceleb_no_processor_cv_8class.pkl'
 if not os.path.exists(MODEL_PATH):
     MODEL_PATH = 'svm_model.joblib'
 print(f"Loading SVM classifier: {MODEL_PATH}")
+svm_model = joblib.load(MODEL_PATH)
+# Load SpeechBrain Feature Extractor
 print("Loading SpeechBrain ECAPA feature extractor...")
 feature_extractor = EncoderClassifier.from_hparams(
     source="speechbrain/spkrec-ecapa-voxceleb",
+    savedir="pretrained_models/spkrec-ecapa-voxceleb"
 )
+# 3. DEFINE INFERENCE
+EMOTIONS = ['neutral', 'calm', 'happy', 'sad', 'angry', 'fearful', 'disgust', 'surprised']
 def predict_emotion(audio_path):
     if audio_path is None:
         return "Please upload an audio file."
+    # Load and Preprocess Audio
     signal, fs = torchaudio.load(audio_path)
+    # Extract ECAPA-TDNN Embeddings
     with torch.no_grad():
         embeddings = feature_extractor.encode_batch(signal)
+        # Convert to numpy and flatten (SVM expects 1D array of 192 features)
+        embeddings = embeddings.cpu().numpy().squeeze().reshape(1, -1)
+    # Predict with SVM
+    prediction = svm_model.predict(embeddings)[0]
+    # Return mapped label if numeric, otherwise return string
+    if isinstance(prediction, (int, np.integer)):
+        return EMOTIONS[prediction]
+    return prediction
+# 4. GRADIO INTERFACE
 demo = gr.Interface(
     fn=predict_emotion,
+    inputs=gr.Audio(type="filepath", label="Upload Speech (WAV)"),
     outputs=gr.Label(label="Detected Emotion"),
+    title="Speech Emotion Recognition (RAVDESS)",
+    description="This app uses SpeechBrain ECAPA-TDNN embeddings and a pre-trained SVM to classify emotions."
 )
 if __name__ == "__main__":