Spaces:

E-motionAssistant
/

Space5

Running

App Files Files Community

Raemih commited on 14 days ago

Commit

cf64064

verified ·

1 Parent(s): ec48379

Update app.py

Browse files

Files changed (1) hide show

app.py +37 -57

app.py CHANGED Viewed

@@ -1,68 +1,48 @@
 import gradio as gr
 import torch
 import librosa
-from transformers import AutoFeatureExtractor
-from model import MMSForMultilingualSER
-MODEL_ID = "E-motionAssistant/mms-300m-multilingual-ser"
-emotion_labels = [
-    "neutral",
-    "happy",
-    "sad",
-    "anger",
-    "fear"
-]
-device = "cpu"
-print("Loading model...")
-feature_extractor = AutoFeatureExtractor.from_pretrained(MODEL_ID)
-emotion_model = MMSForMultilingualSER.from_pretrained(
-    MODEL_ID,
-    ignore_mismatched_sizes=True
-)
-emotion_model.eval()
-print("Model loaded")
-def detect_emotion(audio):
     speech, sr = librosa.load(audio, sr=16000)
-    inputs = feature_extractor(
-        speech,
-        sampling_rate=16000,
-        return_tensors="pt"
-    )
     with torch.no_grad():
-        logits = emotion_model(**inputs)
-    pred = torch.argmax(logits, dim=-1).item()
-    return emotion_labels[pred]
-with gr.Blocks() as demo:
-    gr.Markdown("# Emotion Regulation Assistant")
-    with gr.Tab("Emotion Detection"):
-        audio_input = gr.Audio(type="filepath")
-        output = gr.Textbox(label="Detected Emotion")
-        btn = gr.Button("Detect Emotion")
-        btn.click(
-            fn=detect_emotion,
-            inputs=audio_input,
-            outputs=output
-        )
-demo.launch()

 import gradio as gr
 import torch
 import librosa
+from transformers import Wav2Vec2FeatureExtractor, HubertForSequenceClassification
+# Load model and processor
+model_id = "superb/hubert-base-superb-er"
+feature_extractor = Wav2Vec2FeatureExtractor.from_pretrained(model_id)
+model = HubertForSequenceClassification.from_pretrained(model_id)
+def predict_emotion(audio):
+    if audio is None:
+        return "Please upload an audio file."
+    # Load and resample audio to 16kHz
+    # Gradio provides the path to the temporary file
     speech, sr = librosa.load(audio, sr=16000)
+    # Preprocess
+    inputs = feature_extractor(speech, sampling_rate=16000, return_tensors="pt", padding=True)
+    # Inference
     with torch.no_grad():
+        logits = model(**inputs).logits
+    # Get probabilities via Softmax
+    probs = torch.nn.functional.softmax(logits, dim=-1)
+    # Map to labels
+    # Model labels: 0: neu, 1: hap, 2: ang, 3: sad
+    labels = ["Neutral", "Happy", "Angry", "Sad"]
+    results = {labels[i]: float(probs[0][i]) for i in range(len(labels))}
+    return results
+# Define the Gradio Interface
+demo = gr.Interface(
+    fn=predict_emotion,
+    inputs=gr.Audio(type="filepath", label="Upload Audio or Record"),
+    outputs=gr.Label(label="Detected Emotion"),
+    title="HuBERT Emotion Recognition",
+    description="Upload an audio clip to detect the primary emotion. This model (hubert-base-superb-er) is fine-tuned for Neutral, Happy, Angry, and Sad classifications.",
+    examples=[], # You can add paths to example .wav files here
+    theme="soft"
+)
+if __name__ == "__main__":
+    demo.launch()