Spaces:

codetocare
/

emotion_api

Runtime error

codetocare commited on Oct 22, 2025

Commit

0ae8940

verified ·

1 Parent(s): f80b262

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,40 +1,21 @@
-import gradio as gr
-from transformers import Wav2Vec2ForSequenceClassification, Wav2Vec2Processor
-import torch
-import torchaudio
-# Load the pre-trained model and processor
-model_name = "bhadresh-savani/wav2vec2-large-robust-english-emotion"
-processor = Wav2Vec2Processor.from_pretrained(model_name)
-model = Wav2Vec2ForSequenceClassification.from_pretrained(model_name)
-# Emotion labels for this specific model
-labels = ['angry', 'calm', 'happy', 'sad']
 def predict_emotion(audio):
-    # audio: tuple -> (sample_rate, numpy array)
     speech, sr = audio
     if sr != 16000:
         resampler = torchaudio.transforms.Resample(sr, 16000)
-        speech = resampler(torch.tensor(speech))
     else:
-        speech = torch.tensor(speech)
     input_values = processor(speech, sampling_rate=16000, return_tensors="pt").input_values
     with torch.no_grad():
         logits = model(input_values).logits
     predicted_id = torch.argmax(logits, dim=-1).item()
-    emotion = labels[predicted_id]
     return f"Predicted Emotion: **{emotion}**"
-# Gradio interface
-interface = gr.Interface(
-    fn=predict_emotion,
-    inputs=gr.Audio(source="microphone", type="numpy", label="Record or Upload Speech"),
-    outputs=gr.Markdown(label="Emotion"),
-    title="Voice Emotion Recognition",
-    description="Speak or upload a WAV file to detect the emotion using a fine-tuned Wav2Vec2 model."
-)
-interface.launch()

+model_name = "Dpngtm/wav2vec2-emotion-recognition"
+processor  = Wav2Vec2Processor.from_pretrained(model_name)
+model      = Wav2Vec2ForSequenceClassification.from_pretrained(model_name)
+labels = ["angry", "calm", "disgust", "fearful", "happy", "neutral", "sad", "surprised"]
 def predict_emotion(audio):
     speech, sr = audio
     if sr != 16000:
         resampler = torchaudio.transforms.Resample(sr, 16000)
+        speech    = resampler(torch.tensor(speech))
     else:
+        speech    = torch.tensor(speech)
     input_values = processor(speech, sampling_rate=16000, return_tensors="pt").input_values
     with torch.no_grad():
         logits = model(input_values).logits
     predicted_id = torch.argmax(logits, dim=-1).item()
+    emotion      = labels[predicted_id]
     return f"Predicted Emotion: **{emotion}**"