Spaces:

codetocare
/

emotion_api

Runtime error

File size: 1,417 Bytes

16dbaa3
 
 
 
 
 
0ae8940
16dbaa3
 
2051424
16dbaa3
0ae8940
2051424
16dbaa3
2051424
 
 
 
16dbaa3
2051424
16dbaa3
2051424
 
16dbaa3
2051424
 
 
 
16dbaa3
2051424
16dbaa3

import gradio as gr
import torch
import torchaudio
from transformers import Wav2Vec2ForSequenceClassification, Wav2Vec2Processor

# Load model and processor from Hugging Face
model_name = "Dpngtm/wav2vec2-emotion-recognition"
processor = Wav2Vec2Processor.from_pretrained(model_name)
model = Wav2Vec2ForSequenceClassification.from_pretrained(model_name)

# Emotion labels from the model card
labels = ["angry", "calm", "disgust", "fearful", "happy", "neutral", "sad", "surprised"]

# Emotion prediction function
def predict_emotion(audio):
    speech, sr = audio
    if sr != 16000:
        resampler = torchaudio.transforms.Resample(sr, 16000)
        speech = resampler(torch.tensor(speech))
    else:
        speech = torch.tensor(speech)

    input_values = processor(speech, sampling_rate=16000, return_tensors="pt").input_values

    with torch.no_grad():
        logits = model(input_values).logits

    predicted_id = torch.argmax(logits, dim=-1).item()
    emotion = labels[predicted_id]
    return f"Predicted Emotion: **{emotion}**"

# Gradio interface
interface = gr.Interface(
    fn=predict_emotion,
    inputs=gr.Audio(source="microphone", type="numpy", label="Speak or Upload Audio"),
    outputs=gr.Markdown(label="Detected Emotion"),
    title="Voice Emotion Recognition",
    description="This app detects the emotional tone of your speech using a fine-tuned Wav2Vec2 model."
)

interface.launch()