Spaces:
Runtime error
Runtime error
File size: 1,417 Bytes
16dbaa3 0ae8940 16dbaa3 2051424 16dbaa3 0ae8940 2051424 16dbaa3 2051424 16dbaa3 2051424 16dbaa3 2051424 16dbaa3 2051424 16dbaa3 2051424 16dbaa3 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 |
import gradio as gr
import torch
import torchaudio
from transformers import Wav2Vec2ForSequenceClassification, Wav2Vec2Processor
# Load model and processor from Hugging Face
model_name = "Dpngtm/wav2vec2-emotion-recognition"
processor = Wav2Vec2Processor.from_pretrained(model_name)
model = Wav2Vec2ForSequenceClassification.from_pretrained(model_name)
# Emotion labels from the model card
labels = ["angry", "calm", "disgust", "fearful", "happy", "neutral", "sad", "surprised"]
# Emotion prediction function
def predict_emotion(audio):
speech, sr = audio
if sr != 16000:
resampler = torchaudio.transforms.Resample(sr, 16000)
speech = resampler(torch.tensor(speech))
else:
speech = torch.tensor(speech)
input_values = processor(speech, sampling_rate=16000, return_tensors="pt").input_values
with torch.no_grad():
logits = model(input_values).logits
predicted_id = torch.argmax(logits, dim=-1).item()
emotion = labels[predicted_id]
return f"Predicted Emotion: **{emotion}**"
# Gradio interface
interface = gr.Interface(
fn=predict_emotion,
inputs=gr.Audio(source="microphone", type="numpy", label="Speak or Upload Audio"),
outputs=gr.Markdown(label="Detected Emotion"),
title="Voice Emotion Recognition",
description="This app detects the emotional tone of your speech using a fine-tuned Wav2Vec2 model."
)
interface.launch()
|