Spaces:
Runtime error
Runtime error
| import gradio as gr | |
| import torch | |
| import torchaudio | |
| from transformers import Wav2Vec2ForSequenceClassification, Wav2Vec2Processor | |
| # Load model and processor from Hugging Face | |
| model_name = "Dpngtm/wav2vec2-emotion-recognition" | |
| processor = Wav2Vec2Processor.from_pretrained(model_name) | |
| model = Wav2Vec2ForSequenceClassification.from_pretrained(model_name) | |
| # Emotion labels from the model card | |
| labels = ["angry", "calm", "disgust", "fearful", "happy", "neutral", "sad", "surprised"] | |
| # Emotion prediction function | |
| def predict_emotion(audio): | |
| speech, sr = audio | |
| if sr != 16000: | |
| resampler = torchaudio.transforms.Resample(sr, 16000) | |
| speech = resampler(torch.tensor(speech)) | |
| else: | |
| speech = torch.tensor(speech) | |
| input_values = processor(speech, sampling_rate=16000, return_tensors="pt").input_values | |
| with torch.no_grad(): | |
| logits = model(input_values).logits | |
| predicted_id = torch.argmax(logits, dim=-1).item() | |
| emotion = labels[predicted_id] | |
| return f"Predicted Emotion: **{emotion}**" | |
| # Gradio interface | |
| interface = gr.Interface( | |
| fn=predict_emotion, | |
| inputs=gr.Audio(source="microphone", type="numpy", label="Speak or Upload Audio"), | |
| outputs=gr.Markdown(label="Detected Emotion"), | |
| title="Voice Emotion Recognition", | |
| description="This app detects the emotional tone of your speech using a fine-tuned Wav2Vec2 model." | |
| ) | |
| interface.launch() | |