Finetuned Superb SER Model for Spanish
Speech Emotion Recognition Model for handling audios in spanish
How to use it?
import torch
import librosa
from transformers import Wav2Vec2ForSequenceClassification, Wav2Vec2FeatureExtractor
model = Wav2Vec2ForSequenceClassification.from_pretrained("pollitoconpapass/superb-ser-finetuned-spanish-v3.5")
feature_extractor = Wav2Vec2FeatureExtractor.from_pretrained("superb/wav2vec2-base-superb-er")
EMOTIONS_DICT = {
# Modelo Finetuneado
"LABEL_0": "miedo",
"LABEL_1": "triste",
"LABEL_2": "neutral",
"LABEL_3": "enojo",
"LABEL_4": "disgusto",
"LABEL_5": "feliz"
}
def load_audio(file_path):
speech, _ = librosa.load(file_path, sr=16000, mono=True)
return speech
audio_path = "/content/your-own-audio.wav"
audio = load_audio(audio_path)
inputs = feature_extractor(audio, sampling_rate=16000, padding=True, return_tensors="pt")
logits = model(**inputs).logits
probs = torch.nn.functional.softmax(logits, dim=-1)[0]
predicted_ids = torch.argmax(logits, dim=-1)
labels = [model.config.id2label[_id] for _id in predicted_ids.tolist()]
label = labels[0]
resultado = EMOTIONS_DICT[label]
print(f"\nEmoción: {resultado}")
print(f"\nPorcentajes de confianza")
for i, prob in enumerate(probs):
emotion_label = model.config.id2label[i]
emotion_name = EMOTIONS_DICT[emotion_label]
print(f"{emotion_name}: {prob.item()*100:.2f}%")
- Downloads last month
- 500
Model tree for pollitoconpapass/superb-ser-finetuned-spanish-v3.5
Base model
superb/wav2vec2-base-superb-er