dair-ai/emotion
Viewer • Updated • 437k • 33.8k • 440
How to use Cesar42/xlm-roberta-emotion-es with Transformers:
# Use a pipeline as a high-level helper
from transformers import pipeline
pipe = pipeline("text-classification", model="Cesar42/xlm-roberta-emotion-es") # Load model directly
from transformers import AutoTokenizer, AutoModelForSequenceClassification
tokenizer = AutoTokenizer.from_pretrained("Cesar42/xlm-roberta-emotion-es")
model = AutoModelForSequenceClassification.from_pretrained("Cesar42/xlm-roberta-emotion-es")Model IA Roberta_Base_Cased entrened with dateset emotion
Model Base: bert_base_uncased
dataset: dair-ai/emotion
Config train:
num_train_epochs= 8 learning_rate= 2e-5 weight_decay=0.01 batch_size: 64
{
'test_loss': 0.14830373227596283
'test_accuracy': 0.9415
'test_f1': 0.9411005763302622
'test_runtime': 8.372
'test_samples_per_second': 238.892
'test_steps_per_second': 3.822
}
from transformers import pipeline
model_path = "daveni/twitter-xlm-roberta-emotion-es"
emotion_analysis = pipeline("text-classification", framework="pt", model=model_path, tokenizer=model_path)
emotion_analysis("Einstein dijo: Solo hay dos cosas infinitas, el universo y los pinches anuncios de bitcoin en Twitter. Paren ya carajo aaaaaaghhgggghhh me quiero murir")
[{'label': 'anger', 'score': 0.48307016491889954}]
from transformers import AutoModelForSequenceClassification
from transformers import AutoTokenizer, AutoConfig
import numpy as np
from scipy.special import softmax
# Preprocess text (username and link placeholders)
def preprocess(text):
new_text = []
for t in text.split(" "):
t = '@user' if t.startswith('@') and len(t) > 1 else t
t = 'http' if t.startswith('http') else t
new_text.append(t)
return " ".join(new_text)
model_path = "Cesar42/bert-base-uncased-emotion_v2"
tokenizer = AutoTokenizer.from_pretrained(model_path )
config = AutoConfig.from_pretrained(model_path )
# PT
model = AutoModelForSequenceClassification.from_pretrained(model_path )
text = "Se ha quedao bonito día para publicar vídeo, ¿no? Hoy del tema más diferente que hemos tocado en el canal."
text = preprocess(text)
print(text)
encoded_input = tokenizer(text, return_tensors='pt')
output = model(**encoded_input)
scores = output[0][0].detach().numpy()
scores = softmax(scores)
# Print labels and scores
ranking = np.argsort(scores)
ranking = ranking[::-1]
for i in range(scores.shape[0]):
l = config.id2label[ranking[i]]
s = scores[ranking[i]]
print(f"{i+1}) {l} {np.round(float(s), 4)}")
Output:
Se ha quedao bonito día para publicar vídeo, ¿no? Hoy del tema más diferente que hemos tocado en el canal.
1) joy 0.7887
2) others 0.1679
3) surprise 0.0152
4) sadness 0.0145
5) anger 0.0077
6) disgust 0.0033
7) fear 0.0027