Spaces:
Sleeping
Sleeping
File size: 1,503 Bytes
15bf0f2 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 |
from transformers import pipeline
# Load pipelines
pipe_bert = pipeline("text-classification", model="distilbert/distilbert-base-uncased-finetuned-sst-2-english")
pipe_roberta = pipeline("text-classification", model="cardiffnlp/twitter-roberta-base-sentiment")
# Label mapping for RoBERTa
roberta_label_mapping_dict = {
'LABEL_2': 'Positive',
'LABEL_1': 'Neutral',
'LABEL_0': 'Negative'
}
# Sample input - top 100 review
top_n = 10
reviews = train_data['review'][:top_n]
sentiments = train_data['sentiment'][:top_n]
data_to_test = dict(zip(reviews, sentiments))
# Print header
print(f"{'Original':<10} | {'DistilBERT':<10} | {'RoBERTa':<10}")
# Track accuracy
bert_correct = 0
roberta_correct = 0
total = len(data_to_test)
for text, true_label in data_to_test.items():
pred_bert = pipe_bert(text)[0]
pred_roberta = pipe_roberta(text, truncation=True)[0]
# Normalize labels
original = true_label.strip().capitalize()
bert = pred_bert["label"].capitalize()
roberta = roberta_label_mapping_dict.get(pred_roberta["label"], "Unknown")
# Accuracy check
if bert == original:
bert_correct += 1
if roberta == original:
roberta_correct += 1
# Print results
print(f"{original:<10} | {bert:<10} | {roberta:<10}")
# Calculate and print accuracy
bert_acc = (bert_correct / total) * 100
roberta_acc = (roberta_correct / total) * 100
print(f"\nAccuracy:")
print(f"DistilBERT: {bert_acc:.2f}%")
print(f"RoBERTa : {roberta_acc:.2f}%")
|