from transformers import pipeline

# Load pipelines
pipe_bert = pipeline("text-classification", model="distilbert/distilbert-base-uncased-finetuned-sst-2-english")
pipe_roberta = pipeline("text-classification", model="cardiffnlp/twitter-roberta-base-sentiment")

# Label mapping for RoBERTa
roberta_label_mapping_dict = {
    'LABEL_2': 'Positive',
    'LABEL_1': 'Neutral',
    'LABEL_0': 'Negative'
}

# Sample input - top 100 review
top_n = 10 
reviews = train_data['review'][:top_n]
sentiments = train_data['sentiment'][:top_n]
data_to_test = dict(zip(reviews, sentiments))

# Print header
print(f"{'Original':<10} | {'DistilBERT':<10} | {'RoBERTa':<10}")

# Track accuracy
bert_correct = 0
roberta_correct = 0
total = len(data_to_test)

for text, true_label in data_to_test.items():
    pred_bert = pipe_bert(text)[0]
    pred_roberta = pipe_roberta(text, truncation=True)[0]

    # Normalize labels
    original = true_label.strip().capitalize()
    bert = pred_bert["label"].capitalize()
    roberta = roberta_label_mapping_dict.get(pred_roberta["label"], "Unknown")

    # Accuracy check
    if bert == original:
        bert_correct += 1
    if roberta == original:
        roberta_correct += 1

    # Print results
    print(f"{original:<10} | {bert:<10} | {roberta:<10}")

# Calculate and print accuracy
bert_acc = (bert_correct / total) * 100
roberta_acc = (roberta_correct / total) * 100

print(f"\nAccuracy:")
print(f"DistilBERT: {bert_acc:.2f}%")
print(f"RoBERTa   : {roberta_acc:.2f}%")