from transformers import pipeline # Load pipelines pipe_bert = pipeline("text-classification", model="distilbert/distilbert-base-uncased-finetuned-sst-2-english") pipe_roberta = pipeline("text-classification", model="cardiffnlp/twitter-roberta-base-sentiment") # Label mapping for RoBERTa roberta_label_mapping_dict = { 'LABEL_2': 'Positive', 'LABEL_1': 'Neutral', 'LABEL_0': 'Negative' } # Sample input - top 100 review top_n = 10 reviews = train_data['review'][:top_n] sentiments = train_data['sentiment'][:top_n] data_to_test = dict(zip(reviews, sentiments)) # Print header print(f"{'Original':<10} | {'DistilBERT':<10} | {'RoBERTa':<10}") # Track accuracy bert_correct = 0 roberta_correct = 0 total = len(data_to_test) for text, true_label in data_to_test.items(): pred_bert = pipe_bert(text)[0] pred_roberta = pipe_roberta(text, truncation=True)[0] # Normalize labels original = true_label.strip().capitalize() bert = pred_bert["label"].capitalize() roberta = roberta_label_mapping_dict.get(pred_roberta["label"], "Unknown") # Accuracy check if bert == original: bert_correct += 1 if roberta == original: roberta_correct += 1 # Print results print(f"{original:<10} | {bert:<10} | {roberta:<10}") # Calculate and print accuracy bert_acc = (bert_correct / total) * 100 roberta_acc = (roberta_correct / total) * 100 print(f"\nAccuracy:") print(f"DistilBERT: {bert_acc:.2f}%") print(f"RoBERTa : {roberta_acc:.2f}%")