Review-RAG / src /evaluate.py
HariHaran9597
Initial commit
1d70196
import pandas as pd
import torch
import numpy as np
import argparse
from transformers import RobertaTokenizer, RobertaForSequenceClassification
from sklearn.metrics import classification_report, accuracy_score, f1_score
from torch.utils.data import DataLoader, Dataset
from tqdm import tqdm
import os
class SentimentDataset(Dataset):
def __init__(self, texts, aspects, labels, tokenizer, max_len=128):
self.texts = texts
self.aspects = aspects
self.labels = labels
self.tokenizer = tokenizer
self.max_len = max_len
def __len__(self):
return len(self.texts)
def __getitem__(self, item):
text = str(self.texts[item])
aspect = str(self.aspects[item])
label = self.labels[item]
encoding = self.tokenizer.encode_plus(
text,
aspect,
add_special_tokens=True,
max_length=self.max_len,
return_token_type_ids=False,
padding='max_length',
truncation=True,
return_attention_mask=True,
return_tensors='pt',
)
return {
'input_ids': encoding['input_ids'].flatten(),
'attention_mask': encoding['attention_mask'].flatten(),
'labels': torch.tensor(label, dtype=torch.long)
}
def main():
parser = argparse.ArgumentParser()
parser.add_argument('--model_path', type=str, default='models/absa-roberta-final', help='Path to your fine-tuned model')
parser.add_argument('--test_file', type=str, default='data/processed/test.csv', help='Path to test CSV')
args = parser.parse_args()
if not os.path.exists(args.test_file):
print(f"Error: {args.test_file} not found. Ensure you ran data_processing.py and downloaded the data.")
return
if not os.path.exists(args.model_path):
print(f"Error: Model not found at {args.model_path}.")
print("Please train the model on Colab using train_absa.ipynb, download the 'absa-roberta-final' folder, and place it in 'models/'.")
return
print(f"Loading test data from {args.test_file}...")
df = pd.read_csv(args.test_file)
label_mapping = {'positive': 0, 'negative': 1, 'neutral': 2, 'conflict': 3}
df['label'] = df['sentiment'].map(label_mapping)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")
print(f"Loading model and tokenizer from {args.model_path}...")
tokenizer = RobertaTokenizer.from_pretrained(args.model_path)
model = RobertaForSequenceClassification.from_pretrained(args.model_path)
model = model.to(device)
model.eval()
test_dataset = SentimentDataset(
texts=df['text'].to_numpy(),
aspects=df['aspect'].to_numpy(),
labels=df['label'].to_numpy(),
tokenizer=tokenizer
)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)
all_preds = []
all_labels = []
print("Evaluating...")
with torch.no_grad():
for batch in tqdm(test_loader):
input_ids = batch['input_ids'].to(device)
attention_mask = batch['attention_mask'].to(device)
labels = batch['labels'].to(device)
outputs = model(input_ids=input_ids, attention_mask=attention_mask)
_, preds = torch.max(outputs.logits, dim=1)
all_preds.extend(preds.cpu().tolist())
all_labels.extend(labels.cpu().tolist())
target_names = ['positive', 'negative', 'neutral', 'conflict']
print("\n--- Model Evaluation Results ---")
print(f"Accuracy: {accuracy_score(all_labels, all_preds):.4f}")
print(f"Macro F1: {f1_score(all_labels, all_preds, average='macro'):.4f}\n")
print("Classification Report:")
print(classification_report(all_labels, all_preds, target_names=target_names, zero_division=0))
print("--- Per-Aspect Accuracy ---")
df['prediction'] = all_preds
for aspect in df['aspect'].unique():
mask = df['aspect'] == aspect
aspect_acc = accuracy_score(df[mask]['label'], df[mask]['prediction'])
print(f"{aspect.ljust(15)}: {aspect_acc:.2%}")
if __name__ == "__main__":
main()