GoodWordsOnly / src /evaluate.py
Jathin Chetty
Vercel-ready API version without model artifacts
f7a8d72
raw
history blame contribute delete
899 Bytes
import torch
from sklearn.metrics import accuracy_score, f1_score
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from src.dataset import get_dataset
from src.config import Config
dataset = get_dataset()
model = AutoModelForSequenceClassification.from_pretrained("outputs/model")
model.eval()
tokenizer = AutoTokenizer.from_pretrained(Config.MODEL_NAME)
preds = []
labels = []
for i in range(500): # sample evaluation
item = dataset[i]
input_ids = item["input_ids"].unsqueeze(0)
attention_mask = item["attention_mask"].unsqueeze(0)
with torch.no_grad():
outputs = model(input_ids=input_ids, attention_mask=attention_mask)
pred = torch.argmax(outputs.logits, dim=1).item()
preds.append(pred)
labels.append(item["labels"].item())
print("Accuracy:", accuracy_score(labels, preds))
print("F1 Score:", f1_score(labels, preds))