""" Evaluation utilities — metrics computed during and after training. """ import numpy as np from sklearn.metrics import classification_report, confusion_matrix, f1_score, accuracy_score from transformers import EvalPrediction LABEL_NAMES = ["True", "Fake", "Satire", "Bias"] def compute_metrics(eval_pred: EvalPrediction) -> dict: """Called by HuggingFace Trainer after every eval step. Returns accuracy and macro/weighted F1.""" logits, labels = eval_pred preds = np.argmax(logits, axis=-1) return { "accuracy": round(accuracy_score(labels, preds), 4), "f1_macro": round(f1_score(labels, preds, average="macro", zero_division=0), 4), "f1_weighted": round(f1_score(labels, preds, average="weighted", zero_division=0), 4), } def full_report(model, tokenized_test, label_names=LABEL_NAMES) -> dict: """Run full evaluation on the test split. Returns per-class metrics and confusion matrix.""" from transformers import Trainer trainer = Trainer(model=model, compute_metrics=compute_metrics) preds_out = trainer.predict(tokenized_test) preds = np.argmax(preds_out.predictions, axis=-1) labels = preds_out.label_ids report = classification_report( labels, preds, target_names=label_names, output_dict=True, zero_division=0) cm = confusion_matrix(labels, preds) print("\n" + "=" * 60) print("CLASSIFICATION REPORT") print("=" * 60) print(classification_report(labels, preds, target_names=label_names, zero_division=0)) print("Confusion Matrix:") print(cm) print("=" * 60 + "\n") return {"report": report, "confusion_matrix": cm.tolist()}