"""
Evaluation utilities — metrics computed during and after training.
"""

import numpy as np
from sklearn.metrics import classification_report, confusion_matrix, f1_score, accuracy_score
from transformers import EvalPrediction

LABEL_NAMES = ["True", "Fake", "Satire", "Bias"]


def compute_metrics(eval_pred: EvalPrediction) -> dict:
    """Called by HuggingFace Trainer after every eval step. Returns accuracy and macro/weighted F1."""
    logits, labels = eval_pred
    preds = np.argmax(logits, axis=-1)
    return {
        "accuracy":    round(accuracy_score(labels, preds), 4),
        "f1_macro":    round(f1_score(labels, preds, average="macro",    zero_division=0), 4),
        "f1_weighted": round(f1_score(labels, preds, average="weighted", zero_division=0), 4),
    }


def full_report(model, tokenized_test, label_names=LABEL_NAMES) -> dict:
    """Run full evaluation on the test split. Returns per-class metrics and confusion matrix."""
    from transformers import Trainer

    trainer = Trainer(model=model, compute_metrics=compute_metrics)
    preds_out = trainer.predict(tokenized_test)

    preds = np.argmax(preds_out.predictions, axis=-1)
    labels = preds_out.label_ids

    report = classification_report(
        labels, preds, target_names=label_names, output_dict=True, zero_division=0)
    cm = confusion_matrix(labels, preds)

    print("\n" + "=" * 60)
    print("CLASSIFICATION REPORT")
    print("=" * 60)
    print(classification_report(labels, preds,
          target_names=label_names, zero_division=0))
    print("Confusion Matrix:")
    print(cm)
    print("=" * 60 + "\n")

    return {"report": report, "confusion_matrix": cm.tolist()}