Spaces:

disiniStutterModel
/

EmpowerHer

Sleeping

File size: 2,826 Bytes

9be21ef

# eval/evaluate_emotion_classifier.py
# Run from project root:
#   python -m eval.evaluate_emotion_classifier

from __future__ import annotations

from dataclasses import dataclass
from typing import List, Dict, Tuple
import os

import pandas as pd
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

from models.emotion_classifier import EmotionClassifier


@dataclass
class EvalResult:
    top1_acc: float
    top3_acc: float
    top5_acc: float


def topk_hit(pred_labels: List[str], true_label: str, k: int) -> bool:
    return true_label in pred_labels[:k]


def normalize_label(x: str) -> str:
    return (x or "").strip().lower()


def main():
    # --- 1) Load your evaluation CSV ---
    # Create: eval/emotion_eval.csv with columns: text,true_label
    csv_path = os.path.join("eval", "emotion_eval.csv")
    if not os.path.exists(csv_path):
        print(f"[ERROR] Missing file: {csv_path}")
        print("Create eval/emotion_eval.csv with columns: text,true_label")
        return

    df = pd.read_csv(csv_path)
    if "text" not in df.columns or "true_label" not in df.columns:
        print("[ERROR] CSV must have columns: text,true_label")
        return

    texts = df["text"].astype(str).tolist()
    true_labels = [normalize_label(x) for x in df["true_label"].astype(str).tolist()]

    # --- 2) Load classifier ---
    clf = EmotionClassifier()

    # --- 3) Predict + compute Top-k accuracy ---
    pred_top1 = []
    top3_hits = 0
    top5_hits = 0

    for text, true_lab in zip(texts, true_labels):
        preds = clf.predict_emotions(text, top_k=5)

        # preds looks like: [{'label': 'fear', 'score': 0.88}, ...]
        pred_labels = [normalize_label(p.get("label", "")) for p in preds]

        if pred_labels:
            pred_top1.append(pred_labels[0])
        else:
            pred_top1.append("")

        if topk_hit(pred_labels, true_lab, 3):
            top3_hits += 1
        if topk_hit(pred_labels, true_lab, 5):
            top5_hits += 1

    top1 = accuracy_score(true_labels, pred_top1)
    top3 = top3_hits / len(true_labels)
    top5 = top5_hits / len(true_labels)

    print("\n========== Emotion Classifier Evaluation ==========")
    print(f"Samples: {len(true_labels)}")
    print(f"Top-1 Accuracy: {top1:.3f}")
    print(f"Top-3 Accuracy: {top3:.3f}")
    print(f"Top-5 Accuracy: {top5:.3f}")

    print("\n--- Classification Report (Top-1) ---")
    print(classification_report(true_labels, pred_top1, zero_division=0))

    print("\n--- Confusion Matrix (Top-1) ---")
    labels_sorted = sorted(list(set(true_labels)))
    cm = confusion_matrix(true_labels, pred_top1, labels=labels_sorted)
    cm_df = pd.DataFrame(cm, index=labels_sorted, columns=labels_sorted)
    print(cm_df.to_string())


if __name__ == "__main__":
    main()