Spaces:
Sleeping
Sleeping
File size: 2,826 Bytes
9be21ef | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 | # eval/evaluate_emotion_classifier.py
# Run from project root:
# python -m eval.evaluate_emotion_classifier
from __future__ import annotations
from dataclasses import dataclass
from typing import List, Dict, Tuple
import os
import pandas as pd
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from models.emotion_classifier import EmotionClassifier
@dataclass
class EvalResult:
top1_acc: float
top3_acc: float
top5_acc: float
def topk_hit(pred_labels: List[str], true_label: str, k: int) -> bool:
return true_label in pred_labels[:k]
def normalize_label(x: str) -> str:
return (x or "").strip().lower()
def main():
# --- 1) Load your evaluation CSV ---
# Create: eval/emotion_eval.csv with columns: text,true_label
csv_path = os.path.join("eval", "emotion_eval.csv")
if not os.path.exists(csv_path):
print(f"[ERROR] Missing file: {csv_path}")
print("Create eval/emotion_eval.csv with columns: text,true_label")
return
df = pd.read_csv(csv_path)
if "text" not in df.columns or "true_label" not in df.columns:
print("[ERROR] CSV must have columns: text,true_label")
return
texts = df["text"].astype(str).tolist()
true_labels = [normalize_label(x) for x in df["true_label"].astype(str).tolist()]
# --- 2) Load classifier ---
clf = EmotionClassifier()
# --- 3) Predict + compute Top-k accuracy ---
pred_top1 = []
top3_hits = 0
top5_hits = 0
for text, true_lab in zip(texts, true_labels):
preds = clf.predict_emotions(text, top_k=5)
# preds looks like: [{'label': 'fear', 'score': 0.88}, ...]
pred_labels = [normalize_label(p.get("label", "")) for p in preds]
if pred_labels:
pred_top1.append(pred_labels[0])
else:
pred_top1.append("")
if topk_hit(pred_labels, true_lab, 3):
top3_hits += 1
if topk_hit(pred_labels, true_lab, 5):
top5_hits += 1
top1 = accuracy_score(true_labels, pred_top1)
top3 = top3_hits / len(true_labels)
top5 = top5_hits / len(true_labels)
print("\n========== Emotion Classifier Evaluation ==========")
print(f"Samples: {len(true_labels)}")
print(f"Top-1 Accuracy: {top1:.3f}")
print(f"Top-3 Accuracy: {top3:.3f}")
print(f"Top-5 Accuracy: {top5:.3f}")
print("\n--- Classification Report (Top-1) ---")
print(classification_report(true_labels, pred_top1, zero_division=0))
print("\n--- Confusion Matrix (Top-1) ---")
labels_sorted = sorted(list(set(true_labels)))
cm = confusion_matrix(true_labels, pred_top1, labels=labels_sorted)
cm_df = pd.DataFrame(cm, index=labels_sorted, columns=labels_sorted)
print(cm_df.to_string())
if __name__ == "__main__":
main()
|