Spaces:

disiniStutterModel
/

EmpowerHer

Sleeping

EmpowerHer / eval /evaluate_emotion_classifier.py

Disini Ruhansa Kodagoda Hettige

Deploy EmpowerHer app

9be21ef about 2 months ago

2.83 kB

	# eval/evaluate_emotion_classifier.py
	# Run from project root:
	# python -m eval.evaluate_emotion_classifier

	from __future__ import annotations

	from dataclasses import dataclass
	from typing import List, Dict, Tuple
	import os

	import pandas as pd
	from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

	from models.emotion_classifier import EmotionClassifier


	@dataclass
	class EvalResult:
	top1_acc: float
	top3_acc: float
	top5_acc: float


	def topk_hit(pred_labels: List[str], true_label: str, k: int) -> bool:
	return true_label in pred_labels[:k]


	def normalize_label(x: str) -> str:
	return (x or "").strip().lower()


	def main():
	# --- 1) Load your evaluation CSV ---
	# Create: eval/emotion_eval.csv with columns: text,true_label
	csv_path = os.path.join("eval", "emotion_eval.csv")
	if not os.path.exists(csv_path):
	print(f"[ERROR] Missing file: {csv_path}")
	print("Create eval/emotion_eval.csv with columns: text,true_label")
	return

	df = pd.read_csv(csv_path)
	if "text" not in df.columns or "true_label" not in df.columns:
	print("[ERROR] CSV must have columns: text,true_label")
	return

	texts = df["text"].astype(str).tolist()
	true_labels = [normalize_label(x) for x in df["true_label"].astype(str).tolist()]

	# --- 2) Load classifier ---
	clf = EmotionClassifier()

	# --- 3) Predict + compute Top-k accuracy ---
	pred_top1 = []
	top3_hits = 0
	top5_hits = 0

	for text, true_lab in zip(texts, true_labels):
	preds = clf.predict_emotions(text, top_k=5)

	# preds looks like: [{'label': 'fear', 'score': 0.88}, ...]
	pred_labels = [normalize_label(p.get("label", "")) for p in preds]

	if pred_labels:
	pred_top1.append(pred_labels[0])
	else:
	pred_top1.append("")

	if topk_hit(pred_labels, true_lab, 3):
	top3_hits += 1
	if topk_hit(pred_labels, true_lab, 5):
	top5_hits += 1

	top1 = accuracy_score(true_labels, pred_top1)
	top3 = top3_hits / len(true_labels)
	top5 = top5_hits / len(true_labels)

	print("\n========== Emotion Classifier Evaluation ==========")
	print(f"Samples: {len(true_labels)}")
	print(f"Top-1 Accuracy: {top1:.3f}")
	print(f"Top-3 Accuracy: {top3:.3f}")
	print(f"Top-5 Accuracy: {top5:.3f}")

	print("\n--- Classification Report (Top-1) ---")
	print(classification_report(true_labels, pred_top1, zero_division=0))

	print("\n--- Confusion Matrix (Top-1) ---")
	labels_sorted = sorted(list(set(true_labels)))
	cm = confusion_matrix(true_labels, pred_top1, labels=labels_sorted)
	cm_df = pd.DataFrame(cm, index=labels_sorted, columns=labels_sorted)
	print(cm_df.to_string())


	if __name__ == "__main__":
	main()