Spaces:

hirumunasinghe
/

strategy-sync-ai

Sleeping

Lahiru Munasinghe

Initial Space snapshot without binaries

a91323c about 2 months ago

6.05 kB

	from __future__ import annotations

	from dataclasses import dataclass
	from typing import Dict, List, Tuple, Iterable, Set, Any

	import math

	from .alignment import AlignmentEngine
	from .models import StrategicObjective, ActionTask


	@dataclass
	class EvalConfig:
	top_k: int = 5


	def precision_recall_at_k(
	pred_ids: List[str], truth_ids: Set[str], k: int
	) -> Tuple[float, float]:
	preds = pred_ids[:k]
	hits = sum(1 for pid in preds if pid in truth_ids)
	precision = hits / max(1, len(preds))
	recall = hits / max(1, len(truth_ids))
	return precision, recall


	def average_precision(pred_ids: List[str], truth_ids: Set[str]) -> float:
	hits = 0
	ap_sum = 0.0
	for i, pid in enumerate(pred_ids, start=1):
	if pid in truth_ids:
	hits += 1
	ap_sum += hits / i
	return ap_sum / max(1, hits)


	def ndcg_at_k(pred_ids: List[str], truth_ids: Set[str], k: int) -> float:
	# Relevance is binary: 1 if in truth, else 0
	dcg = 0.0
	for i, pid in enumerate(pred_ids[:k], start=1):
	rel = 1.0 if pid in truth_ids else 0.0
	dcg += rel / math.log2(i + 1)
	# Ideal DCG assumes all relevant items are ranked first
	ideal_rel_count = min(len(truth_ids), k)
	idcg = sum(1.0 / math.log2(i + 1) for i in range(1, ideal_rel_count + 1))
	return dcg / idcg if idcg > 0 else 0.0


	@dataclass
	class StrategyEval:
	strategy_id: str
	precision_at_k: float
	recall_at_k: float
	ap: float
	ndcg: float


	@dataclass
	class EvalSummary:
	top_k: int
	macro_precision: float
	macro_recall: float
	map: float
	mean_ndcg: float
	per_strategy: List[StrategyEval]
	similarity_summary: Dict[str, float] \| None = None


	def evaluate_alignment(
	engine: AlignmentEngine,
	strategies: Iterable[StrategicObjective],
	actions: Iterable[ActionTask],
	ground_truth: Dict[str, List[str]],
	config: EvalConfig \| None = None,
	) -> EvalSummary:
	cfg = config or EvalConfig()
	# Run alignment retrieval
	result = engine.align(
	strategies=list(strategies), actions=list(actions), top_k=cfg.top_k
	)

	per_strategy: List[StrategyEval] = []
	p_list: List[float] = []
	r_list: List[float] = []
	ap_list: List[float] = []
	ndcg_list: List[float] = []

	for sres in result["strategy_results"]:
	sid = sres["strategy_id"]
	preds = [m["action_id"] for m in sres.get("top_matches", [])]
	truth = set(ground_truth.get(sid, []))
	p, r = precision_recall_at_k(preds, truth, cfg.top_k)
	ap = average_precision(preds, truth)
	nd = ndcg_at_k(preds, truth, cfg.top_k)

	per_strategy.append(
	StrategyEval(
	strategy_id=sid,
	precision_at_k=p,
	recall_at_k=r,
	ap=ap,
	ndcg=nd,
	)
	)
	p_list.append(p)
	r_list.append(r)
	ap_list.append(ap)
	ndcg_list.append(nd)

	summary = EvalSummary(
	top_k=cfg.top_k,
	macro_precision=sum(p_list) / max(1, len(p_list)),
	macro_recall=sum(r_list) / max(1, len(r_list)),
	map=sum(ap_list) / max(1, len(ap_list)),
	mean_ndcg=sum(ndcg_list) / max(1, len(ndcg_list)),
	per_strategy=per_strategy,
	similarity_summary=None,
	)
	return summary


	def precision_at_k(pred_ids: List[str], truth_ids: Set[str], k: int) -> float:
	p, _ = precision_recall_at_k(pred_ids, truth_ids, k)
	return p


	def recall_at_k(pred_ids: List[str], truth_ids: Set[str], k: int) -> float:
	_, r = precision_recall_at_k(pred_ids, truth_ids, k)
	return r


	def run_evaluation(
	alignment_result: Dict[str, Any], ground_truth_path: str \| None, top_k: int = 5
	) -> Dict[str, Any]:
	"""Compute Precision@K, Recall@K and similarity summaries given alignment results.

	Ground truth format: {"S1": ["A3","A9"], "S2": ["A2"], ...}
	"""
	import json
	from pathlib import Path

	truth_map: Dict[str, List[str]] = {}
	if ground_truth_path:
	p = Path(ground_truth_path)
	if p.exists():
	with p.open("r", encoding="utf-8") as f:
	data = json.load(f)
	if isinstance(data, dict):
	truth_map = {str(k): list(v or []) for k, v in data.items()}

	per_strategy: List[Dict[str, Any]] = []
	p_list: List[float] = []
	r_list: List[float] = []

	retrieved_sims: List[float] = []
	relevant_sims: List[float] = []

	for sres in alignment_result.get("strategy_results", []):
	sid = sres.get("strategy_id")
	preds = [m.get("action_id") for m in sres.get("top_matches", [])]
	sims = [float(m.get("similarity", 0.0)) for m in sres.get("top_matches", [])]
	truth = set(truth_map.get(str(sid), []))

	p, r = precision_recall_at_k(preds, truth, top_k)
	ap = average_precision(preds, truth)
	nd = ndcg_at_k(preds, truth, top_k)

	per_strategy.append(
	{
	"strategy_id": sid,
	"precision_at_k": p,
	"recall_at_k": r,
	"ap": ap,
	"ndcg": nd,
	}
	)
	p_list.append(p)
	r_list.append(r)

	# Similarity summaries
	retrieved_sims.extend(sims)
	# Relevant sims: similarity of matches that are in ground truth
	for m in sres.get("top_matches", []):
	if m.get("action_id") in truth:
	relevant_sims.append(float(m.get("similarity", 0.0)))

	eval_summary = {
	"top_k": top_k,
	"macro_precision": sum(p_list) / max(1, len(p_list)),
	"macro_recall": sum(r_list) / max(1, len(r_list)),
	"per_strategy": per_strategy,
	"similarity_summary": {
	"retrieved_mean": (sum(retrieved_sims) / max(1, len(retrieved_sims)))
	if retrieved_sims
	else 0.0,
	"relevant_mean": (sum(relevant_sims) / max(1, len(relevant_sims)))
	if relevant_sims
	else 0.0,
	},
	}
	return eval_summary