Spaces:
Sleeping
Sleeping
File size: 1,662 Bytes
a91323c | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 | from pathlib import Path
import json
import sys
ROOT = Path(__file__).resolve().parent.parent
sys.path.insert(0, str(ROOT))
from src.models import load_strategies, load_actions # noqa: E402
from src.alignment import AlignmentEngine # noqa: E402
from src.evaluation import evaluate_alignment, EvalConfig # noqa: E402
STRATEGY_JSON = ROOT / "data" / "strategic.json"
ACTION_JSON = ROOT / "data" / "action.json"
GROUND_TRUTH_JSON = ROOT / "data" / "ground_truth.json"
def main() -> None:
# Load inputs
strategies = load_strategies(STRATEGY_JSON)
actions = load_actions(ACTION_JSON)
if not GROUND_TRUTH_JSON.exists():
print(f"Ground truth not found at {GROUND_TRUTH_JSON}.")
print("Please create it based on data/ground_truth.example.json")
return
with GROUND_TRUTH_JSON.open("r", encoding="utf-8") as f:
ground_truth = json.load(f)
# Align and evaluate
engine = AlignmentEngine()
summary = evaluate_alignment(
engine, strategies, actions, ground_truth, EvalConfig(top_k=5)
)
print("Evaluation Summary:")
print(f"- Top-K: {summary.top_k}")
print(f"- Macro Precision: {summary.macro_precision:.3f}")
print(f"- Macro Recall: {summary.macro_recall:.3f}")
print(f"- MAP: {summary.map:.3f}")
print(f"- Mean NDCG: {summary.mean_ndcg:.3f}")
print("\nPer-strategy metrics:")
for s in summary.per_strategy:
print(
f" {s.strategy_id}: P@{summary.top_k}={s.precision_at_k:.3f} | R@{summary.top_k}={s.recall_at_k:.3f} | AP={s.ap:.3f} | NDCG={s.ndcg:.3f}"
)
if __name__ == "__main__":
main()
|