cache-env / env /task_graders.py
Parv Pareek
done
e75c8ce
raw
history blame contribute delete
972 Bytes
"""
Registered agent graders — one enabled grader per task (easy / medium / hard).
Automated checks count tasks that declare a grader and can run episode scoring.
All three share the same history-based rubric; difficulty is enforced by the
environment dynamics (items + volatility), not by different formulas.
"""
from __future__ import annotations
from typing import Any, Callable, Dict, List
from env.grader import evaluate_episode
History = List[Dict[str, Any]]
def easy_agent_grader(history: History) -> float:
return evaluate_episode(history)
def medium_agent_grader(history: History) -> float:
return evaluate_episode(history)
def hard_agent_grader(history: History) -> float:
return evaluate_episode(history)
# Explicit registry (imported by server /tasks and static analysis)
TASK_AGENT_GRADERS: Dict[str, Callable[[History], float]] = {
"easy": easy_agent_grader,
"medium": medium_agent_grader,
"hard": hard_agent_grader,
}