Spaces:
Sleeping
Sleeping
| """ | |
| Registered agent graders — one enabled grader per task (easy / medium / hard). | |
| Automated checks count tasks that declare a grader and can run episode scoring. | |
| All three share the same history-based rubric; difficulty is enforced by the | |
| environment dynamics (items + volatility), not by different formulas. | |
| """ | |
| from __future__ import annotations | |
| from typing import Any, Callable, Dict, List | |
| from env.grader import evaluate_episode | |
| History = List[Dict[str, Any]] | |
| def easy_agent_grader(history: History) -> float: | |
| return evaluate_episode(history) | |
| def medium_agent_grader(history: History) -> float: | |
| return evaluate_episode(history) | |
| def hard_agent_grader(history: History) -> float: | |
| return evaluate_episode(history) | |
| # Explicit registry (imported by server /tasks and static analysis) | |
| TASK_AGENT_GRADERS: Dict[str, Callable[[History], float]] = { | |
| "easy": easy_agent_grader, | |
| "medium": medium_agent_grader, | |
| "hard": hard_agent_grader, | |
| } | |