scheduling_env / graders.py
Akshaykumarbm's picture
Upload folder using huggingface_hub
8e97e82 verified
"""Root-level grader entry points for OpenEnv judge.
Each function receives the episode trajectory and returns a float in [0.0, 1.0].
These are referenced in openenv.yaml and must be importable from the repo root.
"""
from __future__ import annotations
from typing import Any, Dict, List
from server.graders import SchedulingGrader
from server.scheduling_env_environment import SchedulingEnvironment
_grader = SchedulingGrader()
_env = SchedulingEnvironment()
def _grade_task(task_id: str, trajectory: Any = None) -> float:
"""Run a task with the environment's own state and grade it.
If a trajectory dict is provided with 'final_state' and 'final_observation',
those are used directly. Otherwise the grader returns 0.0.
"""
if trajectory is not None:
final_state = trajectory.get("final_state")
final_obs = trajectory.get("final_observation")
if final_state and final_obs:
return _grader.grade_episode(final_state, final_obs)
# Fallback: use the environment's current state
state = _env.state
if state.completed:
from models import SchedulingObservation
obs = SchedulingObservation(
success=True,
done=True,
reward=state.final_reward,
)
return _grader.grade_episode(state, obs)
return 0.0
def task1_easy_grader(trajectory: Any = None) -> float:
"""Grader for task1_easy: 2 attendees, free slot exists. Expected: 0.8-1.0."""
return _grade_task("task1_easy", trajectory)
def task2_medium_grader(trajectory: Any = None) -> float:
"""Grader for task2_medium: 4 attendees, requires rescheduling. Expected: 0.5-0.8."""
return _grade_task("task2_medium", trajectory)
def task3_hard_grader(trajectory: Any = None) -> float:
"""Grader for task3_hard: 6 attendees, cascading conflicts. Expected: 0.2-0.6."""
return _grade_task("task3_hard", trajectory)