M ShreeRaj commited on
Implement grader function for task evaluation
Browse filesAdded a grader function to evaluate task trajectories.
models.py
CHANGED
|
@@ -62,6 +62,20 @@ def generate_tasks(level: str) -> list[Task]:
|
|
| 62 |
]
|
| 63 |
return []
|
| 64 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 65 |
def deterministic_grader(tasks: list[Task], time_step: int, final_energy: float) -> float:
|
| 66 |
"""
|
| 67 |
A deterministic grader returning 0.0-1.0 based on:
|
|
|
|
| 62 |
]
|
| 63 |
return []
|
| 64 |
|
| 65 |
+
def grader(trajectory: dict) -> float:
|
| 66 |
+
"""
|
| 67 |
+
OpenEnv-compatible grader callable (single-argument signature).
|
| 68 |
+
|
| 69 |
+
Wraps deterministic_grader for use with the openenv-core task evaluation
|
| 70 |
+
framework. The trajectory dict should contain keys: tasks, time_step, energy.
|
| 71 |
+
"""
|
| 72 |
+
raw_tasks = trajectory.get("tasks", [])
|
| 73 |
+
time_step_val = trajectory.get("time_step", 50)
|
| 74 |
+
final_energy_val = trajectory.get("energy", 0.5)
|
| 75 |
+
task_objs = [Task(**t) if isinstance(t, dict) else t for t in raw_tasks]
|
| 76 |
+
return deterministic_grader(task_objs, time_step_val, final_energy_val)
|
| 77 |
+
|
| 78 |
+
|
| 79 |
def deterministic_grader(tasks: list[Task], time_step: int, final_energy: float) -> float:
|
| 80 |
"""
|
| 81 |
A deterministic grader returning 0.0-1.0 based on:
|