Spaces:
Sleeping
Sleeping
File size: 699 Bytes
9bd4a93 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 | from typing import Any, Dict, List
def grade(trajectory: List[Dict[str, Any]], **kwargs) -> float:
"""
Generic OpenEnv grader.
Extracts the final reward from the agent's trajectory.
"""
if not trajectory:
return 0.01
last_step = trajectory[-1]
# Try to extract from OpenEnv's typical trajectory format
reward = 0.01
if "reward" in last_step:
reward = float(last_step.get("reward", 0.01))
elif "observation" in last_step and "reward" in last_step["observation"]:
reward = float(last_step["observation"].get("reward", 0.01))
# Ensure it's bounded between 0.01 and 0.99
return min(max(reward, 0.01), 0.99)
|