Spaces:
Sleeping
Sleeping
| from typing import Any, Dict, List | |
| def grade(trajectory: List[Dict[str, Any]], **kwargs) -> float: | |
| """ | |
| Generic OpenEnv grader. | |
| Extracts the final reward from the agent's trajectory. | |
| """ | |
| if not trajectory: | |
| return 0.01 | |
| last_step = trajectory[-1] | |
| # Try to extract from OpenEnv's typical trajectory format | |
| reward = 0.01 | |
| if "reward" in last_step: | |
| reward = float(last_step.get("reward", 0.01)) | |
| elif "observation" in last_step and "reward" in last_step["observation"]: | |
| reward = float(last_step["observation"].get("reward", 0.01)) | |
| # Ensure it's bounded between 0.01 and 0.99 | |
| return min(max(reward, 0.01), 0.99) | |