Spaces:
Sleeping
Sleeping
| """Grader for auth_error task: 401 Unauthorized - expired API key.""" | |
| def grade(trajectory) -> float: | |
| """Grade the auth_error task based on agent trajectory. | |
| Args: | |
| trajectory: List of (action, observation, reward) tuples from the episode. | |
| Returns: | |
| Score between 0 and 1. | |
| """ | |
| if not trajectory: | |
| return 0.0 | |
| # Check if agent used refresh_token to fix the auth error | |
| actions = [step[0] if isinstance(step, (list, tuple)) else step.get("action", "") for step in trajectory] | |
| correct_action_used = "refresh_token" in actions | |
| resolved = any( | |
| (step[2] > 0 if isinstance(step, (list, tuple)) else step.get("reward", 0) > 0) | |
| for step in trajectory | |
| ) | |
| if correct_action_used and resolved: | |
| return 0.95 | |
| elif correct_action_used: | |
| return 0.6 | |
| elif resolved: | |
| return 0.4 | |
| else: | |
| return 0.05 | |