Spaces:
Sleeping
Sleeping
| """Grader for server_error task: 500 Internal Server Error.""" | |
| def grade(trajectory) -> float: | |
| """Grade the server_error task based on agent trajectory. | |
| Args: | |
| trajectory: List of (action, observation, reward) tuples from the episode. | |
| Returns: | |
| Score between 0 and 1. | |
| """ | |
| if not trajectory: | |
| return 0.0 | |
| # Check if agent escalated to handle the server error | |
| actions = [step[0] if isinstance(step, (list, tuple)) else step.get("action", "") for step in trajectory] | |
| correct_action_used = "escalate" in actions | |
| resolved = any( | |
| (step[2] > 0 if isinstance(step, (list, tuple)) else step.get("reward", 0) > 0) | |
| for step in trajectory | |
| ) | |
| if correct_action_used and resolved: | |
| return 0.95 | |
| elif correct_action_used: | |
| return 0.6 | |
| elif resolved: | |
| return 0.4 | |
| else: | |
| return 0.05 | |