Spaces:
Sleeping
Sleeping
Update tasks/rate_limit/grader.py
#6
by Tarun-sar0ya - opened
- tasks/rate_limit/grader.py +28 -5
tasks/rate_limit/grader.py
CHANGED
|
@@ -1,9 +1,32 @@
|
|
| 1 |
"""Grader for rate_limit task: 429 Too Many Requests."""
|
| 2 |
|
| 3 |
-
from tasks.grading_helper import run_agent_on_incident
|
| 4 |
|
|
|
|
|
|
|
| 5 |
|
| 6 |
-
|
| 7 |
-
|
| 8 |
-
|
| 9 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
"""Grader for rate_limit task: 429 Too Many Requests."""
|
| 2 |
|
|
|
|
| 3 |
|
| 4 |
+
def grade(trajectory) -> float:
|
| 5 |
+
"""Grade the rate_limit task based on agent trajectory.
|
| 6 |
|
| 7 |
+
Args:
|
| 8 |
+
trajectory: List of (action, observation, reward) tuples from the episode.
|
| 9 |
+
|
| 10 |
+
Returns:
|
| 11 |
+
Score between 0 and 1.
|
| 12 |
+
"""
|
| 13 |
+
if not trajectory:
|
| 14 |
+
return 0.0
|
| 15 |
+
|
| 16 |
+
# Check if agent used wait_retry to fix the 429 error
|
| 17 |
+
actions = [step[0] if isinstance(step, (list, tuple)) else step.get("action", "") for step in trajectory]
|
| 18 |
+
|
| 19 |
+
correct_action_used = "wait_retry" in actions
|
| 20 |
+
resolved = any(
|
| 21 |
+
(step[2] > 0 if isinstance(step, (list, tuple)) else step.get("reward", 0) > 0)
|
| 22 |
+
for step in trajectory
|
| 23 |
+
)
|
| 24 |
+
|
| 25 |
+
if correct_action_used and resolved:
|
| 26 |
+
return 0.95
|
| 27 |
+
elif correct_action_used:
|
| 28 |
+
return 0.6
|
| 29 |
+
elif resolved:
|
| 30 |
+
return 0.4
|
| 31 |
+
else:
|
| 32 |
+
return 0.05
|