Spaces:
Sleeping
Sleeping
Commit ·
c750553
1
Parent(s): d3dcd72
Update tasks/auth_error/grader.py (#5)
Browse files- Update tasks/auth_error/grader.py (20941a74e26cbab9a652f027b0c23662811ebfd7)
Co-authored-by: tarun saroya <Tarun-sar0ya@users.noreply.huggingface.co>
- tasks/auth_error/grader.py +28 -5
tasks/auth_error/grader.py
CHANGED
|
@@ -1,9 +1,32 @@
|
|
| 1 |
"""Grader for auth_error task: 401 Unauthorized - expired API key."""
|
| 2 |
|
| 3 |
-
from tasks.grading_helper import run_agent_on_incident
|
| 4 |
|
|
|
|
|
|
|
| 5 |
|
| 6 |
-
|
| 7 |
-
|
| 8 |
-
|
| 9 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
"""Grader for auth_error task: 401 Unauthorized - expired API key."""
|
| 2 |
|
|
|
|
| 3 |
|
| 4 |
+
def grade(trajectory) -> float:
|
| 5 |
+
"""Grade the auth_error task based on agent trajectory.
|
| 6 |
|
| 7 |
+
Args:
|
| 8 |
+
trajectory: List of (action, observation, reward) tuples from the episode.
|
| 9 |
+
|
| 10 |
+
Returns:
|
| 11 |
+
Score between 0 and 1.
|
| 12 |
+
"""
|
| 13 |
+
if not trajectory:
|
| 14 |
+
return 0.0
|
| 15 |
+
|
| 16 |
+
# Check if agent used refresh_token to fix the auth error
|
| 17 |
+
actions = [step[0] if isinstance(step, (list, tuple)) else step.get("action", "") for step in trajectory]
|
| 18 |
+
|
| 19 |
+
correct_action_used = "refresh_token" in actions
|
| 20 |
+
resolved = any(
|
| 21 |
+
(step[2] > 0 if isinstance(step, (list, tuple)) else step.get("reward", 0) > 0)
|
| 22 |
+
for step in trajectory
|
| 23 |
+
)
|
| 24 |
+
|
| 25 |
+
if correct_action_used and resolved:
|
| 26 |
+
return 0.95
|
| 27 |
+
elif correct_action_used:
|
| 28 |
+
return 0.6
|
| 29 |
+
elif resolved:
|
| 30 |
+
return 0.4
|
| 31 |
+
else:
|
| 32 |
+
return 0.05
|