Kavya988 Tarun-sar0ya commited on
Commit
c750553
·
1 Parent(s): d3dcd72

Update tasks/auth_error/grader.py (#5)

Browse files

- Update tasks/auth_error/grader.py (20941a74e26cbab9a652f027b0c23662811ebfd7)


Co-authored-by: tarun saroya <Tarun-sar0ya@users.noreply.huggingface.co>

Files changed (1) hide show
  1. tasks/auth_error/grader.py +28 -5
tasks/auth_error/grader.py CHANGED
@@ -1,9 +1,32 @@
1
  """Grader for auth_error task: 401 Unauthorized - expired API key."""
2
 
3
- from tasks.grading_helper import run_agent_on_incident
4
 
 
 
5
 
6
- def grade() -> float:
7
- """Grade the auth_error task. Returns score between 0 and 1."""
8
- score = run_agent_on_incident("auth_error")
9
- return max(0.001, min(0.999, score))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  """Grader for auth_error task: 401 Unauthorized - expired API key."""
2
 
 
3
 
4
+ def grade(trajectory) -> float:
5
+ """Grade the auth_error task based on agent trajectory.
6
 
7
+ Args:
8
+ trajectory: List of (action, observation, reward) tuples from the episode.
9
+
10
+ Returns:
11
+ Score between 0 and 1.
12
+ """
13
+ if not trajectory:
14
+ return 0.0
15
+
16
+ # Check if agent used refresh_token to fix the auth error
17
+ actions = [step[0] if isinstance(step, (list, tuple)) else step.get("action", "") for step in trajectory]
18
+
19
+ correct_action_used = "refresh_token" in actions
20
+ resolved = any(
21
+ (step[2] > 0 if isinstance(step, (list, tuple)) else step.get("reward", 0) > 0)
22
+ for step in trajectory
23
+ )
24
+
25
+ if correct_action_used and resolved:
26
+ return 0.95
27
+ elif correct_action_used:
28
+ return 0.6
29
+ elif resolved:
30
+ return 0.4
31
+ else:
32
+ return 0.05