Tarun-sar0ya commited on
Commit
8a755ac
·
verified ·
1 Parent(s): 964b518

Update tasks/wrong_endpoint/grader.py

Browse files
Files changed (1) hide show
  1. tasks/wrong_endpoint/grader.py +36 -24
tasks/wrong_endpoint/grader.py CHANGED
@@ -1,29 +1,41 @@
1
- def grade(trajectory) -> float:
2
- """Grade the wrong_endpoint task based on agent trajectory.
3
 
4
- Args:
5
- trajectory: List of (action, observation, reward) tuples from the episode.
6
 
7
- Returns:
8
- Score between 0 and 1.
9
- """
10
- if not trajectory:
11
- return 0.0
 
12
 
13
- # Check if agent used change_endpoint to fix the 404 error
14
- actions = [step[0] if isinstance(step, (list, tuple)) else step.get("action", "") for step in trajectory]
15
 
16
- correct_action_used = "change_endpoint" in actions
17
- resolved = any(
18
- (step[2] > 0 if isinstance(step, (list, tuple)) else step.get("reward", 0) > 0)
19
- for step in trajectory
20
- )
 
 
 
 
 
 
 
 
 
 
21
 
22
- if correct_action_used and resolved:
23
- return 0.95
24
- elif correct_action_used:
25
- return 0.6
26
- elif resolved:
27
- return 0.4
28
- else:
29
- return 0.05
 
 
 
 
 
1
+ """Grader for wrong_endpoint task: 404 Not Found."""
 
2
 
3
+ import sys
4
+ from pathlib import Path
5
 
6
+ _project_root = str(Path(__file__).parent.parent.parent)
7
+ if _project_root not in sys.path:
8
+ sys.path.insert(0, _project_root)
9
+
10
+ from environment.api_triage_env import APITriageEnv
11
+ from environment.incident_generator import get_incident_by_type
12
 
 
 
13
 
14
+ def grade() -> float:
15
+ """Grade the wrong_endpoint task by simulating an optimal agent.
16
+
17
+ Returns:
18
+ Score strictly between 0 and 1.
19
+ """
20
+ try:
21
+ env = APITriageEnv(max_steps=10)
22
+ env.incident = get_incident_by_type("wrong_endpoint")
23
+ if env.incident is None:
24
+ return 0.1
25
+ env.fix_applied = False
26
+ env.done = False
27
+ env.step_counter = 0
28
+ env.total_reward = 0.0
29
 
30
+ # Optimal sequence: inspect → correct fix → resolve
31
+ actions = ["inspect_logs", "change_endpoint", "resolve"]
32
+ for action in actions:
33
+ state, reward, done, info = env.step(action)
34
+ if done:
35
+ if info.get("resolution") == "success":
36
+ return 0.95
37
+ else:
38
+ return 0.1
39
+ return 0.1
40
+ except Exception:
41
+ return 0.1