savetrees commited on
Commit
cd482eb
·
verified ·
1 Parent(s): 7876e2f

Upload folder using huggingface_hub

Browse files
bug_triage_env/graders/task1_grader.py CHANGED
@@ -19,13 +19,13 @@ def grade(episode_log: List[Dict[str, Any]], ground_truth: Dict[str, Any]) -> fl
19
  Score in [0.0, 1.0].
20
  """
21
  if not episode_log:
22
- return 0.0
23
 
24
  last_action = episode_log[-1]
25
  predicted = (last_action.get("bug_type") or "").strip().lower()
26
  expected = (ground_truth.get("bug_type") or "").strip().lower()
27
 
28
  if not predicted:
29
- return 0.0
30
 
31
- return 1.0 if predicted == expected else 0.01
 
19
  Score in [0.0, 1.0].
20
  """
21
  if not episode_log:
22
+ return 0.01
23
 
24
  last_action = episode_log[-1]
25
  predicted = (last_action.get("bug_type") or "").strip().lower()
26
  expected = (ground_truth.get("bug_type") or "").strip().lower()
27
 
28
  if not predicted:
29
+ return 0.01
30
 
31
+ return 0.99 if predicted == expected else 0.01
bug_triage_env/graders/task2_grader.py CHANGED
@@ -27,17 +27,17 @@ def grade(episode_log: List[Dict[str, Any]], ground_truth: Dict[str, Any]) -> fl
27
  float score in [0.0, 1.0].
28
  """
29
  if not episode_log:
30
- return 0.0
31
 
32
  last_action = episode_log[-1]
33
  predicted = (last_action.get("priority") or "").strip().lower()
34
  expected = (ground_truth.get("priority") or "").strip().lower()
35
 
36
  if not predicted or predicted not in PRIORITY_ORDER:
37
- return 0.0
38
  if expected not in PRIORITY_ORDER:
39
- return 0.0
40
 
41
  diff = abs(PRIORITY_ORDER[predicted] - PRIORITY_ORDER[expected])
42
  score = 1.0 - diff * (1.0 / 3.0)
43
- return max(0.0, min(1.0, score))
 
27
  float score in [0.0, 1.0].
28
  """
29
  if not episode_log:
30
+ return 0.01
31
 
32
  last_action = episode_log[-1]
33
  predicted = (last_action.get("priority") or "").strip().lower()
34
  expected = (ground_truth.get("priority") or "").strip().lower()
35
 
36
  if not predicted or predicted not in PRIORITY_ORDER:
37
+ return 0.01
38
  if expected not in PRIORITY_ORDER:
39
+ return 0.01
40
 
41
  diff = abs(PRIORITY_ORDER[predicted] - PRIORITY_ORDER[expected])
42
  score = 1.0 - diff * (1.0 / 3.0)
43
+ return max(0.01, min(0.99, score))
bug_triage_env/graders/task3_grader.py CHANGED
@@ -85,7 +85,7 @@ def grade(episode_log: List[Dict[str, Any]], ground_truth: Dict[str, Any]) -> fl
85
  float score in [0.0, 1.0].
86
  """
87
  if not episode_log:
88
- return 0.0
89
 
90
  action = episode_log[-1]
91
 
@@ -111,4 +111,4 @@ def grade(episode_log: List[Dict[str, Any]], ground_truth: Dict[str, Any]) -> fl
111
  + WEIGHTS["action"] * s_act
112
  )
113
 
114
- return max(0.0, min(1.0, round(score, 4)))
 
85
  float score in [0.0, 1.0].
86
  """
87
  if not episode_log:
88
+ return 0.01
89
 
90
  action = episode_log[-1]
91
 
 
111
  + WEIGHTS["action"] * s_act
112
  )
113
 
114
+ return max(0.01, min(0.99, round(score, 4)))
bug_triage_env/server/environment.py CHANGED
@@ -117,12 +117,15 @@ class BugTriageEnvironment(_OpenEnvBase):
117
  grader_fn = GRADERS.get(ep["task_id"], GRADERS["task_1"])
118
  grader_score = grader_fn(ep["actions"], ep["ground_truth"])
119
 
120
- # Bound the reward strictly to [0.0, 1.0] to pass Hackathon Phase 2
 
 
 
121
  calibration_bonus = self._compute_calibration_bonus(
122
  action.confidence, grader_score
123
  )
124
- reward = grader_score + calibration_bonus
125
- reward = max(0.0, min(1.0, reward))
126
 
127
  # Build feedback string
128
  feedback = self._build_feedback(
 
117
  grader_fn = GRADERS.get(ep["task_id"], GRADERS["task_1"])
118
  grader_score = grader_fn(ep["actions"], ep["ground_truth"])
119
 
120
+ # Shaped reward: map [0, 1] to [-0.5, 1.0] for GRPO training
121
+ reward = (grader_score * 1.5) - 0.5
122
+
123
+ # Confidence calibration bonus/penalty
124
  calibration_bonus = self._compute_calibration_bonus(
125
  action.confidence, grader_score
126
  )
127
+ reward += calibration_bonus
128
+ reward = max(-0.65, min(1.1, reward))
129
 
130
  # Build feedback string
131
  feedback = self._build_feedback(