Krooz commited on
Commit
8b57fde
Β·
verified Β·
1 Parent(s): 976988f

Upload folder using huggingface_hub

Browse files
Files changed (1) hide show
  1. reward/grader.py +11 -1
reward/grader.py CHANGED
@@ -19,6 +19,11 @@ def _clamp(score: float) -> float:
19
  return min(max(round(score, 4), 0.10), 0.90)
20
 
21
 
 
 
 
 
 
22
  # ── Task 1: Access Decision ───────────────────────────────────────────────────
23
 
24
  def grade_access_decision(world_state: Dict[str, Any]) -> Dict[str, Any]:
@@ -52,8 +57,8 @@ def grade_access_decision(world_state: Dict[str, Any]) -> Dict[str, Any]:
52
 
53
  if not decided_req:
54
  # No decision submitted β€” partial credit for viewing (policy_compliance baseline)
55
- # Score: 0.0 across all β€” clamp guarantees (0, 1) β†’ 0.01
56
  details["error"] = "No decision was submitted"
 
57
  total = _clamp(sum(scores[k] * weights[k] for k in weights))
58
  return {"score": total, "breakdown": scores,
59
  "weights": weights, "details": details}
@@ -64,6 +69,7 @@ def grade_access_decision(world_state: Dict[str, Any]) -> Dict[str, Any]:
64
 
65
  if not correct:
66
  details["error"] = "No correct decision found for this request"
 
67
  total = _clamp(sum(scores[k] * weights[k] for k in weights))
68
  return {"score": total, "breakdown": scores,
69
  "weights": weights, "details": details}
@@ -103,6 +109,7 @@ def grade_access_decision(world_state: Dict[str, Any]) -> Dict[str, Any]:
103
  scores["correct_justification"] = 1.0
104
  details["justification"] = {"agent": agent_just, "correct": correct_just}
105
 
 
106
  total = _clamp(sum(scores[k] * weights[k] for k in weights))
107
  return {"score": total, "breakdown": scores, "weights": weights, "details": details}
108
 
@@ -136,6 +143,7 @@ def grade_jit_escalation(world_state: Dict[str, Any]) -> Dict[str, Any]:
136
 
137
  if not req:
138
  details["error"] = "No request found"
 
139
  return {"score": _clamp(0.0), "breakdown": scores,
140
  "weights": weights, "details": details}
141
 
@@ -199,6 +207,7 @@ def grade_jit_escalation(world_state: Dict[str, Any]) -> Dict[str, Any]:
199
  details["final_decision"] = {
200
  "grant_activated": grant_activated, "should_approve": should_approve}
201
 
 
202
  total = _clamp(sum(scores[k] * weights[k] for k in weights))
203
  return {"score": total, "breakdown": scores, "weights": weights, "details": details}
204
 
@@ -295,6 +304,7 @@ def grade_access_review(world_state: Dict[str, Any]) -> Dict[str, Any]:
295
  scores["review_submitted"] = 1.0
296
  details["review_submitted"] = review_submitted
297
 
 
298
  total = _clamp(sum(scores[k] * weights[k] for k in weights))
299
  return {"score": total, "breakdown": scores, "weights": weights, "details": details}
300
 
 
19
  return min(max(round(score, 4), 0.10), 0.90)
20
 
21
 
22
+ def _clamp_breakdown(scores: dict) -> dict:
23
+ """Clamp every individual sub-score to strictly (0, 1)."""
24
+ return {k: _clamp(v) for k, v in scores.items()}
25
+
26
+
27
  # ── Task 1: Access Decision ───────────────────────────────────────────────────
28
 
29
  def grade_access_decision(world_state: Dict[str, Any]) -> Dict[str, Any]:
 
57
 
58
  if not decided_req:
59
  # No decision submitted β€” partial credit for viewing (policy_compliance baseline)
 
60
  details["error"] = "No decision was submitted"
61
+ scores = _clamp_breakdown(scores)
62
  total = _clamp(sum(scores[k] * weights[k] for k in weights))
63
  return {"score": total, "breakdown": scores,
64
  "weights": weights, "details": details}
 
69
 
70
  if not correct:
71
  details["error"] = "No correct decision found for this request"
72
+ scores = _clamp_breakdown(scores)
73
  total = _clamp(sum(scores[k] * weights[k] for k in weights))
74
  return {"score": total, "breakdown": scores,
75
  "weights": weights, "details": details}
 
109
  scores["correct_justification"] = 1.0
110
  details["justification"] = {"agent": agent_just, "correct": correct_just}
111
 
112
+ scores = _clamp_breakdown(scores)
113
  total = _clamp(sum(scores[k] * weights[k] for k in weights))
114
  return {"score": total, "breakdown": scores, "weights": weights, "details": details}
115
 
 
143
 
144
  if not req:
145
  details["error"] = "No request found"
146
+ scores = _clamp_breakdown(scores)
147
  return {"score": _clamp(0.0), "breakdown": scores,
148
  "weights": weights, "details": details}
149
 
 
207
  details["final_decision"] = {
208
  "grant_activated": grant_activated, "should_approve": should_approve}
209
 
210
+ scores = _clamp_breakdown(scores)
211
  total = _clamp(sum(scores[k] * weights[k] for k in weights))
212
  return {"score": total, "breakdown": scores, "weights": weights, "details": details}
213
 
 
304
  scores["review_submitted"] = 1.0
305
  details["review_submitted"] = review_submitted
306
 
307
+ scores = _clamp_breakdown(scores)
308
  total = _clamp(sum(scores[k] * weights[k] for k in weights))
309
  return {"score": total, "breakdown": scores, "weights": weights, "details": details}
310