Spaces:
Sleeping
Sleeping
Upload folder using huggingface_hub
Browse files- reward/grader.py +11 -1
reward/grader.py
CHANGED
|
@@ -19,6 +19,11 @@ def _clamp(score: float) -> float:
|
|
| 19 |
return min(max(round(score, 4), 0.10), 0.90)
|
| 20 |
|
| 21 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 22 |
# ββ Task 1: Access Decision βββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 23 |
|
| 24 |
def grade_access_decision(world_state: Dict[str, Any]) -> Dict[str, Any]:
|
|
@@ -52,8 +57,8 @@ def grade_access_decision(world_state: Dict[str, Any]) -> Dict[str, Any]:
|
|
| 52 |
|
| 53 |
if not decided_req:
|
| 54 |
# No decision submitted β partial credit for viewing (policy_compliance baseline)
|
| 55 |
-
# Score: 0.0 across all β clamp guarantees (0, 1) β 0.01
|
| 56 |
details["error"] = "No decision was submitted"
|
|
|
|
| 57 |
total = _clamp(sum(scores[k] * weights[k] for k in weights))
|
| 58 |
return {"score": total, "breakdown": scores,
|
| 59 |
"weights": weights, "details": details}
|
|
@@ -64,6 +69,7 @@ def grade_access_decision(world_state: Dict[str, Any]) -> Dict[str, Any]:
|
|
| 64 |
|
| 65 |
if not correct:
|
| 66 |
details["error"] = "No correct decision found for this request"
|
|
|
|
| 67 |
total = _clamp(sum(scores[k] * weights[k] for k in weights))
|
| 68 |
return {"score": total, "breakdown": scores,
|
| 69 |
"weights": weights, "details": details}
|
|
@@ -103,6 +109,7 @@ def grade_access_decision(world_state: Dict[str, Any]) -> Dict[str, Any]:
|
|
| 103 |
scores["correct_justification"] = 1.0
|
| 104 |
details["justification"] = {"agent": agent_just, "correct": correct_just}
|
| 105 |
|
|
|
|
| 106 |
total = _clamp(sum(scores[k] * weights[k] for k in weights))
|
| 107 |
return {"score": total, "breakdown": scores, "weights": weights, "details": details}
|
| 108 |
|
|
@@ -136,6 +143,7 @@ def grade_jit_escalation(world_state: Dict[str, Any]) -> Dict[str, Any]:
|
|
| 136 |
|
| 137 |
if not req:
|
| 138 |
details["error"] = "No request found"
|
|
|
|
| 139 |
return {"score": _clamp(0.0), "breakdown": scores,
|
| 140 |
"weights": weights, "details": details}
|
| 141 |
|
|
@@ -199,6 +207,7 @@ def grade_jit_escalation(world_state: Dict[str, Any]) -> Dict[str, Any]:
|
|
| 199 |
details["final_decision"] = {
|
| 200 |
"grant_activated": grant_activated, "should_approve": should_approve}
|
| 201 |
|
|
|
|
| 202 |
total = _clamp(sum(scores[k] * weights[k] for k in weights))
|
| 203 |
return {"score": total, "breakdown": scores, "weights": weights, "details": details}
|
| 204 |
|
|
@@ -295,6 +304,7 @@ def grade_access_review(world_state: Dict[str, Any]) -> Dict[str, Any]:
|
|
| 295 |
scores["review_submitted"] = 1.0
|
| 296 |
details["review_submitted"] = review_submitted
|
| 297 |
|
|
|
|
| 298 |
total = _clamp(sum(scores[k] * weights[k] for k in weights))
|
| 299 |
return {"score": total, "breakdown": scores, "weights": weights, "details": details}
|
| 300 |
|
|
|
|
| 19 |
return min(max(round(score, 4), 0.10), 0.90)
|
| 20 |
|
| 21 |
|
| 22 |
+
def _clamp_breakdown(scores: dict) -> dict:
|
| 23 |
+
"""Clamp every individual sub-score to strictly (0, 1)."""
|
| 24 |
+
return {k: _clamp(v) for k, v in scores.items()}
|
| 25 |
+
|
| 26 |
+
|
| 27 |
# ββ Task 1: Access Decision βββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 28 |
|
| 29 |
def grade_access_decision(world_state: Dict[str, Any]) -> Dict[str, Any]:
|
|
|
|
| 57 |
|
| 58 |
if not decided_req:
|
| 59 |
# No decision submitted β partial credit for viewing (policy_compliance baseline)
|
|
|
|
| 60 |
details["error"] = "No decision was submitted"
|
| 61 |
+
scores = _clamp_breakdown(scores)
|
| 62 |
total = _clamp(sum(scores[k] * weights[k] for k in weights))
|
| 63 |
return {"score": total, "breakdown": scores,
|
| 64 |
"weights": weights, "details": details}
|
|
|
|
| 69 |
|
| 70 |
if not correct:
|
| 71 |
details["error"] = "No correct decision found for this request"
|
| 72 |
+
scores = _clamp_breakdown(scores)
|
| 73 |
total = _clamp(sum(scores[k] * weights[k] for k in weights))
|
| 74 |
return {"score": total, "breakdown": scores,
|
| 75 |
"weights": weights, "details": details}
|
|
|
|
| 109 |
scores["correct_justification"] = 1.0
|
| 110 |
details["justification"] = {"agent": agent_just, "correct": correct_just}
|
| 111 |
|
| 112 |
+
scores = _clamp_breakdown(scores)
|
| 113 |
total = _clamp(sum(scores[k] * weights[k] for k in weights))
|
| 114 |
return {"score": total, "breakdown": scores, "weights": weights, "details": details}
|
| 115 |
|
|
|
|
| 143 |
|
| 144 |
if not req:
|
| 145 |
details["error"] = "No request found"
|
| 146 |
+
scores = _clamp_breakdown(scores)
|
| 147 |
return {"score": _clamp(0.0), "breakdown": scores,
|
| 148 |
"weights": weights, "details": details}
|
| 149 |
|
|
|
|
| 207 |
details["final_decision"] = {
|
| 208 |
"grant_activated": grant_activated, "should_approve": should_approve}
|
| 209 |
|
| 210 |
+
scores = _clamp_breakdown(scores)
|
| 211 |
total = _clamp(sum(scores[k] * weights[k] for k in weights))
|
| 212 |
return {"score": total, "breakdown": scores, "weights": weights, "details": details}
|
| 213 |
|
|
|
|
| 304 |
scores["review_submitted"] = 1.0
|
| 305 |
details["review_submitted"] = review_submitted
|
| 306 |
|
| 307 |
+
scores = _clamp_breakdown(scores)
|
| 308 |
total = _clamp(sum(scores[k] * weights[k] for k in weights))
|
| 309 |
return {"score": total, "breakdown": scores, "weights": weights, "details": details}
|
| 310 |
|