Spaces:
Sleeping
Sleeping
Commit ·
8a428a8
1
Parent(s): 4c2a495
Clamp grader scores to strict (0,1) range for validator compliance
Browse files- src/graders.py +13 -11
src/graders.py
CHANGED
|
@@ -30,19 +30,21 @@ def grade_task_easy(history: List[Dict[str, Any]], attack_start_step: int) -> fl
|
|
| 30 |
break
|
| 31 |
|
| 32 |
if first_correct_detection_step is None:
|
| 33 |
-
return 0.
|
| 34 |
|
| 35 |
delay = first_correct_detection_step - attack_start_step
|
| 36 |
|
| 37 |
if delay <= 20:
|
| 38 |
-
|
| 39 |
elif delay <= 100:
|
| 40 |
-
# Linear decay from
|
| 41 |
-
|
| 42 |
elif delay <= 420:
|
| 43 |
-
|
| 44 |
else:
|
| 45 |
-
|
|
|
|
|
|
|
| 46 |
|
| 47 |
|
| 48 |
def grade_task_medium(history: List[Dict[str, Any]], attack_start_step: int) -> float:
|
|
@@ -72,7 +74,7 @@ def grade_task_medium(history: List[Dict[str, Any]], attack_start_step: int) ->
|
|
| 72 |
first_correct_classification_step = step
|
| 73 |
|
| 74 |
if steps_after_attack == 0:
|
| 75 |
-
return 0.
|
| 76 |
|
| 77 |
base_score = correct_classifications / steps_after_attack
|
| 78 |
|
|
@@ -81,8 +83,8 @@ def grade_task_medium(history: List[Dict[str, Any]], attack_start_step: int) ->
|
|
| 81 |
else:
|
| 82 |
early_bonus = 0.0
|
| 83 |
|
| 84 |
-
score = min(
|
| 85 |
-
return max(0.
|
| 86 |
|
| 87 |
|
| 88 |
def grade_task_hard(
|
|
@@ -130,6 +132,6 @@ def grade_task_hard(
|
|
| 130 |
|
| 131 |
# Applying false alarm penalty
|
| 132 |
penalty = 0.2 * false_alarm_count
|
| 133 |
-
score = max(0.
|
| 134 |
|
| 135 |
-
return
|
|
|
|
| 30 |
break
|
| 31 |
|
| 32 |
if first_correct_detection_step is None:
|
| 33 |
+
return 0.01
|
| 34 |
|
| 35 |
delay = first_correct_detection_step - attack_start_step
|
| 36 |
|
| 37 |
if delay <= 20:
|
| 38 |
+
score = 0.99
|
| 39 |
elif delay <= 100:
|
| 40 |
+
# Linear decay from 0.99 at delay=20 to 0.5 at delay=100
|
| 41 |
+
score = 0.99 - 0.49 * (delay - 20) / 80.0
|
| 42 |
elif delay <= 420:
|
| 43 |
+
score = 0.2
|
| 44 |
else:
|
| 45 |
+
score = 0.01
|
| 46 |
+
|
| 47 |
+
return max(0.01, min(0.99, score))
|
| 48 |
|
| 49 |
|
| 50 |
def grade_task_medium(history: List[Dict[str, Any]], attack_start_step: int) -> float:
|
|
|
|
| 74 |
first_correct_classification_step = step
|
| 75 |
|
| 76 |
if steps_after_attack == 0:
|
| 77 |
+
return 0.01
|
| 78 |
|
| 79 |
base_score = correct_classifications / steps_after_attack
|
| 80 |
|
|
|
|
| 83 |
else:
|
| 84 |
early_bonus = 0.0
|
| 85 |
|
| 86 |
+
score = min(0.99, base_score * 0.6 + early_bonus)
|
| 87 |
+
return max(0.01, score)
|
| 88 |
|
| 89 |
|
| 90 |
def grade_task_hard(
|
|
|
|
| 132 |
|
| 133 |
# Applying false alarm penalty
|
| 134 |
penalty = 0.2 * false_alarm_count
|
| 135 |
+
score = max(0.01, score - penalty)
|
| 136 |
|
| 137 |
+
return max(0.01, min(0.99, score))
|