Commit ·
cca44f4
1
Parent(s): 4aa36f3
fix: clamp grader scores strictly between 0 and 1 for validator compliance
Browse files- inference.py +10 -5
- task_graders.py +19 -8
inference.py
CHANGED
|
@@ -54,7 +54,8 @@ def task_1_basic_ram_reduction_grader(observation: EnergyOptimizationObservation
|
|
| 54 |
step_efficiency = max(0.0, 1.0 - (observation.steps_taken - max_steps) * 0.1)
|
| 55 |
|
| 56 |
composite_score = (ram_score * 0.4) + (energy_score * 0.4) + (step_efficiency * 0.2)
|
| 57 |
-
|
|
|
|
| 58 |
|
| 59 |
|
| 60 |
def task_2_energy_optimization_grader(observation: EnergyOptimizationObservation) -> float:
|
|
@@ -78,7 +79,8 @@ def task_2_energy_optimization_grader(observation: EnergyOptimizationObservation
|
|
| 78 |
step_efficiency = max(0.0, 1.0 - (observation.steps_taken - max_steps) * 0.08)
|
| 79 |
|
| 80 |
composite_score = (energy_score * 0.5) + (ram_constraint_score * 0.25) + (step_efficiency * 0.25)
|
| 81 |
-
|
|
|
|
| 82 |
|
| 83 |
|
| 84 |
def task_3_balanced_optimization_grader(observation: EnergyOptimizationObservation) -> float:
|
|
@@ -101,7 +103,8 @@ def task_3_balanced_optimization_grader(observation: EnergyOptimizationObservati
|
|
| 101 |
step_bonus = max(-0.2, -(observation.steps_taken - max_steps) * 0.05)
|
| 102 |
|
| 103 |
composite_score = max(0.0, min(1.0, (balance_score * 0.9) + step_bonus))
|
| 104 |
-
|
|
|
|
| 105 |
|
| 106 |
|
| 107 |
def task_4_advanced_efficiency_grader(observation: EnergyOptimizationObservation) -> float:
|
|
@@ -124,7 +127,8 @@ def task_4_advanced_efficiency_grader(observation: EnergyOptimizationObservation
|
|
| 124 |
step_bonus = max(-0.2, -(observation.steps_taken - max_steps) * 0.05)
|
| 125 |
|
| 126 |
composite_score = max(0.0, min(1.0, (balance_score * 0.9) + step_bonus))
|
| 127 |
-
|
|
|
|
| 128 |
|
| 129 |
|
| 130 |
def task_5_expert_optimization_grader(observation: EnergyOptimizationObservation) -> float:
|
|
@@ -147,7 +151,8 @@ def task_5_expert_optimization_grader(observation: EnergyOptimizationObservation
|
|
| 147 |
step_bonus = max(-0.3, -(observation.steps_taken - max_steps) * 0.05)
|
| 148 |
|
| 149 |
composite_score = max(0.0, min(1.0, (balance_score * 0.9) + step_bonus))
|
| 150 |
-
|
|
|
|
| 151 |
|
| 152 |
|
| 153 |
# Explicit task grader mapping for validator tool detection
|
|
|
|
| 54 |
step_efficiency = max(0.0, 1.0 - (observation.steps_taken - max_steps) * 0.1)
|
| 55 |
|
| 56 |
composite_score = (ram_score * 0.4) + (energy_score * 0.4) + (step_efficiency * 0.2)
|
| 57 |
+
clamped_score = max(0.001, min(0.999, composite_score))
|
| 58 |
+
return round(clamped_score, 3)
|
| 59 |
|
| 60 |
|
| 61 |
def task_2_energy_optimization_grader(observation: EnergyOptimizationObservation) -> float:
|
|
|
|
| 79 |
step_efficiency = max(0.0, 1.0 - (observation.steps_taken - max_steps) * 0.08)
|
| 80 |
|
| 81 |
composite_score = (energy_score * 0.5) + (ram_constraint_score * 0.25) + (step_efficiency * 0.25)
|
| 82 |
+
clamped_score = max(0.001, min(0.999, composite_score))
|
| 83 |
+
return round(clamped_score, 3)
|
| 84 |
|
| 85 |
|
| 86 |
def task_3_balanced_optimization_grader(observation: EnergyOptimizationObservation) -> float:
|
|
|
|
| 103 |
step_bonus = max(-0.2, -(observation.steps_taken - max_steps) * 0.05)
|
| 104 |
|
| 105 |
composite_score = max(0.0, min(1.0, (balance_score * 0.9) + step_bonus))
|
| 106 |
+
clamped_score = max(0.001, min(0.999, composite_score))
|
| 107 |
+
return round(clamped_score, 3)
|
| 108 |
|
| 109 |
|
| 110 |
def task_4_advanced_efficiency_grader(observation: EnergyOptimizationObservation) -> float:
|
|
|
|
| 127 |
step_bonus = max(-0.2, -(observation.steps_taken - max_steps) * 0.05)
|
| 128 |
|
| 129 |
composite_score = max(0.0, min(1.0, (balance_score * 0.9) + step_bonus))
|
| 130 |
+
clamped_score = max(0.001, min(0.999, composite_score))
|
| 131 |
+
return round(clamped_score, 3)
|
| 132 |
|
| 133 |
|
| 134 |
def task_5_expert_optimization_grader(observation: EnergyOptimizationObservation) -> float:
|
|
|
|
| 151 |
step_bonus = max(-0.3, -(observation.steps_taken - max_steps) * 0.05)
|
| 152 |
|
| 153 |
composite_score = max(0.0, min(1.0, (balance_score * 0.9) + step_bonus))
|
| 154 |
+
clamped_score = max(0.001, min(0.999, composite_score))
|
| 155 |
+
return round(clamped_score, 3)
|
| 156 |
|
| 157 |
|
| 158 |
# Explicit task grader mapping for validator tool detection
|
task_graders.py
CHANGED
|
@@ -72,7 +72,10 @@ def task_1_basic_ram_reduction_grader(observation: EnergyOptimizationObservation
|
|
| 72 |
# Combined score: 40% RAM, 40% Energy, 20% Step Efficiency
|
| 73 |
composite_score = (ram_score * 0.4) + (energy_score * 0.4) + (step_efficiency * 0.2)
|
| 74 |
|
| 75 |
-
|
|
|
|
|
|
|
|
|
|
| 76 |
|
| 77 |
|
| 78 |
# ============================================================================
|
|
@@ -129,7 +132,9 @@ def task_2_energy_optimization_grader(observation: EnergyOptimizationObservation
|
|
| 129 |
# Combined: Energy (50%), RAM Constraint (25%), Step Efficiency (25%)
|
| 130 |
composite_score = (energy_score * 0.5) + (ram_constraint_score * 0.25) + (step_efficiency * 0.25)
|
| 131 |
|
| 132 |
-
|
|
|
|
|
|
|
| 133 |
|
| 134 |
|
| 135 |
# ============================================================================
|
|
@@ -181,9 +186,11 @@ def task_3_balanced_optimization_grader(observation: EnergyOptimizationObservati
|
|
| 181 |
step_bonus = max(-0.2, -(observation.steps_taken - max_steps) * 0.05) # Up to -20% penalty
|
| 182 |
|
| 183 |
# Combined: Balance (90%) + Step Bonus (10%)
|
| 184 |
-
composite_score =
|
| 185 |
|
| 186 |
-
|
|
|
|
|
|
|
| 187 |
|
| 188 |
|
| 189 |
# ============================================================================
|
|
@@ -213,9 +220,11 @@ def task_4_advanced_efficiency_grader(observation: EnergyOptimizationObservation
|
|
| 213 |
else:
|
| 214 |
step_bonus = max(-0.2, -(observation.steps_taken - max_steps) * 0.05)
|
| 215 |
|
| 216 |
-
composite_score =
|
| 217 |
|
| 218 |
-
|
|
|
|
|
|
|
| 219 |
|
| 220 |
|
| 221 |
# ============================================================================
|
|
@@ -245,9 +254,11 @@ def task_5_expert_optimization_grader(observation: EnergyOptimizationObservation
|
|
| 245 |
else:
|
| 246 |
step_bonus = max(-0.3, -(observation.steps_taken - max_steps) * 0.05)
|
| 247 |
|
| 248 |
-
composite_score =
|
| 249 |
|
| 250 |
-
|
|
|
|
|
|
|
| 251 |
|
| 252 |
|
| 253 |
# ============================================================================
|
|
|
|
| 72 |
# Combined score: 40% RAM, 40% Energy, 20% Step Efficiency
|
| 73 |
composite_score = (ram_score * 0.4) + (energy_score * 0.4) + (step_efficiency * 0.2)
|
| 74 |
|
| 75 |
+
# Clamp strictly between 0 and 1 (not including endpoints)
|
| 76 |
+
# Validator requires 0 < score < 1
|
| 77 |
+
clamped_score = max(0.001, min(0.999, composite_score))
|
| 78 |
+
return round(clamped_score, 3)
|
| 79 |
|
| 80 |
|
| 81 |
# ============================================================================
|
|
|
|
| 132 |
# Combined: Energy (50%), RAM Constraint (25%), Step Efficiency (25%)
|
| 133 |
composite_score = (energy_score * 0.5) + (ram_constraint_score * 0.25) + (step_efficiency * 0.25)
|
| 134 |
|
| 135 |
+
# Clamp strictly between 0 and 1 (not including endpoints)
|
| 136 |
+
clamped_score = max(0.001, min(0.999, composite_score))
|
| 137 |
+
return round(clamped_score, 3)
|
| 138 |
|
| 139 |
|
| 140 |
# ============================================================================
|
|
|
|
| 186 |
step_bonus = max(-0.2, -(observation.steps_taken - max_steps) * 0.05) # Up to -20% penalty
|
| 187 |
|
| 188 |
# Combined: Balance (90%) + Step Bonus (10%)
|
| 189 |
+
composite_score = (balance_score * 0.9) + step_bonus
|
| 190 |
|
| 191 |
+
# Clamp strictly between 0 and 1 (not including endpoints)
|
| 192 |
+
clamped_score = max(0.001, min(0.999, composite_score))
|
| 193 |
+
return round(clamped_score, 3)
|
| 194 |
|
| 195 |
|
| 196 |
# ============================================================================
|
|
|
|
| 220 |
else:
|
| 221 |
step_bonus = max(-0.2, -(observation.steps_taken - max_steps) * 0.05)
|
| 222 |
|
| 223 |
+
composite_score = (balance_score * 0.9) + step_bonus
|
| 224 |
|
| 225 |
+
# Clamp strictly between 0 and 1 (not including endpoints)
|
| 226 |
+
clamped_score = max(0.001, min(0.999, composite_score))
|
| 227 |
+
return round(clamped_score, 3)
|
| 228 |
|
| 229 |
|
| 230 |
# ============================================================================
|
|
|
|
| 254 |
else:
|
| 255 |
step_bonus = max(-0.3, -(observation.steps_taken - max_steps) * 0.05)
|
| 256 |
|
| 257 |
+
composite_score = (balance_score * 0.9) + step_bonus
|
| 258 |
|
| 259 |
+
# Clamp strictly between 0 and 1 (not including endpoints)
|
| 260 |
+
clamped_score = max(0.001, min(0.999, composite_score))
|
| 261 |
+
return round(clamped_score, 3)
|
| 262 |
|
| 263 |
|
| 264 |
# ============================================================================
|