Sushruth21 commited on
Commit
cca44f4
·
1 Parent(s): 4aa36f3

fix: clamp grader scores strictly between 0 and 1 for validator compliance

Browse files
Files changed (2) hide show
  1. inference.py +10 -5
  2. task_graders.py +19 -8
inference.py CHANGED
@@ -54,7 +54,8 @@ def task_1_basic_ram_reduction_grader(observation: EnergyOptimizationObservation
54
  step_efficiency = max(0.0, 1.0 - (observation.steps_taken - max_steps) * 0.1)
55
 
56
  composite_score = (ram_score * 0.4) + (energy_score * 0.4) + (step_efficiency * 0.2)
57
- return round(composite_score, 3)
 
58
 
59
 
60
  def task_2_energy_optimization_grader(observation: EnergyOptimizationObservation) -> float:
@@ -78,7 +79,8 @@ def task_2_energy_optimization_grader(observation: EnergyOptimizationObservation
78
  step_efficiency = max(0.0, 1.0 - (observation.steps_taken - max_steps) * 0.08)
79
 
80
  composite_score = (energy_score * 0.5) + (ram_constraint_score * 0.25) + (step_efficiency * 0.25)
81
- return round(composite_score, 3)
 
82
 
83
 
84
  def task_3_balanced_optimization_grader(observation: EnergyOptimizationObservation) -> float:
@@ -101,7 +103,8 @@ def task_3_balanced_optimization_grader(observation: EnergyOptimizationObservati
101
  step_bonus = max(-0.2, -(observation.steps_taken - max_steps) * 0.05)
102
 
103
  composite_score = max(0.0, min(1.0, (balance_score * 0.9) + step_bonus))
104
- return round(composite_score, 3)
 
105
 
106
 
107
  def task_4_advanced_efficiency_grader(observation: EnergyOptimizationObservation) -> float:
@@ -124,7 +127,8 @@ def task_4_advanced_efficiency_grader(observation: EnergyOptimizationObservation
124
  step_bonus = max(-0.2, -(observation.steps_taken - max_steps) * 0.05)
125
 
126
  composite_score = max(0.0, min(1.0, (balance_score * 0.9) + step_bonus))
127
- return round(composite_score, 3)
 
128
 
129
 
130
  def task_5_expert_optimization_grader(observation: EnergyOptimizationObservation) -> float:
@@ -147,7 +151,8 @@ def task_5_expert_optimization_grader(observation: EnergyOptimizationObservation
147
  step_bonus = max(-0.3, -(observation.steps_taken - max_steps) * 0.05)
148
 
149
  composite_score = max(0.0, min(1.0, (balance_score * 0.9) + step_bonus))
150
- return round(composite_score, 3)
 
151
 
152
 
153
  # Explicit task grader mapping for validator tool detection
 
54
  step_efficiency = max(0.0, 1.0 - (observation.steps_taken - max_steps) * 0.1)
55
 
56
  composite_score = (ram_score * 0.4) + (energy_score * 0.4) + (step_efficiency * 0.2)
57
+ clamped_score = max(0.001, min(0.999, composite_score))
58
+ return round(clamped_score, 3)
59
 
60
 
61
  def task_2_energy_optimization_grader(observation: EnergyOptimizationObservation) -> float:
 
79
  step_efficiency = max(0.0, 1.0 - (observation.steps_taken - max_steps) * 0.08)
80
 
81
  composite_score = (energy_score * 0.5) + (ram_constraint_score * 0.25) + (step_efficiency * 0.25)
82
+ clamped_score = max(0.001, min(0.999, composite_score))
83
+ return round(clamped_score, 3)
84
 
85
 
86
  def task_3_balanced_optimization_grader(observation: EnergyOptimizationObservation) -> float:
 
103
  step_bonus = max(-0.2, -(observation.steps_taken - max_steps) * 0.05)
104
 
105
  composite_score = max(0.0, min(1.0, (balance_score * 0.9) + step_bonus))
106
+ clamped_score = max(0.001, min(0.999, composite_score))
107
+ return round(clamped_score, 3)
108
 
109
 
110
  def task_4_advanced_efficiency_grader(observation: EnergyOptimizationObservation) -> float:
 
127
  step_bonus = max(-0.2, -(observation.steps_taken - max_steps) * 0.05)
128
 
129
  composite_score = max(0.0, min(1.0, (balance_score * 0.9) + step_bonus))
130
+ clamped_score = max(0.001, min(0.999, composite_score))
131
+ return round(clamped_score, 3)
132
 
133
 
134
  def task_5_expert_optimization_grader(observation: EnergyOptimizationObservation) -> float:
 
151
  step_bonus = max(-0.3, -(observation.steps_taken - max_steps) * 0.05)
152
 
153
  composite_score = max(0.0, min(1.0, (balance_score * 0.9) + step_bonus))
154
+ clamped_score = max(0.001, min(0.999, composite_score))
155
+ return round(clamped_score, 3)
156
 
157
 
158
  # Explicit task grader mapping for validator tool detection
task_graders.py CHANGED
@@ -72,7 +72,10 @@ def task_1_basic_ram_reduction_grader(observation: EnergyOptimizationObservation
72
  # Combined score: 40% RAM, 40% Energy, 20% Step Efficiency
73
  composite_score = (ram_score * 0.4) + (energy_score * 0.4) + (step_efficiency * 0.2)
74
 
75
- return round(composite_score, 3)
 
 
 
76
 
77
 
78
  # ============================================================================
@@ -129,7 +132,9 @@ def task_2_energy_optimization_grader(observation: EnergyOptimizationObservation
129
  # Combined: Energy (50%), RAM Constraint (25%), Step Efficiency (25%)
130
  composite_score = (energy_score * 0.5) + (ram_constraint_score * 0.25) + (step_efficiency * 0.25)
131
 
132
- return round(composite_score, 3)
 
 
133
 
134
 
135
  # ============================================================================
@@ -181,9 +186,11 @@ def task_3_balanced_optimization_grader(observation: EnergyOptimizationObservati
181
  step_bonus = max(-0.2, -(observation.steps_taken - max_steps) * 0.05) # Up to -20% penalty
182
 
183
  # Combined: Balance (90%) + Step Bonus (10%)
184
- composite_score = max(0.0, min(1.0, (balance_score * 0.9) + step_bonus))
185
 
186
- return round(composite_score, 3)
 
 
187
 
188
 
189
  # ============================================================================
@@ -213,9 +220,11 @@ def task_4_advanced_efficiency_grader(observation: EnergyOptimizationObservation
213
  else:
214
  step_bonus = max(-0.2, -(observation.steps_taken - max_steps) * 0.05)
215
 
216
- composite_score = max(0.0, min(1.0, (balance_score * 0.9) + step_bonus))
217
 
218
- return round(composite_score, 3)
 
 
219
 
220
 
221
  # ============================================================================
@@ -245,9 +254,11 @@ def task_5_expert_optimization_grader(observation: EnergyOptimizationObservation
245
  else:
246
  step_bonus = max(-0.3, -(observation.steps_taken - max_steps) * 0.05)
247
 
248
- composite_score = max(0.0, min(1.0, (balance_score * 0.9) + step_bonus))
249
 
250
- return round(composite_score, 3)
 
 
251
 
252
 
253
  # ============================================================================
 
72
  # Combined score: 40% RAM, 40% Energy, 20% Step Efficiency
73
  composite_score = (ram_score * 0.4) + (energy_score * 0.4) + (step_efficiency * 0.2)
74
 
75
+ # Clamp strictly between 0 and 1 (not including endpoints)
76
+ # Validator requires 0 < score < 1
77
+ clamped_score = max(0.001, min(0.999, composite_score))
78
+ return round(clamped_score, 3)
79
 
80
 
81
  # ============================================================================
 
132
  # Combined: Energy (50%), RAM Constraint (25%), Step Efficiency (25%)
133
  composite_score = (energy_score * 0.5) + (ram_constraint_score * 0.25) + (step_efficiency * 0.25)
134
 
135
+ # Clamp strictly between 0 and 1 (not including endpoints)
136
+ clamped_score = max(0.001, min(0.999, composite_score))
137
+ return round(clamped_score, 3)
138
 
139
 
140
  # ============================================================================
 
186
  step_bonus = max(-0.2, -(observation.steps_taken - max_steps) * 0.05) # Up to -20% penalty
187
 
188
  # Combined: Balance (90%) + Step Bonus (10%)
189
+ composite_score = (balance_score * 0.9) + step_bonus
190
 
191
+ # Clamp strictly between 0 and 1 (not including endpoints)
192
+ clamped_score = max(0.001, min(0.999, composite_score))
193
+ return round(clamped_score, 3)
194
 
195
 
196
  # ============================================================================
 
220
  else:
221
  step_bonus = max(-0.2, -(observation.steps_taken - max_steps) * 0.05)
222
 
223
+ composite_score = (balance_score * 0.9) + step_bonus
224
 
225
+ # Clamp strictly between 0 and 1 (not including endpoints)
226
+ clamped_score = max(0.001, min(0.999, composite_score))
227
+ return round(clamped_score, 3)
228
 
229
 
230
  # ============================================================================
 
254
  else:
255
  step_bonus = max(-0.3, -(observation.steps_taken - max_steps) * 0.05)
256
 
257
+ composite_score = (balance_score * 0.9) + step_bonus
258
 
259
+ # Clamp strictly between 0 and 1 (not including endpoints)
260
+ clamped_score = max(0.001, min(0.999, composite_score))
261
+ return round(clamped_score, 3)
262
 
263
 
264
  # ============================================================================