ShreeshantXD commited on
Commit
e58b5ec
·
1 Parent(s): ef0556b

fix: clamp scores after rounding and ensure all sub-scores are clamped

Browse files
Files changed (1) hide show
  1. env/tasks.go +22 -10
env/tasks.go CHANGED
@@ -97,7 +97,13 @@ func GradeEpisode(inp GradeEpisodeInput) EpisodeGrade {
97
  grade.Score = math.Max(0, grade.Score-grade.PenaltyApplied)
98
  }
99
 
 
100
  grade.Score = clampOpenInterval(math.Round(grade.Score*10000) / 10000) // 4 decimal places
 
 
 
 
 
101
  return grade
102
  }
103
 
@@ -119,7 +125,9 @@ func gradeTask1(inp GradeEpisodeInput, grade EpisodeGrade) EpisodeGrade {
119
  costScore = math.Max(0, 1.0-ratio)
120
  }
121
 
122
- grade.SubScores["cost"] = clampOpenInterval(math.Min(1.0, costScore))
 
 
123
  grade.Score = grade.SubScores["cost"]
124
  grade.Details["agent_cost"] = agentCost
125
  grade.Details["baseline_cost"] = baselineCost
@@ -151,9 +159,11 @@ func gradeTask2(inp GradeEpisodeInput, grade EpisodeGrade) EpisodeGrade {
151
  constraintScore = float64(withinBounds) / float64(totalSteps)
152
  }
153
 
154
- grade.SubScores["cost"] = costScore
155
- grade.SubScores["temperature"] = clampOpenInterval(constraintScore)
156
- grade.Score = clampOpenInterval(costScore*0.6 + constraintScore*0.4)
 
 
157
  grade.Details["within_bounds_steps"] = withinBounds
158
  grade.Details["total_steps"] = totalSteps
159
  return grade
@@ -212,13 +222,15 @@ func gradeTask3(inp GradeEpisodeInput, grade EpisodeGrade) EpisodeGrade {
212
  carbonScore = math.Max(0, 1.0-agentCarbon/baselineCarbon)
213
  }
214
 
215
- grade.SubScores["cost"] = costScore
216
- grade.SubScores["temperature"] = tempScore
217
- grade.SubScores["grid_response"] = clampOpenInterval(gridScore)
218
- grade.SubScores["batch_deadline"] = clampOpenInterval(batchScore)
219
- grade.SubScores["carbon"] = clampOpenInterval(math.Min(1.0, carbonScore))
 
220
 
221
- grade.Score = clampOpenInterval(costScore*0.28 + tempScore*0.20 + gridScore*0.20 + batchScore*0.12 + carbonScore*0.20)
 
222
 
223
  grade.Details["grid_stress_steps"] = gridStressSteps
224
  grade.Details["grid_response_steps"] = gridResponseSteps
 
97
  grade.Score = math.Max(0, grade.Score-grade.PenaltyApplied)
98
  }
99
 
100
+ // Clamp AFTER rounding to ensure boundary values are handled
101
  grade.Score = clampOpenInterval(math.Round(grade.Score*10000) / 10000) // 4 decimal places
102
+
103
+ // Also ensure all sub-scores are properly clamped after rounding
104
+ for key, val := range grade.SubScores {
105
+ grade.SubScores[key] = clampOpenInterval(math.Round(val*10000) / 10000)
106
+ }
107
  return grade
108
  }
109
 
 
125
  costScore = math.Max(0, 1.0-ratio)
126
  }
127
 
128
+ // Clamp after min operation
129
+ clamped := clampOpenInterval(math.Min(1.0, costScore))
130
+ grade.SubScores["cost"] = clampOpenInterval(math.Round(clamped*10000) / 10000)
131
  grade.Score = grade.SubScores["cost"]
132
  grade.Details["agent_cost"] = agentCost
133
  grade.Details["baseline_cost"] = baselineCost
 
159
  constraintScore = float64(withinBounds) / float64(totalSteps)
160
  }
161
 
162
+ // Clamp sub-scores and final score after rounding
163
+ grade.SubScores["cost"] = clampOpenInterval(math.Round(costScore*10000) / 10000)
164
+ grade.SubScores["temperature"] = clampOpenInterval(math.Round(constraintScore*10000) / 10000)
165
+ finalScore := costScore*0.6 + constraintScore*0.4
166
+ grade.Score = clampOpenInterval(math.Round(finalScore*10000) / 10000)
167
  grade.Details["within_bounds_steps"] = withinBounds
168
  grade.Details["total_steps"] = totalSteps
169
  return grade
 
222
  carbonScore = math.Max(0, 1.0-agentCarbon/baselineCarbon)
223
  }
224
 
225
+ // Clamp all sub-scores after rounding
226
+ grade.SubScores["cost"] = clampOpenInterval(math.Round(costScore*10000) / 10000)
227
+ grade.SubScores["temperature"] = clampOpenInterval(math.Round(tempScore*10000) / 10000)
228
+ grade.SubScores["grid_response"] = clampOpenInterval(math.Round(gridScore*10000) / 10000)
229
+ grade.SubScores["batch_deadline"] = clampOpenInterval(math.Round(batchScore*10000) / 10000)
230
+ grade.SubScores["carbon"] = clampOpenInterval(math.Round(math.Min(1.0, carbonScore)*10000) / 10000)
231
 
232
+ finalScore := costScore*0.28 + tempScore*0.20 + gridScore*0.20 + batchScore*0.12 + carbonScore*0.20
233
+ grade.Score = clampOpenInterval(math.Round(finalScore*10000) / 10000)
234
 
235
  grade.Details["grid_stress_steps"] = gridStressSteps
236
  grade.Details["grid_response_steps"] = gridResponseSteps