Spaces:
Sleeping
Sleeping
Commit ·
e58b5ec
1
Parent(s): ef0556b
fix: clamp scores after rounding and ensure all sub-scores are clamped
Browse files- env/tasks.go +22 -10
env/tasks.go
CHANGED
|
@@ -97,7 +97,13 @@ func GradeEpisode(inp GradeEpisodeInput) EpisodeGrade {
|
|
| 97 |
grade.Score = math.Max(0, grade.Score-grade.PenaltyApplied)
|
| 98 |
}
|
| 99 |
|
|
|
|
| 100 |
grade.Score = clampOpenInterval(math.Round(grade.Score*10000) / 10000) // 4 decimal places
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 101 |
return grade
|
| 102 |
}
|
| 103 |
|
|
@@ -119,7 +125,9 @@ func gradeTask1(inp GradeEpisodeInput, grade EpisodeGrade) EpisodeGrade {
|
|
| 119 |
costScore = math.Max(0, 1.0-ratio)
|
| 120 |
}
|
| 121 |
|
| 122 |
-
|
|
|
|
|
|
|
| 123 |
grade.Score = grade.SubScores["cost"]
|
| 124 |
grade.Details["agent_cost"] = agentCost
|
| 125 |
grade.Details["baseline_cost"] = baselineCost
|
|
@@ -151,9 +159,11 @@ func gradeTask2(inp GradeEpisodeInput, grade EpisodeGrade) EpisodeGrade {
|
|
| 151 |
constraintScore = float64(withinBounds) / float64(totalSteps)
|
| 152 |
}
|
| 153 |
|
| 154 |
-
|
| 155 |
-
grade.SubScores["
|
| 156 |
-
grade.
|
|
|
|
|
|
|
| 157 |
grade.Details["within_bounds_steps"] = withinBounds
|
| 158 |
grade.Details["total_steps"] = totalSteps
|
| 159 |
return grade
|
|
@@ -212,13 +222,15 @@ func gradeTask3(inp GradeEpisodeInput, grade EpisodeGrade) EpisodeGrade {
|
|
| 212 |
carbonScore = math.Max(0, 1.0-agentCarbon/baselineCarbon)
|
| 213 |
}
|
| 214 |
|
| 215 |
-
|
| 216 |
-
grade.SubScores["
|
| 217 |
-
grade.SubScores["
|
| 218 |
-
grade.SubScores["
|
| 219 |
-
grade.SubScores["
|
|
|
|
| 220 |
|
| 221 |
-
|
|
|
|
| 222 |
|
| 223 |
grade.Details["grid_stress_steps"] = gridStressSteps
|
| 224 |
grade.Details["grid_response_steps"] = gridResponseSteps
|
|
|
|
| 97 |
grade.Score = math.Max(0, grade.Score-grade.PenaltyApplied)
|
| 98 |
}
|
| 99 |
|
| 100 |
+
// Clamp AFTER rounding to ensure boundary values are handled
|
| 101 |
grade.Score = clampOpenInterval(math.Round(grade.Score*10000) / 10000) // 4 decimal places
|
| 102 |
+
|
| 103 |
+
// Also ensure all sub-scores are properly clamped after rounding
|
| 104 |
+
for key, val := range grade.SubScores {
|
| 105 |
+
grade.SubScores[key] = clampOpenInterval(math.Round(val*10000) / 10000)
|
| 106 |
+
}
|
| 107 |
return grade
|
| 108 |
}
|
| 109 |
|
|
|
|
| 125 |
costScore = math.Max(0, 1.0-ratio)
|
| 126 |
}
|
| 127 |
|
| 128 |
+
// Clamp after min operation
|
| 129 |
+
clamped := clampOpenInterval(math.Min(1.0, costScore))
|
| 130 |
+
grade.SubScores["cost"] = clampOpenInterval(math.Round(clamped*10000) / 10000)
|
| 131 |
grade.Score = grade.SubScores["cost"]
|
| 132 |
grade.Details["agent_cost"] = agentCost
|
| 133 |
grade.Details["baseline_cost"] = baselineCost
|
|
|
|
| 159 |
constraintScore = float64(withinBounds) / float64(totalSteps)
|
| 160 |
}
|
| 161 |
|
| 162 |
+
// Clamp sub-scores and final score after rounding
|
| 163 |
+
grade.SubScores["cost"] = clampOpenInterval(math.Round(costScore*10000) / 10000)
|
| 164 |
+
grade.SubScores["temperature"] = clampOpenInterval(math.Round(constraintScore*10000) / 10000)
|
| 165 |
+
finalScore := costScore*0.6 + constraintScore*0.4
|
| 166 |
+
grade.Score = clampOpenInterval(math.Round(finalScore*10000) / 10000)
|
| 167 |
grade.Details["within_bounds_steps"] = withinBounds
|
| 168 |
grade.Details["total_steps"] = totalSteps
|
| 169 |
return grade
|
|
|
|
| 222 |
carbonScore = math.Max(0, 1.0-agentCarbon/baselineCarbon)
|
| 223 |
}
|
| 224 |
|
| 225 |
+
// Clamp all sub-scores after rounding
|
| 226 |
+
grade.SubScores["cost"] = clampOpenInterval(math.Round(costScore*10000) / 10000)
|
| 227 |
+
grade.SubScores["temperature"] = clampOpenInterval(math.Round(tempScore*10000) / 10000)
|
| 228 |
+
grade.SubScores["grid_response"] = clampOpenInterval(math.Round(gridScore*10000) / 10000)
|
| 229 |
+
grade.SubScores["batch_deadline"] = clampOpenInterval(math.Round(batchScore*10000) / 10000)
|
| 230 |
+
grade.SubScores["carbon"] = clampOpenInterval(math.Round(math.Min(1.0, carbonScore)*10000) / 10000)
|
| 231 |
|
| 232 |
+
finalScore := costScore*0.28 + tempScore*0.20 + gridScore*0.20 + batchScore*0.12 + carbonScore*0.20
|
| 233 |
+
grade.Score = clampOpenInterval(math.Round(finalScore*10000) / 10000)
|
| 234 |
|
| 235 |
grade.Details["grid_stress_steps"] = gridStressSteps
|
| 236 |
grade.Details["grid_response_steps"] = gridResponseSteps
|