Spaces:
Sleeping
Sleeping
Commit ·
ef0556b
1
Parent(s): db76306
fix: clamp all scores to open interval (0, 1) to meet validator requirements
Browse files- env/tasks.go +31 -18
env/tasks.go
CHANGED
|
@@ -3,12 +3,25 @@ package env
|
|
| 3 |
|
| 4 |
import "math"
|
| 5 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 6 |
// TaskConfig describes a single task.
|
| 7 |
type TaskConfig struct {
|
| 8 |
-
ID int
|
| 9 |
-
Name string
|
| 10 |
-
Description string
|
| 11 |
-
Difficulty string
|
| 12 |
Weights map[string]float64 `json:"weights"`
|
| 13 |
}
|
| 14 |
|
|
@@ -41,12 +54,12 @@ func AllTasks() []TaskConfig {
|
|
| 41 |
|
| 42 |
// GradeEpisodeInput collects all data needed to score a completed episode.
|
| 43 |
type GradeEpisodeInput struct {
|
| 44 |
-
TaskID
|
| 45 |
-
Buildings
|
| 46 |
-
Replay
|
| 47 |
-
TempHistory
|
| 48 |
-
TMin
|
| 49 |
-
TMax
|
| 50 |
ExploitPenalties []float64
|
| 51 |
}
|
| 52 |
|
|
@@ -84,7 +97,7 @@ func GradeEpisode(inp GradeEpisodeInput) EpisodeGrade {
|
|
| 84 |
grade.Score = math.Max(0, grade.Score-grade.PenaltyApplied)
|
| 85 |
}
|
| 86 |
|
| 87 |
-
grade.Score = math.Round(grade.Score*10000) / 10000 // 4 decimal places
|
| 88 |
return grade
|
| 89 |
}
|
| 90 |
|
|
@@ -106,7 +119,7 @@ func gradeTask1(inp GradeEpisodeInput, grade EpisodeGrade) EpisodeGrade {
|
|
| 106 |
costScore = math.Max(0, 1.0-ratio)
|
| 107 |
}
|
| 108 |
|
| 109 |
-
grade.SubScores["cost"] = math.Min(1.0, costScore)
|
| 110 |
grade.Score = grade.SubScores["cost"]
|
| 111 |
grade.Details["agent_cost"] = agentCost
|
| 112 |
grade.Details["baseline_cost"] = baselineCost
|
|
@@ -139,8 +152,8 @@ func gradeTask2(inp GradeEpisodeInput, grade EpisodeGrade) EpisodeGrade {
|
|
| 139 |
}
|
| 140 |
|
| 141 |
grade.SubScores["cost"] = costScore
|
| 142 |
-
grade.SubScores["temperature"] = constraintScore
|
| 143 |
-
grade.Score = costScore*0.6 + constraintScore*0.4
|
| 144 |
grade.Details["within_bounds_steps"] = withinBounds
|
| 145 |
grade.Details["total_steps"] = totalSteps
|
| 146 |
return grade
|
|
@@ -201,11 +214,11 @@ func gradeTask3(inp GradeEpisodeInput, grade EpisodeGrade) EpisodeGrade {
|
|
| 201 |
|
| 202 |
grade.SubScores["cost"] = costScore
|
| 203 |
grade.SubScores["temperature"] = tempScore
|
| 204 |
-
grade.SubScores["grid_response"] = gridScore
|
| 205 |
-
grade.SubScores["batch_deadline"] = batchScore
|
| 206 |
-
grade.SubScores["carbon"] = math.Min(1.0, carbonScore)
|
| 207 |
|
| 208 |
-
grade.Score = costScore*0.28 + tempScore*0.20 + gridScore*0.20 + batchScore*0.12 + carbonScore*0.20
|
| 209 |
|
| 210 |
grade.Details["grid_stress_steps"] = gridStressSteps
|
| 211 |
grade.Details["grid_response_steps"] = gridResponseSteps
|
|
|
|
| 3 |
|
| 4 |
import "math"
|
| 5 |
|
| 6 |
+
// clampOpenInterval clamps a score to the open interval (0, 1), strictly excluding 0.0 and 1.0.
|
| 7 |
+
// This ensures all scores satisfy the requirement: 0 < score < 1
|
| 8 |
+
func clampOpenInterval(score float64) float64 {
|
| 9 |
+
const epsilon = 1e-6
|
| 10 |
+
if score <= 0.0 {
|
| 11 |
+
return epsilon
|
| 12 |
+
}
|
| 13 |
+
if score >= 1.0 {
|
| 14 |
+
return 1.0 - epsilon
|
| 15 |
+
}
|
| 16 |
+
return score
|
| 17 |
+
}
|
| 18 |
+
|
| 19 |
// TaskConfig describes a single task.
|
| 20 |
type TaskConfig struct {
|
| 21 |
+
ID int `json:"id"`
|
| 22 |
+
Name string `json:"name"`
|
| 23 |
+
Description string `json:"description"`
|
| 24 |
+
Difficulty string `json:"difficulty"`
|
| 25 |
Weights map[string]float64 `json:"weights"`
|
| 26 |
}
|
| 27 |
|
|
|
|
| 54 |
|
| 55 |
// GradeEpisodeInput collects all data needed to score a completed episode.
|
| 56 |
type GradeEpisodeInput struct {
|
| 57 |
+
TaskID int
|
| 58 |
+
Buildings []*BuildingState
|
| 59 |
+
Replay []ReplayEntry
|
| 60 |
+
TempHistory [][]float64 // per building, per step
|
| 61 |
+
TMin float64
|
| 62 |
+
TMax float64
|
| 63 |
ExploitPenalties []float64
|
| 64 |
}
|
| 65 |
|
|
|
|
| 97 |
grade.Score = math.Max(0, grade.Score-grade.PenaltyApplied)
|
| 98 |
}
|
| 99 |
|
| 100 |
+
grade.Score = clampOpenInterval(math.Round(grade.Score*10000) / 10000) // 4 decimal places
|
| 101 |
return grade
|
| 102 |
}
|
| 103 |
|
|
|
|
| 119 |
costScore = math.Max(0, 1.0-ratio)
|
| 120 |
}
|
| 121 |
|
| 122 |
+
grade.SubScores["cost"] = clampOpenInterval(math.Min(1.0, costScore))
|
| 123 |
grade.Score = grade.SubScores["cost"]
|
| 124 |
grade.Details["agent_cost"] = agentCost
|
| 125 |
grade.Details["baseline_cost"] = baselineCost
|
|
|
|
| 152 |
}
|
| 153 |
|
| 154 |
grade.SubScores["cost"] = costScore
|
| 155 |
+
grade.SubScores["temperature"] = clampOpenInterval(constraintScore)
|
| 156 |
+
grade.Score = clampOpenInterval(costScore*0.6 + constraintScore*0.4)
|
| 157 |
grade.Details["within_bounds_steps"] = withinBounds
|
| 158 |
grade.Details["total_steps"] = totalSteps
|
| 159 |
return grade
|
|
|
|
| 214 |
|
| 215 |
grade.SubScores["cost"] = costScore
|
| 216 |
grade.SubScores["temperature"] = tempScore
|
| 217 |
+
grade.SubScores["grid_response"] = clampOpenInterval(gridScore)
|
| 218 |
+
grade.SubScores["batch_deadline"] = clampOpenInterval(batchScore)
|
| 219 |
+
grade.SubScores["carbon"] = clampOpenInterval(math.Min(1.0, carbonScore))
|
| 220 |
|
| 221 |
+
grade.Score = clampOpenInterval(costScore*0.28 + tempScore*0.20 + gridScore*0.20 + batchScore*0.12 + carbonScore*0.20)
|
| 222 |
|
| 223 |
grade.Details["grid_stress_steps"] = gridStressSteps
|
| 224 |
grade.Details["grid_response_steps"] = gridResponseSteps
|