ShreeshantXD commited on
Commit
ef0556b
·
1 Parent(s): db76306

fix: clamp all scores to open interval (0, 1) to meet validator requirements

Browse files
Files changed (1) hide show
  1. env/tasks.go +31 -18
env/tasks.go CHANGED
@@ -3,12 +3,25 @@ package env
3
 
4
  import "math"
5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6
  // TaskConfig describes a single task.
7
  type TaskConfig struct {
8
- ID int `json:"id"`
9
- Name string `json:"name"`
10
- Description string `json:"description"`
11
- Difficulty string `json:"difficulty"`
12
  Weights map[string]float64 `json:"weights"`
13
  }
14
 
@@ -41,12 +54,12 @@ func AllTasks() []TaskConfig {
41
 
42
  // GradeEpisodeInput collects all data needed to score a completed episode.
43
  type GradeEpisodeInput struct {
44
- TaskID int
45
- Buildings []*BuildingState
46
- Replay []ReplayEntry
47
- TempHistory [][]float64 // per building, per step
48
- TMin float64
49
- TMax float64
50
  ExploitPenalties []float64
51
  }
52
 
@@ -84,7 +97,7 @@ func GradeEpisode(inp GradeEpisodeInput) EpisodeGrade {
84
  grade.Score = math.Max(0, grade.Score-grade.PenaltyApplied)
85
  }
86
 
87
- grade.Score = math.Round(grade.Score*10000) / 10000 // 4 decimal places
88
  return grade
89
  }
90
 
@@ -106,7 +119,7 @@ func gradeTask1(inp GradeEpisodeInput, grade EpisodeGrade) EpisodeGrade {
106
  costScore = math.Max(0, 1.0-ratio)
107
  }
108
 
109
- grade.SubScores["cost"] = math.Min(1.0, costScore)
110
  grade.Score = grade.SubScores["cost"]
111
  grade.Details["agent_cost"] = agentCost
112
  grade.Details["baseline_cost"] = baselineCost
@@ -139,8 +152,8 @@ func gradeTask2(inp GradeEpisodeInput, grade EpisodeGrade) EpisodeGrade {
139
  }
140
 
141
  grade.SubScores["cost"] = costScore
142
- grade.SubScores["temperature"] = constraintScore
143
- grade.Score = costScore*0.6 + constraintScore*0.4
144
  grade.Details["within_bounds_steps"] = withinBounds
145
  grade.Details["total_steps"] = totalSteps
146
  return grade
@@ -201,11 +214,11 @@ func gradeTask3(inp GradeEpisodeInput, grade EpisodeGrade) EpisodeGrade {
201
 
202
  grade.SubScores["cost"] = costScore
203
  grade.SubScores["temperature"] = tempScore
204
- grade.SubScores["grid_response"] = gridScore
205
- grade.SubScores["batch_deadline"] = batchScore
206
- grade.SubScores["carbon"] = math.Min(1.0, carbonScore)
207
 
208
- grade.Score = costScore*0.28 + tempScore*0.20 + gridScore*0.20 + batchScore*0.12 + carbonScore*0.20
209
 
210
  grade.Details["grid_stress_steps"] = gridStressSteps
211
  grade.Details["grid_response_steps"] = gridResponseSteps
 
3
 
4
  import "math"
5
 
6
+ // clampOpenInterval clamps a score to the open interval (0, 1), strictly excluding 0.0 and 1.0.
7
+ // This ensures all scores satisfy the requirement: 0 < score < 1
8
+ func clampOpenInterval(score float64) float64 {
9
+ const epsilon = 1e-6
10
+ if score <= 0.0 {
11
+ return epsilon
12
+ }
13
+ if score >= 1.0 {
14
+ return 1.0 - epsilon
15
+ }
16
+ return score
17
+ }
18
+
19
  // TaskConfig describes a single task.
20
  type TaskConfig struct {
21
+ ID int `json:"id"`
22
+ Name string `json:"name"`
23
+ Description string `json:"description"`
24
+ Difficulty string `json:"difficulty"`
25
  Weights map[string]float64 `json:"weights"`
26
  }
27
 
 
54
 
55
  // GradeEpisodeInput collects all data needed to score a completed episode.
56
  type GradeEpisodeInput struct {
57
+ TaskID int
58
+ Buildings []*BuildingState
59
+ Replay []ReplayEntry
60
+ TempHistory [][]float64 // per building, per step
61
+ TMin float64
62
+ TMax float64
63
  ExploitPenalties []float64
64
  }
65
 
 
97
  grade.Score = math.Max(0, grade.Score-grade.PenaltyApplied)
98
  }
99
 
100
+ grade.Score = clampOpenInterval(math.Round(grade.Score*10000) / 10000) // 4 decimal places
101
  return grade
102
  }
103
 
 
119
  costScore = math.Max(0, 1.0-ratio)
120
  }
121
 
122
+ grade.SubScores["cost"] = clampOpenInterval(math.Min(1.0, costScore))
123
  grade.Score = grade.SubScores["cost"]
124
  grade.Details["agent_cost"] = agentCost
125
  grade.Details["baseline_cost"] = baselineCost
 
152
  }
153
 
154
  grade.SubScores["cost"] = costScore
155
+ grade.SubScores["temperature"] = clampOpenInterval(constraintScore)
156
+ grade.Score = clampOpenInterval(costScore*0.6 + constraintScore*0.4)
157
  grade.Details["within_bounds_steps"] = withinBounds
158
  grade.Details["total_steps"] = totalSteps
159
  return grade
 
214
 
215
  grade.SubScores["cost"] = costScore
216
  grade.SubScores["temperature"] = tempScore
217
+ grade.SubScores["grid_response"] = clampOpenInterval(gridScore)
218
+ grade.SubScores["batch_deadline"] = clampOpenInterval(batchScore)
219
+ grade.SubScores["carbon"] = clampOpenInterval(math.Min(1.0, carbonScore))
220
 
221
+ grade.Score = clampOpenInterval(costScore*0.28 + tempScore*0.20 + gridScore*0.20 + batchScore*0.12 + carbonScore*0.20)
222
 
223
  grade.Details["grid_stress_steps"] = gridStressSteps
224
  grade.Details["grid_response_steps"] = gridResponseSteps