File size: 6,146 Bytes
1875b13
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6d74982
 
 
1875b13
 
6d74982
 
1875b13
 
 
 
 
 
6d74982
1875b13
 
 
6d74982
1875b13
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6d74982
1875b13
 
 
 
 
 
 
 
6d74982
 
1875b13
6d74982
 
 
 
 
1875b13
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
// tests/test_environment.go — unit tests for GridMind-RL environment
package tests

import (
	"testing"
	"math"

	"gridmind-rl/env"
)

// TestResetProducesValidObservation checks that reset returns sane initial observations.
func TestResetProducesValidObservation(t *testing.T) {
	e := env.NewEnvironment()
	var seed int64 = 42
	resp := e.Reset(env.ResetRequest{Seed: &seed, TaskID: 1, NumBuildings: 1})

	if len(resp.Observations) != 1 {
		t.Fatalf("expected 1 observation, got %d", len(resp.Observations))
	}
	obs := resp.Observations[0]

	if obs.IndoorTemperature < 10 || obs.IndoorTemperature > 40 {
		t.Errorf("indoor_temperature out of range: %.2f", obs.IndoorTemperature)
	}
	if obs.ThermalStorageLevel < 0 || obs.ThermalStorageLevel > 1 {
		t.Errorf("thermal_storage_level out of [0,1]: %.3f", obs.ThermalStorageLevel)
	}
	if obs.CurrentPrice <= 0 {
		t.Errorf("current_price must be positive, got %.4f", obs.CurrentPrice)
	}
	if obs.HourOfDay < 0 || obs.HourOfDay > 23 {
		t.Errorf("hour_of_day out of [0,23]: %d", obs.HourOfDay)
	}
	if obs.GridStressSignal < 0 || obs.GridStressSignal > 1 {
		t.Errorf("grid_stress_signal out of [0,1]: %.3f", obs.GridStressSignal)
	}
}

// TestStepAdvancesState verifies that step increments the step counter.
func TestStepAdvancesState(t *testing.T) {
	e := env.NewEnvironment()
	var seed int64 = 1
	e.Reset(env.ResetRequest{Seed: &seed, TaskID: 1, NumBuildings: 1})

	action := []env.ActionModel{{HVACPowerLevel: 0.5, ThermalChargeRate: 0.0, BatchJobSlot: 0}}
	resps, done := e.Step(action)

	if done {
		t.Error("episode should not be done after first step")
	}
	if len(resps) != 1 {
		t.Fatalf("expected 1 step response, got %d", len(resps))
	}
	state := e.GetState()
	if state.Step != 1 {
		t.Errorf("expected step=1 after one step, got %d", state.Step)
	}
	if resps[0].Observation.Step != 0 {
		t.Errorf("expected observation.step=0 after first transition, got %d", resps[0].Observation.Step)
	}
}

// TestEpisodeLengthIs96 verifies the episode terminates after 96 steps (24h).
func TestEpisodeLengthIs96(t *testing.T) {
	e := env.NewEnvironment()
	var seed int64 = 99
	e.Reset(env.ResetRequest{Seed: &seed, TaskID: 1, NumBuildings: 1})

	action := []env.ActionModel{{HVACPowerLevel: 0.5}}
	var lastDone bool
	for i := 0; i < env.EpisodeSteps; i++ {
		_, lastDone = e.Step(action)
	}
	if !lastDone {
		t.Errorf("episode should be done after %d steps", env.EpisodeSteps)
	}
}

// TestDeterministicWithSeed verifies that two runs with the same seed produce identical rewards.
func TestDeterministicWithSeed(t *testing.T) {
	action := []env.ActionModel{{HVACPowerLevel: 0.4, ThermalChargeRate: 0.1, BatchJobSlot: 1}}
	var seed int64 = 1337

	run := func() float64 {
		e := env.NewEnvironment()
		e.Reset(env.ResetRequest{Seed: &seed, TaskID: 2, NumBuildings: 1})
		resps, _ := e.Step(action)
		return resps[0].Reward
	}

	r1 := run()
	r2 := run()
	if math.Abs(r1-r2) > 1e-9 {
		t.Errorf("non-deterministic rewards with same seed: %.6f vs %.6f", r1, r2)
	}
}

// TestActionClamping verifies out-of-range actions are clamped.
func TestActionClamping(t *testing.T) {
	e := env.NewEnvironment()
	var seed int64 = 7
	e.Reset(env.ResetRequest{Seed: &seed, TaskID: 1})

	// Over-range action
	action := []env.ActionModel{{HVACPowerLevel: 2.0, ThermalChargeRate: -5.0, LoadShedFraction: 0.9}}
	resps, _ := e.Step(action)
	if len(resps) == 0 {
		t.Fatal("no responses returned")
	}
	// After step, state should still be valid
	state := e.GetState()
	if len(state.Buildings) == 0 {
		t.Fatal("no buildings in state")
	}
	b := state.Buildings[0]
	if b.ThermalStorageLevel < 0 || b.ThermalStorageLevel > 1 {
		t.Errorf("thermal storage out of bounds: %.3f", b.ThermalStorageLevel)
	}
}

// TestMultiBuildingFederation checks that 3-building reset + step works.
func TestMultiBuildingFederation(t *testing.T) {
	e := env.NewEnvironment()
	var seed int64 = 5
	resp := e.Reset(env.ResetRequest{Seed: &seed, TaskID: 3, NumBuildings: 3})

	if len(resp.Observations) != 3 {
		t.Fatalf("expected 3 observations for 3 buildings, got %d", len(resp.Observations))
	}

	actions := []env.ActionModel{
		{HVACPowerLevel: 0.3, BuildingID: 0},
		{HVACPowerLevel: 0.5, BuildingID: 1},
		{HVACPowerLevel: 0.7, BuildingID: 2},
	}
	resps, _ := e.Step(actions)
	if len(resps) != 3 {
		t.Fatalf("expected 3 step responses, got %d", len(resps))
	}
}

// TestRewardComponentsAreFinite verifies no NaN/Inf in rewards.
func TestRewardComponentsAreFinite(t *testing.T) {
	e := env.NewEnvironment()
	var seed int64 = 42
	e.Reset(env.ResetRequest{Seed: &seed, TaskID: 3})

	action := []env.ActionModel{{HVACPowerLevel: 0.5, ThermalChargeRate: 0.2, BatchJobSlot: 2, LoadShedFraction: 0.3}}
	resps, _ := e.Step(action)

	rc := resps[0].Info.RewardComponents
	vals := []float64{rc.CostSavings, rc.TempConstraint, rc.GridResponse,
		rc.DeadlinePenalty, rc.EfficiencyBonus, rc.StabilityPenalty, rc.CarbonReward, rc.Total}
	for i, v := range vals {
		if math.IsNaN(v) || math.IsInf(v, 0) {
			t.Errorf("reward component %d is not finite: %v", i, v)
		}
	}
}

// TestGraderTask1ScoreRange verifies Task 1 score is always in [0, 1].
func TestGraderTask1ScoreRange(t *testing.T) {
	e := env.NewEnvironment()
	var seed int64 = 101
	e.Reset(env.ResetRequest{Seed: &seed, TaskID: 1})

	action := []env.ActionModel{{HVACPowerLevel: 0.3}}
	for i := 0; i < env.EpisodeSteps; i++ {
		e.Step(action)
	}

	state := e.GetState()
	replay := e.GetReplay()

	buildings := make([]*env.BuildingState, len(state.Buildings))
	for i, pub := range state.Buildings {
		jobsCopy := make([]env.BatchJob, len(pub.Jobs))
		copy(jobsCopy, pub.Jobs)
		buildings[i] = &env.BuildingState{
			CumulativeCost:   pub.CumulativeCost,
			BaselineCost:     pub.BaselineCost,
			CumulativeCarbon: pub.CumulativeCarbon,
			BaselineCarbon:   pub.BaselineCarbon,
			Jobs:             jobsCopy,
		}
	}

	grade := env.GradeEpisode(env.GradeEpisodeInput{
		TaskID:    1,
		Buildings: buildings,
		Replay:    replay,
		TMin:      env.TMinDefault,
		TMax:      env.TMaxDefault,
	})

	if grade.Score < 0 || grade.Score > 1 {
		t.Errorf("Task 1 score out of [0,1]: %.4f", grade.Score)
	}
}