feat: add baseline scores JSON, inference script, and update Dockerfile for improved project structure
6d74982 | // tests/test_environment.go — unit tests for GridMind-RL environment | |
| package tests | |
| import ( | |
| "testing" | |
| "math" | |
| "gridmind-rl/env" | |
| ) | |
| // TestResetProducesValidObservation checks that reset returns sane initial observations. | |
| func TestResetProducesValidObservation(t *testing.T) { | |
| e := env.NewEnvironment() | |
| var seed int64 = 42 | |
| resp := e.Reset(env.ResetRequest{Seed: &seed, TaskID: 1, NumBuildings: 1}) | |
| if len(resp.Observations) != 1 { | |
| t.Fatalf("expected 1 observation, got %d", len(resp.Observations)) | |
| } | |
| obs := resp.Observations[0] | |
| if obs.IndoorTemperature < 10 || obs.IndoorTemperature > 40 { | |
| t.Errorf("indoor_temperature out of range: %.2f", obs.IndoorTemperature) | |
| } | |
| if obs.ThermalStorageLevel < 0 || obs.ThermalStorageLevel > 1 { | |
| t.Errorf("thermal_storage_level out of [0,1]: %.3f", obs.ThermalStorageLevel) | |
| } | |
| if obs.CurrentPrice <= 0 { | |
| t.Errorf("current_price must be positive, got %.4f", obs.CurrentPrice) | |
| } | |
| if obs.HourOfDay < 0 || obs.HourOfDay > 23 { | |
| t.Errorf("hour_of_day out of [0,23]: %d", obs.HourOfDay) | |
| } | |
| if obs.GridStressSignal < 0 || obs.GridStressSignal > 1 { | |
| t.Errorf("grid_stress_signal out of [0,1]: %.3f", obs.GridStressSignal) | |
| } | |
| } | |
| // TestStepAdvancesState verifies that step increments the step counter. | |
| func TestStepAdvancesState(t *testing.T) { | |
| e := env.NewEnvironment() | |
| var seed int64 = 1 | |
| e.Reset(env.ResetRequest{Seed: &seed, TaskID: 1, NumBuildings: 1}) | |
| action := []env.ActionModel{{HVACPowerLevel: 0.5, ThermalChargeRate: 0.0, BatchJobSlot: 0}} | |
| resps, done := e.Step(action) | |
| if done { | |
| t.Error("episode should not be done after first step") | |
| } | |
| if len(resps) != 1 { | |
| t.Fatalf("expected 1 step response, got %d", len(resps)) | |
| } | |
| state := e.GetState() | |
| if state.Step != 1 { | |
| t.Errorf("expected step=1 after one step, got %d", state.Step) | |
| } | |
| if resps[0].Observation.Step != 0 { | |
| t.Errorf("expected observation.step=0 after first transition, got %d", resps[0].Observation.Step) | |
| } | |
| } | |
| // TestEpisodeLengthIs96 verifies the episode terminates after 96 steps (24h). | |
| func TestEpisodeLengthIs96(t *testing.T) { | |
| e := env.NewEnvironment() | |
| var seed int64 = 99 | |
| e.Reset(env.ResetRequest{Seed: &seed, TaskID: 1, NumBuildings: 1}) | |
| action := []env.ActionModel{{HVACPowerLevel: 0.5}} | |
| var lastDone bool | |
| for i := 0; i < env.EpisodeSteps; i++ { | |
| _, lastDone = e.Step(action) | |
| } | |
| if !lastDone { | |
| t.Errorf("episode should be done after %d steps", env.EpisodeSteps) | |
| } | |
| } | |
| // TestDeterministicWithSeed verifies that two runs with the same seed produce identical rewards. | |
| func TestDeterministicWithSeed(t *testing.T) { | |
| action := []env.ActionModel{{HVACPowerLevel: 0.4, ThermalChargeRate: 0.1, BatchJobSlot: 1}} | |
| var seed int64 = 1337 | |
| run := func() float64 { | |
| e := env.NewEnvironment() | |
| e.Reset(env.ResetRequest{Seed: &seed, TaskID: 2, NumBuildings: 1}) | |
| resps, _ := e.Step(action) | |
| return resps[0].Reward | |
| } | |
| r1 := run() | |
| r2 := run() | |
| if math.Abs(r1-r2) > 1e-9 { | |
| t.Errorf("non-deterministic rewards with same seed: %.6f vs %.6f", r1, r2) | |
| } | |
| } | |
| // TestActionClamping verifies out-of-range actions are clamped. | |
| func TestActionClamping(t *testing.T) { | |
| e := env.NewEnvironment() | |
| var seed int64 = 7 | |
| e.Reset(env.ResetRequest{Seed: &seed, TaskID: 1}) | |
| // Over-range action | |
| action := []env.ActionModel{{HVACPowerLevel: 2.0, ThermalChargeRate: -5.0, LoadShedFraction: 0.9}} | |
| resps, _ := e.Step(action) | |
| if len(resps) == 0 { | |
| t.Fatal("no responses returned") | |
| } | |
| // After step, state should still be valid | |
| state := e.GetState() | |
| if len(state.Buildings) == 0 { | |
| t.Fatal("no buildings in state") | |
| } | |
| b := state.Buildings[0] | |
| if b.ThermalStorageLevel < 0 || b.ThermalStorageLevel > 1 { | |
| t.Errorf("thermal storage out of bounds: %.3f", b.ThermalStorageLevel) | |
| } | |
| } | |
| // TestMultiBuildingFederation checks that 3-building reset + step works. | |
| func TestMultiBuildingFederation(t *testing.T) { | |
| e := env.NewEnvironment() | |
| var seed int64 = 5 | |
| resp := e.Reset(env.ResetRequest{Seed: &seed, TaskID: 3, NumBuildings: 3}) | |
| if len(resp.Observations) != 3 { | |
| t.Fatalf("expected 3 observations for 3 buildings, got %d", len(resp.Observations)) | |
| } | |
| actions := []env.ActionModel{ | |
| {HVACPowerLevel: 0.3, BuildingID: 0}, | |
| {HVACPowerLevel: 0.5, BuildingID: 1}, | |
| {HVACPowerLevel: 0.7, BuildingID: 2}, | |
| } | |
| resps, _ := e.Step(actions) | |
| if len(resps) != 3 { | |
| t.Fatalf("expected 3 step responses, got %d", len(resps)) | |
| } | |
| } | |
| // TestRewardComponentsAreFinite verifies no NaN/Inf in rewards. | |
| func TestRewardComponentsAreFinite(t *testing.T) { | |
| e := env.NewEnvironment() | |
| var seed int64 = 42 | |
| e.Reset(env.ResetRequest{Seed: &seed, TaskID: 3}) | |
| action := []env.ActionModel{{HVACPowerLevel: 0.5, ThermalChargeRate: 0.2, BatchJobSlot: 2, LoadShedFraction: 0.3}} | |
| resps, _ := e.Step(action) | |
| rc := resps[0].Info.RewardComponents | |
| vals := []float64{rc.CostSavings, rc.TempConstraint, rc.GridResponse, | |
| rc.DeadlinePenalty, rc.EfficiencyBonus, rc.StabilityPenalty, rc.CarbonReward, rc.Total} | |
| for i, v := range vals { | |
| if math.IsNaN(v) || math.IsInf(v, 0) { | |
| t.Errorf("reward component %d is not finite: %v", i, v) | |
| } | |
| } | |
| } | |
| // TestGraderTask1ScoreRange verifies Task 1 score is always in [0, 1]. | |
| func TestGraderTask1ScoreRange(t *testing.T) { | |
| e := env.NewEnvironment() | |
| var seed int64 = 101 | |
| e.Reset(env.ResetRequest{Seed: &seed, TaskID: 1}) | |
| action := []env.ActionModel{{HVACPowerLevel: 0.3}} | |
| for i := 0; i < env.EpisodeSteps; i++ { | |
| e.Step(action) | |
| } | |
| state := e.GetState() | |
| replay := e.GetReplay() | |
| buildings := make([]*env.BuildingState, len(state.Buildings)) | |
| for i, pub := range state.Buildings { | |
| jobsCopy := make([]env.BatchJob, len(pub.Jobs)) | |
| copy(jobsCopy, pub.Jobs) | |
| buildings[i] = &env.BuildingState{ | |
| CumulativeCost: pub.CumulativeCost, | |
| BaselineCost: pub.BaselineCost, | |
| CumulativeCarbon: pub.CumulativeCarbon, | |
| BaselineCarbon: pub.BaselineCarbon, | |
| Jobs: jobsCopy, | |
| } | |
| } | |
| grade := env.GradeEpisode(env.GradeEpisodeInput{ | |
| TaskID: 1, | |
| Buildings: buildings, | |
| Replay: replay, | |
| TMin: env.TMinDefault, | |
| TMax: env.TMaxDefault, | |
| }) | |
| if grade.Score < 0 || grade.Score > 1 { | |
| t.Errorf("Task 1 score out of [0,1]: %.4f", grade.Score) | |
| } | |
| } | |