gridmind / tests /environment_test.go
ShreeshantXD's picture
feat: add baseline scores JSON, inference script, and update Dockerfile for improved project structure
6d74982
// tests/test_environment.go — unit tests for GridMind-RL environment
package tests
import (
"testing"
"math"
"gridmind-rl/env"
)
// TestResetProducesValidObservation checks that reset returns sane initial observations.
func TestResetProducesValidObservation(t *testing.T) {
e := env.NewEnvironment()
var seed int64 = 42
resp := e.Reset(env.ResetRequest{Seed: &seed, TaskID: 1, NumBuildings: 1})
if len(resp.Observations) != 1 {
t.Fatalf("expected 1 observation, got %d", len(resp.Observations))
}
obs := resp.Observations[0]
if obs.IndoorTemperature < 10 || obs.IndoorTemperature > 40 {
t.Errorf("indoor_temperature out of range: %.2f", obs.IndoorTemperature)
}
if obs.ThermalStorageLevel < 0 || obs.ThermalStorageLevel > 1 {
t.Errorf("thermal_storage_level out of [0,1]: %.3f", obs.ThermalStorageLevel)
}
if obs.CurrentPrice <= 0 {
t.Errorf("current_price must be positive, got %.4f", obs.CurrentPrice)
}
if obs.HourOfDay < 0 || obs.HourOfDay > 23 {
t.Errorf("hour_of_day out of [0,23]: %d", obs.HourOfDay)
}
if obs.GridStressSignal < 0 || obs.GridStressSignal > 1 {
t.Errorf("grid_stress_signal out of [0,1]: %.3f", obs.GridStressSignal)
}
}
// TestStepAdvancesState verifies that step increments the step counter.
func TestStepAdvancesState(t *testing.T) {
e := env.NewEnvironment()
var seed int64 = 1
e.Reset(env.ResetRequest{Seed: &seed, TaskID: 1, NumBuildings: 1})
action := []env.ActionModel{{HVACPowerLevel: 0.5, ThermalChargeRate: 0.0, BatchJobSlot: 0}}
resps, done := e.Step(action)
if done {
t.Error("episode should not be done after first step")
}
if len(resps) != 1 {
t.Fatalf("expected 1 step response, got %d", len(resps))
}
state := e.GetState()
if state.Step != 1 {
t.Errorf("expected step=1 after one step, got %d", state.Step)
}
if resps[0].Observation.Step != 0 {
t.Errorf("expected observation.step=0 after first transition, got %d", resps[0].Observation.Step)
}
}
// TestEpisodeLengthIs96 verifies the episode terminates after 96 steps (24h).
func TestEpisodeLengthIs96(t *testing.T) {
e := env.NewEnvironment()
var seed int64 = 99
e.Reset(env.ResetRequest{Seed: &seed, TaskID: 1, NumBuildings: 1})
action := []env.ActionModel{{HVACPowerLevel: 0.5}}
var lastDone bool
for i := 0; i < env.EpisodeSteps; i++ {
_, lastDone = e.Step(action)
}
if !lastDone {
t.Errorf("episode should be done after %d steps", env.EpisodeSteps)
}
}
// TestDeterministicWithSeed verifies that two runs with the same seed produce identical rewards.
func TestDeterministicWithSeed(t *testing.T) {
action := []env.ActionModel{{HVACPowerLevel: 0.4, ThermalChargeRate: 0.1, BatchJobSlot: 1}}
var seed int64 = 1337
run := func() float64 {
e := env.NewEnvironment()
e.Reset(env.ResetRequest{Seed: &seed, TaskID: 2, NumBuildings: 1})
resps, _ := e.Step(action)
return resps[0].Reward
}
r1 := run()
r2 := run()
if math.Abs(r1-r2) > 1e-9 {
t.Errorf("non-deterministic rewards with same seed: %.6f vs %.6f", r1, r2)
}
}
// TestActionClamping verifies out-of-range actions are clamped.
func TestActionClamping(t *testing.T) {
e := env.NewEnvironment()
var seed int64 = 7
e.Reset(env.ResetRequest{Seed: &seed, TaskID: 1})
// Over-range action
action := []env.ActionModel{{HVACPowerLevel: 2.0, ThermalChargeRate: -5.0, LoadShedFraction: 0.9}}
resps, _ := e.Step(action)
if len(resps) == 0 {
t.Fatal("no responses returned")
}
// After step, state should still be valid
state := e.GetState()
if len(state.Buildings) == 0 {
t.Fatal("no buildings in state")
}
b := state.Buildings[0]
if b.ThermalStorageLevel < 0 || b.ThermalStorageLevel > 1 {
t.Errorf("thermal storage out of bounds: %.3f", b.ThermalStorageLevel)
}
}
// TestMultiBuildingFederation checks that 3-building reset + step works.
func TestMultiBuildingFederation(t *testing.T) {
e := env.NewEnvironment()
var seed int64 = 5
resp := e.Reset(env.ResetRequest{Seed: &seed, TaskID: 3, NumBuildings: 3})
if len(resp.Observations) != 3 {
t.Fatalf("expected 3 observations for 3 buildings, got %d", len(resp.Observations))
}
actions := []env.ActionModel{
{HVACPowerLevel: 0.3, BuildingID: 0},
{HVACPowerLevel: 0.5, BuildingID: 1},
{HVACPowerLevel: 0.7, BuildingID: 2},
}
resps, _ := e.Step(actions)
if len(resps) != 3 {
t.Fatalf("expected 3 step responses, got %d", len(resps))
}
}
// TestRewardComponentsAreFinite verifies no NaN/Inf in rewards.
func TestRewardComponentsAreFinite(t *testing.T) {
e := env.NewEnvironment()
var seed int64 = 42
e.Reset(env.ResetRequest{Seed: &seed, TaskID: 3})
action := []env.ActionModel{{HVACPowerLevel: 0.5, ThermalChargeRate: 0.2, BatchJobSlot: 2, LoadShedFraction: 0.3}}
resps, _ := e.Step(action)
rc := resps[0].Info.RewardComponents
vals := []float64{rc.CostSavings, rc.TempConstraint, rc.GridResponse,
rc.DeadlinePenalty, rc.EfficiencyBonus, rc.StabilityPenalty, rc.CarbonReward, rc.Total}
for i, v := range vals {
if math.IsNaN(v) || math.IsInf(v, 0) {
t.Errorf("reward component %d is not finite: %v", i, v)
}
}
}
// TestGraderTask1ScoreRange verifies Task 1 score is always in [0, 1].
func TestGraderTask1ScoreRange(t *testing.T) {
e := env.NewEnvironment()
var seed int64 = 101
e.Reset(env.ResetRequest{Seed: &seed, TaskID: 1})
action := []env.ActionModel{{HVACPowerLevel: 0.3}}
for i := 0; i < env.EpisodeSteps; i++ {
e.Step(action)
}
state := e.GetState()
replay := e.GetReplay()
buildings := make([]*env.BuildingState, len(state.Buildings))
for i, pub := range state.Buildings {
jobsCopy := make([]env.BatchJob, len(pub.Jobs))
copy(jobsCopy, pub.Jobs)
buildings[i] = &env.BuildingState{
CumulativeCost: pub.CumulativeCost,
BaselineCost: pub.BaselineCost,
CumulativeCarbon: pub.CumulativeCarbon,
BaselineCarbon: pub.BaselineCarbon,
Jobs: jobsCopy,
}
}
grade := env.GradeEpisode(env.GradeEpisodeInput{
TaskID: 1,
Buildings: buildings,
Replay: replay,
TMin: env.TMinDefault,
TMax: env.TMaxDefault,
})
if grade.Score < 0 || grade.Score > 1 {
t.Errorf("Task 1 score out of [0,1]: %.4f", grade.Score)
}
}