gridmind / env /environment.go
ShreeshantXD's picture
fix: problems with parameters
f316664
// Package env implements the GridMind-RL simulation core.
// It models a multi-building industrial/commercial energy management system
// with stochastic electricity prices, thermal dynamics, and batch job scheduling.
package env
import (
"math"
"math/rand"
"sync"
"time"
)
const (
EpisodeSteps = 96 // 24 hours × 15-min intervals (96 × 0.25h = 24h)
StepDurationHrs = 0.25 // each step = 15 minutes = 0.25 h
MaxBuildings = 3
DefaultSetpoint = 21.0 // °C comfortable indoor temp
TMinDefault = 19.0 // °C lower bound
TMaxDefault = 23.0 // °C upper bound
MaxHVACPowerKW = 50.0 // kW per building
MaxStorageKWh = 100.0 // kWh thermal storage capacity
StorageLossRate = 0.005 // fraction lost per step (thermal dissipation)
MaxBatchJobs = 5 // max concurrent batch jobs per building
)
// Environment is the thread-safe top-level simulation manager.
type Environment struct {
mu sync.RWMutex
rng *rand.Rand
seed int64
episode int
step int
done bool
taskID int
difficulty string
numBuildings int
Buildings []*BuildingState
PriceCurve [EpisodeSteps]float64 // $/kWh for each step
CarbonCurve [EpisodeSteps]float64 // gCO2/kWh for each step
Replay []ReplayEntry
LastActions []ActionModel
// History for dashboard rendering (per building)
TempHistory [][]float64
CostHistory [][]float64
HVACHistory [][]float64
LoadShedHistory [][]float64
RewardHistory [][]RewardComponents
// Exploit detection counters
totalShedSteps []int // steps where load_shed > 0.4
thermalCycleCounts []int // rapid thermal storage reversals
prevChargeRates []float64
}
// NewEnvironment creates an initialised (but not reset) environment.
func NewEnvironment() *Environment {
seed := time.Now().UnixNano()
return &Environment{
rng: rand.New(rand.NewSource(seed)),
seed: seed,
taskID: 1,
difficulty: "easy",
numBuildings: 1,
}
}
// Reset initializes a new episode. Thread-safe.
func (e *Environment) Reset(req ResetRequest) ResetResponse {
e.mu.Lock()
defer e.mu.Unlock()
// Apply seed
if req.Seed != nil {
e.seed = *req.Seed
} else {
e.seed = time.Now().UnixNano()
}
e.rng = rand.New(rand.NewSource(e.seed))
// Apply task and difficulty
e.taskID = req.TaskID
if e.taskID < 1 || e.taskID > 3 {
e.taskID = 1
}
e.difficulty = req.Difficulty
if e.difficulty == "" {
switch e.taskID {
case 1:
e.difficulty = "easy"
case 2:
e.difficulty = "medium"
case 3:
e.difficulty = "hard"
}
}
// Number of buildings (federation)
e.numBuildings = req.NumBuildings
if e.numBuildings < 1 {
e.numBuildings = 1
}
if e.numBuildings > MaxBuildings {
e.numBuildings = MaxBuildings
}
e.episode++
e.step = 0
e.done = false
e.Replay = make([]ReplayEntry, 0, EpisodeSteps)
e.LastActions = make([]ActionModel, e.numBuildings)
// Generate price and carbon curves for this episode
e.generatePriceCurve()
e.generateCarbonCurve()
// Initialise buildings
e.Buildings = make([]*BuildingState, e.numBuildings)
e.TempHistory = make([][]float64, e.numBuildings)
e.CostHistory = make([][]float64, e.numBuildings)
e.HVACHistory = make([][]float64, e.numBuildings)
e.LoadShedHistory = make([][]float64, e.numBuildings)
e.RewardHistory = make([][]RewardComponents, e.numBuildings)
e.totalShedSteps = make([]int, e.numBuildings)
e.thermalCycleCounts = make([]int, e.numBuildings)
e.prevChargeRates = make([]float64, e.numBuildings)
for i := 0; i < e.numBuildings; i++ {
e.Buildings[i] = e.newBuildingState(i)
e.TempHistory[i] = make([]float64, 0, EpisodeSteps)
e.CostHistory[i] = make([]float64, 0, EpisodeSteps)
e.HVACHistory[i] = make([]float64, 0, EpisodeSteps)
e.LoadShedHistory[i] = make([]float64, 0, EpisodeSteps)
e.RewardHistory[i] = make([]RewardComponents, 0, EpisodeSteps)
}
obs := make([]ObservationModel, e.numBuildings)
for i, b := range e.Buildings {
obs[i] = e.buildObservation(b)
}
return ResetResponse{
Observations: obs,
Episode: e.episode,
TaskID: e.taskID,
Seed: e.seed,
}
}
// Step advances the simulation by one timestep for all buildings. Thread-safe.
func (e *Environment) Step(actions []ActionModel) ([]StepResponse, bool) {
e.mu.Lock()
defer e.mu.Unlock()
if e.done {
return nil, true
}
// Validate and clamp actions
for i := range actions {
e.clampAction(&actions[i])
if i < e.numBuildings {
e.LastActions[i] = actions[i]
}
}
responses := make([]StepResponse, e.numBuildings)
for i, b := range e.Buildings {
var act ActionModel
// Find action for this building (by building_id or by index)
act = e.findAction(actions, i)
responses[i] = e.stepBuilding(b, act, i)
}
e.step++
if e.step >= EpisodeSteps {
e.done = true
}
// Record replay entry (aggregate of all buildings, first building primary)
if len(responses) > 0 {
entry := ReplayEntry{
Step: e.step - 1,
Observation: responses[0].Observation,
Action: e.LastActions[0],
Reward: responses[0].Reward,
Components: responses[0].Info.RewardComponents,
Done: e.done,
}
e.Replay = append(e.Replay, entry)
}
return responses, e.done
}
// GetState returns a full snapshot of environment state. Thread-safe (read lock).
func (e *Environment) GetState() StateResponse {
e.mu.RLock()
defer e.mu.RUnlock()
buildings := make([]BuildingStatePublic, e.numBuildings)
for i, b := range e.Buildings {
pub := BuildingStatePublic{
ObservationModel: e.buildObservation(b),
OutdoorTemperature: b.OutdoorTemperature,
SetpointTemperature: b.SetpointTemperature,
BaselineCost: b.BaselineCost,
BaselineCarbon: b.BaselineCarbon,
CumulativeCarbon: b.CumulativeCarbon,
Jobs: b.Jobs,
}
if i < len(e.TempHistory) {
pub.TempHistory = e.TempHistory[i]
pub.CostHistory = e.CostHistory[i]
pub.HVACHistory = e.HVACHistory[i]
pub.LoadShedHistory = e.LoadShedHistory[i]
pub.RewardHistory = e.RewardHistory[i]
}
buildings[i] = pub
}
priceCurve := make([]float64, EpisodeSteps/4)
carbonCurve := make([]float64, EpisodeSteps/4)
for h := 0; h < EpisodeSteps/4; h++ {
stepIdx := h * 4
if stepIdx < EpisodeSteps {
priceCurve[h] = e.PriceCurve[stepIdx]
carbonCurve[h] = e.CarbonCurve[stepIdx]
}
}
return StateResponse{
Buildings: buildings,
PriceCurve: priceCurve,
CarbonCurve: carbonCurve,
Episode: e.episode,
Step: e.step,
TaskID: e.taskID,
Done: e.done,
Seed: e.seed,
}
}
// GetReplay returns the full episode replay. Thread-safe.
func (e *Environment) GetReplay() []ReplayEntry {
e.mu.RLock()
defer e.mu.RUnlock()
result := make([]ReplayEntry, len(e.Replay))
copy(result, e.Replay)
return result
}
// ──────────────────────────────────────────────
// Internal helpers
// ──────────────────────────────────────────────
func (e *Environment) newBuildingState(id int) *BuildingState {
// Randomise initial conditions slightly
initTemp := DefaultSetpoint + (e.rng.Float64()-0.5)*2.0
storageLevel := 0.3 + e.rng.Float64()*0.4 // start 30–70% full
outdoorTemp := 15.0 + e.rng.Float64()*15.0 // 15–30 °C
b := &BuildingState{
BuildingID: id,
IndoorTemperature: initTemp,
ThermalStorageLevel: storageLevel,
ProcessDemand: 10.0 + e.rng.Float64()*20.0,
CurrentPrice: e.PriceCurve[0],
GridStressSignal: 0.0,
CarbonIntensity: e.CarbonCurve[0],
HourOfDay: 0,
Step: 0,
BatchQueue: []int{},
CumulativeCost: 0.0,
CumulativeCarbon: 0.0,
OutdoorTemperature: outdoorTemp,
PrevHVACLevel: 0.5,
BaselineCost: 0.0,
BaselineCarbon: 0.0,
SetpointTemperature: DefaultSetpoint,
MaxHVACPower: MaxHVACPowerKW,
MaxStorageCapacity: MaxStorageKWh,
ThermalLossRate: StorageLossRate,
}
// Spawn batch jobs based on difficulty
b.Jobs = e.generateBatchJobs()
b.BatchQueue = pendingDeadlines(b.Jobs)
return b
}
func (e *Environment) generateBatchJobs() []BatchJob {
numJobs := 3
switch e.difficulty {
case "medium":
numJobs = 4
case "hard":
numJobs = 5
}
jobs := make([]BatchJob, numJobs)
for i := range jobs {
// Deadline spread across episode (leave slack at end for duration)
span := EpisodeSteps - 12
if span < 8 {
span = 8
}
deadline := 4 + e.rng.Intn(span)
jobs[i] = BatchJob{
ID: i + 1,
DeadlineSlot: deadline,
Duration: 1 + e.rng.Intn(3),
PowerDraw: 5.0 + e.rng.Float64()*15.0,
Scheduled: false,
ScheduledAt: -1,
Completed: false,
MissedDeadline: false,
}
}
return jobs
}
// generatePriceCurve creates a stochastic Time-of-Use price curve for the episode.
func (e *Environment) generatePriceCurve() {
// Base ToU: low overnight, moderate morning, high peak (8-12, 17-21), low night
volatility := 0.1
switch e.difficulty {
case "medium":
volatility = 0.2
case "hard":
volatility = 0.35
}
// Random peak window shift (±2 hours) for stochasticity
morningPeakShift := e.rng.Intn(5) - 2
eveningPeakShift := e.rng.Intn(5) - 2
for s := 0; s < EpisodeSteps; s++ {
hour := (s / 4)
base := touPrice(hour, morningPeakShift, eveningPeakShift)
noise := (e.rng.Float64()*2 - 1) * volatility * base
price := math.Max(0.02, base+noise)
e.PriceCurve[s] = price
}
}
// touPrice returns the base time-of-use price for a given hour.
func touPrice(hour, morningShift, eveningShift int) float64 {
// Off-peak: 0.04 $/kWh, on-peak: 0.18 $/kWh, extreme peak: 0.32 $/kWh
morningPeakStart := 8 + morningShift
morningPeakEnd := 12 + morningShift
eveningPeakStart := 17 + eveningShift
eveningPeakEnd := 21 + eveningShift
switch {
case hour >= morningPeakStart && hour < morningPeakEnd:
return 0.18
case hour >= eveningPeakStart && hour <= eveningPeakEnd:
return 0.22
case (hour >= 9 && hour < morningPeakStart) || (hour >= morningPeakEnd && hour < eveningPeakStart):
return 0.10
case hour >= 23 || hour < 6:
return 0.04
default:
return 0.08
}
}
// generateCarbonCurve creates a realistic carbon intensity curve (gCO2/kWh).
// Correlates roughly with price: higher price = more peaker plants = higher carbon.
func (e *Environment) generateCarbonCurve() {
for s := 0; s < EpisodeSteps; s++ {
price := e.PriceCurve[s]
// Map price range [0.04, 0.32] → carbon [150, 600] gCO2/kWh
carbon := 150.0 + (price-0.04)/(0.32-0.04)*(600.0-150.0)
noise := (e.rng.Float64()*2 - 1) * 30.0
e.CarbonCurve[s] = math.Max(100.0, carbon+noise)
}
}
// stepBuilding advances a single building by one timestep.
func (e *Environment) stepBuilding(b *BuildingState, act ActionModel, idx int) StepResponse {
s := e.step
// Update environmental signals from curves
b.CurrentPrice = e.PriceCurve[s]
b.CarbonIntensity = e.CarbonCurve[s]
b.HourOfDay = (s / 4) % 24
// Stochastic grid stress events (more frequent in hard mode)
b.GridStressSignal = e.updateGridStress(s)
// Weather perturbation: outdoor temp drifts sinusoidally + noise
b.OutdoorTemperature = e.updateOutdoorTemp(s)
// Process demand fluctuation
b.ProcessDemand = e.updateProcessDemand(s)
// ----- Apply actions -----
// 1. HVAC: heats/cools building toward setpoint
hvacPower := act.HVACPowerLevel * b.MaxHVACPower // kW
// 2. Thermal storage: charge or discharge
chargeKW := act.ThermalChargeRate * b.MaxHVACPower * 0.3 // max 30% of HVAC for storage
newStorageEnergy := b.ThermalStorageLevel*b.MaxStorageCapacity + chargeKW*StepDurationHrs
// Apply thermal losses
newStorageEnergy *= (1.0 - b.ThermalLossRate)
newStorageEnergy = math.Max(0, math.Min(b.MaxStorageCapacity, newStorageEnergy))
b.ThermalStorageLevel = newStorageEnergy / b.MaxStorageCapacity
// 3. Load shedding
clampedShed := math.Max(0, math.Min(0.5, act.LoadShedFraction))
shedKW := clampedShed * b.ProcessDemand
// 4. Batch job scheduling
batchCompleted, batchMissed := e.updateBatchJobs(b, act.BatchJobSlot, s)
// ----- Thermal dynamics -----
// First-order setpoint-driven model:
// HVAC drives temperature toward setpoint; higher power = stronger effect.
// At HVACPowerLevel=1.0, HVAC strongly pushes toward setpoint.
// At HVACPowerLevel=0.0, HVAC is off — temp drifts with environment.
hvacEffect := (b.SetpointTemperature - b.IndoorTemperature) * act.HVACPowerLevel * 0.15
// Outdoor infiltration: building slowly equilibrates with outside
infiltration := (b.OutdoorTemperature - b.IndoorTemperature) * 0.03
// Thermal storage discharge provides supplemental conditioning toward setpoint
storageEffect := 0.0
if act.ThermalChargeRate < 0 {
storageEffect = (b.SetpointTemperature - b.IndoorTemperature) * math.Abs(act.ThermalChargeRate) * 0.05
}
// Process equipment waste heat (always warms the building)
processHeat := b.ProcessDemand * 0.002 // kW→°C rough factor
deltaT := hvacEffect + infiltration + storageEffect + processHeat
b.IndoorTemperature += deltaT
// Clamp to physically reasonable indoor range
b.IndoorTemperature = math.Max(10.0, math.Min(40.0, b.IndoorTemperature))
// ----- Energy & cost accounting -----
batchPowerDraw := e.batchRunningPower(b)
totalKW := hvacPower + math.Max(0, chargeKW) + batchPowerDraw - shedKW
totalKW = math.Max(0, totalKW)
energyKWh := totalKW * StepDurationHrs
stepCost := energyKWh * b.CurrentPrice
stepCarbon := energyKWh * b.CarbonIntensity
b.CumulativeCost += stepCost
b.CumulativeCarbon += stepCarbon
// Baseline (always-on at 70% HVAC, no storage/shedding)
baselineKW := 0.7*b.MaxHVACPower + b.ProcessDemand
baselineEnergy := baselineKW * StepDurationHrs
b.BaselineCost += baselineEnergy * b.CurrentPrice
b.BaselineCarbon += baselineEnergy * b.CarbonIntensity
// ----- Reward computation -----
rc := ComputeReward(ComputeRewardInput{
B: b,
Act: act,
StepCost: stepCost,
EnergyKWh: energyKWh,
TMin: TMinDefault,
TMax: TMaxDefault,
StepCarbon: stepCarbon,
BatchMissed: len(batchMissed),
GridStress: b.GridStressSignal,
ShedFraction: clampedShed,
TaskID: e.taskID,
PrevHVACLevel: b.PrevHVACLevel,
ChargeRate: act.ThermalChargeRate,
PrevChargeRate: e.prevChargeRates[idx],
StorageDelta: act.ThermalChargeRate,
PriceCurve: e.PriceCurve[:],
CurrentStep: s,
})
b.PrevHVACLevel = act.HVACPowerLevel
e.prevChargeRates[idx] = act.ThermalChargeRate
// Update batch queue
b.BatchQueue = pendingDeadlines(b.Jobs)
// Exploit detection
if clampedShed > 0.4 {
e.totalShedSteps[idx]++
}
if len(e.thermalCycleCounts) > idx {
if len(e.Replay) > 0 {
prev := e.prevChargeRates[idx]
if prev > 0.3 && act.ThermalChargeRate < -0.3 || prev < -0.3 && act.ThermalChargeRate > 0.3 {
e.thermalCycleCounts[idx]++
}
}
}
// Per-building step index matches global timestep for this transition (0 .. EpisodeSteps-1)
b.Step = s
// Record history
if idx < len(e.TempHistory) {
e.TempHistory[idx] = append(e.TempHistory[idx], b.IndoorTemperature)
e.CostHistory[idx] = append(e.CostHistory[idx], b.CumulativeCost)
e.HVACHistory[idx] = append(e.HVACHistory[idx], act.HVACPowerLevel)
e.LoadShedHistory[idx] = append(e.LoadShedHistory[idx], clampedShed)
e.RewardHistory[idx] = append(e.RewardHistory[idx], rc)
}
obs := e.buildObservation(b)
return StepResponse{
Observation: obs,
Reward: rc.Total,
Done: e.done || s+1 >= EpisodeSteps,
Info: StepInfo{
RewardComponents: rc,
EnergyUsed: energyKWh,
CarbonEmitted: stepCarbon,
PriceSignal: b.CurrentPrice,
GridStress: b.GridStressSignal,
BatchCompleted: batchCompleted,
BatchMissed: batchMissed,
Episode: e.episode,
Step: s,
},
}
}
func (e *Environment) updateGridStress(s int) float64 {
// Grid stress is elevated during price peaks and stochastic demand spikes
price := e.PriceCurve[s]
priceNorm := (price - 0.04) / (0.32 - 0.04)
// Random stress events
stressProb := 0.05
switch e.difficulty {
case "medium":
stressProb = 0.1
case "hard":
stressProb = 0.2
}
spike := 0.0
if e.rng.Float64() < stressProb {
spike = 0.3 + e.rng.Float64()*0.5
}
stress := math.Min(1.0, priceNorm*0.6+spike)
return math.Max(0, stress)
}
func (e *Environment) updateOutdoorTemp(s int) float64 {
// Sinusoidal daily temperature cycle + noise
hour := float64(s) / 4.0
baseTemp := 15.0 + 8.0*math.Sin(2*math.Pi*(hour-6)/24.0)
noise := (e.rng.Float64()*2 - 1) * 1.5
return baseTemp + noise
}
func (e *Environment) updateProcessDemand(s int) float64 {
// Process demand shifts with business hours
hour := s / 4
base := 10.0
if hour >= 8 && hour <= 18 {
base = 20.0 + 10.0*math.Sin(math.Pi*float64(hour-8)/10.0)
}
noise := (e.rng.Float64()*2 - 1) * 3.0
return math.Max(0, base+noise)
}
func (e *Environment) updateBatchJobs(b *BuildingState, slot int, step int) (completed []int, missed []int) {
completed = []int{}
missed = []int{}
// Schedule the first pending job into the chosen slot
for i := range b.Jobs {
job := &b.Jobs[i]
if !job.Scheduled && !job.Completed && !job.MissedDeadline {
schedAt := step + slot
job.Scheduled = true
job.ScheduledAt = schedAt
break // only schedule one job per step
}
}
// Advance running or completed jobs
for i := range b.Jobs {
job := &b.Jobs[i]
if job.Completed || job.MissedDeadline {
continue
}
// Check deadline miss
if step >= job.DeadlineSlot && !job.Completed {
job.MissedDeadline = true
missed = append(missed, job.ID)
continue
}
// Mark as completed if scheduled and past its start
if job.Scheduled && step >= job.ScheduledAt {
if step >= job.ScheduledAt+job.Duration-1 {
job.Completed = true
completed = append(completed, job.ID)
}
}
}
return
}
func (e *Environment) batchRunningPower(b *BuildingState) float64 {
total := 0.0
for _, job := range b.Jobs {
if job.Scheduled && !job.Completed && !job.MissedDeadline {
if e.step >= job.ScheduledAt && e.step < job.ScheduledAt+job.Duration {
total += job.PowerDraw
}
}
}
return total
}
func (e *Environment) buildObservation(b *BuildingState) ObservationModel {
return ObservationModel{
IndoorTemperature: math.Round(b.IndoorTemperature*100) / 100,
ThermalStorageLevel: math.Round(b.ThermalStorageLevel*1000) / 1000,
ProcessDemand: math.Round(b.ProcessDemand*100) / 100,
CurrentPrice: math.Round(b.CurrentPrice*10000) / 10000,
GridStressSignal: math.Round(b.GridStressSignal*1000) / 1000,
CarbonIntensity: math.Round(b.CarbonIntensity*10) / 10,
HourOfDay: b.HourOfDay,
BatchQueue: pendingDeadlines(b.Jobs),
CumulativeCost: math.Round(b.CumulativeCost*10000) / 10000,
Step: b.Step,
BuildingID: b.BuildingID,
}
}
func (e *Environment) clampAction(a *ActionModel) {
a.HVACPowerLevel = math.Max(0, math.Min(1.0, a.HVACPowerLevel))
a.ThermalChargeRate = math.Max(-1.0, math.Min(1.0, a.ThermalChargeRate))
a.BatchJobSlot = max(0, min(4, a.BatchJobSlot))
a.LoadShedFraction = math.Max(0, math.Min(0.5, a.LoadShedFraction))
}
func (e *Environment) findAction(actions []ActionModel, buildingIdx int) ActionModel {
// Try to find an action with matching building_id, else use positional
for _, a := range actions {
if a.BuildingID == buildingIdx {
return a
}
}
if buildingIdx < len(actions) {
return actions[buildingIdx]
}
// Default: do-nothing action
return ActionModel{HVACPowerLevel: 0.5, ThermalChargeRate: 0.0, BatchJobSlot: 0, LoadShedFraction: 0.0}
}
// pendingDeadlines returns a slice of deadline slots for all incomplete, unscheduled jobs.
func pendingDeadlines(jobs []BatchJob) []int {
result := []int{}
for _, j := range jobs {
if !j.Completed && !j.MissedDeadline {
result = append(result, j.DeadlineSlot)
}
}
return result
}
func max(a, b int) int {
if a > b {
return a
}
return b
}
func min(a, b int) int {
if a < b {
return a
}
return b
}
// ExploitDetected returns whether the current episode shows signs of degenerate strategies.
func (e *Environment) ExploitDetected(buildingIdx int) (bool, float64) {
e.mu.RLock()
defer e.mu.RUnlock()
if buildingIdx >= len(e.totalShedSteps) {
return false, 0.0
}
// Flag if agent always sheds > 40% load (more than 70% of steps)
shedRatio := float64(e.totalShedSteps[buildingIdx]) / float64(e.step+1)
cycleRatio := float64(e.thermalCycleCounts[buildingIdx]) / float64(e.step+1)
exploited := shedRatio > 0.7 || cycleRatio > 0.4
penalty := 0.0
if exploited {
penalty = math.Max(shedRatio-0.7, 0)*0.5 + math.Max(cycleRatio-0.4, 0)*0.3
}
return exploited, penalty
}