Spaces:
Sleeping
Sleeping
| // Package env implements the GridMind-RL simulation core. | |
| // It models a multi-building industrial/commercial energy management system | |
| // with stochastic electricity prices, thermal dynamics, and batch job scheduling. | |
| package env | |
| import ( | |
| "math" | |
| "math/rand" | |
| "sync" | |
| "time" | |
| ) | |
| const ( | |
| EpisodeSteps = 96 // 24 hours × 15-min intervals (96 × 0.25h = 24h) | |
| StepDurationHrs = 0.25 // each step = 15 minutes = 0.25 h | |
| MaxBuildings = 3 | |
| DefaultSetpoint = 21.0 // °C comfortable indoor temp | |
| TMinDefault = 19.0 // °C lower bound | |
| TMaxDefault = 23.0 // °C upper bound | |
| MaxHVACPowerKW = 50.0 // kW per building | |
| MaxStorageKWh = 100.0 // kWh thermal storage capacity | |
| StorageLossRate = 0.005 // fraction lost per step (thermal dissipation) | |
| MaxBatchJobs = 5 // max concurrent batch jobs per building | |
| ) | |
| // Environment is the thread-safe top-level simulation manager. | |
| type Environment struct { | |
| mu sync.RWMutex | |
| rng *rand.Rand | |
| seed int64 | |
| episode int | |
| step int | |
| done bool | |
| taskID int | |
| difficulty string | |
| numBuildings int | |
| Buildings []*BuildingState | |
| PriceCurve [EpisodeSteps]float64 | |
| CarbonCurve [EpisodeSteps]float64 | |
| Replay []ReplayEntry | |
| LastActions []ActionModel | |
| InstructionCard *InstructionCard // set for Task 4 episodes | |
| FaultSchedule *FaultSchedule // randomised fault events for this episode | |
| PriceMultipliers []float64 // per-building multipliers set by coordinator (default 1.0) | |
| // History for dashboard rendering (per building) | |
| TempHistory [][]float64 | |
| CostHistory [][]float64 | |
| HVACHistory [][]float64 | |
| LoadShedHistory [][]float64 | |
| RewardHistory [][]RewardComponents | |
| // Exploit detection counters | |
| totalShedSteps []int | |
| thermalCycleCounts []int | |
| prevChargeRates []float64 | |
| } | |
| // NewEnvironment creates an initialised (but not reset) environment. | |
| func NewEnvironment() *Environment { | |
| seed := time.Now().UnixNano() | |
| return &Environment{ | |
| rng: rand.New(rand.NewSource(seed)), | |
| seed: seed, | |
| taskID: 1, | |
| difficulty: "easy", | |
| numBuildings: 1, | |
| } | |
| } | |
| // Reset initializes a new episode. Thread-safe. | |
| func (e *Environment) Reset(req ResetRequest) ResetResponse { | |
| e.mu.Lock() | |
| defer e.mu.Unlock() | |
| // Apply seed | |
| if req.Seed != nil { | |
| e.seed = *req.Seed | |
| } else { | |
| e.seed = time.Now().UnixNano() | |
| } | |
| e.rng = rand.New(rand.NewSource(e.seed)) | |
| // Apply task and difficulty | |
| e.taskID = req.TaskID | |
| if e.taskID < 1 || e.taskID > 4 { | |
| e.taskID = 1 | |
| } | |
| e.difficulty = req.Difficulty | |
| if e.difficulty == "" { | |
| switch e.taskID { | |
| case 1: | |
| e.difficulty = "easy" | |
| case 2: | |
| e.difficulty = "medium" | |
| case 3, 4: | |
| e.difficulty = "hard" | |
| } | |
| } | |
| // Number of buildings (federation) | |
| e.numBuildings = req.NumBuildings | |
| if e.numBuildings < 1 { | |
| e.numBuildings = 1 | |
| } | |
| if e.numBuildings > MaxBuildings { | |
| e.numBuildings = MaxBuildings | |
| } | |
| e.episode++ | |
| e.step = 0 | |
| e.done = false | |
| e.Replay = make([]ReplayEntry, 0, EpisodeSteps) | |
| e.LastActions = make([]ActionModel, e.numBuildings) | |
| // Generate price and carbon curves for this episode | |
| e.generatePriceCurve() | |
| e.generateCarbonCurve() | |
| // Initialise buildings | |
| e.Buildings = make([]*BuildingState, e.numBuildings) | |
| e.TempHistory = make([][]float64, e.numBuildings) | |
| e.CostHistory = make([][]float64, e.numBuildings) | |
| e.HVACHistory = make([][]float64, e.numBuildings) | |
| e.LoadShedHistory = make([][]float64, e.numBuildings) | |
| e.RewardHistory = make([][]RewardComponents, e.numBuildings) | |
| e.totalShedSteps = make([]int, e.numBuildings) | |
| e.thermalCycleCounts = make([]int, e.numBuildings) | |
| e.prevChargeRates = make([]float64, e.numBuildings) | |
| for i := range e.Buildings { | |
| e.Buildings[i] = e.newBuildingState(i) | |
| e.TempHistory[i] = make([]float64, 0, EpisodeSteps) | |
| e.CostHistory[i] = make([]float64, 0, EpisodeSteps) | |
| e.HVACHistory[i] = make([]float64, 0, EpisodeSteps) | |
| e.LoadShedHistory[i] = make([]float64, 0, EpisodeSteps) | |
| e.RewardHistory[i] = make([]RewardComponents, 0, EpisodeSteps) | |
| } | |
| // Initialise coordinator price multipliers to 1.0 | |
| e.PriceMultipliers = make([]float64, e.numBuildings) | |
| for i := range e.PriceMultipliers { | |
| e.PriceMultipliers[i] = 1.0 | |
| } | |
| // Generate instruction card for Task 4 | |
| e.InstructionCard = nil | |
| if e.taskID == 4 { | |
| e.InstructionCard = GenerateInstructionCard(e.rng) | |
| } | |
| // Generate fault schedule for all tasks (probability varies by difficulty) | |
| e.FaultSchedule = GenerateFaultSchedule(e.rng, e.difficulty) | |
| obs := make([]ObservationModel, e.numBuildings) | |
| for i, b := range e.Buildings { | |
| obs[i] = e.buildObservation(b) | |
| } | |
| return ResetResponse{ | |
| Observations: obs, | |
| Episode: e.episode, | |
| TaskID: e.taskID, | |
| Seed: e.seed, | |
| InstructionCard: e.InstructionCard, | |
| } | |
| } | |
| // Step advances the simulation by one timestep for all buildings. Thread-safe. | |
| func (e *Environment) Step(actions []ActionModel) ([]StepResponse, bool) { | |
| e.mu.Lock() | |
| defer e.mu.Unlock() | |
| if e.done { | |
| return nil, true | |
| } | |
| // Validate and clamp actions | |
| for i := range actions { | |
| e.clampAction(&actions[i]) | |
| if i < e.numBuildings { | |
| e.LastActions[i] = actions[i] | |
| } | |
| } | |
| responses := make([]StepResponse, e.numBuildings) | |
| for i, b := range e.Buildings { | |
| var act ActionModel | |
| // Find action for this building (by building_id or by index) | |
| act = e.findAction(actions, i) | |
| responses[i] = e.stepBuilding(b, act, i) | |
| } | |
| e.step++ | |
| if e.step >= EpisodeSteps { | |
| e.done = true | |
| } | |
| // Record replay entry (aggregate of all buildings, first building primary) | |
| if len(responses) > 0 { | |
| entry := ReplayEntry{ | |
| Step: e.step - 1, | |
| Observation: responses[0].Observation, | |
| Action: e.LastActions[0], | |
| Reward: responses[0].Reward, | |
| Components: responses[0].Info.RewardComponents, | |
| Done: e.done, | |
| } | |
| e.Replay = append(e.Replay, entry) | |
| } | |
| return responses, e.done | |
| } | |
| // GetState returns a full snapshot of environment state. Thread-safe (read lock). | |
| func (e *Environment) GetState() StateResponse { | |
| e.mu.RLock() | |
| defer e.mu.RUnlock() | |
| buildings := make([]BuildingStatePublic, e.numBuildings) | |
| for i, b := range e.Buildings { | |
| pub := BuildingStatePublic{ | |
| ObservationModel: e.buildObservation(b), | |
| OutdoorTemperature: b.OutdoorTemperature, | |
| SetpointTemperature: b.SetpointTemperature, | |
| BaselineCost: b.BaselineCost, | |
| BaselineCarbon: b.BaselineCarbon, | |
| CumulativeCarbon: b.CumulativeCarbon, | |
| Jobs: b.Jobs, | |
| } | |
| if i < len(e.TempHistory) { | |
| pub.TempHistory = e.TempHistory[i] | |
| pub.CostHistory = e.CostHistory[i] | |
| pub.HVACHistory = e.HVACHistory[i] | |
| pub.LoadShedHistory = e.LoadShedHistory[i] | |
| pub.RewardHistory = e.RewardHistory[i] | |
| } | |
| buildings[i] = pub | |
| } | |
| priceCurve := make([]float64, EpisodeSteps/4) | |
| carbonCurve := make([]float64, EpisodeSteps/4) | |
| for h := 0; h < EpisodeSteps/4; h++ { | |
| stepIdx := h * 4 | |
| if stepIdx < EpisodeSteps { | |
| priceCurve[h] = e.PriceCurve[stepIdx] | |
| carbonCurve[h] = e.CarbonCurve[stepIdx] | |
| } | |
| } | |
| return StateResponse{ | |
| Buildings: buildings, | |
| PriceCurve: priceCurve, | |
| CarbonCurve: carbonCurve, | |
| Episode: e.episode, | |
| Step: e.step, | |
| TaskID: e.taskID, | |
| Done: e.done, | |
| Seed: e.seed, | |
| InstructionCard: e.InstructionCard, | |
| } | |
| } | |
| // GetReplay returns the full episode replay. Thread-safe. | |
| func (e *Environment) GetReplay() []ReplayEntry { | |
| e.mu.RLock() | |
| defer e.mu.RUnlock() | |
| result := make([]ReplayEntry, len(e.Replay)) | |
| copy(result, e.Replay) | |
| return result | |
| } | |
| // ────────────────────────────────────────────── | |
| // Internal helpers | |
| // ────────────────────────────────────────────── | |
| func (e *Environment) newBuildingState(id int) *BuildingState { | |
| // Randomise initial conditions slightly | |
| initTemp := DefaultSetpoint + (e.rng.Float64()-0.5)*2.0 | |
| storageLevel := 0.3 + e.rng.Float64()*0.4 // start 30–70% full | |
| outdoorTemp := 15.0 + e.rng.Float64()*15.0 // 15–30 °C | |
| b := &BuildingState{ | |
| BuildingID: id, | |
| IndoorTemperature: initTemp, | |
| ThermalStorageLevel: storageLevel, | |
| ProcessDemand: 10.0 + e.rng.Float64()*20.0, | |
| CurrentPrice: e.PriceCurve[0], | |
| GridStressSignal: 0.0, | |
| CarbonIntensity: e.CarbonCurve[0], | |
| HourOfDay: 0, | |
| Step: 0, | |
| BatchQueue: []int{}, | |
| CumulativeCost: 0.0, | |
| CumulativeCarbon: 0.0, | |
| OutdoorTemperature: outdoorTemp, | |
| PrevHVACLevel: 0.5, | |
| BaselineCost: 0.0, | |
| BaselineCarbon: 0.0, | |
| SetpointTemperature: DefaultSetpoint, | |
| MaxHVACPower: MaxHVACPowerKW, | |
| MaxStorageCapacity: MaxStorageKWh, | |
| ThermalLossRate: StorageLossRate, | |
| HVACEfficiency: 1.0, | |
| HVACDegradationRate: 0.0005 + e.rng.Float64()*0.001, // 0.05% to 0.15% per step | |
| } | |
| // Spawn batch jobs based on difficulty | |
| b.Jobs = e.generateBatchJobs() | |
| b.BatchQueue = pendingDeadlines(b.Jobs) | |
| return b | |
| } | |
| func (e *Environment) generateBatchJobs() []BatchJob { | |
| numJobs := 3 | |
| switch e.difficulty { | |
| case "medium": | |
| numJobs = 4 | |
| case "hard": | |
| numJobs = 5 | |
| } | |
| jobs := make([]BatchJob, numJobs) | |
| for i := range jobs { | |
| // Deadline spread across episode (leave slack at end for duration) | |
| span := EpisodeSteps - 12 | |
| if span < 8 { | |
| span = 8 | |
| } | |
| deadline := 4 + e.rng.Intn(span) | |
| jobs[i] = BatchJob{ | |
| ID: i + 1, | |
| DeadlineSlot: deadline, | |
| Duration: 1 + e.rng.Intn(3), | |
| PowerDraw: 5.0 + e.rng.Float64()*15.0, | |
| Scheduled: false, | |
| ScheduledAt: -1, | |
| Completed: false, | |
| MissedDeadline: false, | |
| } | |
| } | |
| return jobs | |
| } | |
| // generatePriceCurve creates a stochastic Time-of-Use price curve for the episode. | |
| func (e *Environment) generatePriceCurve() { | |
| // Base ToU: low overnight, moderate morning, high peak (8-12, 17-21), low night | |
| volatility := 0.1 | |
| switch e.difficulty { | |
| case "medium": | |
| volatility = 0.2 | |
| case "hard": | |
| volatility = 0.35 | |
| } | |
| // Random peak window shift (±2 hours) for stochasticity | |
| morningPeakShift := e.rng.Intn(5) - 2 | |
| eveningPeakShift := e.rng.Intn(5) - 2 | |
| for s := 0; s < EpisodeSteps; s++ { | |
| hour := (s / 4) | |
| base := touPrice(hour, morningPeakShift, eveningPeakShift) | |
| noise := (e.rng.Float64()*2 - 1) * volatility * base | |
| price := math.Max(0.02, base+noise) | |
| e.PriceCurve[s] = price | |
| } | |
| } | |
| // touPrice returns the base time-of-use price for a given hour. | |
| // Price schedule ($/kWh): | |
| // 00:00–06:00 0.035 Deep off-peak (lowest demand) | |
| // 06:00–08:00 0.070 Ramp-up (industry + residential start) | |
| // 08:00–12:00 0.200 Morning peak (industrial load high) | |
| // 12:00–17:00 0.120 Solar + stabilised demand shoulder | |
| // 17:00–21:00 0.310 True peak (highest grid stress) | |
| // 21:00–24:00 0.093 Declining demand | |
| func touPrice(hour, morningShift, eveningShift int) float64 { | |
| morningPeakStart := 8 + morningShift | |
| morningPeakEnd := 12 + morningShift | |
| eveningPeakStart := 17 + eveningShift | |
| eveningPeakEnd := 21 + eveningShift | |
| switch { | |
| case hour >= morningPeakStart && hour < morningPeakEnd: | |
| return 0.20 // Morning peak: 0.16–0.24 $/kWh | |
| case hour >= eveningPeakStart && hour < eveningPeakEnd: | |
| return 0.31 // Evening peak: 0.26–0.36 $/kWh | |
| case hour >= morningPeakEnd && hour < eveningPeakStart: | |
| return 0.12 // Solar/shoulder: 0.09–0.15 $/kWh | |
| case hour >= 6 && hour < morningPeakStart: | |
| return 0.07 // Ramp-up: 0.055–0.085 $/kWh | |
| case hour >= eveningPeakEnd: | |
| return 0.093 // Declining: 0.075–0.11 $/kWh | |
| default: | |
| return 0.035 // Deep off-peak (0–6 AM): 0.028–0.042 $/kWh | |
| } | |
| } | |
| // generateCarbonCurve creates a realistic carbon intensity curve (gCO2/kWh). | |
| // Correlates roughly with price: higher price = more peaker plants = higher carbon. | |
| func (e *Environment) generateCarbonCurve() { | |
| for s := 0; s < EpisodeSteps; s++ { | |
| price := e.PriceCurve[s] | |
| // Map price range [0.028, 0.36] → carbon [150, 600] gCO2/kWh | |
| carbon := 150.0 + (price-0.028)/(0.36-0.028)*(600.0-150.0) | |
| noise := (e.rng.Float64()*2 - 1) * 30.0 | |
| e.CarbonCurve[s] = math.Max(100.0, carbon+noise) | |
| } | |
| } | |
| // stepBuilding advances a single building by one timestep. | |
| func (e *Environment) stepBuilding(b *BuildingState, act ActionModel, idx int) StepResponse { | |
| s := e.step | |
| // Update environmental signals from curves | |
| b.CurrentPrice = e.PriceCurve[s] * e.PriceMultipliers[idx] | |
| b.CarbonIntensity = e.CarbonCurve[s] | |
| b.HourOfDay = (s / 4) % 24 | |
| // Restore defaults before applying faults (allows recovery when fault ends) | |
| b.MaxHVACPower = MaxHVACPowerKW | |
| // Apply fault events for this step (modifies price, stress, HVAC capacity) | |
| activeFaultDescs := ApplyFaults(b, e.FaultSchedule, s, e.rng) | |
| _ = activeFaultDescs // stored for use in buildObservation via FaultSchedule.ActiveAt | |
| // Stochastic grid stress events (more frequent in hard mode). | |
| // Note: FaultGridOutage sets GridStressSignal=1.0 inside ApplyFaults. | |
| // We only overwrite it from the stochastic model if no outage is active. | |
| hasGridFault := false | |
| if e.FaultSchedule != nil { | |
| for _, f := range e.FaultSchedule.ActiveAt(s) { | |
| if f.Type == FaultGridOutage { | |
| hasGridFault = true | |
| break | |
| } | |
| } | |
| } | |
| if !hasGridFault { | |
| b.GridStressSignal = e.updateGridStress(s) | |
| } | |
| // Weather perturbation: outdoor temp drifts sinusoidally + noise | |
| b.OutdoorTemperature = e.updateOutdoorTemp(s) | |
| // Process demand fluctuation | |
| b.ProcessDemand = e.updateProcessDemand(s) | |
| // ----- Apply actions ----- | |
| // 0. Degrade HVAC efficiency | |
| b.HVACEfficiency = math.Max(0.5, b.HVACEfficiency-b.HVACDegradationRate) | |
| // 1. HVAC: heats/cools building toward setpoint | |
| hvacPower := act.HVACPowerLevel * b.MaxHVACPower * b.HVACEfficiency // kW | |
| // 2. Thermal storage: charge or discharge | |
| chargeKW := act.ThermalChargeRate * b.MaxHVACPower * 0.3 // max 30% of HVAC for storage | |
| newStorageEnergy := b.ThermalStorageLevel*b.MaxStorageCapacity + chargeKW*StepDurationHrs | |
| // Apply thermal losses | |
| newStorageEnergy *= (1.0 - b.ThermalLossRate) | |
| newStorageEnergy = math.Max(0, math.Min(b.MaxStorageCapacity, newStorageEnergy)) | |
| b.ThermalStorageLevel = newStorageEnergy / b.MaxStorageCapacity | |
| // 3. Load shedding | |
| clampedShed := math.Max(0, math.Min(0.5, act.LoadShedFraction)) | |
| shedKW := clampedShed * b.ProcessDemand | |
| // 4. Batch job scheduling | |
| batchCompleted, batchMissed := e.updateBatchJobs(b, act.BatchJobSlot, s) | |
| // ----- Thermal dynamics ----- | |
| // First-order setpoint-driven model: | |
| // HVAC drives temperature toward setpoint; higher power = stronger effect. | |
| // At HVACPowerLevel=1.0, HVAC strongly pushes toward setpoint. | |
| // At HVACPowerLevel=0.0, HVAC is off — temp drifts with environment. | |
| hvacEffect := (b.SetpointTemperature - b.IndoorTemperature) * act.HVACPowerLevel * 0.15 | |
| // Outdoor infiltration: building slowly equilibrates with outside | |
| infiltration := (b.OutdoorTemperature - b.IndoorTemperature) * 0.03 | |
| // Thermal storage discharge provides supplemental conditioning toward setpoint | |
| storageEffect := 0.0 | |
| if act.ThermalChargeRate < 0 { | |
| storageEffect = (b.SetpointTemperature - b.IndoorTemperature) * math.Abs(act.ThermalChargeRate) * 0.05 | |
| } | |
| // Process equipment waste heat (always warms the building) | |
| processHeat := b.ProcessDemand * 0.002 // kW→°C rough factor | |
| deltaT := hvacEffect + infiltration + storageEffect + processHeat | |
| b.IndoorTemperature += deltaT | |
| // Clamp to physically reasonable indoor range | |
| b.IndoorTemperature = math.Max(10.0, math.Min(40.0, b.IndoorTemperature)) | |
| // ----- Energy & cost accounting ----- | |
| batchPowerDraw := e.batchRunningPower(b) | |
| totalKW := hvacPower + math.Max(0, chargeKW) + batchPowerDraw - shedKW | |
| totalKW = math.Max(0, totalKW) | |
| energyKWh := totalKW * StepDurationHrs | |
| stepCost := energyKWh * b.CurrentPrice | |
| stepCarbon := energyKWh * b.CarbonIntensity | |
| b.CumulativeCost += stepCost | |
| b.CumulativeCarbon += stepCarbon | |
| // Baseline (always-on at 70% HVAC, no storage/shedding) | |
| baselineKW := 0.7*b.MaxHVACPower + b.ProcessDemand | |
| baselineEnergy := baselineKW * StepDurationHrs | |
| b.BaselineCost += baselineEnergy * b.CurrentPrice | |
| b.BaselineCarbon += baselineEnergy * b.CarbonIntensity | |
| // ----- Reward computation ----- | |
| // Get active faults for fault mitigation reward | |
| var activeFaults []FaultEvent | |
| if e.FaultSchedule != nil { | |
| activeFaults = e.FaultSchedule.ActiveAt(s) | |
| } | |
| rc := ComputeReward(ComputeRewardInput{ | |
| B: b, | |
| Act: act, | |
| StepCost: stepCost, | |
| EnergyKWh: energyKWh, | |
| TMin: TMinDefault, | |
| TMax: TMaxDefault, | |
| StepCarbon: stepCarbon, | |
| BatchMissed: len(batchMissed), | |
| GridStress: b.GridStressSignal, | |
| ShedFraction: clampedShed, | |
| TaskID: e.taskID, | |
| PrevHVACLevel: b.PrevHVACLevel, | |
| ChargeRate: act.ThermalChargeRate, | |
| PrevChargeRate: e.prevChargeRates[idx], | |
| StorageDelta: act.ThermalChargeRate, | |
| PriceCurve: e.PriceCurve[:], | |
| CurrentStep: s, | |
| InstructionCard: e.InstructionCard, | |
| ActiveFaults: activeFaults, | |
| }) | |
| b.PrevHVACLevel = act.HVACPowerLevel | |
| e.prevChargeRates[idx] = act.ThermalChargeRate | |
| // Update batch queue | |
| b.BatchQueue = pendingDeadlines(b.Jobs) | |
| // Exploit detection | |
| if clampedShed > 0.4 { | |
| e.totalShedSteps[idx]++ | |
| } | |
| if len(e.thermalCycleCounts) > idx { | |
| if len(e.Replay) > 0 { | |
| prev := e.prevChargeRates[idx] | |
| if prev > 0.3 && act.ThermalChargeRate < -0.3 || prev < -0.3 && act.ThermalChargeRate > 0.3 { | |
| e.thermalCycleCounts[idx]++ | |
| } | |
| } | |
| } | |
| // Per-building step index matches global timestep for this transition (0 .. EpisodeSteps-1) | |
| b.Step = s | |
| // Record history | |
| if idx < len(e.TempHistory) { | |
| e.TempHistory[idx] = append(e.TempHistory[idx], b.IndoorTemperature) | |
| e.CostHistory[idx] = append(e.CostHistory[idx], b.CumulativeCost) | |
| e.HVACHistory[idx] = append(e.HVACHistory[idx], act.HVACPowerLevel) | |
| e.LoadShedHistory[idx] = append(e.LoadShedHistory[idx], clampedShed) | |
| e.RewardHistory[idx] = append(e.RewardHistory[idx], rc) | |
| } | |
| obs := e.buildObservation(b) | |
| return StepResponse{ | |
| Observation: obs, | |
| Reward: rc.Total, | |
| Done: e.done || s+1 >= EpisodeSteps, | |
| Info: StepInfo{ | |
| RewardComponents: rc, | |
| EnergyUsed: energyKWh, | |
| CarbonEmitted: stepCarbon, | |
| PriceSignal: b.CurrentPrice, | |
| GridStress: b.GridStressSignal, | |
| BatchCompleted: batchCompleted, | |
| BatchMissed: batchMissed, | |
| Episode: e.episode, | |
| Step: s, | |
| }, | |
| Rewards: rc, | |
| } | |
| } | |
| func (e *Environment) updateGridStress(s int) float64 { | |
| // Grid stress is elevated during price peaks and stochastic demand spikes | |
| price := e.PriceCurve[s] | |
| priceNorm := (price - 0.028) / (0.36 - 0.028) | |
| // Random stress events | |
| stressProb := 0.05 | |
| switch e.difficulty { | |
| case "medium": | |
| stressProb = 0.1 | |
| case "hard": | |
| stressProb = 0.2 | |
| } | |
| spike := 0.0 | |
| if e.rng.Float64() < stressProb { | |
| spike = 0.3 + e.rng.Float64()*0.5 | |
| } | |
| stress := math.Min(1.0, priceNorm*0.6+spike) | |
| return math.Max(0, stress) | |
| } | |
| func (e *Environment) updateOutdoorTemp(s int) float64 { | |
| // Sinusoidal daily temperature cycle + noise | |
| hour := float64(s) / 4.0 | |
| baseTemp := 15.0 + 8.0*math.Sin(2*math.Pi*(hour-6)/24.0) | |
| noise := (e.rng.Float64()*2 - 1) * 1.5 | |
| return baseTemp + noise | |
| } | |
| func (e *Environment) updateProcessDemand(s int) float64 { | |
| // Process demand shifts with business hours | |
| hour := s / 4 | |
| base := 10.0 | |
| if hour >= 8 && hour <= 18 { | |
| base = 20.0 + 10.0*math.Sin(math.Pi*float64(hour-8)/10.0) | |
| } | |
| noise := (e.rng.Float64()*2 - 1) * 3.0 | |
| return math.Max(0, base+noise) | |
| } | |
| func (e *Environment) updateBatchJobs(b *BuildingState, slot int, step int) (completed []int, missed []int) { | |
| completed = []int{} | |
| missed = []int{} | |
| // Schedule the first pending job into the chosen slot | |
| for i := range b.Jobs { | |
| job := &b.Jobs[i] | |
| if !job.Scheduled && !job.Completed && !job.MissedDeadline { | |
| schedAt := step + slot | |
| job.Scheduled = true | |
| job.ScheduledAt = schedAt | |
| break // only schedule one job per step | |
| } | |
| } | |
| // Advance running or completed jobs | |
| for i := range b.Jobs { | |
| job := &b.Jobs[i] | |
| if job.Completed || job.MissedDeadline { | |
| continue | |
| } | |
| // Check deadline miss | |
| if step >= job.DeadlineSlot && !job.Completed { | |
| job.MissedDeadline = true | |
| missed = append(missed, job.ID) | |
| continue | |
| } | |
| // Mark as completed if scheduled and past its start | |
| if job.Scheduled && step >= job.ScheduledAt { | |
| if step >= job.ScheduledAt+job.Duration-1 { | |
| job.Completed = true | |
| completed = append(completed, job.ID) | |
| } | |
| } | |
| } | |
| return | |
| } | |
| func (e *Environment) batchRunningPower(b *BuildingState) float64 { | |
| total := 0.0 | |
| for _, job := range b.Jobs { | |
| if job.Scheduled && !job.Completed && !job.MissedDeadline { | |
| if e.step >= job.ScheduledAt && e.step < job.ScheduledAt+job.Duration { | |
| total += job.PowerDraw | |
| } | |
| } | |
| } | |
| return total | |
| } | |
| func (e *Environment) buildObservation(b *BuildingState) ObservationModel { | |
| // Collect active fault descriptions for this step | |
| var activeFaults []string | |
| if e.FaultSchedule != nil { | |
| for _, f := range e.FaultSchedule.ActiveAt(b.Step) { | |
| activeFaults = append(activeFaults, f.Description) | |
| } | |
| } | |
| // Apply sensor fault noise to observation (not physics) - if sensor fault is active, agent sees wrong temp | |
| reportedTemp := b.IndoorTemperature + b.TempObservationNoise | |
| taskCardStr := "" | |
| if e.taskID == 4 && e.InstructionCard != nil { | |
| taskCardStr = e.InstructionCard.Text | |
| } else if e.taskID == 1 { | |
| taskCardStr = "Task 1 (Easy - Cost Minimization): Minimize total energy cost over 24 hours. No temperature or batch constraints. Use cheap off-peak periods and thermal storage." | |
| } else if e.taskID == 2 { | |
| taskCardStr = "Task 2 (Medium - Temperature Management): Minimize cost AND keep indoor temperature within 19-23°C at all times. Balance comfort vs cost." | |
| } else if e.taskID == 3 { | |
| taskCardStr = "Task 3 (Hard - Full Demand Response): Minimize cost, maintain temperature, respond to grid stress (shed when grid_stress_signal > 0.7), schedule batch jobs, minimize carbon." | |
| } else { | |
| taskCardStr = "Maintain operations and minimize cost." | |
| } | |
| priceForecast := make([]float64, 4) | |
| for i := 0; i < 4; i++ { | |
| idx := b.Step + i | |
| if idx < EpisodeSteps { | |
| priceForecast[i] = math.Round(e.PriceCurve[idx]*10000) / 10000 | |
| } else { | |
| priceForecast[i] = math.Round(e.PriceCurve[EpisodeSteps-1]*10000) / 10000 | |
| } | |
| } | |
| return ObservationModel{ | |
| IndoorTemperature: math.Round(reportedTemp*100) / 100, | |
| ThermalStorageLevel: math.Round(b.ThermalStorageLevel*1000) / 1000, | |
| ProcessDemand: math.Round(b.ProcessDemand*100) / 100, | |
| CurrentPrice: math.Round(b.CurrentPrice*10000) / 10000, | |
| GridStressSignal: math.Round(b.GridStressSignal*1000) / 1000, | |
| CarbonIntensity: math.Round(b.CarbonIntensity*10) / 10, | |
| HourOfDay: b.HourOfDay, | |
| BatchQueue: pendingDeadlines(b.Jobs), | |
| CumulativeCost: math.Round(b.CumulativeCost*10000) / 10000, | |
| Step: b.Step, | |
| BuildingID: b.BuildingID, | |
| HVACEfficiency: math.Round(b.HVACEfficiency*1000) / 1000, | |
| InstructionCard: e.InstructionCard, | |
| ActiveFaults: activeFaults, | |
| TaskCard: taskCardStr, | |
| NLSummary: "GridMind simulation state.", | |
| MarketType: "tou", | |
| Season: "summer", | |
| PriceVolatility: 0.2, | |
| PriceForecast: priceForecast, | |
| DemandChargeActive: false, | |
| } | |
| } | |
| func (e *Environment) clampAction(a *ActionModel) { | |
| a.HVACPowerLevel = math.Max(0, math.Min(1.0, a.HVACPowerLevel)) | |
| a.ThermalChargeRate = math.Max(-1.0, math.Min(1.0, a.ThermalChargeRate)) | |
| a.BatchJobSlot = max(0, min(4, a.BatchJobSlot)) | |
| a.LoadShedFraction = math.Max(0, math.Min(0.5, a.LoadShedFraction)) | |
| } | |
| func (e *Environment) findAction(actions []ActionModel, buildingIdx int) ActionModel { | |
| // Try to find an action with matching building_id, else use positional | |
| for _, a := range actions { | |
| if a.BuildingID == buildingIdx { | |
| return a | |
| } | |
| } | |
| if buildingIdx < len(actions) { | |
| return actions[buildingIdx] | |
| } | |
| // Default: do-nothing action | |
| return ActionModel{HVACPowerLevel: 0.5, ThermalChargeRate: 0.0, BatchJobSlot: 0, LoadShedFraction: 0.0} | |
| } | |
| // pendingDeadlines returns a slice of deadline slots for all incomplete, unscheduled jobs. | |
| func pendingDeadlines(jobs []BatchJob) []int { | |
| result := []int{} | |
| for _, j := range jobs { | |
| if !j.Completed && !j.MissedDeadline { | |
| result = append(result, j.DeadlineSlot) | |
| } | |
| } | |
| return result | |
| } | |
| func max(a, b int) int { | |
| if a > b { | |
| return a | |
| } | |
| return b | |
| } | |
| func min(a, b int) int { | |
| if a < b { | |
| return a | |
| } | |
| return b | |
| } | |
| // ExploitDetected returns whether the current episode shows signs of degenerate strategies. | |
| func (e *Environment) ExploitDetected(buildingIdx int) (bool, float64) { | |
| e.mu.RLock() | |
| defer e.mu.RUnlock() | |
| if buildingIdx >= len(e.totalShedSteps) { | |
| return false, 0.0 | |
| } | |
| // Flag if agent always sheds > 40% load (more than 70% of steps) | |
| shedRatio := float64(e.totalShedSteps[buildingIdx]) / float64(e.step+1) | |
| cycleRatio := float64(e.thermalCycleCounts[buildingIdx]) / float64(e.step+1) | |
| exploited := shedRatio > 0.7 || cycleRatio > 0.4 | |
| penalty := 0.0 | |
| if exploited { | |
| penalty = math.Max(shedRatio-0.7, 0)*0.5 + math.Max(cycleRatio-0.4, 0)*0.3 | |
| } | |
| return exploited, penalty | |
| } | |
| // GetFeederState returns the aggregate fleet view for the coordinator. | |
| func (e *Environment) GetFeederState() FeederState { | |
| e.mu.RLock() | |
| defer e.mu.RUnlock() | |
| var totalDemand float64 | |
| buildings := make([]BuildingSummary, len(e.Buildings)) | |
| for i, b := range e.Buildings { | |
| demand := b.ProcessDemand + b.MaxHVACPower*b.PrevHVACLevel | |
| totalDemand += demand | |
| buildings[i] = BuildingSummary{ | |
| BuildingID: b.BuildingID, | |
| CurrentDemandKW: math.Round(demand*100) / 100, | |
| IndoorTemperature: math.Round(b.IndoorTemperature*100) / 100, | |
| ThermalStorageLevel: math.Round(b.ThermalStorageLevel*1000) / 1000, | |
| CumulativeCost: math.Round(b.CumulativeCost*100) / 100, | |
| GridStressSignal: math.Round(b.GridStressSignal*100) / 100, | |
| PriceMultiplier: e.PriceMultipliers[i], | |
| } | |
| } | |
| limit := float64(120 * len(e.Buildings)) // Simplistic soft cap | |
| // Downsample price curve to 24 hourly points | |
| hourlyCurve := make([]float64, 24) | |
| for h := 0; h < 24; h++ { | |
| hourlyCurve[h] = e.PriceCurve[h*4] | |
| } | |
| return FeederState{ | |
| TotalDemandKW: math.Round(totalDemand*100) / 100, | |
| FeederLimitKW: limit, | |
| FeederOverload: totalDemand > limit, | |
| UtilizationPct: math.Round((totalDemand/limit)*1000) / 10, | |
| Buildings: buildings, | |
| PriceCurveHourly: hourlyCurve, | |
| Step: e.step, | |
| Episode: e.episode, | |
| } | |
| } | |
| // SetCoordinatorSignals applies per-building price multipliers. | |
| func (e *Environment) SetCoordinatorSignals(multipliers []float64) { | |
| e.mu.Lock() | |
| defer e.mu.Unlock() | |
| for i, val := range multipliers { | |
| if i < len(e.PriceMultipliers) { | |
| e.PriceMultipliers[i] = math.Max(0.1, math.Min(10.0, val)) // Clamp safety | |
| } | |
| } | |
| } | |
| // cloneBuilding creates a deep copy of a BuildingState | |
| func cloneBuilding(b *BuildingState) *BuildingState { | |
| c := *b | |
| c.BatchQueue = make([]int, len(b.BatchQueue)) | |
| copy(c.BatchQueue, b.BatchQueue) | |
| c.Jobs = make([]BatchJob, len(b.Jobs)) | |
| copy(c.Jobs, b.Jobs) | |
| return &c | |
| } | |
| // SimulateStep predicts the next state and reward without modifying the actual environment. | |
| // It performs a deep copy of the required state, applies the actions, and returns the expected result. | |
| func (e *Environment) SimulateStep(actions []ActionModel) ([]StepResponse, bool) { | |
| e.mu.RLock() | |
| defer e.mu.RUnlock() | |
| if e.done { | |
| return nil, true | |
| } | |
| // Create a temporary mock environment for a single step | |
| mock := &Environment{ | |
| rng: rand.New(rand.NewSource(e.rng.Int63())), // local PRNG to not desync main | |
| episode: e.episode, | |
| step: e.step, | |
| taskID: e.taskID, | |
| seed: e.seed, | |
| difficulty: e.difficulty, | |
| numBuildings: e.numBuildings, | |
| Buildings: make([]*BuildingState, e.numBuildings), | |
| PriceCurve: e.PriceCurve, | |
| CarbonCurve: e.CarbonCurve, | |
| InstructionCard: e.InstructionCard, | |
| FaultSchedule: e.FaultSchedule, | |
| PriceMultipliers: e.PriceMultipliers, | |
| prevChargeRates: make([]float64, len(e.prevChargeRates)), | |
| } | |
| copy(mock.prevChargeRates, e.prevChargeRates) | |
| for i, b := range e.Buildings { | |
| mock.Buildings[i] = cloneBuilding(b) | |
| } | |
| // Clamp and apply actions | |
| mockActions := make([]ActionModel, len(actions)) | |
| copy(mockActions, actions) | |
| for i := range mockActions { | |
| mock.clampAction(&mockActions[i]) | |
| } | |
| responses := make([]StepResponse, mock.numBuildings) | |
| for i, b := range mock.Buildings { | |
| act := mock.findAction(mockActions, i) | |
| responses[i] = mock.stepBuilding(b, act, i) | |
| } | |
| mockDone := (mock.step + 1) >= EpisodeSteps | |
| return responses, mockDone | |
| } | |