Spaces:

Prajwal782007
/

Gridmind

Sleeping

App Files Files Community

Gridmind / env /environment.go

adityss

refactor: replace heuristic log generation with Go-based environment simulation and update API schema

3b977fc 23 days ago

raw

history blame contribute delete

28.9 kB

	// Package env implements the GridMind-RL simulation core.
	// It models a multi-building industrial/commercial energy management system
	// with stochastic electricity prices, thermal dynamics, and batch job scheduling.
	package env

	import (
	"math"
	"math/rand"
	"sync"
	"time"
	)

	const (
	EpisodeSteps = 96 // 24 hours × 15-min intervals (96 × 0.25h = 24h)
	StepDurationHrs = 0.25 // each step = 15 minutes = 0.25 h
	MaxBuildings = 3
	DefaultSetpoint = 21.0 // °C comfortable indoor temp
	TMinDefault = 19.0 // °C lower bound
	TMaxDefault = 23.0 // °C upper bound
	MaxHVACPowerKW = 50.0 // kW per building
	MaxStorageKWh = 100.0 // kWh thermal storage capacity
	StorageLossRate = 0.005 // fraction lost per step (thermal dissipation)
	MaxBatchJobs = 5 // max concurrent batch jobs per building
	)

	// Environment is the thread-safe top-level simulation manager.
	type Environment struct {
	mu sync.RWMutex
	rng *rand.Rand
	seed int64
	episode int
	step int
	done bool
	taskID int
	difficulty string
	numBuildings int

	Buildings []*BuildingState
	PriceCurve [EpisodeSteps]float64
	CarbonCurve [EpisodeSteps]float64
	Replay []ReplayEntry
	LastActions []ActionModel
	InstructionCard *InstructionCard // set for Task 4 episodes
	FaultSchedule *FaultSchedule // randomised fault events for this episode
	PriceMultipliers []float64 // per-building multipliers set by coordinator (default 1.0)

	// History for dashboard rendering (per building)
	TempHistory [][]float64
	CostHistory [][]float64
	HVACHistory [][]float64
	LoadShedHistory [][]float64
	RewardHistory [][]RewardComponents

	// Exploit detection counters
	totalShedSteps []int
	thermalCycleCounts []int
	prevChargeRates []float64
	}

	// NewEnvironment creates an initialised (but not reset) environment.
	func NewEnvironment() *Environment {
	seed := time.Now().UnixNano()
	return &Environment{
	rng: rand.New(rand.NewSource(seed)),
	seed: seed,
	taskID: 1,
	difficulty: "easy",
	numBuildings: 1,
	}
	}

	// Reset initializes a new episode. Thread-safe.
	func (e *Environment) Reset(req ResetRequest) ResetResponse {
	e.mu.Lock()
	defer e.mu.Unlock()

	// Apply seed
	if req.Seed != nil {
	e.seed = *req.Seed
	} else {
	e.seed = time.Now().UnixNano()
	}
	e.rng = rand.New(rand.NewSource(e.seed))

	// Apply task and difficulty
	e.taskID = req.TaskID
	if e.taskID < 1 \|\| e.taskID > 4 {
	e.taskID = 1
	}
	e.difficulty = req.Difficulty
	if e.difficulty == "" {
	switch e.taskID {
	case 1:
	e.difficulty = "easy"
	case 2:
	e.difficulty = "medium"
	case 3, 4:
	e.difficulty = "hard"
	}
	}

	// Number of buildings (federation)
	e.numBuildings = req.NumBuildings
	if e.numBuildings < 1 {
	e.numBuildings = 1
	}
	if e.numBuildings > MaxBuildings {
	e.numBuildings = MaxBuildings
	}

	e.episode++
	e.step = 0
	e.done = false
	e.Replay = make([]ReplayEntry, 0, EpisodeSteps)
	e.LastActions = make([]ActionModel, e.numBuildings)

	// Generate price and carbon curves for this episode
	e.generatePriceCurve()
	e.generateCarbonCurve()

	// Initialise buildings
	e.Buildings = make([]*BuildingState, e.numBuildings)
	e.TempHistory = make([][]float64, e.numBuildings)
	e.CostHistory = make([][]float64, e.numBuildings)
	e.HVACHistory = make([][]float64, e.numBuildings)
	e.LoadShedHistory = make([][]float64, e.numBuildings)
	e.RewardHistory = make([][]RewardComponents, e.numBuildings)
	e.totalShedSteps = make([]int, e.numBuildings)
	e.thermalCycleCounts = make([]int, e.numBuildings)
	e.prevChargeRates = make([]float64, e.numBuildings)

	for i := range e.Buildings {
	e.Buildings[i] = e.newBuildingState(i)
	e.TempHistory[i] = make([]float64, 0, EpisodeSteps)
	e.CostHistory[i] = make([]float64, 0, EpisodeSteps)
	e.HVACHistory[i] = make([]float64, 0, EpisodeSteps)
	e.LoadShedHistory[i] = make([]float64, 0, EpisodeSteps)
	e.RewardHistory[i] = make([]RewardComponents, 0, EpisodeSteps)
	}

	// Initialise coordinator price multipliers to 1.0
	e.PriceMultipliers = make([]float64, e.numBuildings)
	for i := range e.PriceMultipliers {
	e.PriceMultipliers[i] = 1.0
	}

	// Generate instruction card for Task 4
	e.InstructionCard = nil
	if e.taskID == 4 {
	e.InstructionCard = GenerateInstructionCard(e.rng)
	}

	// Generate fault schedule for all tasks (probability varies by difficulty)
	e.FaultSchedule = GenerateFaultSchedule(e.rng, e.difficulty)

	obs := make([]ObservationModel, e.numBuildings)
	for i, b := range e.Buildings {
	obs[i] = e.buildObservation(b)
	}

	return ResetResponse{
	Observations: obs,
	Episode: e.episode,
	TaskID: e.taskID,
	Seed: e.seed,
	InstructionCard: e.InstructionCard,
	}
	}

	// Step advances the simulation by one timestep for all buildings. Thread-safe.
	func (e *Environment) Step(actions []ActionModel) ([]StepResponse, bool) {
	e.mu.Lock()
	defer e.mu.Unlock()

	if e.done {
	return nil, true
	}

	// Validate and clamp actions
	for i := range actions {
	e.clampAction(&actions[i])
	if i < e.numBuildings {
	e.LastActions[i] = actions[i]
	}
	}

	responses := make([]StepResponse, e.numBuildings)
	for i, b := range e.Buildings {
	var act ActionModel
	// Find action for this building (by building_id or by index)
	act = e.findAction(actions, i)
	responses[i] = e.stepBuilding(b, act, i)
	}

	e.step++
	if e.step >= EpisodeSteps {
	e.done = true
	}

	// Record replay entry (aggregate of all buildings, first building primary)
	if len(responses) > 0 {
	entry := ReplayEntry{
	Step: e.step - 1,
	Observation: responses[0].Observation,
	Action: e.LastActions[0],
	Reward: responses[0].Reward,
	Components: responses[0].Info.RewardComponents,
	Done: e.done,
	}
	e.Replay = append(e.Replay, entry)
	}

	return responses, e.done
	}

	// GetState returns a full snapshot of environment state. Thread-safe (read lock).
	func (e *Environment) GetState() StateResponse {
	e.mu.RLock()
	defer e.mu.RUnlock()

	buildings := make([]BuildingStatePublic, e.numBuildings)
	for i, b := range e.Buildings {
	pub := BuildingStatePublic{
	ObservationModel: e.buildObservation(b),
	OutdoorTemperature: b.OutdoorTemperature,
	SetpointTemperature: b.SetpointTemperature,
	BaselineCost: b.BaselineCost,
	BaselineCarbon: b.BaselineCarbon,
	CumulativeCarbon: b.CumulativeCarbon,
	Jobs: b.Jobs,
	}
	if i < len(e.TempHistory) {
	pub.TempHistory = e.TempHistory[i]
	pub.CostHistory = e.CostHistory[i]
	pub.HVACHistory = e.HVACHistory[i]
	pub.LoadShedHistory = e.LoadShedHistory[i]
	pub.RewardHistory = e.RewardHistory[i]
	}
	buildings[i] = pub
	}

	priceCurve := make([]float64, EpisodeSteps/4)
	carbonCurve := make([]float64, EpisodeSteps/4)
	for h := 0; h < EpisodeSteps/4; h++ {
	stepIdx := h * 4
	if stepIdx < EpisodeSteps {
	priceCurve[h] = e.PriceCurve[stepIdx]
	carbonCurve[h] = e.CarbonCurve[stepIdx]
	}
	}

	return StateResponse{
	Buildings: buildings,
	PriceCurve: priceCurve,
	CarbonCurve: carbonCurve,
	Episode: e.episode,
	Step: e.step,
	TaskID: e.taskID,
	Done: e.done,
	Seed: e.seed,
	InstructionCard: e.InstructionCard,
	}
	}

	// GetReplay returns the full episode replay. Thread-safe.
	func (e *Environment) GetReplay() []ReplayEntry {
	e.mu.RLock()
	defer e.mu.RUnlock()
	result := make([]ReplayEntry, len(e.Replay))
	copy(result, e.Replay)
	return result
	}

	// ──────────────────────────────────────────────
	// Internal helpers
	// ──────────────────────────────────────────────

	func (e Environment) newBuildingState(id int) BuildingState {
	// Randomise initial conditions slightly
	initTemp := DefaultSetpoint + (e.rng.Float64()-0.5)*2.0
	storageLevel := 0.3 + e.rng.Float64()*0.4 // start 30–70% full
	outdoorTemp := 15.0 + e.rng.Float64()*15.0 // 15–30 °C

	b := &BuildingState{
	BuildingID: id,
	IndoorTemperature: initTemp,
	ThermalStorageLevel: storageLevel,
	ProcessDemand: 10.0 + e.rng.Float64()*20.0,
	CurrentPrice: e.PriceCurve[0],
	GridStressSignal: 0.0,
	CarbonIntensity: e.CarbonCurve[0],
	HourOfDay: 0,
	Step: 0,
	BatchQueue: []int{},
	CumulativeCost: 0.0,
	CumulativeCarbon: 0.0,
	OutdoorTemperature: outdoorTemp,
	PrevHVACLevel: 0.5,
	BaselineCost: 0.0,
	BaselineCarbon: 0.0,
	SetpointTemperature: DefaultSetpoint,
	MaxHVACPower: MaxHVACPowerKW,
	MaxStorageCapacity: MaxStorageKWh,
	ThermalLossRate: StorageLossRate,
	HVACEfficiency: 1.0,
	HVACDegradationRate: 0.0005 + e.rng.Float64()*0.001, // 0.05% to 0.15% per step
	}

	// Spawn batch jobs based on difficulty
	b.Jobs = e.generateBatchJobs()
	b.BatchQueue = pendingDeadlines(b.Jobs)
	return b
	}

	func (e *Environment) generateBatchJobs() []BatchJob {
	numJobs := 3
	switch e.difficulty {
	case "medium":
	numJobs = 4
	case "hard":
	numJobs = 5
	}

	jobs := make([]BatchJob, numJobs)
	for i := range jobs {
	// Deadline spread across episode (leave slack at end for duration)
	span := EpisodeSteps - 12
	if span < 8 {
	span = 8
	}
	deadline := 4 + e.rng.Intn(span)
	jobs[i] = BatchJob{
	ID: i + 1,
	DeadlineSlot: deadline,
	Duration: 1 + e.rng.Intn(3),
	PowerDraw: 5.0 + e.rng.Float64()*15.0,
	Scheduled: false,
	ScheduledAt: -1,
	Completed: false,
	MissedDeadline: false,
	}
	}
	return jobs
	}

	// generatePriceCurve creates a stochastic Time-of-Use price curve for the episode.
	func (e *Environment) generatePriceCurve() {
	// Base ToU: low overnight, moderate morning, high peak (8-12, 17-21), low night
	volatility := 0.1
	switch e.difficulty {
	case "medium":
	volatility = 0.2
	case "hard":
	volatility = 0.35
	}

	// Random peak window shift (±2 hours) for stochasticity
	morningPeakShift := e.rng.Intn(5) - 2
	eveningPeakShift := e.rng.Intn(5) - 2

	for s := 0; s < EpisodeSteps; s++ {
	hour := (s / 4)
	base := touPrice(hour, morningPeakShift, eveningPeakShift)
	noise := (e.rng.Float64()2 - 1) volatility * base
	price := math.Max(0.02, base+noise)
	e.PriceCurve[s] = price
	}
	}

	// touPrice returns the base time-of-use price for a given hour.
	// Price schedule ($/kWh):
	// 00:00–06:00 0.035 Deep off-peak (lowest demand)
	// 06:00–08:00 0.070 Ramp-up (industry + residential start)
	// 08:00–12:00 0.200 Morning peak (industrial load high)
	// 12:00–17:00 0.120 Solar + stabilised demand shoulder
	// 17:00–21:00 0.310 True peak (highest grid stress)
	// 21:00–24:00 0.093 Declining demand
	func touPrice(hour, morningShift, eveningShift int) float64 {
	morningPeakStart := 8 + morningShift
	morningPeakEnd := 12 + morningShift
	eveningPeakStart := 17 + eveningShift
	eveningPeakEnd := 21 + eveningShift

	switch {
	case hour >= morningPeakStart && hour < morningPeakEnd:
	return 0.20 // Morning peak: 0.16–0.24 $/kWh
	case hour >= eveningPeakStart && hour < eveningPeakEnd:
	return 0.31 // Evening peak: 0.26–0.36 $/kWh
	case hour >= morningPeakEnd && hour < eveningPeakStart:
	return 0.12 // Solar/shoulder: 0.09–0.15 $/kWh
	case hour >= 6 && hour < morningPeakStart:
	return 0.07 // Ramp-up: 0.055–0.085 $/kWh
	case hour >= eveningPeakEnd:
	return 0.093 // Declining: 0.075–0.11 $/kWh
	default:
	return 0.035 // Deep off-peak (0–6 AM): 0.028–0.042 $/kWh
	}
	}

	// generateCarbonCurve creates a realistic carbon intensity curve (gCO2/kWh).
	// Correlates roughly with price: higher price = more peaker plants = higher carbon.
	func (e *Environment) generateCarbonCurve() {
	for s := 0; s < EpisodeSteps; s++ {
	price := e.PriceCurve[s]
	// Map price range [0.028, 0.36] → carbon [150, 600] gCO2/kWh
	carbon := 150.0 + (price-0.028)/(0.36-0.028)*(600.0-150.0)
	noise := (e.rng.Float64()2 - 1) 30.0
	e.CarbonCurve[s] = math.Max(100.0, carbon+noise)
	}
	}

	// stepBuilding advances a single building by one timestep.
	func (e Environment) stepBuilding(b BuildingState, act ActionModel, idx int) StepResponse {
	s := e.step

	// Update environmental signals from curves
	b.CurrentPrice = e.PriceCurve[s] * e.PriceMultipliers[idx]
	b.CarbonIntensity = e.CarbonCurve[s]
	b.HourOfDay = (s / 4) % 24

	// Restore defaults before applying faults (allows recovery when fault ends)
	b.MaxHVACPower = MaxHVACPowerKW

	// Apply fault events for this step (modifies price, stress, HVAC capacity)
	activeFaultDescs := ApplyFaults(b, e.FaultSchedule, s, e.rng)
	_ = activeFaultDescs // stored for use in buildObservation via FaultSchedule.ActiveAt

	// Stochastic grid stress events (more frequent in hard mode).
	// Note: FaultGridOutage sets GridStressSignal=1.0 inside ApplyFaults.
	// We only overwrite it from the stochastic model if no outage is active.
	hasGridFault := false
	if e.FaultSchedule != nil {
	for _, f := range e.FaultSchedule.ActiveAt(s) {
	if f.Type == FaultGridOutage {
	hasGridFault = true
	break
	}
	}
	}
	if !hasGridFault {
	b.GridStressSignal = e.updateGridStress(s)
	}

	// Weather perturbation: outdoor temp drifts sinusoidally + noise
	b.OutdoorTemperature = e.updateOutdoorTemp(s)

	// Process demand fluctuation
	b.ProcessDemand = e.updateProcessDemand(s)

	// ----- Apply actions -----

	// 0. Degrade HVAC efficiency
	b.HVACEfficiency = math.Max(0.5, b.HVACEfficiency-b.HVACDegradationRate)

	// 1. HVAC: heats/cools building toward setpoint
	hvacPower := act.HVACPowerLevel * b.MaxHVACPower * b.HVACEfficiency // kW

	// 2. Thermal storage: charge or discharge
	chargeKW := act.ThermalChargeRate * b.MaxHVACPower * 0.3 // max 30% of HVAC for storage
	newStorageEnergy := b.ThermalStorageLevelb.MaxStorageCapacity + chargeKWStepDurationHrs
	// Apply thermal losses
	newStorageEnergy *= (1.0 - b.ThermalLossRate)
	newStorageEnergy = math.Max(0, math.Min(b.MaxStorageCapacity, newStorageEnergy))
	b.ThermalStorageLevel = newStorageEnergy / b.MaxStorageCapacity

	// 3. Load shedding
	clampedShed := math.Max(0, math.Min(0.5, act.LoadShedFraction))
	shedKW := clampedShed * b.ProcessDemand

	// 4. Batch job scheduling
	batchCompleted, batchMissed := e.updateBatchJobs(b, act.BatchJobSlot, s)

	// ----- Thermal dynamics -----
	// First-order setpoint-driven model:
	// HVAC drives temperature toward setpoint; higher power = stronger effect.
	// At HVACPowerLevel=1.0, HVAC strongly pushes toward setpoint.
	// At HVACPowerLevel=0.0, HVAC is off — temp drifts with environment.
	hvacEffect := (b.SetpointTemperature - b.IndoorTemperature) * act.HVACPowerLevel * 0.15

	// Outdoor infiltration: building slowly equilibrates with outside
	infiltration := (b.OutdoorTemperature - b.IndoorTemperature) * 0.03

	// Thermal storage discharge provides supplemental conditioning toward setpoint
	storageEffect := 0.0
	if act.ThermalChargeRate < 0 {
	storageEffect = (b.SetpointTemperature - b.IndoorTemperature) * math.Abs(act.ThermalChargeRate) * 0.05
	}

	// Process equipment waste heat (always warms the building)
	processHeat := b.ProcessDemand * 0.002 // kW→°C rough factor

	deltaT := hvacEffect + infiltration + storageEffect + processHeat
	b.IndoorTemperature += deltaT

	// Clamp to physically reasonable indoor range
	b.IndoorTemperature = math.Max(10.0, math.Min(40.0, b.IndoorTemperature))

	// ----- Energy & cost accounting -----
	batchPowerDraw := e.batchRunningPower(b)
	totalKW := hvacPower + math.Max(0, chargeKW) + batchPowerDraw - shedKW
	totalKW = math.Max(0, totalKW)
	energyKWh := totalKW * StepDurationHrs
	stepCost := energyKWh * b.CurrentPrice
	stepCarbon := energyKWh * b.CarbonIntensity

	b.CumulativeCost += stepCost
	b.CumulativeCarbon += stepCarbon

	// Baseline (always-on at 70% HVAC, no storage/shedding)
	baselineKW := 0.7*b.MaxHVACPower + b.ProcessDemand
	baselineEnergy := baselineKW * StepDurationHrs
	b.BaselineCost += baselineEnergy * b.CurrentPrice
	b.BaselineCarbon += baselineEnergy * b.CarbonIntensity

	// ----- Reward computation -----
	// Get active faults for fault mitigation reward
	var activeFaults []FaultEvent
	if e.FaultSchedule != nil {
	activeFaults = e.FaultSchedule.ActiveAt(s)
	}
	rc := ComputeReward(ComputeRewardInput{
	B: b,
	Act: act,
	StepCost: stepCost,
	EnergyKWh: energyKWh,
	TMin: TMinDefault,
	TMax: TMaxDefault,
	StepCarbon: stepCarbon,
	BatchMissed: len(batchMissed),
	GridStress: b.GridStressSignal,
	ShedFraction: clampedShed,
	TaskID: e.taskID,
	PrevHVACLevel: b.PrevHVACLevel,
	ChargeRate: act.ThermalChargeRate,
	PrevChargeRate: e.prevChargeRates[idx],
	StorageDelta: act.ThermalChargeRate,
	PriceCurve: e.PriceCurve[:],
	CurrentStep: s,
	InstructionCard: e.InstructionCard,
	ActiveFaults: activeFaults,
	})
	b.PrevHVACLevel = act.HVACPowerLevel
	e.prevChargeRates[idx] = act.ThermalChargeRate

	// Update batch queue
	b.BatchQueue = pendingDeadlines(b.Jobs)

	// Exploit detection
	if clampedShed > 0.4 {
	e.totalShedSteps[idx]++
	}
	if len(e.thermalCycleCounts) > idx {
	if len(e.Replay) > 0 {
	prev := e.prevChargeRates[idx]
	if prev > 0.3 && act.ThermalChargeRate < -0.3 \|\| prev < -0.3 && act.ThermalChargeRate > 0.3 {
	e.thermalCycleCounts[idx]++
	}
	}
	}

	// Per-building step index matches global timestep for this transition (0 .. EpisodeSteps-1)
	b.Step = s

	// Record history
	if idx < len(e.TempHistory) {
	e.TempHistory[idx] = append(e.TempHistory[idx], b.IndoorTemperature)
	e.CostHistory[idx] = append(e.CostHistory[idx], b.CumulativeCost)
	e.HVACHistory[idx] = append(e.HVACHistory[idx], act.HVACPowerLevel)
	e.LoadShedHistory[idx] = append(e.LoadShedHistory[idx], clampedShed)
	e.RewardHistory[idx] = append(e.RewardHistory[idx], rc)
	}

	obs := e.buildObservation(b)

	return StepResponse{
	Observation: obs,
	Reward: rc.Total,
	Done: e.done \|\| s+1 >= EpisodeSteps,
	Info: StepInfo{
	RewardComponents: rc,
	EnergyUsed: energyKWh,
	CarbonEmitted: stepCarbon,
	PriceSignal: b.CurrentPrice,
	GridStress: b.GridStressSignal,
	BatchCompleted: batchCompleted,
	BatchMissed: batchMissed,
	Episode: e.episode,
	Step: s,
	},
	Rewards: rc,
	}
	}

	func (e *Environment) updateGridStress(s int) float64 {
	// Grid stress is elevated during price peaks and stochastic demand spikes
	price := e.PriceCurve[s]
	priceNorm := (price - 0.028) / (0.36 - 0.028)

	// Random stress events
	stressProb := 0.05
	switch e.difficulty {
	case "medium":
	stressProb = 0.1
	case "hard":
	stressProb = 0.2
	}
	spike := 0.0
	if e.rng.Float64() < stressProb {
	spike = 0.3 + e.rng.Float64()*0.5
	}
	stress := math.Min(1.0, priceNorm*0.6+spike)
	return math.Max(0, stress)
	}

	func (e *Environment) updateOutdoorTemp(s int) float64 {
	// Sinusoidal daily temperature cycle + noise
	hour := float64(s) / 4.0
	baseTemp := 15.0 + 8.0math.Sin(2math.Pi*(hour-6)/24.0)
	noise := (e.rng.Float64()2 - 1) 1.5
	return baseTemp + noise
	}

	func (e *Environment) updateProcessDemand(s int) float64 {
	// Process demand shifts with business hours
	hour := s / 4
	base := 10.0
	if hour >= 8 && hour <= 18 {
	base = 20.0 + 10.0math.Sin(math.Pifloat64(hour-8)/10.0)
	}
	noise := (e.rng.Float64()2 - 1) 3.0
	return math.Max(0, base+noise)
	}

	func (e Environment) updateBatchJobs(b BuildingState, slot int, step int) (completed []int, missed []int) {
	completed = []int{}
	missed = []int{}

	// Schedule the first pending job into the chosen slot
	for i := range b.Jobs {
	job := &b.Jobs[i]
	if !job.Scheduled && !job.Completed && !job.MissedDeadline {
	schedAt := step + slot
	job.Scheduled = true
	job.ScheduledAt = schedAt
	break // only schedule one job per step
	}
	}

	// Advance running or completed jobs
	for i := range b.Jobs {
	job := &b.Jobs[i]
	if job.Completed \|\| job.MissedDeadline {
	continue
	}
	// Check deadline miss
	if step >= job.DeadlineSlot && !job.Completed {
	job.MissedDeadline = true
	missed = append(missed, job.ID)
	continue
	}
	// Mark as completed if scheduled and past its start
	if job.Scheduled && step >= job.ScheduledAt {
	if step >= job.ScheduledAt+job.Duration-1 {
	job.Completed = true
	completed = append(completed, job.ID)
	}
	}
	}
	return
	}

	func (e Environment) batchRunningPower(b BuildingState) float64 {
	total := 0.0
	for _, job := range b.Jobs {
	if job.Scheduled && !job.Completed && !job.MissedDeadline {
	if e.step >= job.ScheduledAt && e.step < job.ScheduledAt+job.Duration {
	total += job.PowerDraw
	}
	}
	}
	return total
	}

	func (e Environment) buildObservation(b BuildingState) ObservationModel {
	// Collect active fault descriptions for this step
	var activeFaults []string
	if e.FaultSchedule != nil {
	for _, f := range e.FaultSchedule.ActiveAt(b.Step) {
	activeFaults = append(activeFaults, f.Description)
	}
	}

	// Apply sensor fault noise to observation (not physics) - if sensor fault is active, agent sees wrong temp
	reportedTemp := b.IndoorTemperature + b.TempObservationNoise

	taskCardStr := ""
	if e.taskID == 4 && e.InstructionCard != nil {
	taskCardStr = e.InstructionCard.Text
	} else if e.taskID == 1 {
	taskCardStr = "Task 1 (Easy - Cost Minimization): Minimize total energy cost over 24 hours. No temperature or batch constraints. Use cheap off-peak periods and thermal storage."
	} else if e.taskID == 2 {
	taskCardStr = "Task 2 (Medium - Temperature Management): Minimize cost AND keep indoor temperature within 19-23°C at all times. Balance comfort vs cost."
	} else if e.taskID == 3 {
	taskCardStr = "Task 3 (Hard - Full Demand Response): Minimize cost, maintain temperature, respond to grid stress (shed when grid_stress_signal > 0.7), schedule batch jobs, minimize carbon."
	} else {
	taskCardStr = "Maintain operations and minimize cost."
	}

	priceForecast := make([]float64, 4)
	for i := 0; i < 4; i++ {
	idx := b.Step + i
	if idx < EpisodeSteps {
	priceForecast[i] = math.Round(e.PriceCurve[idx]*10000) / 10000
	} else {
	priceForecast[i] = math.Round(e.PriceCurve[EpisodeSteps-1]*10000) / 10000
	}
	}

	return ObservationModel{
	IndoorTemperature: math.Round(reportedTemp*100) / 100,
	ThermalStorageLevel: math.Round(b.ThermalStorageLevel*1000) / 1000,
	ProcessDemand: math.Round(b.ProcessDemand*100) / 100,
	CurrentPrice: math.Round(b.CurrentPrice*10000) / 10000,
	GridStressSignal: math.Round(b.GridStressSignal*1000) / 1000,
	CarbonIntensity: math.Round(b.CarbonIntensity*10) / 10,
	HourOfDay: b.HourOfDay,
	BatchQueue: pendingDeadlines(b.Jobs),
	CumulativeCost: math.Round(b.CumulativeCost*10000) / 10000,
	Step: b.Step,
	BuildingID: b.BuildingID,
	HVACEfficiency: math.Round(b.HVACEfficiency*1000) / 1000,
	InstructionCard: e.InstructionCard,
	ActiveFaults: activeFaults,
	TaskCard: taskCardStr,
	NLSummary: "GridMind simulation state.",
	MarketType: "tou",
	Season: "summer",
	PriceVolatility: 0.2,
	PriceForecast: priceForecast,
	DemandChargeActive: false,
	}
	}

	func (e Environment) clampAction(a ActionModel) {
	a.HVACPowerLevel = math.Max(0, math.Min(1.0, a.HVACPowerLevel))
	a.ThermalChargeRate = math.Max(-1.0, math.Min(1.0, a.ThermalChargeRate))
	a.BatchJobSlot = max(0, min(4, a.BatchJobSlot))
	a.LoadShedFraction = math.Max(0, math.Min(0.5, a.LoadShedFraction))
	}

	func (e *Environment) findAction(actions []ActionModel, buildingIdx int) ActionModel {
	// Try to find an action with matching building_id, else use positional
	for _, a := range actions {
	if a.BuildingID == buildingIdx {
	return a
	}
	}
	if buildingIdx < len(actions) {
	return actions[buildingIdx]
	}
	// Default: do-nothing action
	return ActionModel{HVACPowerLevel: 0.5, ThermalChargeRate: 0.0, BatchJobSlot: 0, LoadShedFraction: 0.0}
	}

	// pendingDeadlines returns a slice of deadline slots for all incomplete, unscheduled jobs.
	func pendingDeadlines(jobs []BatchJob) []int {
	result := []int{}
	for _, j := range jobs {
	if !j.Completed && !j.MissedDeadline {
	result = append(result, j.DeadlineSlot)
	}
	}
	return result
	}

	func max(a, b int) int {
	if a > b {
	return a
	}
	return b
	}

	func min(a, b int) int {
	if a < b {
	return a
	}
	return b
	}

	// ExploitDetected returns whether the current episode shows signs of degenerate strategies.
	func (e *Environment) ExploitDetected(buildingIdx int) (bool, float64) {
	e.mu.RLock()
	defer e.mu.RUnlock()
	if buildingIdx >= len(e.totalShedSteps) {
	return false, 0.0
	}
	// Flag if agent always sheds > 40% load (more than 70% of steps)
	shedRatio := float64(e.totalShedSteps[buildingIdx]) / float64(e.step+1)
	cycleRatio := float64(e.thermalCycleCounts[buildingIdx]) / float64(e.step+1)
	exploited := shedRatio > 0.7 \|\| cycleRatio > 0.4
	penalty := 0.0
	if exploited {
	penalty = math.Max(shedRatio-0.7, 0)0.5 + math.Max(cycleRatio-0.4, 0)0.3
	}
	return exploited, penalty
	}

	// GetFeederState returns the aggregate fleet view for the coordinator.
	func (e *Environment) GetFeederState() FeederState {
	e.mu.RLock()
	defer e.mu.RUnlock()

	var totalDemand float64
	buildings := make([]BuildingSummary, len(e.Buildings))
	for i, b := range e.Buildings {
	demand := b.ProcessDemand + b.MaxHVACPower*b.PrevHVACLevel
	totalDemand += demand
	buildings[i] = BuildingSummary{
	BuildingID: b.BuildingID,
	CurrentDemandKW: math.Round(demand*100) / 100,
	IndoorTemperature: math.Round(b.IndoorTemperature*100) / 100,
	ThermalStorageLevel: math.Round(b.ThermalStorageLevel*1000) / 1000,
	CumulativeCost: math.Round(b.CumulativeCost*100) / 100,
	GridStressSignal: math.Round(b.GridStressSignal*100) / 100,
	PriceMultiplier: e.PriceMultipliers[i],
	}
	}

	limit := float64(120 * len(e.Buildings)) // Simplistic soft cap

	// Downsample price curve to 24 hourly points
	hourlyCurve := make([]float64, 24)
	for h := 0; h < 24; h++ {
	hourlyCurve[h] = e.PriceCurve[h*4]
	}

	return FeederState{
	TotalDemandKW: math.Round(totalDemand*100) / 100,
	FeederLimitKW: limit,
	FeederOverload: totalDemand > limit,
	UtilizationPct: math.Round((totalDemand/limit)*1000) / 10,
	Buildings: buildings,
	PriceCurveHourly: hourlyCurve,
	Step: e.step,
	Episode: e.episode,
	}
	}

	// SetCoordinatorSignals applies per-building price multipliers.
	func (e *Environment) SetCoordinatorSignals(multipliers []float64) {
	e.mu.Lock()
	defer e.mu.Unlock()
	for i, val := range multipliers {
	if i < len(e.PriceMultipliers) {
	e.PriceMultipliers[i] = math.Max(0.1, math.Min(10.0, val)) // Clamp safety
	}
	}
	}

	// cloneBuilding creates a deep copy of a BuildingState
	func cloneBuilding(b BuildingState) BuildingState {
	c := *b
	c.BatchQueue = make([]int, len(b.BatchQueue))
	copy(c.BatchQueue, b.BatchQueue)
	c.Jobs = make([]BatchJob, len(b.Jobs))
	copy(c.Jobs, b.Jobs)
	return &c
	}

	// SimulateStep predicts the next state and reward without modifying the actual environment.
	// It performs a deep copy of the required state, applies the actions, and returns the expected result.
	func (e *Environment) SimulateStep(actions []ActionModel) ([]StepResponse, bool) {
	e.mu.RLock()
	defer e.mu.RUnlock()

	if e.done {
	return nil, true
	}

	// Create a temporary mock environment for a single step
	mock := &Environment{
	rng: rand.New(rand.NewSource(e.rng.Int63())), // local PRNG to not desync main
	episode: e.episode,
	step: e.step,
	taskID: e.taskID,
	seed: e.seed,
	difficulty: e.difficulty,
	numBuildings: e.numBuildings,
	Buildings: make([]*BuildingState, e.numBuildings),
	PriceCurve: e.PriceCurve,
	CarbonCurve: e.CarbonCurve,
	InstructionCard: e.InstructionCard,
	FaultSchedule: e.FaultSchedule,
	PriceMultipliers: e.PriceMultipliers,
	prevChargeRates: make([]float64, len(e.prevChargeRates)),
	}
	copy(mock.prevChargeRates, e.prevChargeRates)

	for i, b := range e.Buildings {
	mock.Buildings[i] = cloneBuilding(b)
	}

	// Clamp and apply actions
	mockActions := make([]ActionModel, len(actions))
	copy(mockActions, actions)
	for i := range mockActions {
	mock.clampAction(&mockActions[i])
	}

	responses := make([]StepResponse, mock.numBuildings)
	for i, b := range mock.Buildings {
	act := mock.findAction(mockActions, i)
	responses[i] = mock.stepBuilding(b, act, i)
	}

	mockDone := (mock.step + 1) >= EpisodeSteps
	return responses, mockDone
	}