Spaces:

LO-Kyu
/

gridmind

Running

App Files Files Community

gridmind / env /environment.go

ShreeshantXD

fix: problems with parameters

f316664 5 days ago

raw

history blame contribute delete

21 kB

	// Package env implements the GridMind-RL simulation core.
	// It models a multi-building industrial/commercial energy management system
	// with stochastic electricity prices, thermal dynamics, and batch job scheduling.
	package env

	import (
	"math"
	"math/rand"
	"sync"
	"time"
	)

	const (
	EpisodeSteps = 96 // 24 hours × 15-min intervals (96 × 0.25h = 24h)
	StepDurationHrs = 0.25 // each step = 15 minutes = 0.25 h
	MaxBuildings = 3
	DefaultSetpoint = 21.0 // °C comfortable indoor temp
	TMinDefault = 19.0 // °C lower bound
	TMaxDefault = 23.0 // °C upper bound
	MaxHVACPowerKW = 50.0 // kW per building
	MaxStorageKWh = 100.0 // kWh thermal storage capacity
	StorageLossRate = 0.005 // fraction lost per step (thermal dissipation)
	MaxBatchJobs = 5 // max concurrent batch jobs per building
	)

	// Environment is the thread-safe top-level simulation manager.
	type Environment struct {
	mu sync.RWMutex
	rng *rand.Rand
	seed int64
	episode int
	step int
	done bool
	taskID int
	difficulty string
	numBuildings int

	Buildings []*BuildingState
	PriceCurve [EpisodeSteps]float64 // $/kWh for each step
	CarbonCurve [EpisodeSteps]float64 // gCO2/kWh for each step
	Replay []ReplayEntry
	LastActions []ActionModel

	// History for dashboard rendering (per building)
	TempHistory [][]float64
	CostHistory [][]float64
	HVACHistory [][]float64
	LoadShedHistory [][]float64
	RewardHistory [][]RewardComponents

	// Exploit detection counters
	totalShedSteps []int // steps where load_shed > 0.4
	thermalCycleCounts []int // rapid thermal storage reversals
	prevChargeRates []float64
	}

	// NewEnvironment creates an initialised (but not reset) environment.
	func NewEnvironment() *Environment {
	seed := time.Now().UnixNano()
	return &Environment{
	rng: rand.New(rand.NewSource(seed)),
	seed: seed,
	taskID: 1,
	difficulty: "easy",
	numBuildings: 1,
	}
	}

	// Reset initializes a new episode. Thread-safe.
	func (e *Environment) Reset(req ResetRequest) ResetResponse {
	e.mu.Lock()
	defer e.mu.Unlock()

	// Apply seed
	if req.Seed != nil {
	e.seed = *req.Seed
	} else {
	e.seed = time.Now().UnixNano()
	}
	e.rng = rand.New(rand.NewSource(e.seed))

	// Apply task and difficulty
	e.taskID = req.TaskID
	if e.taskID < 1 \|\| e.taskID > 3 {
	e.taskID = 1
	}
	e.difficulty = req.Difficulty
	if e.difficulty == "" {
	switch e.taskID {
	case 1:
	e.difficulty = "easy"
	case 2:
	e.difficulty = "medium"
	case 3:
	e.difficulty = "hard"
	}
	}

	// Number of buildings (federation)
	e.numBuildings = req.NumBuildings
	if e.numBuildings < 1 {
	e.numBuildings = 1
	}
	if e.numBuildings > MaxBuildings {
	e.numBuildings = MaxBuildings
	}

	e.episode++
	e.step = 0
	e.done = false
	e.Replay = make([]ReplayEntry, 0, EpisodeSteps)
	e.LastActions = make([]ActionModel, e.numBuildings)

	// Generate price and carbon curves for this episode
	e.generatePriceCurve()
	e.generateCarbonCurve()

	// Initialise buildings
	e.Buildings = make([]*BuildingState, e.numBuildings)
	e.TempHistory = make([][]float64, e.numBuildings)
	e.CostHistory = make([][]float64, e.numBuildings)
	e.HVACHistory = make([][]float64, e.numBuildings)
	e.LoadShedHistory = make([][]float64, e.numBuildings)
	e.RewardHistory = make([][]RewardComponents, e.numBuildings)
	e.totalShedSteps = make([]int, e.numBuildings)
	e.thermalCycleCounts = make([]int, e.numBuildings)
	e.prevChargeRates = make([]float64, e.numBuildings)

	for i := 0; i < e.numBuildings; i++ {
	e.Buildings[i] = e.newBuildingState(i)
	e.TempHistory[i] = make([]float64, 0, EpisodeSteps)
	e.CostHistory[i] = make([]float64, 0, EpisodeSteps)
	e.HVACHistory[i] = make([]float64, 0, EpisodeSteps)
	e.LoadShedHistory[i] = make([]float64, 0, EpisodeSteps)
	e.RewardHistory[i] = make([]RewardComponents, 0, EpisodeSteps)
	}

	obs := make([]ObservationModel, e.numBuildings)
	for i, b := range e.Buildings {
	obs[i] = e.buildObservation(b)
	}

	return ResetResponse{
	Observations: obs,
	Episode: e.episode,
	TaskID: e.taskID,
	Seed: e.seed,
	}
	}

	// Step advances the simulation by one timestep for all buildings. Thread-safe.
	func (e *Environment) Step(actions []ActionModel) ([]StepResponse, bool) {
	e.mu.Lock()
	defer e.mu.Unlock()

	if e.done {
	return nil, true
	}

	// Validate and clamp actions
	for i := range actions {
	e.clampAction(&actions[i])
	if i < e.numBuildings {
	e.LastActions[i] = actions[i]
	}
	}

	responses := make([]StepResponse, e.numBuildings)
	for i, b := range e.Buildings {
	var act ActionModel
	// Find action for this building (by building_id or by index)
	act = e.findAction(actions, i)
	responses[i] = e.stepBuilding(b, act, i)
	}

	e.step++
	if e.step >= EpisodeSteps {
	e.done = true
	}

	// Record replay entry (aggregate of all buildings, first building primary)
	if len(responses) > 0 {
	entry := ReplayEntry{
	Step: e.step - 1,
	Observation: responses[0].Observation,
	Action: e.LastActions[0],
	Reward: responses[0].Reward,
	Components: responses[0].Info.RewardComponents,
	Done: e.done,
	}
	e.Replay = append(e.Replay, entry)
	}

	return responses, e.done
	}

	// GetState returns a full snapshot of environment state. Thread-safe (read lock).
	func (e *Environment) GetState() StateResponse {
	e.mu.RLock()
	defer e.mu.RUnlock()

	buildings := make([]BuildingStatePublic, e.numBuildings)
	for i, b := range e.Buildings {
	pub := BuildingStatePublic{
	ObservationModel: e.buildObservation(b),
	OutdoorTemperature: b.OutdoorTemperature,
	SetpointTemperature: b.SetpointTemperature,
	BaselineCost: b.BaselineCost,
	BaselineCarbon: b.BaselineCarbon,
	CumulativeCarbon: b.CumulativeCarbon,
	Jobs: b.Jobs,
	}
	if i < len(e.TempHistory) {
	pub.TempHistory = e.TempHistory[i]
	pub.CostHistory = e.CostHistory[i]
	pub.HVACHistory = e.HVACHistory[i]
	pub.LoadShedHistory = e.LoadShedHistory[i]
	pub.RewardHistory = e.RewardHistory[i]
	}
	buildings[i] = pub
	}

	priceCurve := make([]float64, EpisodeSteps/4)
	carbonCurve := make([]float64, EpisodeSteps/4)
	for h := 0; h < EpisodeSteps/4; h++ {
	stepIdx := h * 4
	if stepIdx < EpisodeSteps {
	priceCurve[h] = e.PriceCurve[stepIdx]
	carbonCurve[h] = e.CarbonCurve[stepIdx]
	}
	}

	return StateResponse{
	Buildings: buildings,
	PriceCurve: priceCurve,
	CarbonCurve: carbonCurve,
	Episode: e.episode,
	Step: e.step,
	TaskID: e.taskID,
	Done: e.done,
	Seed: e.seed,
	}
	}

	// GetReplay returns the full episode replay. Thread-safe.
	func (e *Environment) GetReplay() []ReplayEntry {
	e.mu.RLock()
	defer e.mu.RUnlock()
	result := make([]ReplayEntry, len(e.Replay))
	copy(result, e.Replay)
	return result
	}

	// ──────────────────────────────────────────────
	// Internal helpers
	// ──────────────────────────────────────────────

	func (e Environment) newBuildingState(id int) BuildingState {
	// Randomise initial conditions slightly
	initTemp := DefaultSetpoint + (e.rng.Float64()-0.5)*2.0
	storageLevel := 0.3 + e.rng.Float64()*0.4 // start 30–70% full
	outdoorTemp := 15.0 + e.rng.Float64()*15.0 // 15–30 °C

	b := &BuildingState{
	BuildingID: id,
	IndoorTemperature: initTemp,
	ThermalStorageLevel: storageLevel,
	ProcessDemand: 10.0 + e.rng.Float64()*20.0,
	CurrentPrice: e.PriceCurve[0],
	GridStressSignal: 0.0,
	CarbonIntensity: e.CarbonCurve[0],
	HourOfDay: 0,
	Step: 0,
	BatchQueue: []int{},
	CumulativeCost: 0.0,
	CumulativeCarbon: 0.0,
	OutdoorTemperature: outdoorTemp,
	PrevHVACLevel: 0.5,
	BaselineCost: 0.0,
	BaselineCarbon: 0.0,
	SetpointTemperature: DefaultSetpoint,
	MaxHVACPower: MaxHVACPowerKW,
	MaxStorageCapacity: MaxStorageKWh,
	ThermalLossRate: StorageLossRate,
	}

	// Spawn batch jobs based on difficulty
	b.Jobs = e.generateBatchJobs()
	b.BatchQueue = pendingDeadlines(b.Jobs)
	return b
	}

	func (e *Environment) generateBatchJobs() []BatchJob {
	numJobs := 3
	switch e.difficulty {
	case "medium":
	numJobs = 4
	case "hard":
	numJobs = 5
	}

	jobs := make([]BatchJob, numJobs)
	for i := range jobs {
	// Deadline spread across episode (leave slack at end for duration)
	span := EpisodeSteps - 12
	if span < 8 {
	span = 8
	}
	deadline := 4 + e.rng.Intn(span)
	jobs[i] = BatchJob{
	ID: i + 1,
	DeadlineSlot: deadline,
	Duration: 1 + e.rng.Intn(3),
	PowerDraw: 5.0 + e.rng.Float64()*15.0,
	Scheduled: false,
	ScheduledAt: -1,
	Completed: false,
	MissedDeadline: false,
	}
	}
	return jobs
	}

	// generatePriceCurve creates a stochastic Time-of-Use price curve for the episode.
	func (e *Environment) generatePriceCurve() {
	// Base ToU: low overnight, moderate morning, high peak (8-12, 17-21), low night
	volatility := 0.1
	switch e.difficulty {
	case "medium":
	volatility = 0.2
	case "hard":
	volatility = 0.35
	}

	// Random peak window shift (±2 hours) for stochasticity
	morningPeakShift := e.rng.Intn(5) - 2
	eveningPeakShift := e.rng.Intn(5) - 2

	for s := 0; s < EpisodeSteps; s++ {
	hour := (s / 4)
	base := touPrice(hour, morningPeakShift, eveningPeakShift)
	noise := (e.rng.Float64()2 - 1) volatility * base
	price := math.Max(0.02, base+noise)
	e.PriceCurve[s] = price
	}
	}

	// touPrice returns the base time-of-use price for a given hour.
	func touPrice(hour, morningShift, eveningShift int) float64 {
	// Off-peak: 0.04 $/kWh, on-peak: 0.18 $/kWh, extreme peak: 0.32 $/kWh
	morningPeakStart := 8 + morningShift
	morningPeakEnd := 12 + morningShift
	eveningPeakStart := 17 + eveningShift
	eveningPeakEnd := 21 + eveningShift

	switch {
	case hour >= morningPeakStart && hour < morningPeakEnd:
	return 0.18
	case hour >= eveningPeakStart && hour <= eveningPeakEnd:
	return 0.22
	case (hour >= 9 && hour < morningPeakStart) \|\| (hour >= morningPeakEnd && hour < eveningPeakStart):
	return 0.10
	case hour >= 23 \|\| hour < 6:
	return 0.04
	default:
	return 0.08
	}
	}

	// generateCarbonCurve creates a realistic carbon intensity curve (gCO2/kWh).
	// Correlates roughly with price: higher price = more peaker plants = higher carbon.
	func (e *Environment) generateCarbonCurve() {
	for s := 0; s < EpisodeSteps; s++ {
	price := e.PriceCurve[s]
	// Map price range [0.04, 0.32] → carbon [150, 600] gCO2/kWh
	carbon := 150.0 + (price-0.04)/(0.32-0.04)*(600.0-150.0)
	noise := (e.rng.Float64()2 - 1) 30.0
	e.CarbonCurve[s] = math.Max(100.0, carbon+noise)
	}
	}

	// stepBuilding advances a single building by one timestep.
	func (e Environment) stepBuilding(b BuildingState, act ActionModel, idx int) StepResponse {
	s := e.step

	// Update environmental signals from curves
	b.CurrentPrice = e.PriceCurve[s]
	b.CarbonIntensity = e.CarbonCurve[s]
	b.HourOfDay = (s / 4) % 24

	// Stochastic grid stress events (more frequent in hard mode)
	b.GridStressSignal = e.updateGridStress(s)

	// Weather perturbation: outdoor temp drifts sinusoidally + noise
	b.OutdoorTemperature = e.updateOutdoorTemp(s)

	// Process demand fluctuation
	b.ProcessDemand = e.updateProcessDemand(s)

	// ----- Apply actions -----

	// 1. HVAC: heats/cools building toward setpoint
	hvacPower := act.HVACPowerLevel * b.MaxHVACPower // kW

	// 2. Thermal storage: charge or discharge
	chargeKW := act.ThermalChargeRate * b.MaxHVACPower * 0.3 // max 30% of HVAC for storage
	newStorageEnergy := b.ThermalStorageLevelb.MaxStorageCapacity + chargeKWStepDurationHrs
	// Apply thermal losses
	newStorageEnergy *= (1.0 - b.ThermalLossRate)
	newStorageEnergy = math.Max(0, math.Min(b.MaxStorageCapacity, newStorageEnergy))
	b.ThermalStorageLevel = newStorageEnergy / b.MaxStorageCapacity

	// 3. Load shedding
	clampedShed := math.Max(0, math.Min(0.5, act.LoadShedFraction))
	shedKW := clampedShed * b.ProcessDemand

	// 4. Batch job scheduling
	batchCompleted, batchMissed := e.updateBatchJobs(b, act.BatchJobSlot, s)

	// ----- Thermal dynamics -----
	// First-order setpoint-driven model:
	// HVAC drives temperature toward setpoint; higher power = stronger effect.
	// At HVACPowerLevel=1.0, HVAC strongly pushes toward setpoint.
	// At HVACPowerLevel=0.0, HVAC is off — temp drifts with environment.
	hvacEffect := (b.SetpointTemperature - b.IndoorTemperature) * act.HVACPowerLevel * 0.15

	// Outdoor infiltration: building slowly equilibrates with outside
	infiltration := (b.OutdoorTemperature - b.IndoorTemperature) * 0.03

	// Thermal storage discharge provides supplemental conditioning toward setpoint
	storageEffect := 0.0
	if act.ThermalChargeRate < 0 {
	storageEffect = (b.SetpointTemperature - b.IndoorTemperature) * math.Abs(act.ThermalChargeRate) * 0.05
	}

	// Process equipment waste heat (always warms the building)
	processHeat := b.ProcessDemand * 0.002 // kW→°C rough factor

	deltaT := hvacEffect + infiltration + storageEffect + processHeat
	b.IndoorTemperature += deltaT

	// Clamp to physically reasonable indoor range
	b.IndoorTemperature = math.Max(10.0, math.Min(40.0, b.IndoorTemperature))

	// ----- Energy & cost accounting -----
	batchPowerDraw := e.batchRunningPower(b)
	totalKW := hvacPower + math.Max(0, chargeKW) + batchPowerDraw - shedKW
	totalKW = math.Max(0, totalKW)
	energyKWh := totalKW * StepDurationHrs
	stepCost := energyKWh * b.CurrentPrice
	stepCarbon := energyKWh * b.CarbonIntensity

	b.CumulativeCost += stepCost
	b.CumulativeCarbon += stepCarbon

	// Baseline (always-on at 70% HVAC, no storage/shedding)
	baselineKW := 0.7*b.MaxHVACPower + b.ProcessDemand
	baselineEnergy := baselineKW * StepDurationHrs
	b.BaselineCost += baselineEnergy * b.CurrentPrice
	b.BaselineCarbon += baselineEnergy * b.CarbonIntensity

	// ----- Reward computation -----
	rc := ComputeReward(ComputeRewardInput{
	B: b,
	Act: act,
	StepCost: stepCost,
	EnergyKWh: energyKWh,
	TMin: TMinDefault,
	TMax: TMaxDefault,
	StepCarbon: stepCarbon,
	BatchMissed: len(batchMissed),
	GridStress: b.GridStressSignal,
	ShedFraction: clampedShed,
	TaskID: e.taskID,
	PrevHVACLevel: b.PrevHVACLevel,
	ChargeRate: act.ThermalChargeRate,
	PrevChargeRate: e.prevChargeRates[idx],
	StorageDelta: act.ThermalChargeRate,
	PriceCurve: e.PriceCurve[:],
	CurrentStep: s,
	})
	b.PrevHVACLevel = act.HVACPowerLevel
	e.prevChargeRates[idx] = act.ThermalChargeRate

	// Update batch queue
	b.BatchQueue = pendingDeadlines(b.Jobs)

	// Exploit detection
	if clampedShed > 0.4 {
	e.totalShedSteps[idx]++
	}
	if len(e.thermalCycleCounts) > idx {
	if len(e.Replay) > 0 {
	prev := e.prevChargeRates[idx]
	if prev > 0.3 && act.ThermalChargeRate < -0.3 \|\| prev < -0.3 && act.ThermalChargeRate > 0.3 {
	e.thermalCycleCounts[idx]++
	}
	}
	}

	// Per-building step index matches global timestep for this transition (0 .. EpisodeSteps-1)
	b.Step = s

	// Record history
	if idx < len(e.TempHistory) {
	e.TempHistory[idx] = append(e.TempHistory[idx], b.IndoorTemperature)
	e.CostHistory[idx] = append(e.CostHistory[idx], b.CumulativeCost)
	e.HVACHistory[idx] = append(e.HVACHistory[idx], act.HVACPowerLevel)
	e.LoadShedHistory[idx] = append(e.LoadShedHistory[idx], clampedShed)
	e.RewardHistory[idx] = append(e.RewardHistory[idx], rc)
	}

	obs := e.buildObservation(b)

	return StepResponse{
	Observation: obs,
	Reward: rc.Total,
	Done: e.done \|\| s+1 >= EpisodeSteps,
	Info: StepInfo{
	RewardComponents: rc,
	EnergyUsed: energyKWh,
	CarbonEmitted: stepCarbon,
	PriceSignal: b.CurrentPrice,
	GridStress: b.GridStressSignal,
	BatchCompleted: batchCompleted,
	BatchMissed: batchMissed,
	Episode: e.episode,
	Step: s,
	},
	}
	}

	func (e *Environment) updateGridStress(s int) float64 {
	// Grid stress is elevated during price peaks and stochastic demand spikes
	price := e.PriceCurve[s]
	priceNorm := (price - 0.04) / (0.32 - 0.04)

	// Random stress events
	stressProb := 0.05
	switch e.difficulty {
	case "medium":
	stressProb = 0.1
	case "hard":
	stressProb = 0.2
	}
	spike := 0.0
	if e.rng.Float64() < stressProb {
	spike = 0.3 + e.rng.Float64()*0.5
	}
	stress := math.Min(1.0, priceNorm*0.6+spike)
	return math.Max(0, stress)
	}

	func (e *Environment) updateOutdoorTemp(s int) float64 {
	// Sinusoidal daily temperature cycle + noise
	hour := float64(s) / 4.0
	baseTemp := 15.0 + 8.0math.Sin(2math.Pi*(hour-6)/24.0)
	noise := (e.rng.Float64()2 - 1) 1.5
	return baseTemp + noise
	}

	func (e *Environment) updateProcessDemand(s int) float64 {
	// Process demand shifts with business hours
	hour := s / 4
	base := 10.0
	if hour >= 8 && hour <= 18 {
	base = 20.0 + 10.0math.Sin(math.Pifloat64(hour-8)/10.0)
	}
	noise := (e.rng.Float64()2 - 1) 3.0
	return math.Max(0, base+noise)
	}

	func (e Environment) updateBatchJobs(b BuildingState, slot int, step int) (completed []int, missed []int) {
	completed = []int{}
	missed = []int{}

	// Schedule the first pending job into the chosen slot
	for i := range b.Jobs {
	job := &b.Jobs[i]
	if !job.Scheduled && !job.Completed && !job.MissedDeadline {
	schedAt := step + slot
	job.Scheduled = true
	job.ScheduledAt = schedAt
	break // only schedule one job per step
	}
	}

	// Advance running or completed jobs
	for i := range b.Jobs {
	job := &b.Jobs[i]
	if job.Completed \|\| job.MissedDeadline {
	continue
	}
	// Check deadline miss
	if step >= job.DeadlineSlot && !job.Completed {
	job.MissedDeadline = true
	missed = append(missed, job.ID)
	continue
	}
	// Mark as completed if scheduled and past its start
	if job.Scheduled && step >= job.ScheduledAt {
	if step >= job.ScheduledAt+job.Duration-1 {
	job.Completed = true
	completed = append(completed, job.ID)
	}
	}
	}
	return
	}

	func (e Environment) batchRunningPower(b BuildingState) float64 {
	total := 0.0
	for _, job := range b.Jobs {
	if job.Scheduled && !job.Completed && !job.MissedDeadline {
	if e.step >= job.ScheduledAt && e.step < job.ScheduledAt+job.Duration {
	total += job.PowerDraw
	}
	}
	}
	return total
	}

	func (e Environment) buildObservation(b BuildingState) ObservationModel {
	return ObservationModel{
	IndoorTemperature: math.Round(b.IndoorTemperature*100) / 100,
	ThermalStorageLevel: math.Round(b.ThermalStorageLevel*1000) / 1000,
	ProcessDemand: math.Round(b.ProcessDemand*100) / 100,
	CurrentPrice: math.Round(b.CurrentPrice*10000) / 10000,
	GridStressSignal: math.Round(b.GridStressSignal*1000) / 1000,
	CarbonIntensity: math.Round(b.CarbonIntensity*10) / 10,
	HourOfDay: b.HourOfDay,
	BatchQueue: pendingDeadlines(b.Jobs),
	CumulativeCost: math.Round(b.CumulativeCost*10000) / 10000,
	Step: b.Step,
	BuildingID: b.BuildingID,
	}
	}

	func (e Environment) clampAction(a ActionModel) {
	a.HVACPowerLevel = math.Max(0, math.Min(1.0, a.HVACPowerLevel))
	a.ThermalChargeRate = math.Max(-1.0, math.Min(1.0, a.ThermalChargeRate))
	a.BatchJobSlot = max(0, min(4, a.BatchJobSlot))
	a.LoadShedFraction = math.Max(0, math.Min(0.5, a.LoadShedFraction))
	}

	func (e *Environment) findAction(actions []ActionModel, buildingIdx int) ActionModel {
	// Try to find an action with matching building_id, else use positional
	for _, a := range actions {
	if a.BuildingID == buildingIdx {
	return a
	}
	}
	if buildingIdx < len(actions) {
	return actions[buildingIdx]
	}
	// Default: do-nothing action
	return ActionModel{HVACPowerLevel: 0.5, ThermalChargeRate: 0.0, BatchJobSlot: 0, LoadShedFraction: 0.0}
	}

	// pendingDeadlines returns a slice of deadline slots for all incomplete, unscheduled jobs.
	func pendingDeadlines(jobs []BatchJob) []int {
	result := []int{}
	for _, j := range jobs {
	if !j.Completed && !j.MissedDeadline {
	result = append(result, j.DeadlineSlot)
	}
	}
	return result
	}

	func max(a, b int) int {
	if a > b {
	return a
	}
	return b
	}

	func min(a, b int) int {
	if a < b {
	return a
	}
	return b
	}

	// ExploitDetected returns whether the current episode shows signs of degenerate strategies.
	func (e *Environment) ExploitDetected(buildingIdx int) (bool, float64) {
	e.mu.RLock()
	defer e.mu.RUnlock()
	if buildingIdx >= len(e.totalShedSteps) {
	return false, 0.0
	}
	// Flag if agent always sheds > 40% load (more than 70% of steps)
	shedRatio := float64(e.totalShedSteps[buildingIdx]) / float64(e.step+1)
	cycleRatio := float64(e.thermalCycleCounts[buildingIdx]) / float64(e.step+1)
	exploited := shedRatio > 0.7 \|\| cycleRatio > 0.4
	penalty := 0.0
	if exploited {
	penalty = math.Max(shedRatio-0.7, 0)0.5 + math.Max(cycleRatio-0.4, 0)0.3
	}
	return exploited, penalty
	}