| |
| |
| |
| package env |
|
|
| import ( |
| "math" |
| "math/rand" |
| "sync" |
| "time" |
| ) |
|
|
| const ( |
| EpisodeSteps = 96 |
| StepDurationHrs = 0.25 |
| MaxBuildings = 3 |
| DefaultSetpoint = 21.0 |
| TMinDefault = 19.0 |
| TMaxDefault = 23.0 |
| MaxHVACPowerKW = 50.0 |
| MaxStorageKWh = 100.0 |
| StorageLossRate = 0.005 |
| MaxBatchJobs = 5 |
| ) |
|
|
| |
| type Environment struct { |
| mu sync.RWMutex |
| rng *rand.Rand |
| seed int64 |
| episode int |
| step int |
| done bool |
| taskID int |
| difficulty string |
| numBuildings int |
|
|
| Buildings []*BuildingState |
| PriceCurve [EpisodeSteps]float64 |
| CarbonCurve [EpisodeSteps]float64 |
| Replay []ReplayEntry |
| LastActions []ActionModel |
|
|
| |
| TempHistory [][]float64 |
| CostHistory [][]float64 |
| HVACHistory [][]float64 |
| LoadShedHistory [][]float64 |
| RewardHistory [][]RewardComponents |
|
|
| |
| totalShedSteps []int |
| thermalCycleCounts []int |
| prevChargeRates []float64 |
| } |
|
|
| |
| func NewEnvironment() *Environment { |
| seed := time.Now().UnixNano() |
| return &Environment{ |
| rng: rand.New(rand.NewSource(seed)), |
| seed: seed, |
| taskID: 1, |
| difficulty: "easy", |
| numBuildings: 1, |
| } |
| } |
|
|
| |
| func (e *Environment) Reset(req ResetRequest) ResetResponse { |
| e.mu.Lock() |
| defer e.mu.Unlock() |
|
|
| |
| if req.Seed != nil { |
| e.seed = *req.Seed |
| } else { |
| e.seed = time.Now().UnixNano() |
| } |
| e.rng = rand.New(rand.NewSource(e.seed)) |
|
|
| |
| e.taskID = req.TaskID |
| if e.taskID < 1 || e.taskID > 3 { |
| e.taskID = 1 |
| } |
| e.difficulty = req.Difficulty |
| if e.difficulty == "" { |
| switch e.taskID { |
| case 1: |
| e.difficulty = "easy" |
| case 2: |
| e.difficulty = "medium" |
| case 3: |
| e.difficulty = "hard" |
| } |
| } |
|
|
| |
| e.numBuildings = req.NumBuildings |
| if e.numBuildings < 1 { |
| e.numBuildings = 1 |
| } |
| if e.numBuildings > MaxBuildings { |
| e.numBuildings = MaxBuildings |
| } |
|
|
| e.episode++ |
| e.step = 0 |
| e.done = false |
| e.Replay = make([]ReplayEntry, 0, EpisodeSteps) |
| e.LastActions = make([]ActionModel, e.numBuildings) |
|
|
| |
| e.generatePriceCurve() |
| e.generateCarbonCurve() |
|
|
| |
| e.Buildings = make([]*BuildingState, e.numBuildings) |
| e.TempHistory = make([][]float64, e.numBuildings) |
| e.CostHistory = make([][]float64, e.numBuildings) |
| e.HVACHistory = make([][]float64, e.numBuildings) |
| e.LoadShedHistory = make([][]float64, e.numBuildings) |
| e.RewardHistory = make([][]RewardComponents, e.numBuildings) |
| e.totalShedSteps = make([]int, e.numBuildings) |
| e.thermalCycleCounts = make([]int, e.numBuildings) |
| e.prevChargeRates = make([]float64, e.numBuildings) |
|
|
| for i := 0; i < e.numBuildings; i++ { |
| e.Buildings[i] = e.newBuildingState(i) |
| e.TempHistory[i] = make([]float64, 0, EpisodeSteps) |
| e.CostHistory[i] = make([]float64, 0, EpisodeSteps) |
| e.HVACHistory[i] = make([]float64, 0, EpisodeSteps) |
| e.LoadShedHistory[i] = make([]float64, 0, EpisodeSteps) |
| e.RewardHistory[i] = make([]RewardComponents, 0, EpisodeSteps) |
| } |
|
|
| obs := make([]ObservationModel, e.numBuildings) |
| for i, b := range e.Buildings { |
| obs[i] = e.buildObservation(b) |
| } |
|
|
| return ResetResponse{ |
| Observations: obs, |
| Episode: e.episode, |
| TaskID: e.taskID, |
| Seed: e.seed, |
| } |
| } |
|
|
| |
| func (e *Environment) Step(actions []ActionModel) ([]StepResponse, bool) { |
| e.mu.Lock() |
| defer e.mu.Unlock() |
|
|
| if e.done { |
| return nil, true |
| } |
|
|
| |
| for i := range actions { |
| e.clampAction(&actions[i]) |
| if i < e.numBuildings { |
| e.LastActions[i] = actions[i] |
| } |
| } |
|
|
| responses := make([]StepResponse, e.numBuildings) |
| for i, b := range e.Buildings { |
| var act ActionModel |
| |
| act = e.findAction(actions, i) |
| responses[i] = e.stepBuilding(b, act, i) |
| } |
|
|
| e.step++ |
| if e.step >= EpisodeSteps { |
| e.done = true |
| } |
|
|
| |
| if len(responses) > 0 { |
| entry := ReplayEntry{ |
| Step: e.step - 1, |
| Observation: responses[0].Observation, |
| Action: e.LastActions[0], |
| Reward: responses[0].Reward, |
| Components: responses[0].Info.RewardComponents, |
| Done: e.done, |
| } |
| e.Replay = append(e.Replay, entry) |
| } |
|
|
| return responses, e.done |
| } |
|
|
| |
| func (e *Environment) GetState() StateResponse { |
| e.mu.RLock() |
| defer e.mu.RUnlock() |
|
|
| buildings := make([]BuildingStatePublic, e.numBuildings) |
| for i, b := range e.Buildings { |
| pub := BuildingStatePublic{ |
| ObservationModel: e.buildObservation(b), |
| OutdoorTemperature: b.OutdoorTemperature, |
| SetpointTemperature: b.SetpointTemperature, |
| BaselineCost: b.BaselineCost, |
| BaselineCarbon: b.BaselineCarbon, |
| CumulativeCarbon: b.CumulativeCarbon, |
| Jobs: b.Jobs, |
| } |
| if i < len(e.TempHistory) { |
| pub.TempHistory = e.TempHistory[i] |
| pub.CostHistory = e.CostHistory[i] |
| pub.HVACHistory = e.HVACHistory[i] |
| pub.LoadShedHistory = e.LoadShedHistory[i] |
| pub.RewardHistory = e.RewardHistory[i] |
| } |
| buildings[i] = pub |
| } |
|
|
| priceCurve := make([]float64, EpisodeSteps/4) |
| carbonCurve := make([]float64, EpisodeSteps/4) |
| for h := 0; h < EpisodeSteps/4; h++ { |
| stepIdx := h * 4 |
| if stepIdx < EpisodeSteps { |
| priceCurve[h] = e.PriceCurve[stepIdx] |
| carbonCurve[h] = e.CarbonCurve[stepIdx] |
| } |
| } |
|
|
| return StateResponse{ |
| Buildings: buildings, |
| PriceCurve: priceCurve, |
| CarbonCurve: carbonCurve, |
| Episode: e.episode, |
| Step: e.step, |
| TaskID: e.taskID, |
| Done: e.done, |
| Seed: e.seed, |
| } |
| } |
|
|
| |
| func (e *Environment) GetReplay() []ReplayEntry { |
| e.mu.RLock() |
| defer e.mu.RUnlock() |
| result := make([]ReplayEntry, len(e.Replay)) |
| copy(result, e.Replay) |
| return result |
| } |
|
|
| |
| |
| |
|
|
| func (e *Environment) newBuildingState(id int) *BuildingState { |
| |
| initTemp := DefaultSetpoint + (e.rng.Float64()-0.5)*2.0 |
| storageLevel := 0.3 + e.rng.Float64()*0.4 |
| outdoorTemp := 15.0 + e.rng.Float64()*15.0 |
|
|
| b := &BuildingState{ |
| BuildingID: id, |
| IndoorTemperature: initTemp, |
| ThermalStorageLevel: storageLevel, |
| ProcessDemand: 10.0 + e.rng.Float64()*20.0, |
| CurrentPrice: e.PriceCurve[0], |
| GridStressSignal: 0.0, |
| CarbonIntensity: e.CarbonCurve[0], |
| HourOfDay: 0, |
| Step: 0, |
| BatchQueue: []int{}, |
| CumulativeCost: 0.0, |
| CumulativeCarbon: 0.0, |
| OutdoorTemperature: outdoorTemp, |
| PrevHVACLevel: 0.5, |
| BaselineCost: 0.0, |
| BaselineCarbon: 0.0, |
| SetpointTemperature: DefaultSetpoint, |
| MaxHVACPower: MaxHVACPowerKW, |
| MaxStorageCapacity: MaxStorageKWh, |
| ThermalLossRate: StorageLossRate, |
| } |
|
|
| |
| b.Jobs = e.generateBatchJobs() |
| b.BatchQueue = pendingDeadlines(b.Jobs) |
| return b |
| } |
|
|
| func (e *Environment) generateBatchJobs() []BatchJob { |
| numJobs := 3 |
| switch e.difficulty { |
| case "medium": |
| numJobs = 4 |
| case "hard": |
| numJobs = 5 |
| } |
|
|
| jobs := make([]BatchJob, numJobs) |
| for i := range jobs { |
| |
| span := EpisodeSteps - 12 |
| if span < 8 { |
| span = 8 |
| } |
| deadline := 4 + e.rng.Intn(span) |
| jobs[i] = BatchJob{ |
| ID: i + 1, |
| DeadlineSlot: deadline, |
| Duration: 1 + e.rng.Intn(3), |
| PowerDraw: 5.0 + e.rng.Float64()*15.0, |
| Scheduled: false, |
| ScheduledAt: -1, |
| Completed: false, |
| MissedDeadline: false, |
| } |
| } |
| return jobs |
| } |
|
|
| |
| func (e *Environment) generatePriceCurve() { |
| |
| volatility := 0.1 |
| switch e.difficulty { |
| case "medium": |
| volatility = 0.2 |
| case "hard": |
| volatility = 0.35 |
| } |
|
|
| |
| morningPeakShift := e.rng.Intn(5) - 2 |
| eveningPeakShift := e.rng.Intn(5) - 2 |
|
|
| for s := 0; s < EpisodeSteps; s++ { |
| hour := (s / 4) |
| base := touPrice(hour, morningPeakShift, eveningPeakShift) |
| noise := (e.rng.Float64()*2 - 1) * volatility * base |
| price := math.Max(0.02, base+noise) |
| e.PriceCurve[s] = price |
| } |
| } |
|
|
| |
| func touPrice(hour, morningShift, eveningShift int) float64 { |
| |
| morningPeakStart := 8 + morningShift |
| morningPeakEnd := 12 + morningShift |
| eveningPeakStart := 17 + eveningShift |
| eveningPeakEnd := 21 + eveningShift |
|
|
| switch { |
| case hour >= morningPeakStart && hour < morningPeakEnd: |
| return 0.18 |
| case hour >= eveningPeakStart && hour <= eveningPeakEnd: |
| return 0.22 |
| case (hour >= 9 && hour < morningPeakStart) || (hour >= morningPeakEnd && hour < eveningPeakStart): |
| return 0.10 |
| case hour >= 23 || hour < 6: |
| return 0.04 |
| default: |
| return 0.08 |
| } |
| } |
|
|
| |
| |
| func (e *Environment) generateCarbonCurve() { |
| for s := 0; s < EpisodeSteps; s++ { |
| price := e.PriceCurve[s] |
| |
| carbon := 150.0 + (price-0.04)/(0.32-0.04)*(600.0-150.0) |
| noise := (e.rng.Float64()*2 - 1) * 30.0 |
| e.CarbonCurve[s] = math.Max(100.0, carbon+noise) |
| } |
| } |
|
|
| |
| func (e *Environment) stepBuilding(b *BuildingState, act ActionModel, idx int) StepResponse { |
| s := e.step |
|
|
| |
| b.CurrentPrice = e.PriceCurve[s] |
| b.CarbonIntensity = e.CarbonCurve[s] |
| b.HourOfDay = (s / 4) % 24 |
|
|
| |
| b.GridStressSignal = e.updateGridStress(s) |
|
|
| |
| b.OutdoorTemperature = e.updateOutdoorTemp(s) |
|
|
| |
| b.ProcessDemand = e.updateProcessDemand(s) |
|
|
| |
|
|
| |
| hvacPower := act.HVACPowerLevel * b.MaxHVACPower |
|
|
| |
| chargeKW := act.ThermalChargeRate * b.MaxHVACPower * 0.3 |
| newStorageEnergy := b.ThermalStorageLevel*b.MaxStorageCapacity + chargeKW*StepDurationHrs |
| |
| newStorageEnergy *= (1.0 - b.ThermalLossRate) |
| newStorageEnergy = math.Max(0, math.Min(b.MaxStorageCapacity, newStorageEnergy)) |
| b.ThermalStorageLevel = newStorageEnergy / b.MaxStorageCapacity |
|
|
| |
| clampedShed := math.Max(0, math.Min(0.5, act.LoadShedFraction)) |
| shedKW := clampedShed * b.ProcessDemand |
|
|
| |
| batchCompleted, batchMissed := e.updateBatchJobs(b, act.BatchJobSlot, s) |
|
|
| |
| |
| |
| |
| |
| hvacEffect := (b.SetpointTemperature - b.IndoorTemperature) * act.HVACPowerLevel * 0.15 |
|
|
| |
| infiltration := (b.OutdoorTemperature - b.IndoorTemperature) * 0.03 |
|
|
| |
| storageEffect := 0.0 |
| if act.ThermalChargeRate < 0 { |
| storageEffect = (b.SetpointTemperature - b.IndoorTemperature) * math.Abs(act.ThermalChargeRate) * 0.05 |
| } |
|
|
| |
| processHeat := b.ProcessDemand * 0.002 |
|
|
| deltaT := hvacEffect + infiltration + storageEffect + processHeat |
| b.IndoorTemperature += deltaT |
|
|
| |
| b.IndoorTemperature = math.Max(10.0, math.Min(40.0, b.IndoorTemperature)) |
|
|
| |
| batchPowerDraw := e.batchRunningPower(b) |
| totalKW := hvacPower + math.Max(0, chargeKW) + batchPowerDraw - shedKW |
| totalKW = math.Max(0, totalKW) |
| energyKWh := totalKW * StepDurationHrs |
| stepCost := energyKWh * b.CurrentPrice |
| stepCarbon := energyKWh * b.CarbonIntensity |
|
|
| b.CumulativeCost += stepCost |
| b.CumulativeCarbon += stepCarbon |
|
|
| |
| baselineKW := 0.7*b.MaxHVACPower + b.ProcessDemand |
| baselineEnergy := baselineKW * StepDurationHrs |
| b.BaselineCost += baselineEnergy * b.CurrentPrice |
| b.BaselineCarbon += baselineEnergy * b.CarbonIntensity |
|
|
| |
| rc := ComputeReward(ComputeRewardInput{ |
| B: b, |
| Act: act, |
| StepCost: stepCost, |
| EnergyKWh: energyKWh, |
| TMin: TMinDefault, |
| TMax: TMaxDefault, |
| StepCarbon: stepCarbon, |
| BatchMissed: len(batchMissed), |
| GridStress: b.GridStressSignal, |
| ShedFraction: clampedShed, |
| TaskID: e.taskID, |
| PrevHVACLevel: b.PrevHVACLevel, |
| ChargeRate: act.ThermalChargeRate, |
| PrevChargeRate: e.prevChargeRates[idx], |
| StorageDelta: act.ThermalChargeRate, |
| PriceCurve: e.PriceCurve[:], |
| CurrentStep: s, |
| }) |
| b.PrevHVACLevel = act.HVACPowerLevel |
| e.prevChargeRates[idx] = act.ThermalChargeRate |
|
|
| |
| b.BatchQueue = pendingDeadlines(b.Jobs) |
|
|
| |
| if clampedShed > 0.4 { |
| e.totalShedSteps[idx]++ |
| } |
| if len(e.thermalCycleCounts) > idx { |
| if len(e.Replay) > 0 { |
| prev := e.prevChargeRates[idx] |
| if prev > 0.3 && act.ThermalChargeRate < -0.3 || prev < -0.3 && act.ThermalChargeRate > 0.3 { |
| e.thermalCycleCounts[idx]++ |
| } |
| } |
| } |
|
|
| |
| b.Step = s |
|
|
| |
| if idx < len(e.TempHistory) { |
| e.TempHistory[idx] = append(e.TempHistory[idx], b.IndoorTemperature) |
| e.CostHistory[idx] = append(e.CostHistory[idx], b.CumulativeCost) |
| e.HVACHistory[idx] = append(e.HVACHistory[idx], act.HVACPowerLevel) |
| e.LoadShedHistory[idx] = append(e.LoadShedHistory[idx], clampedShed) |
| e.RewardHistory[idx] = append(e.RewardHistory[idx], rc) |
| } |
|
|
| obs := e.buildObservation(b) |
|
|
| return StepResponse{ |
| Observation: obs, |
| Reward: rc.Total, |
| Done: e.done || s+1 >= EpisodeSteps, |
| Info: StepInfo{ |
| RewardComponents: rc, |
| EnergyUsed: energyKWh, |
| CarbonEmitted: stepCarbon, |
| PriceSignal: b.CurrentPrice, |
| GridStress: b.GridStressSignal, |
| BatchCompleted: batchCompleted, |
| BatchMissed: batchMissed, |
| Episode: e.episode, |
| Step: s, |
| }, |
| } |
| } |
|
|
| func (e *Environment) updateGridStress(s int) float64 { |
| |
| price := e.PriceCurve[s] |
| priceNorm := (price - 0.04) / (0.32 - 0.04) |
|
|
| |
| stressProb := 0.05 |
| switch e.difficulty { |
| case "medium": |
| stressProb = 0.1 |
| case "hard": |
| stressProb = 0.2 |
| } |
| spike := 0.0 |
| if e.rng.Float64() < stressProb { |
| spike = 0.3 + e.rng.Float64()*0.5 |
| } |
| stress := math.Min(1.0, priceNorm*0.6+spike) |
| return math.Max(0, stress) |
| } |
|
|
| func (e *Environment) updateOutdoorTemp(s int) float64 { |
| |
| hour := float64(s) / 4.0 |
| baseTemp := 15.0 + 8.0*math.Sin(2*math.Pi*(hour-6)/24.0) |
| noise := (e.rng.Float64()*2 - 1) * 1.5 |
| return baseTemp + noise |
| } |
|
|
| func (e *Environment) updateProcessDemand(s int) float64 { |
| |
| hour := s / 4 |
| base := 10.0 |
| if hour >= 8 && hour <= 18 { |
| base = 20.0 + 10.0*math.Sin(math.Pi*float64(hour-8)/10.0) |
| } |
| noise := (e.rng.Float64()*2 - 1) * 3.0 |
| return math.Max(0, base+noise) |
| } |
|
|
| func (e *Environment) updateBatchJobs(b *BuildingState, slot int, step int) (completed []int, missed []int) { |
| completed = []int{} |
| missed = []int{} |
|
|
| |
| for i := range b.Jobs { |
| job := &b.Jobs[i] |
| if !job.Scheduled && !job.Completed && !job.MissedDeadline { |
| schedAt := step + slot |
| job.Scheduled = true |
| job.ScheduledAt = schedAt |
| break |
| } |
| } |
|
|
| |
| for i := range b.Jobs { |
| job := &b.Jobs[i] |
| if job.Completed || job.MissedDeadline { |
| continue |
| } |
| |
| if step >= job.DeadlineSlot && !job.Completed { |
| job.MissedDeadline = true |
| missed = append(missed, job.ID) |
| continue |
| } |
| |
| if job.Scheduled && step >= job.ScheduledAt { |
| if step >= job.ScheduledAt+job.Duration-1 { |
| job.Completed = true |
| completed = append(completed, job.ID) |
| } |
| } |
| } |
| return |
| } |
|
|
| func (e *Environment) batchRunningPower(b *BuildingState) float64 { |
| total := 0.0 |
| for _, job := range b.Jobs { |
| if job.Scheduled && !job.Completed && !job.MissedDeadline { |
| if e.step >= job.ScheduledAt && e.step < job.ScheduledAt+job.Duration { |
| total += job.PowerDraw |
| } |
| } |
| } |
| return total |
| } |
|
|
| func (e *Environment) buildObservation(b *BuildingState) ObservationModel { |
| return ObservationModel{ |
| IndoorTemperature: math.Round(b.IndoorTemperature*100) / 100, |
| ThermalStorageLevel: math.Round(b.ThermalStorageLevel*1000) / 1000, |
| ProcessDemand: math.Round(b.ProcessDemand*100) / 100, |
| CurrentPrice: math.Round(b.CurrentPrice*10000) / 10000, |
| GridStressSignal: math.Round(b.GridStressSignal*1000) / 1000, |
| CarbonIntensity: math.Round(b.CarbonIntensity*10) / 10, |
| HourOfDay: b.HourOfDay, |
| BatchQueue: pendingDeadlines(b.Jobs), |
| CumulativeCost: math.Round(b.CumulativeCost*10000) / 10000, |
| Step: b.Step, |
| BuildingID: b.BuildingID, |
| } |
| } |
|
|
| func (e *Environment) clampAction(a *ActionModel) { |
| a.HVACPowerLevel = math.Max(0, math.Min(1.0, a.HVACPowerLevel)) |
| a.ThermalChargeRate = math.Max(-1.0, math.Min(1.0, a.ThermalChargeRate)) |
| a.BatchJobSlot = max(0, min(4, a.BatchJobSlot)) |
| a.LoadShedFraction = math.Max(0, math.Min(0.5, a.LoadShedFraction)) |
| } |
|
|
| func (e *Environment) findAction(actions []ActionModel, buildingIdx int) ActionModel { |
| |
| for _, a := range actions { |
| if a.BuildingID == buildingIdx { |
| return a |
| } |
| } |
| if buildingIdx < len(actions) { |
| return actions[buildingIdx] |
| } |
| |
| return ActionModel{HVACPowerLevel: 0.5, ThermalChargeRate: 0.0, BatchJobSlot: 0, LoadShedFraction: 0.0} |
| } |
|
|
| |
| func pendingDeadlines(jobs []BatchJob) []int { |
| result := []int{} |
| for _, j := range jobs { |
| if !j.Completed && !j.MissedDeadline { |
| result = append(result, j.DeadlineSlot) |
| } |
| } |
| return result |
| } |
|
|
| func max(a, b int) int { |
| if a > b { |
| return a |
| } |
| return b |
| } |
|
|
| func min(a, b int) int { |
| if a < b { |
| return a |
| } |
| return b |
| } |
|
|
| |
| func (e *Environment) ExploitDetected(buildingIdx int) (bool, float64) { |
| e.mu.RLock() |
| defer e.mu.RUnlock() |
| if buildingIdx >= len(e.totalShedSteps) { |
| return false, 0.0 |
| } |
| |
| shedRatio := float64(e.totalShedSteps[buildingIdx]) / float64(e.step+1) |
| cycleRatio := float64(e.thermalCycleCounts[buildingIdx]) / float64(e.step+1) |
| exploited := shedRatio > 0.7 || cycleRatio > 0.4 |
| penalty := 0.0 |
| if exploited { |
| penalty = math.Max(shedRatio-0.7, 0)*0.5 + math.Max(cycleRatio-0.4, 0)*0.3 |
| } |
| return exploited, penalty |
| } |
|
|