datacenter-env / server /simulation.py
Mephisto2412's picture
fresh start
09ecf23
"""
Data Centre Thermal Simulation β€” V2
Physics-based multi-zone server room model.
Responsibilities (pure physics only):
- ZoneState β€” per-zone thermal + sensor state
- FacilityState β€” facility-level state including chiller, carbon, load curves
- step_thermal(), apply_action_with_rate_limiting(), advance_time()
- Sensor drift, chiller fault injection, free-cooling, load/carbon advancement
No RL concepts. No reward logic. Just physics.
"""
import random
import math
from dataclasses import dataclass, field
from typing import Optional, List, Dict
# ── Physical constants ────────────────────────────────────────────────────────
THERMAL_MASS_KJ_PER_K = 850.0
SECONDS_PER_STEP = 300 # 5-minute steps
IT_HEAT_FRACTION = 0.98
# Action rate limits (enforced by apply_action_with_rate_limiting)
MAX_FAN_DELTA_PER_STEP = 20.0 # Β± % per step
MAX_SUPPLY_TEMP_DELTA = 2.0 # Β± Β°C per step
MAX_CHILLER_SETPOINT_DELTA = 1.0 # Β± Β°C per step
# Chiller operating bounds
CHILLER_SETPOINT_MIN = 6.0
CHILLER_SETPOINT_MAX = 15.0
# Fan / supply air bounds
FAN_SPEED_MIN = 0.0
FAN_SPEED_MAX = 100.0
SUPPLY_AIR_TEMP_MIN = 16.0
SUPPLY_AIR_TEMP_MAX = 26.0
# Reference delta-T used in cooling-power scaling (Β°C).
# Must match the natural hot-aisle rise at design conditions:
# heat_in / (max_mass_flow Γ— cp_air) = 441 / (50 Γ— 1.006) β‰ˆ 8.77 Β°C
# At REF=15 effective cooling at equilibrium was only 280 kW < 441 kW heat-in,
# making the zone impossible to cool at any fan speed. REF=9 gives 468 kW.
COOLING_DELTA_T_REF = 9.0
# Mass-flow reference: easy zone (480 kW capacity) at 100% fan moves 50 kg/s.
# All other zones scale proportionally so every zone has the same 6% cooling
# headroom above heat-in: capacity / (mass_flow_max Γ— 1.006 Γ— REF) β‰ˆ 1.06.
MASS_FLOW_REF_CAPACITY_KW = 480.0 # easy zone cooling capacity (calibration reference)
MASS_FLOW_REF_KGS = 50.0 # mass flow at 100% fan for the reference zone
# Cube-law fan power coefficient (kW at 100 %)
FAN_POWER_MAX_KW = 8.0
# Envelope heat-transfer coefficient (kW / Β°C).
# Real data centres are extremely well insulated; at a 10 Β°C indoor-outdoor
# differential a 500 kW zone loses only ~0.5 kW through the fabric β€”
# negligible compared to IT heat. The previous value of 0.5 kW/Β°C was
# 10Γ— too high and dominated zone-level thermal behaviour unrealistically.
ENVELOPE_CONDUCTANCE = 0.05
# Hot-aisle temperature rise coefficient
HOT_AISLE_RISE_COEFF = 5.0
# ── Zone priority labels ──────────────────────────────────────────────────────
PRIORITY_LOW = 0
PRIORITY_MEDIUM = 1
PRIORITY_CRITICAL = 2
# ── Dataclasses ───────────────────────────────────────────────────────────────
@dataclass
class ZoneState:
"""Per-zone thermal and sensor state (V2 β€” extended from V1)."""
# ── Identity ──────────────────────────────────────────────────────────────
zone_id: str
# ── Thermal state ─────────────────────────────────────────────────────────
temp_c: float # cold-aisle temperature (true value)
it_load_kw: float
fan_speed_pct: float
cooling_capacity_kw: float
# ── V1 fields (kept) ──────────────────────────────────────────────────────
setpoint_c: float = 22.0 # legacy alias; use supply_air_temp_setpoint_c
humidity_pct: float = 45.0
sensor_faulty: bool = False
# ── V2 new fields ─────────────────────────────────────────────────────────
hot_aisle_temp_c: float = 0.0 # return-air temperature (computed)
supply_air_temp_c: float = 18.0 # actual delivered supply air temp
supply_air_temp_setpoint_c: float = 22.0 # agent-controlled setpoint
zone_priority: int = PRIORITY_MEDIUM # 0=low, 1=medium, 2=critical
sensor_drift_c: float = 0.0 # cumulative sensor drift (Β°C)
sensor_confidence: float = 1.0 # [0.0–1.0] reliability weight
base_it_load_kw: float = 0.0 # baseline load before diurnal variation
job_load_kw: float = 0.0 # cluster mode: IT load from admitted jobs (kW)
it_load_pct: float = 0.0 # normalised load [0–1]
thermal_mass_kj_per_k: float = 850.0 # room thermal mass (kJ/K); scale per zone size
def __post_init__(self):
if self.base_it_load_kw == 0.0:
self.base_it_load_kw = self.it_load_kw
if self.hot_aisle_temp_c == 0.0:
self.hot_aisle_temp_c = self.temp_c + 8.0
self.supply_air_temp_c = self.supply_air_temp_setpoint_c
# ── Computed properties ───────────────────────────────────────────────────
@property
def reported_temp_c(self) -> float:
"""Sensor reading β€” may be drifted if sensor_faulty is True."""
noise = random.gauss(0, 0.1)
if self.sensor_faulty:
return round(self.temp_c + self.sensor_drift_c + noise, 2)
return round(self.temp_c + noise, 2)
@property
def actual_cooling_kw(self) -> float:
"""Cooling power delivered (before delta-T scaling)."""
return self.cooling_capacity_kw * (self.fan_speed_pct / 100.0)
@property
def fan_power_kw(self) -> float:
"""Fan electrical power (cube law), scaled by zone cooling capacity."""
capacity_ratio = self.cooling_capacity_kw / MASS_FLOW_REF_CAPACITY_KW
return FAN_POWER_MAX_KW * capacity_ratio * (self.fan_speed_pct / 100.0) ** 3
# ── Thermal step ──────────────────────────────────────────────────────────
def step_thermal(self, outside_temp_c: float, supply_air_temp_c: Optional[float] = None):
"""
Advance zone temperature by one time step.
V2 upgrade: cooling power is scaled by the delta-T between the hot aisle
and the delivered supply air temperature (not just fan speed alone).
"""
if supply_air_temp_c is None:
supply_air_temp_c = self.supply_air_temp_c
heat_in_kw = self.it_load_kw * IT_HEAT_FRACTION
# Effective cooling scales with temperature driving force
delta_t = max(self.hot_aisle_temp_c - supply_air_temp_c, 0.0)
scaling = delta_t / COOLING_DELTA_T_REF
effective_cooling_kw = self.actual_cooling_kw * scaling
effective_cooling_kw = min(effective_cooling_kw, heat_in_kw * 3.0)
# Envelope loss / gain (positive = heat flowing out when room is warm)
envelope_kw = ENVELOPE_CONDUCTANCE * (self.temp_c - outside_temp_c)
net_kw = heat_in_kw - effective_cooling_kw - envelope_kw
net_kj = net_kw * SECONDS_PER_STEP
delta_temp = net_kj / self.thermal_mass_kj_per_k # per-zone thermal mass
# delta_temp = max(-5.0, min(5.0, delta_temp))
delta_temp = max(-2.0, min(2.0, delta_temp))
self.temp_c = round(self.temp_c + delta_temp, 3)
# Derived temperatures
# denominator = max(effective_cooling_kw, 0.1)
# self.hot_aisle_temp_c = round(
# self.temp_c + (heat_in_kw / denominator) * HOT_AISLE_RISE_COEFF, 3
# )
# self.hot_aisle_temp_c = max(self.hot_aisle_temp_c, self.temp_c + 1.0)
# Mass flow scales with zone cooling capacity so every zone has the
# same ~6% cooling headroom above heat-in (fixes small-zone uncoolability).
mass_flow = (
(self.cooling_capacity_kw / MASS_FLOW_REF_CAPACITY_KW)
* MASS_FLOW_REF_KGS
* (self.fan_speed_pct / 100.0)
) # kg/s
if self.fan_speed_pct > 0.5:
self.hot_aisle_temp_c = round(
supply_air_temp_c + heat_in_kw / (mass_flow * 1.006), 3
)
else:
self.hot_aisle_temp_c = round(min(supply_air_temp_c + 50.0, 85.0), 3)
self.supply_air_temp_c = supply_air_temp_c
# Cold-aisle temperature floor: the zone cannot be colder than the
# supply air being delivered into it (physically impossible).
self.temp_c = round(max(self.temp_c, supply_air_temp_c), 3)
# Humidity heuristic
if self.temp_c > 26:
self.humidity_pct = min(70.0, self.humidity_pct + 0.5)
elif self.temp_c < 20:
self.humidity_pct = max(30.0, self.humidity_pct - 0.3)
# Normalised load
peak_load = max(self.base_it_load_kw, 1.0)
self.it_load_pct = round(min(self.it_load_kw / peak_load, 1.0), 4)
# ── Facility state ────────────────────────────────────────────────────────────
@dataclass
class FacilityState:
"""Facility-level state (V2 β€” extended from V1)."""
# ── Core ──────────────────────────────────────────────────────────────────
zones: List[ZoneState]
outside_temp_c: float
# ── V1 fields (kept) ──────────────────────────────────────────────────────
chiller_active: bool = True
chiller_cop: float = 3.5
ups_efficiency: float = 0.96
step_number: int = 0
timestamp_hour: float = 14.0
grid_carbon_intensity: str = "medium"
maintenance_notes: List[str] = field(default_factory=list)
upcoming_events: List[str] = field(default_factory=list)
# ── V2 new fields ─────────────────────────────────────────────────────────
chiller_setpoint_c: float = 10.0 # agent-controlled, [6–15]Β°C
chiller_fault_level: float = 0.0 # 0 = healthy, 1 = full failure
chiller_fault_step: int = -1 # step when fault triggers (-1 = never)
wet_bulb_temp_c: float = 18.0 # enables free-cooling logic
grid_carbon_curve: List[float] = field(default_factory=list) # 24-hr normalised [0–1]
load_curve: List[float] = field(default_factory=list) # 24-hr normalised [0–1]
pid_baseline_pue: float = 1.55 # pre-computed PID reference PUE
grid_carbon_intensity_normalized: float = 0.5 # [0–1] numeric companion
minutes_per_step: float = 5.0 # sim minutes per env step; set by environment.py for timeline condensation
cluster_mode: bool = False # when True, bypass diurnal load curve; loads come from admitted jobs
# ── Convenience constants ─────────────────────────────────────────────────
_BASE_CHILLER_COP: float = field(default=3.5, init=False, repr=False)
def __post_init__(self):
self._BASE_CHILLER_COP = self.chiller_cop
if not self.grid_carbon_curve:
self.grid_carbon_curve = _default_carbon_curve()
if not self.load_curve:
self.load_curve = _default_load_curve()
# ── Computed properties ───────────────────────────────────────────────────
@property
def total_it_load_kw(self) -> float:
return sum(z.it_load_kw for z in self.zones)
@property
def total_fan_power_kw(self) -> float:
return sum(z.fan_power_kw for z in self.zones)
@property
def effective_chiller_cop(self) -> float:
"""
COP adjusted for supply-water temperature and outdoor conditions.
Real-world behaviour:
- Higher leaving-water temp (chiller_setpoint_c) β†’ less compression work β†’ higher COP.
- Higher outdoor temp β†’ harder heat rejection β†’ lower COP.
Fault degradation overrides temperature adjustment: once inject_chiller_fault()
has modified self.chiller_cop, that modified value is used as-is.
"""
if self.chiller_fault_level > 0:
return self.chiller_cop # fault path; COP already degraded
cop = self._BASE_CHILLER_COP
cop *= (1.0 + 0.03 * (self.chiller_setpoint_c - 10.0)) # +3 % per Β°C higher setpoint
cop *= (1.0 - 0.02 * max(0.0, self.outside_temp_c - 20.0)) # βˆ’2 % per Β°C outdoor > 20
return max(1.0, min(6.0, cop))
@property
def chiller_power_kw(self) -> float:
if not self.chiller_active:
return 0.0
total_cooling = sum(z.actual_cooling_kw for z in self.zones)
cop = max(self.effective_chiller_cop, 0.01)
return total_cooling / cop
@property
def pue(self) -> float:
it = self.total_it_load_kw / max(self.ups_efficiency, 0.01)
cooling = self.total_fan_power_kw + self.chiller_power_kw
return round((it + cooling) / max(self.total_it_load_kw, 1.0), 4)
# ── Time advancement ──────────────────────────────────────────────────────
def advance_time(self):
"""Tick clock forward by one step (minutes_per_step minutes)."""
self.timestamp_hour = (self.timestamp_hour + self.minutes_per_step / 60.0) % 24.0
self.step_number += 1
# ── Load advancement ──────────────────────────────────────────────────────
def advance_load(self):
"""
Update IT load per zone.
Standard mode: diurnal curve Γ— base_it_load_kw + random batch burst.
Cluster mode: base_it_load_kw (fixed baseline, e.g. inference always on)
+ job_load_kw (set each window by ClusterEnvironment from
EpisodeLedger.active_load_kw). No diurnal curve applied.
"""
if self.cluster_mode:
for zone in self.zones:
zone.it_load_kw = round(zone.base_it_load_kw + zone.job_load_kw, 2)
peak = max(zone.base_it_load_kw + zone.job_load_kw, zone.base_it_load_kw, 1.0)
zone.it_load_pct = round(min(zone.it_load_kw / peak, 1.5), 4)
return
if not self.load_curve:
return
hour_idx = int(self.timestamp_hour) % 24
normalised_load = self.load_curve[hour_idx]
for zone in self.zones:
# Scale load around the zone's base
batch_burst = 0.0
if random.random() < 0.05: # 5 % chance of batch arrival
batch_burst = random.uniform(0.03, 0.08)
zone.it_load_kw = round(
zone.base_it_load_kw * (normalised_load + batch_burst), 2
)
peak = max(zone.base_it_load_kw, 1.0)
zone.it_load_pct = round(min(zone.it_load_kw / peak, 1.0), 4)
# ── Cluster mode: job load injection ─────────────────────────────────────
def set_zone_job_load(self, zone_id: str, job_load_kw: float) -> None:
"""
Set the admitted-job IT load for a single zone.
Called by ClusterEnvironment when a job is admitted to or expires from a zone.
Only meaningful when cluster_mode=True.
"""
for zone in self.zones:
if zone.zone_id == zone_id:
zone.job_load_kw = max(0.0, round(job_load_kw, 2))
return
def set_all_job_loads(self, load_map: Dict[str, float]) -> None:
"""
Set admitted-job IT load for all zones in one call.
load_map: {zone_id: job_load_kw}. Zones absent from the map are set to 0.
Called by ClusterEnvironment at the start of each negotiation window.
Only meaningful when cluster_mode=True.
"""
for zone in self.zones:
zone.job_load_kw = max(0.0, round(load_map.get(zone.zone_id, 0.0), 2))
def total_job_load_kw(self) -> float:
"""Sum of all admitted job loads across zones (cluster mode)."""
return sum(z.job_load_kw for z in self.zones)
# ── Carbon advancement ────────────────────────────────────────────────────
def advance_carbon(self):
"""Update grid carbon intensity from the 24-hr curve."""
if not self.grid_carbon_curve:
return
hour_idx = int(self.timestamp_hour) % 24
intensity = self.grid_carbon_curve[hour_idx]
self.grid_carbon_intensity_normalized = round(intensity, 4)
# Also update the human-readable label
if intensity < 0.25:
self.grid_carbon_intensity = "low"
elif intensity < 0.55:
self.grid_carbon_intensity = "medium"
elif intensity < 0.80:
self.grid_carbon_intensity = "high"
else:
self.grid_carbon_intensity = "critical_high"
# ── Sensor drift ──────────────────────────────────────────────────────────
def apply_sensor_drift(self, step: int):
"""
Gradually drift faulty zone sensors over time.
Confidence decreases proportionally as drift grows.
Uses an effective step scaled by minutes_per_step so that drift
reaches its maximum at the same simulated time regardless of
whether the episode is condensed (larger minutes_per_step).
At 5 min/step the sensor stabilises at ~12 Β°C around step 50.
At 24 min/step (medium condensed) it stabilises around step 10.
"""
for zone in self.zones:
if not zone.sensor_faulty:
continue
# Scale step by how many 5-min periods this step represents
effective_step = int(step * self.minutes_per_step / 5.0)
target_drift = min(3.0 + effective_step * 0.18, 12.0)
zone.sensor_drift_c = round(target_drift, 2)
# Confidence shrinks from 1.0 β†’ ~0.1 as drift grows from 0 β†’ 12
zone.sensor_confidence = round(max(0.1, 1.0 - zone.sensor_drift_c / 13.0), 3)
# ── Chiller fault injection ───────────────────────────────────────────────
def inject_chiller_fault(self, step: int):
"""
Degrade chiller COP based on fault progression.
Fault schedule (hard task):
step 15 β†’ COP drops from 3.5 β†’ 0.8 (partial failure)
step 20 β†’ chiller goes fully offline
"""
if self.chiller_fault_step < 0:
return # no fault configured for this scenario
if step < self.chiller_fault_step:
return # fault not yet triggered
steps_since_fault = step - self.chiller_fault_step
if steps_since_fault < 5:
# Ramp COP down over 5 steps: 3.5 β†’ 0.8
progress = steps_since_fault / 5.0
degraded_cop = self._BASE_CHILLER_COP + progress * (0.8 - self._BASE_CHILLER_COP)
self.chiller_cop = round(max(degraded_cop, 0.8), 3)
self.chiller_fault_level = round(progress, 3)
else:
# Full failure
self.chiller_active = False
self.chiller_cop = 0.0
self.chiller_fault_level = 1.0
# ── Free-cooling potential ────────────────────────────────────────────────
def compute_free_cooling_potential(self) -> float:
"""
Estimate how much cooling can come from outside air alone.
Returns a fraction [0.0–1.0] of the total required cooling that
could be met via economiser / free-air cooling, based on the
wet-bulb temperature vs. the average zone cold-aisle temperature.
Rule of thumb: free cooling is viable when wet-bulb < (zone_temp - 4Β°C).
"""
if not self.zones:
return 0.0
avg_zone_temp = sum(z.temp_c for z in self.zones) / len(self.zones)
headroom = avg_zone_temp - self.wet_bulb_temp_c - 4.0
if headroom <= 0.0:
return 0.0
# Scale linearly: full free cooling when headroom β‰₯ 10 Β°C
return round(min(headroom / 10.0, 1.0), 3)
# ── Chiller setpoint β†’ supply air temperature propagation ─────────────────
def propagate_chiller_setpoint(self):
"""
Translate the chiller setpoint into actual supply air temperature
for each zone, accounting for chiller COP and free-cooling potential.
When the chiller is offline, supply air approaches outdoor wet-bulb.
Free-cooling (economiser) blending is only applied when outdoor air
is actually cooler than what the chiller delivers. If the wet-bulb
temperature is above the chilled-supply setpoint β€” e.g. a hot summer
day with wet-bulb=22 Β°C and supply setpoint=18 Β°C β€” blending outdoor
air would *raise* the supply temperature and reduce cooling effectiveness.
In that case, the chilled supply is used as-is.
"""
free_cooling = self.compute_free_cooling_potential()
free_cooling_air_temp = self.wet_bulb_temp_c + 2.0
for zone in self.zones:
if self.chiller_active:
# Chiller can deliver down to chiller_setpoint + duct loss
# but zone setpoint is the agent's actual control lever
chiller_floor = self.chiller_setpoint_c + 0.5
# Agent setpoint is bounded below by what the chiller can deliver
chilled_supply = max(zone.supply_air_temp_setpoint_c, chiller_floor)
else:
chilled_supply = free_cooling_air_temp
# Only blend outdoor (free-cooling) air when it is colder than the
# chilled supply β€” otherwise free cooling would warm, not cool.
if free_cooling > 0.0 and free_cooling_air_temp < chilled_supply:
effective_supply = (
free_cooling * free_cooling_air_temp
+ (1.0 - free_cooling) * chilled_supply
)
else:
effective_supply = chilled_supply
zone.supply_air_temp_c = round(
max(SUPPLY_AIR_TEMP_MIN, min(effective_supply, SUPPLY_AIR_TEMP_MAX)), 2
)
# ── Action application with rate-limiting ─────────────────────────────────
def apply_action_with_rate_limiting(
self,
action: "DCAction",
last_action: "DCAction",
) -> Dict[str, bool]:
"""
Apply agent action, enforcing per-step delta limits.
Rate limits scale with the step's simulated duration so they remain
physically consistent under timeline condensation. At 5 min/step
(easy task) the baseline limits apply; at 36 min/step (hard task)
limits are multiplied by 7.2, capped at the physical range.
Returns
-------
info : dict
{
"chiller_setpoint_clipped": bool,
"chiller_toggled": bool,
"zones": {zone_id: {"fan_clipped": bool, "supply_temp_clipped": bool}}
}
"""
# Scale limits proportionally to simulated minutes per step.
# minutes_per_step / 5.0 gives 1.0 for the reference (easy) task.
scale = self.minutes_per_step / 5.0
eff_fan_delta = min(MAX_FAN_DELTA_PER_STEP * scale, FAN_SPEED_MAX)
eff_supply_delta = min(MAX_SUPPLY_TEMP_DELTA * scale,
SUPPLY_AIR_TEMP_MAX - SUPPLY_AIR_TEMP_MIN)
eff_chiller_delta = min(MAX_CHILLER_SETPOINT_DELTA * scale,
CHILLER_SETPOINT_MAX - CHILLER_SETPOINT_MIN)
info: Dict = {
"chiller_setpoint_clipped": False,
"chiller_toggled": False,
"zones": {},
}
# ── Facility-level levers ─────────────────────────────────────────────
# Chiller setpoint
raw_delta = action.chiller_setpoint_c - last_action.chiller_setpoint_c
clipped_delta = _clip(raw_delta, eff_chiller_delta)
if abs(clipped_delta - raw_delta) > 1e-6:
info["chiller_setpoint_clipped"] = True
new_setpoint = last_action.chiller_setpoint_c + clipped_delta
self.chiller_setpoint_c = round(
_clip_bounds(new_setpoint, CHILLER_SETPOINT_MIN, CHILLER_SETPOINT_MAX), 2
)
# Chiller on/off β€” allowed freely (nuclear option, but costly).
# Once fully failed (fault_level == 1.0) the toggle is ignored and a
# maintenance_note is appended so the agent gets explicit feedback.
if self.chiller_fault_level < 1.0:
self.chiller_active = action.chiller_active
info["chiller_toggled"] = (action.chiller_active != last_action.chiller_active)
elif action.chiller_active and not self.chiller_active:
# Agent asked to re-enable a fully failed chiller β€” inform it.
note = (
"ACTION IGNORED: chiller_active=true submitted but chiller is "
"offline due to fault. Fans are the only available cooling."
)
if note not in self.maintenance_notes:
self.maintenance_notes.append(note)
# ── Per-zone levers ───────────────────────────────────────────────────
zone_map = {z.zone_id: z for z in self.zones}
last_zone_map = {za.zone_id: za for za in last_action.zone_adjustments}
for adj in action.zone_adjustments:
zone = zone_map.get(adj.zone_id)
if zone is None:
continue
last_adj = last_zone_map.get(adj.zone_id)
last_fan = last_adj.fan_speed_pct if last_adj else zone.fan_speed_pct
last_supply = (
last_adj.supply_air_temp_setpoint_c
if last_adj
else zone.supply_air_temp_setpoint_c
)
zone_info = {"fan_clipped": False, "supply_temp_clipped": False}
# Fan speed
fan_raw_delta = adj.fan_speed_pct - last_fan
fan_clipped_delta = _clip(fan_raw_delta, eff_fan_delta)
if abs(fan_clipped_delta - fan_raw_delta) > 1e-6:
zone_info["fan_clipped"] = True
zone.fan_speed_pct = round(
_clip_bounds(last_fan + fan_clipped_delta, FAN_SPEED_MIN, FAN_SPEED_MAX), 2
)
# Supply air temperature setpoint
supply_raw_delta = adj.supply_air_temp_setpoint_c - last_supply
supply_clipped_delta = _clip(supply_raw_delta, eff_supply_delta)
if abs(supply_clipped_delta - supply_raw_delta) > 1e-6:
zone_info["supply_temp_clipped"] = True
zone.supply_air_temp_setpoint_c = round(
_clip_bounds(
last_supply + supply_clipped_delta,
SUPPLY_AIR_TEMP_MIN,
SUPPLY_AIR_TEMP_MAX,
),
2,
)
info["zones"][adj.zone_id] = zone_info
return info
# ── Full simulation step ──────────────────────────────────────────────────
def step(
self,
action: "DCAction",
last_action: "DCAction",
) -> Dict:
"""
Advance simulation by one step.
Order of operations (per spec Β§4 Transition Dynamics):
1. Rate-limit and clip action
2. Apply supply air temp setpoint (propagate chiller β†’ zones)
3. Update load and carbon from curves
4. Apply chiller fault (if scheduled)
5. Thermal step per zone
6. Sensor drift
7. Advance time
"""
# 1. Apply action (with clipping)
clip_info = self.apply_action_with_rate_limiting(action, last_action)
# 2. Propagate chiller setpoint β†’ zone supply air temps
self.propagate_chiller_setpoint()
# 3. Update load and carbon
self.advance_load()
self.advance_carbon()
# 4. Chiller fault injection
self.inject_chiller_fault(self.step_number)
# 5. Thermal step per zone
for zone in self.zones:
zone.step_thermal(self.outside_temp_c, zone.supply_air_temp_c)
# 6. Sensor drift
self.apply_sensor_drift(self.step_number)
# 7. Advance time
self.advance_time()
return {"action_clipped": clip_info}
# ── Observation serialisation ─────────────────────────────────────────────
def to_observation_dict(self) -> dict:
"""Serialise full V2 observation (scalars + text fields)."""
hour = self.timestamp_hour
return {
"step": self.step_number,
"timestamp_hour": round(hour, 2),
"timestamp_day_sin": round(math.sin(2 * math.pi * hour / 24.0), 6),
"timestamp_day_cos": round(math.cos(2 * math.pi * hour / 24.0), 6),
"outside_temp_c": round(self.outside_temp_c, 1),
"wet_bulb_temp_c": round(self.wet_bulb_temp_c, 1),
"chiller_active": self.chiller_active,
"chiller_setpoint_c": round(self.chiller_setpoint_c, 2),
"chiller_cop": round(self.chiller_cop, 3),
"chiller_fault_level": round(self.chiller_fault_level, 3),
"ups_efficiency": round(self.ups_efficiency, 4),
"current_pue": self.pue,
"grid_carbon_intensity": self.grid_carbon_intensity,
"grid_carbon_intensity_normalized": self.grid_carbon_intensity_normalized,
"free_cooling_potential": self.compute_free_cooling_potential(),
"zones": [
{
"zone_id": z.zone_id,
# For faulty zones the cold-aisle sensor reading is degraded β€” expose the
# reported (potentially drifted) value so agents face a genuine inference
# challenge. The true internal temp_c is reserved for graders only.
"cold_aisle_temp_c": z.reported_temp_c if z.sensor_faulty else round(z.temp_c, 3),
"hot_aisle_temp_c": round(z.hot_aisle_temp_c, 3),
"reported_temp_c": z.reported_temp_c,
"it_load_kw": round(z.it_load_kw, 1),
"it_load_pct": round(z.it_load_pct, 4),
"fan_speed_pct": round(z.fan_speed_pct, 1),
"supply_air_temp_c": round(z.supply_air_temp_c, 2),
"supply_air_temp_setpoint_c": round(z.supply_air_temp_setpoint_c, 2),
"cooling_capacity_kw": z.cooling_capacity_kw,
"humidity_pct": round(z.humidity_pct, 1),
"sensor_confidence": round(z.sensor_confidence, 3),
"sensor_drift_c": round(z.sensor_drift_c, 2),
"zone_priority": z.zone_priority,
}
for z in self.zones
],
"maintenance_notes": self.maintenance_notes,
"upcoming_events": self.upcoming_events,
}
# ── Default curves ────────────────────────────────────────────────────────────
def _default_carbon_curve() -> List[float]:
"""
Realistic 24-hour grid carbon intensity [0–1].
Low at night (wind/nuclear), peaks around midday (grid demand).
Indices correspond to hour-of-day (0–23).
"""
return [
0.20, 0.18, 0.17, 0.16, 0.17, 0.20, # 00–05 (night, renewables)
0.30, 0.45, 0.60, 0.72, 0.80, 0.85, # 06–11 (morning ramp)
0.88, 0.90, 0.87, 0.82, 0.75, 0.65, # 12–17 (midday peak)
0.55, 0.45, 0.38, 0.32, 0.27, 0.22, # 18–23 (evening wind)
]
def _default_load_curve() -> List[float]:
"""
Normalised IT load [0–1] across 24 hours.
Low at night, rises with business hours, peaks around 10–14h.
"""
return [
0.55, 0.52, 0.50, 0.50, 0.52, 0.58, # 00–05 (overnight batch)
0.65, 0.75, 0.88, 0.95, 0.98, 0.99, # 06–11 (morning surge)
1.00, 0.98, 0.96, 0.94, 0.90, 0.85, # 12–17 (business hours)
0.78, 0.72, 0.68, 0.64, 0.60, 0.57, # 18–23 (evening taper)
]
# ── Action stubs (imported by modules that call apply_action_with_rate_limiting)
# Full Pydantic models live in models.py; these stubs are used here only for
# type-hint purposes and to keep simulation.py import-free of pydantic.
class _ZoneAdjustmentStub:
"""Minimal duck-type for ZoneAdjustment used in rate-limiting logic."""
def __init__(self, zone_id: str, fan_speed_pct: float, supply_air_temp_setpoint_c: float):
self.zone_id = zone_id
self.fan_speed_pct = fan_speed_pct
self.supply_air_temp_setpoint_c = supply_air_temp_setpoint_c
class _DCActionStub:
"""Minimal duck-type for DCAction used in rate-limiting logic."""
def __init__(
self,
zone_adjustments: List[_ZoneAdjustmentStub],
chiller_setpoint_c: float,
chiller_active: bool,
reasoning: Optional[str] = None,
):
self.zone_adjustments = zone_adjustments
self.chiller_setpoint_c = chiller_setpoint_c
self.chiller_active = chiller_active
self.reasoning = reasoning
# Re-export under the names used in type hints above
DCAction = _DCActionStub
ZoneAdjustment = _ZoneAdjustmentStub
# ── Internal helpers ──────────────────────────────────────────────────────────
def _clip(value: float, max_abs: float) -> float:
"""Clip value to [-max_abs, +max_abs]."""
return max(-max_abs, min(max_abs, value))
def _clip_bounds(value: float, lo: float, hi: float) -> float:
"""Clip value to [lo, hi]."""
return max(lo, min(hi, value))