Spaces:
Running
Running
File size: 21,381 Bytes
aedaf74 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 | # Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.
"""Integration tests: full episode playback, config loading, cross-facility.
Validates:
- Known-good action sequences resolve each scenario
- Reward signals are well-behaved across full episodes
- YAML config loading produces valid, runnable environments
- Different facility sizes work correctly
- Episode metrics (PUE, temps, rewards) are in expected ranges
"""
from __future__ import annotations
import time
from pathlib import Path
import pytest
from dc_ops_env.config import (
BUILTIN_CONFIGS,
DatacenterConfig,
load_datacenter_config,
make_default_datacenter_config,
)
from dc_ops_env.models import DcOpsAction, DcOpsObservation
from dc_ops_env.server.dc_ops_env_environment import DcOpsEnvironment
from dc_ops_env.scenarios.registry import registered_scenario_ids
# ===========================================================================
# Config Loading Tests
# ===========================================================================
class TestConfigLoading:
"""Validate YAML config loading and built-in configs."""
def test_builtin_configs_exist(self) -> None:
"""All built-in config files should exist on disk."""
for name, path in BUILTIN_CONFIGS.items():
assert path.exists(), f"Built-in config '{name}' not found at {path}"
@pytest.mark.parametrize("config_name", ["default", "small", "large"])
def test_load_builtin(self, config_name: str) -> None:
"""Each built-in config should load without error."""
cfg = load_datacenter_config(config_name)
assert isinstance(cfg, DatacenterConfig)
assert len(cfg.zones) > 0
for zone in cfg.zones:
assert len(zone.racks) > 0
assert len(zone.crac_units) > 0
def test_load_by_path(self) -> None:
"""Loading by explicit path should work."""
path = BUILTIN_CONFIGS["default"]
cfg = load_datacenter_config(path)
assert cfg.name == "DC-OPS Default Facility"
def test_load_nonexistent_raises(self) -> None:
"""Loading a missing file should raise FileNotFoundError."""
with pytest.raises(FileNotFoundError):
load_datacenter_config("/nonexistent/path.yaml")
def test_default_yaml_matches_programmatic(self) -> None:
"""YAML default config should match make_default_datacenter_config()."""
yaml_cfg = load_datacenter_config("default")
prog_cfg = make_default_datacenter_config()
assert yaml_cfg.name == prog_cfg.name
assert len(yaml_cfg.zones) == len(prog_cfg.zones)
assert yaml_cfg.outside_temp_c == prog_cfg.outside_temp_c
# Same number of racks and CRACs
yaml_racks = sum(len(z.racks) for z in yaml_cfg.zones)
prog_racks = sum(len(z.racks) for z in prog_cfg.zones)
assert yaml_racks == prog_racks
yaml_cracs = sum(len(z.crac_units) for z in yaml_cfg.zones)
prog_cracs = sum(len(z.crac_units) for z in prog_cfg.zones)
assert yaml_cracs == prog_cracs
def test_small_facility_dimensions(self) -> None:
"""Small facility should have correct dimensions."""
cfg = load_datacenter_config("small")
assert len(cfg.zones) == 1
total_racks = sum(len(z.racks) for z in cfg.zones)
assert total_racks == 10
total_it = sum(r.it_load_kw for z in cfg.zones for r in z.racks)
assert total_it == pytest.approx(80.0)
assert len(cfg.power.ups_units) == 1
def test_large_facility_dimensions(self) -> None:
"""Large facility should have correct dimensions."""
cfg = load_datacenter_config("large")
assert len(cfg.zones) == 4
total_racks = sum(len(z.racks) for z in cfg.zones)
assert total_racks == 60
total_it = sum(r.it_load_kw for z in cfg.zones for r in z.racks)
assert total_it == pytest.approx(600.0)
assert len(cfg.power.ups_units) == 4
def test_large_facility_has_h1_zone(self) -> None:
"""Large facility should include an H1 high-density zone."""
cfg = load_datacenter_config("large")
h1_zones = [z for z in cfg.zones if z.ashrae_class == "H1"]
assert len(h1_zones) == 1
# H1 zone should have higher per-rack load
for rack in h1_zones[0].racks:
assert rack.it_load_kw == 20.0
# ===========================================================================
# Config-to-Environment Tests
# ===========================================================================
class TestConfigToEnvironment:
"""Validate that loaded configs produce runnable environments."""
@pytest.mark.parametrize("config_name", ["default", "small", "large"])
def test_env_runs_with_config(self, config_name: str) -> None:
"""Environment should initialize and run steps with each config."""
cfg = load_datacenter_config(config_name)
env = DcOpsEnvironment()
obs = env.reset(config=cfg)
assert isinstance(obs, DcOpsObservation)
assert obs.done is False
obs = env.step(DcOpsAction(command="check_status"))
assert isinstance(obs, DcOpsObservation)
def test_small_facility_pue(self) -> None:
"""Small facility PUE should be realistic after warmup."""
cfg = load_datacenter_config("small")
env = DcOpsEnvironment()
obs = env.reset(config=cfg)
pue = obs.metadata["pue"]
assert 1.1 < pue < 2.5, f"PUE {pue} out of realistic range"
def test_large_facility_total_load(self) -> None:
"""Large facility total IT load should match config."""
cfg = load_datacenter_config("large")
env = DcOpsEnvironment()
obs = env.reset(config=cfg)
total_it = obs.metadata["total_it_load_kw"]
assert total_it == pytest.approx(600.0, rel=0.01)
# ===========================================================================
# Full Episode Playback: Thermal Scenarios
# ===========================================================================
class TestEpisodePlaybackThermal:
"""Full episode playback with known-good action sequences for thermal scenarios."""
def test_a1_optimal_episode(self) -> None:
"""A1 (Cooling Setpoint Optimization): raise setpoints to reduce PUE.
Optimal sequence: check_status → raise each CRAC setpoint → wait for convergence.
PUE should improve significantly from baseline.
"""
env = DcOpsEnvironment()
obs = env.reset(scenario="A1")
pue_initial = obs.metadata["pue"]
# 1. Check status first (procedure bonus)
obs = env.step(DcOpsAction(command="check_status"))
assert not obs.done
# 2. Raise setpoints on all 4 CRACs from 15°C → 24°C (aggressive)
for crac_id in ["CRAC-1", "CRAC-2", "CRAC-3", "CRAC-4"]:
obs = env.step(DcOpsAction(command=f"adjust_setpoint {crac_id} 24"))
# 3. Wait for temps to converge
for _ in range(5):
obs = env.step(DcOpsAction(command="wait"))
if obs.done:
break
pue_final = obs.metadata["pue"]
# PUE should have improved (lower is better)
assert pue_final < pue_initial, (
f"PUE should improve: {pue_initial:.2f} → {pue_final:.2f}"
)
def test_a2_optimal_episode(self) -> None:
"""A2 (Thermal Event Response): diagnose CRAC-3, compensate with remaining units.
Optimal: diagnose → increase fan speeds on survivors → adjust setpoints.
"""
env = DcOpsEnvironment()
obs = env.reset(scenario="A2")
# 1. Diagnose the failed CRAC
obs = env.step(DcOpsAction(command="diagnose CRAC-3"))
assert "COMPRESSOR" in obs.action_result or "compressor" in obs.action_result.lower()
# 2. Increase fan speed on remaining CRACs
for crac_id in ["CRAC-1", "CRAC-2", "CRAC-4"]:
obs = env.step(DcOpsAction(command=f"set_fan_speed {crac_id} 100"))
# 3. Lower setpoints slightly on surviving units to compensate
for crac_id in ["CRAC-1", "CRAC-2", "CRAC-4"]:
obs = env.step(DcOpsAction(command=f"adjust_setpoint {crac_id} 16"))
# 4. Wait for stabilization
for _ in range(8):
obs = env.step(DcOpsAction(command="wait"))
if obs.done:
break
# Should resolve or be close — temps within recommended for 2+ steps
# Even if not fully resolved, reward should be reasonable
assert obs.metadata["cumulative_reward"] > -5.0
def test_a4_episode_with_load_shedding(self) -> None:
"""A4 (CRAC Failure Cascade): diagnose both, compensate, shed load.
This is the hardest thermal scenario — two CRACs down.
"""
env = DcOpsEnvironment()
obs = env.reset(scenario="A4")
# 1. Diagnose both failed units
obs = env.step(DcOpsAction(command="diagnose CRAC-1"))
obs = env.step(DcOpsAction(command="diagnose CRAC-3"))
# 2. Max out surviving CRACs
obs = env.step(DcOpsAction(command="set_fan_speed CRAC-2 100"))
obs = env.step(DcOpsAction(command="set_fan_speed CRAC-4 100"))
obs = env.step(DcOpsAction(command="adjust_setpoint CRAC-2 15"))
obs = env.step(DcOpsAction(command="adjust_setpoint CRAC-4 15"))
# 3. Shed load on hottest racks
for rack_id in ["A-01", "A-02", "B-01", "B-02"]:
obs = env.step(DcOpsAction(command=f"set_rack_load {rack_id} 4"))
# 4. Wait and monitor
for _ in range(10):
obs = env.step(DcOpsAction(command="wait"))
if obs.done:
break
# Hard scenario — may not fully resolve, but should make progress
assert obs.metadata["cumulative_reward"] > -10.0
# ===========================================================================
# Full Episode Playback: Power Scenarios
# ===========================================================================
class TestEpisodePlaybackPower:
"""Full episode playback with known-good action sequences for power scenarios."""
def test_b1_optimal_episode(self) -> None:
"""B1 (UPS Alarm Response): diagnose UPS, acknowledge alarm.
Simple 2-step resolution.
"""
env = DcOpsEnvironment()
obs = env.reset(scenario="B1")
# 1. Diagnose UPS status
obs = env.step(DcOpsAction(command="diagnose UPS-1"))
assert not obs.done
# 2. Acknowledge the alarm
obs = env.step(DcOpsAction(command="acknowledge_alarm"))
assert obs.done, "B1 should resolve after diagnose + acknowledge"
# Speed bonus: (10 - 2) / 10 = 0.8
assert obs.reward > 0.5, "Should have significant speed bonus"
def test_b3_optimal_episode(self) -> None:
"""B3 (Generator Test Protocol): follow the correct test sequence.
check_status → start_generator → diagnose GEN-1 → stop_generator → acknowledge.
"""
env = DcOpsEnvironment()
obs = env.reset(scenario="B3")
# Follow correct protocol
obs = env.step(DcOpsAction(command="check_status"))
assert not obs.done
obs = env.step(DcOpsAction(command="start_generator"))
assert not obs.done
# Wait for generator to start (30s game time per step, gen startup ~17s)
obs = env.step(DcOpsAction(command="wait"))
obs = env.step(DcOpsAction(command="diagnose GEN-1"))
assert not obs.done
obs = env.step(DcOpsAction(command="stop_generator"))
assert not obs.done
# Wait for cooldown
obs = env.step(DcOpsAction(command="wait"))
obs = env.step(DcOpsAction(command="acknowledge_alarm"))
assert obs.done, "B3 should resolve after full protocol"
def test_b4_episode_with_load_shedding(self) -> None:
"""B4 (Power Failure Cascade): manage battery, wait for generator.
Generator starts automatically on utility loss. Agent monitors
and sheds load to extend battery life.
"""
env = DcOpsEnvironment()
obs = env.reset(scenario="B4")
# 1. Diagnose to understand the situation
obs = env.step(DcOpsAction(command="diagnose UPS-1"))
obs = env.step(DcOpsAction(command="diagnose UPS-2"))
# 2. Shed non-critical load to extend battery
obs = env.step(DcOpsAction(command="set_rack_load A-01 4"))
obs = env.step(DcOpsAction(command="set_rack_load B-01 4"))
# 3. Check generator status
obs = env.step(DcOpsAction(command="diagnose GEN-1"))
# 4. Wait for generator to come online and stabilize
for _ in range(14):
obs = env.step(DcOpsAction(command="wait"))
if obs.done:
break
# B4 is hard — may or may not resolve, but should make progress
assert obs.metadata["cumulative_reward"] > -10.0
# ===========================================================================
# Reward Signal Quality
# ===========================================================================
class TestRewardSignalQuality:
"""Validate that reward signals are well-behaved across full episodes."""
def test_rewards_bounded_per_step(self) -> None:
"""Every per-step reward should be bounded."""
env = DcOpsEnvironment()
env.reset(scenario="A2")
for _ in range(15):
obs = env.step(DcOpsAction(command="wait"))
# Base reward is [-1, 1], speed bonus can add up to 1.0
assert -2.0 <= obs.reward <= 2.0, f"Reward {obs.reward} out of bounds"
if obs.done:
break
def test_good_actions_beat_bad_actions(self) -> None:
"""An optimal sequence should yield higher cumulative reward than a bad one."""
env = DcOpsEnvironment()
# Good episode: diagnose then fix
env.reset(scenario="B1")
env.step(DcOpsAction(command="diagnose UPS-1"))
obs_good = env.step(DcOpsAction(command="acknowledge_alarm"))
r_good = obs_good.metadata["cumulative_reward"]
# Bad episode: just wait
env.reset(scenario="B1")
for _ in range(10):
obs_bad = env.step(DcOpsAction(command="wait"))
if obs_bad.done:
break
r_bad = obs_bad.metadata["cumulative_reward"]
assert r_good > r_bad, f"Good ({r_good:.2f}) should beat bad ({r_bad:.2f})"
def test_procedure_bonus_visible(self) -> None:
"""Following correct procedure should yield higher cumulative reward.
Full episode comparison: both episodes do the same actions, but one
follows procedure (check_status first) and the other doesn't.
"""
env = DcOpsEnvironment()
# With procedure: check_status → adjust_setpoint → wait
env.reset(scenario="A1")
env.step(DcOpsAction(command="check_status"))
env.step(DcOpsAction(command="adjust_setpoint CRAC-1 22"))
obs_proc = env.step(DcOpsAction(command="wait"))
r_with = obs_proc.metadata["cumulative_reward"]
# Without procedure: wait → adjust_setpoint → wait (no check_status)
env.reset(scenario="A1")
env.step(DcOpsAction(command="wait"))
env.step(DcOpsAction(command="adjust_setpoint CRAC-1 22"))
obs_noproc = env.step(DcOpsAction(command="wait"))
r_without = obs_noproc.metadata["cumulative_reward"]
assert r_with > r_without, (
f"Procedure bonus not visible: with={r_with:.3f} vs without={r_without:.3f}"
)
@pytest.mark.parametrize("scenario_id", registered_scenario_ids())
def test_no_nan_rewards(self, scenario_id: str) -> None:
"""No scenario should produce NaN rewards."""
import math
env = DcOpsEnvironment()
env.reset(scenario=scenario_id)
for _ in range(5):
obs = env.step(DcOpsAction(command="check_status"))
assert not math.isnan(obs.reward), f"NaN reward in {scenario_id}"
assert not math.isinf(obs.reward), f"Inf reward in {scenario_id}"
if obs.done:
break
# ===========================================================================
# Cross-Facility Scenario Tests
# ===========================================================================
class TestCrossFacility:
"""Validate scenarios work with different facility configs."""
def test_scenario_with_small_facility(self) -> None:
"""Scenarios should adapt to smaller configs that have compatible CRACs."""
cfg = load_datacenter_config("small")
env = DcOpsEnvironment()
# Run without a scenario, just with small config
obs = env.reset(config=cfg, step_budget=5)
assert obs.done is False
# Basic operations should work
obs = env.step(DcOpsAction(command="check_status"))
assert "status" in obs.action_result.lower()
obs = env.step(DcOpsAction(command="diagnose CRAC-1"))
assert "Diagnostic Report" in obs.action_result
def test_large_facility_steady_state(self) -> None:
"""Large facility should reach reasonable steady state."""
cfg = load_datacenter_config("large")
env = DcOpsEnvironment()
obs = env.reset(config=cfg, step_budget=10)
pue = obs.metadata["pue"]
assert 1.1 < pue < 3.0, f"Large facility PUE {pue} unrealistic"
total_cooling = obs.metadata["total_cooling_power_kw"]
total_it = obs.metadata["total_it_load_kw"]
assert total_cooling > 0
assert total_it > 0
# ===========================================================================
# Episode Metrics & Physics Consistency
# ===========================================================================
class TestEpisodeMetrics:
"""Validate physics consistency across episode metrics."""
def test_pue_always_above_one(self) -> None:
"""PUE should always be >= 1.0 (physically impossible otherwise)."""
env = DcOpsEnvironment()
env.reset(scenario="A1")
for _ in range(10):
obs = env.step(DcOpsAction(command="wait"))
assert obs.metadata["pue"] >= 1.0
if obs.done:
break
def test_higher_load_raises_temperature(self) -> None:
"""Adding rack load should cause temperature to rise."""
env = DcOpsEnvironment()
obs = env.reset()
t_before = obs.metadata["zones"]["zone_a"]["cold_aisle_temp_c"]
# Significantly increase multiple racks' load
env.step(DcOpsAction(command="set_rack_load A-01 15"))
env.step(DcOpsAction(command="set_rack_load A-02 15"))
env.step(DcOpsAction(command="set_rack_load A-03 15"))
# Wait for thermal response
for _ in range(7):
obs = env.step(DcOpsAction(command="wait"))
t_after = obs.metadata["zones"]["zone_a"]["cold_aisle_temp_c"]
assert t_after > t_before, (
f"Temp should rise with more load: {t_before:.1f} → {t_after:.1f}"
)
def test_sim_time_monotonically_increases(self) -> None:
"""Simulation time should always advance."""
env = DcOpsEnvironment()
obs = env.reset()
prev_time = obs.metadata["sim_time_s"]
for _ in range(5):
obs = env.step(DcOpsAction(command="wait"))
assert obs.metadata["sim_time_s"] > prev_time
prev_time = obs.metadata["sim_time_s"]
# ===========================================================================
# Performance Tests
# ===========================================================================
class TestIntegrationPerformance:
"""Validate performance across different facility sizes."""
@pytest.mark.parametrize("config_name", ["default", "small", "large"])
def test_episode_completes_fast(self, config_name: str) -> None:
"""Full episode should complete quickly for any facility size."""
cfg = load_datacenter_config(config_name)
env = DcOpsEnvironment()
start = time.perf_counter()
env.reset(config=cfg, step_budget=10)
for _ in range(10):
env.step(DcOpsAction(command="wait"))
elapsed = time.perf_counter() - start
assert elapsed < 10.0, (
f"{config_name} facility 10-step episode took {elapsed:.2f}s, should be <10s"
)
def test_all_scenarios_full_episode_under_10s(self) -> None:
"""Running every scenario for its full step budget should be fast."""
env = DcOpsEnvironment()
total_start = time.perf_counter()
for sid in registered_scenario_ids():
env.reset(scenario=sid)
for _ in range(20): # Max budget across scenarios
obs = env.step(DcOpsAction(command="wait"))
if obs.done:
break
total_elapsed = time.perf_counter() - total_start
assert total_elapsed < 15.0, (
f"All {len(registered_scenario_ids())} scenarios took {total_elapsed:.2f}s"
)
|