# Copyright (c) Meta Platforms, Inc. and affiliates. # All rights reserved. # # This source code is licensed under the BSD-style license found in the # LICENSE file in the root directory of this source tree. """Integration tests: full episode playback, config loading, cross-facility. Validates: - Known-good action sequences resolve each scenario - Reward signals are well-behaved across full episodes - YAML config loading produces valid, runnable environments - Different facility sizes work correctly - Episode metrics (PUE, temps, rewards) are in expected ranges """ from __future__ import annotations import time from pathlib import Path import pytest from dc_ops_env.config import ( BUILTIN_CONFIGS, DatacenterConfig, load_datacenter_config, make_default_datacenter_config, ) from dc_ops_env.models import DcOpsAction, DcOpsObservation from dc_ops_env.server.dc_ops_env_environment import DcOpsEnvironment from dc_ops_env.scenarios.registry import registered_scenario_ids # =========================================================================== # Config Loading Tests # =========================================================================== class TestConfigLoading: """Validate YAML config loading and built-in configs.""" def test_builtin_configs_exist(self) -> None: """All built-in config files should exist on disk.""" for name, path in BUILTIN_CONFIGS.items(): assert path.exists(), f"Built-in config '{name}' not found at {path}" @pytest.mark.parametrize("config_name", ["default", "small", "large"]) def test_load_builtin(self, config_name: str) -> None: """Each built-in config should load without error.""" cfg = load_datacenter_config(config_name) assert isinstance(cfg, DatacenterConfig) assert len(cfg.zones) > 0 for zone in cfg.zones: assert len(zone.racks) > 0 assert len(zone.crac_units) > 0 def test_load_by_path(self) -> None: """Loading by explicit path should work.""" path = BUILTIN_CONFIGS["default"] cfg = load_datacenter_config(path) assert cfg.name == "DC-OPS Default Facility" def test_load_nonexistent_raises(self) -> None: """Loading a missing file should raise FileNotFoundError.""" with pytest.raises(FileNotFoundError): load_datacenter_config("/nonexistent/path.yaml") def test_default_yaml_matches_programmatic(self) -> None: """YAML default config should match make_default_datacenter_config().""" yaml_cfg = load_datacenter_config("default") prog_cfg = make_default_datacenter_config() assert yaml_cfg.name == prog_cfg.name assert len(yaml_cfg.zones) == len(prog_cfg.zones) assert yaml_cfg.outside_temp_c == prog_cfg.outside_temp_c # Same number of racks and CRACs yaml_racks = sum(len(z.racks) for z in yaml_cfg.zones) prog_racks = sum(len(z.racks) for z in prog_cfg.zones) assert yaml_racks == prog_racks yaml_cracs = sum(len(z.crac_units) for z in yaml_cfg.zones) prog_cracs = sum(len(z.crac_units) for z in prog_cfg.zones) assert yaml_cracs == prog_cracs def test_small_facility_dimensions(self) -> None: """Small facility should have correct dimensions.""" cfg = load_datacenter_config("small") assert len(cfg.zones) == 1 total_racks = sum(len(z.racks) for z in cfg.zones) assert total_racks == 10 total_it = sum(r.it_load_kw for z in cfg.zones for r in z.racks) assert total_it == pytest.approx(80.0) assert len(cfg.power.ups_units) == 1 def test_large_facility_dimensions(self) -> None: """Large facility should have correct dimensions.""" cfg = load_datacenter_config("large") assert len(cfg.zones) == 4 total_racks = sum(len(z.racks) for z in cfg.zones) assert total_racks == 60 total_it = sum(r.it_load_kw for z in cfg.zones for r in z.racks) assert total_it == pytest.approx(600.0) assert len(cfg.power.ups_units) == 4 def test_large_facility_has_h1_zone(self) -> None: """Large facility should include an H1 high-density zone.""" cfg = load_datacenter_config("large") h1_zones = [z for z in cfg.zones if z.ashrae_class == "H1"] assert len(h1_zones) == 1 # H1 zone should have higher per-rack load for rack in h1_zones[0].racks: assert rack.it_load_kw == 20.0 # =========================================================================== # Config-to-Environment Tests # =========================================================================== class TestConfigToEnvironment: """Validate that loaded configs produce runnable environments.""" @pytest.mark.parametrize("config_name", ["default", "small", "large"]) def test_env_runs_with_config(self, config_name: str) -> None: """Environment should initialize and run steps with each config.""" cfg = load_datacenter_config(config_name) env = DcOpsEnvironment() obs = env.reset(config=cfg) assert isinstance(obs, DcOpsObservation) assert obs.done is False obs = env.step(DcOpsAction(command="check_status")) assert isinstance(obs, DcOpsObservation) def test_small_facility_pue(self) -> None: """Small facility PUE should be realistic after warmup.""" cfg = load_datacenter_config("small") env = DcOpsEnvironment() obs = env.reset(config=cfg) pue = obs.metadata["pue"] assert 1.1 < pue < 2.5, f"PUE {pue} out of realistic range" def test_large_facility_total_load(self) -> None: """Large facility total IT load should match config.""" cfg = load_datacenter_config("large") env = DcOpsEnvironment() obs = env.reset(config=cfg) total_it = obs.metadata["total_it_load_kw"] assert total_it == pytest.approx(600.0, rel=0.01) # =========================================================================== # Full Episode Playback: Thermal Scenarios # =========================================================================== class TestEpisodePlaybackThermal: """Full episode playback with known-good action sequences for thermal scenarios.""" def test_a1_optimal_episode(self) -> None: """A1 (Cooling Setpoint Optimization): raise setpoints to reduce PUE. Optimal sequence: check_status → raise each CRAC setpoint → wait for convergence. PUE should improve significantly from baseline. """ env = DcOpsEnvironment() obs = env.reset(scenario="A1") pue_initial = obs.metadata["pue"] # 1. Check status first (procedure bonus) obs = env.step(DcOpsAction(command="check_status")) assert not obs.done # 2. Raise setpoints on all 4 CRACs from 15°C → 24°C (aggressive) for crac_id in ["CRAC-1", "CRAC-2", "CRAC-3", "CRAC-4"]: obs = env.step(DcOpsAction(command=f"adjust_setpoint {crac_id} 24")) # 3. Wait for temps to converge for _ in range(5): obs = env.step(DcOpsAction(command="wait")) if obs.done: break pue_final = obs.metadata["pue"] # PUE should have improved (lower is better) assert pue_final < pue_initial, ( f"PUE should improve: {pue_initial:.2f} → {pue_final:.2f}" ) def test_a2_optimal_episode(self) -> None: """A2 (Thermal Event Response): diagnose CRAC-3, compensate with remaining units. Optimal: diagnose → increase fan speeds on survivors → adjust setpoints. """ env = DcOpsEnvironment() obs = env.reset(scenario="A2") # 1. Diagnose the failed CRAC obs = env.step(DcOpsAction(command="diagnose CRAC-3")) assert "COMPRESSOR" in obs.action_result or "compressor" in obs.action_result.lower() # 2. Increase fan speed on remaining CRACs for crac_id in ["CRAC-1", "CRAC-2", "CRAC-4"]: obs = env.step(DcOpsAction(command=f"set_fan_speed {crac_id} 100")) # 3. Lower setpoints slightly on surviving units to compensate for crac_id in ["CRAC-1", "CRAC-2", "CRAC-4"]: obs = env.step(DcOpsAction(command=f"adjust_setpoint {crac_id} 16")) # 4. Wait for stabilization for _ in range(8): obs = env.step(DcOpsAction(command="wait")) if obs.done: break # Should resolve or be close — temps within recommended for 2+ steps # Even if not fully resolved, reward should be reasonable assert obs.metadata["cumulative_reward"] > -5.0 def test_a4_episode_with_load_shedding(self) -> None: """A4 (CRAC Failure Cascade): diagnose both, compensate, shed load. This is the hardest thermal scenario — two CRACs down. """ env = DcOpsEnvironment() obs = env.reset(scenario="A4") # 1. Diagnose both failed units obs = env.step(DcOpsAction(command="diagnose CRAC-1")) obs = env.step(DcOpsAction(command="diagnose CRAC-3")) # 2. Max out surviving CRACs obs = env.step(DcOpsAction(command="set_fan_speed CRAC-2 100")) obs = env.step(DcOpsAction(command="set_fan_speed CRAC-4 100")) obs = env.step(DcOpsAction(command="adjust_setpoint CRAC-2 15")) obs = env.step(DcOpsAction(command="adjust_setpoint CRAC-4 15")) # 3. Shed load on hottest racks for rack_id in ["A-01", "A-02", "B-01", "B-02"]: obs = env.step(DcOpsAction(command=f"set_rack_load {rack_id} 4")) # 4. Wait and monitor for _ in range(10): obs = env.step(DcOpsAction(command="wait")) if obs.done: break # Hard scenario — may not fully resolve, but should make progress assert obs.metadata["cumulative_reward"] > -10.0 # =========================================================================== # Full Episode Playback: Power Scenarios # =========================================================================== class TestEpisodePlaybackPower: """Full episode playback with known-good action sequences for power scenarios.""" def test_b1_optimal_episode(self) -> None: """B1 (UPS Alarm Response): diagnose UPS, acknowledge alarm. Simple 2-step resolution. """ env = DcOpsEnvironment() obs = env.reset(scenario="B1") # 1. Diagnose UPS status obs = env.step(DcOpsAction(command="diagnose UPS-1")) assert not obs.done # 2. Acknowledge the alarm obs = env.step(DcOpsAction(command="acknowledge_alarm")) assert obs.done, "B1 should resolve after diagnose + acknowledge" # Speed bonus: (10 - 2) / 10 = 0.8 assert obs.reward > 0.5, "Should have significant speed bonus" def test_b3_optimal_episode(self) -> None: """B3 (Generator Test Protocol): follow the correct test sequence. check_status → start_generator → diagnose GEN-1 → stop_generator → acknowledge. """ env = DcOpsEnvironment() obs = env.reset(scenario="B3") # Follow correct protocol obs = env.step(DcOpsAction(command="check_status")) assert not obs.done obs = env.step(DcOpsAction(command="start_generator")) assert not obs.done # Wait for generator to start (30s game time per step, gen startup ~17s) obs = env.step(DcOpsAction(command="wait")) obs = env.step(DcOpsAction(command="diagnose GEN-1")) assert not obs.done obs = env.step(DcOpsAction(command="stop_generator")) assert not obs.done # Wait for cooldown obs = env.step(DcOpsAction(command="wait")) obs = env.step(DcOpsAction(command="acknowledge_alarm")) assert obs.done, "B3 should resolve after full protocol" def test_b4_episode_with_load_shedding(self) -> None: """B4 (Power Failure Cascade): manage battery, wait for generator. Generator starts automatically on utility loss. Agent monitors and sheds load to extend battery life. """ env = DcOpsEnvironment() obs = env.reset(scenario="B4") # 1. Diagnose to understand the situation obs = env.step(DcOpsAction(command="diagnose UPS-1")) obs = env.step(DcOpsAction(command="diagnose UPS-2")) # 2. Shed non-critical load to extend battery obs = env.step(DcOpsAction(command="set_rack_load A-01 4")) obs = env.step(DcOpsAction(command="set_rack_load B-01 4")) # 3. Check generator status obs = env.step(DcOpsAction(command="diagnose GEN-1")) # 4. Wait for generator to come online and stabilize for _ in range(14): obs = env.step(DcOpsAction(command="wait")) if obs.done: break # B4 is hard — may or may not resolve, but should make progress assert obs.metadata["cumulative_reward"] > -10.0 # =========================================================================== # Reward Signal Quality # =========================================================================== class TestRewardSignalQuality: """Validate that reward signals are well-behaved across full episodes.""" def test_rewards_bounded_per_step(self) -> None: """Every per-step reward should be bounded.""" env = DcOpsEnvironment() env.reset(scenario="A2") for _ in range(15): obs = env.step(DcOpsAction(command="wait")) # Base reward is [-1, 1], speed bonus can add up to 1.0 assert -2.0 <= obs.reward <= 2.0, f"Reward {obs.reward} out of bounds" if obs.done: break def test_good_actions_beat_bad_actions(self) -> None: """An optimal sequence should yield higher cumulative reward than a bad one.""" env = DcOpsEnvironment() # Good episode: diagnose then fix env.reset(scenario="B1") env.step(DcOpsAction(command="diagnose UPS-1")) obs_good = env.step(DcOpsAction(command="acknowledge_alarm")) r_good = obs_good.metadata["cumulative_reward"] # Bad episode: just wait env.reset(scenario="B1") for _ in range(10): obs_bad = env.step(DcOpsAction(command="wait")) if obs_bad.done: break r_bad = obs_bad.metadata["cumulative_reward"] assert r_good > r_bad, f"Good ({r_good:.2f}) should beat bad ({r_bad:.2f})" def test_procedure_bonus_visible(self) -> None: """Following correct procedure should yield higher cumulative reward. Full episode comparison: both episodes do the same actions, but one follows procedure (check_status first) and the other doesn't. """ env = DcOpsEnvironment() # With procedure: check_status → adjust_setpoint → wait env.reset(scenario="A1") env.step(DcOpsAction(command="check_status")) env.step(DcOpsAction(command="adjust_setpoint CRAC-1 22")) obs_proc = env.step(DcOpsAction(command="wait")) r_with = obs_proc.metadata["cumulative_reward"] # Without procedure: wait → adjust_setpoint → wait (no check_status) env.reset(scenario="A1") env.step(DcOpsAction(command="wait")) env.step(DcOpsAction(command="adjust_setpoint CRAC-1 22")) obs_noproc = env.step(DcOpsAction(command="wait")) r_without = obs_noproc.metadata["cumulative_reward"] assert r_with > r_without, ( f"Procedure bonus not visible: with={r_with:.3f} vs without={r_without:.3f}" ) @pytest.mark.parametrize("scenario_id", registered_scenario_ids()) def test_no_nan_rewards(self, scenario_id: str) -> None: """No scenario should produce NaN rewards.""" import math env = DcOpsEnvironment() env.reset(scenario=scenario_id) for _ in range(5): obs = env.step(DcOpsAction(command="check_status")) assert not math.isnan(obs.reward), f"NaN reward in {scenario_id}" assert not math.isinf(obs.reward), f"Inf reward in {scenario_id}" if obs.done: break # =========================================================================== # Cross-Facility Scenario Tests # =========================================================================== class TestCrossFacility: """Validate scenarios work with different facility configs.""" def test_scenario_with_small_facility(self) -> None: """Scenarios should adapt to smaller configs that have compatible CRACs.""" cfg = load_datacenter_config("small") env = DcOpsEnvironment() # Run without a scenario, just with small config obs = env.reset(config=cfg, step_budget=5) assert obs.done is False # Basic operations should work obs = env.step(DcOpsAction(command="check_status")) assert "status" in obs.action_result.lower() obs = env.step(DcOpsAction(command="diagnose CRAC-1")) assert "Diagnostic Report" in obs.action_result def test_large_facility_steady_state(self) -> None: """Large facility should reach reasonable steady state.""" cfg = load_datacenter_config("large") env = DcOpsEnvironment() obs = env.reset(config=cfg, step_budget=10) pue = obs.metadata["pue"] assert 1.1 < pue < 3.0, f"Large facility PUE {pue} unrealistic" total_cooling = obs.metadata["total_cooling_power_kw"] total_it = obs.metadata["total_it_load_kw"] assert total_cooling > 0 assert total_it > 0 # =========================================================================== # Episode Metrics & Physics Consistency # =========================================================================== class TestEpisodeMetrics: """Validate physics consistency across episode metrics.""" def test_pue_always_above_one(self) -> None: """PUE should always be >= 1.0 (physically impossible otherwise).""" env = DcOpsEnvironment() env.reset(scenario="A1") for _ in range(10): obs = env.step(DcOpsAction(command="wait")) assert obs.metadata["pue"] >= 1.0 if obs.done: break def test_higher_load_raises_temperature(self) -> None: """Adding rack load should cause temperature to rise.""" env = DcOpsEnvironment() obs = env.reset() t_before = obs.metadata["zones"]["zone_a"]["cold_aisle_temp_c"] # Significantly increase multiple racks' load env.step(DcOpsAction(command="set_rack_load A-01 15")) env.step(DcOpsAction(command="set_rack_load A-02 15")) env.step(DcOpsAction(command="set_rack_load A-03 15")) # Wait for thermal response for _ in range(7): obs = env.step(DcOpsAction(command="wait")) t_after = obs.metadata["zones"]["zone_a"]["cold_aisle_temp_c"] assert t_after > t_before, ( f"Temp should rise with more load: {t_before:.1f} → {t_after:.1f}" ) def test_sim_time_monotonically_increases(self) -> None: """Simulation time should always advance.""" env = DcOpsEnvironment() obs = env.reset() prev_time = obs.metadata["sim_time_s"] for _ in range(5): obs = env.step(DcOpsAction(command="wait")) assert obs.metadata["sim_time_s"] > prev_time prev_time = obs.metadata["sim_time_s"] # =========================================================================== # Performance Tests # =========================================================================== class TestIntegrationPerformance: """Validate performance across different facility sizes.""" @pytest.mark.parametrize("config_name", ["default", "small", "large"]) def test_episode_completes_fast(self, config_name: str) -> None: """Full episode should complete quickly for any facility size.""" cfg = load_datacenter_config(config_name) env = DcOpsEnvironment() start = time.perf_counter() env.reset(config=cfg, step_budget=10) for _ in range(10): env.step(DcOpsAction(command="wait")) elapsed = time.perf_counter() - start assert elapsed < 10.0, ( f"{config_name} facility 10-step episode took {elapsed:.2f}s, should be <10s" ) def test_all_scenarios_full_episode_under_10s(self) -> None: """Running every scenario for its full step budget should be fast.""" env = DcOpsEnvironment() total_start = time.perf_counter() for sid in registered_scenario_ids(): env.reset(scenario=sid) for _ in range(20): # Max budget across scenarios obs = env.step(DcOpsAction(command="wait")) if obs.done: break total_elapsed = time.perf_counter() - total_start assert total_elapsed < 15.0, ( f"All {len(registered_scenario_ids())} scenarios took {total_elapsed:.2f}s" )