dc_ops_env / tests /test_integration.py
Melikshah's picture
Upload folder using huggingface_hub
aedaf74 verified
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.
"""Integration tests: full episode playback, config loading, cross-facility.
Validates:
- Known-good action sequences resolve each scenario
- Reward signals are well-behaved across full episodes
- YAML config loading produces valid, runnable environments
- Different facility sizes work correctly
- Episode metrics (PUE, temps, rewards) are in expected ranges
"""
from __future__ import annotations
import time
from pathlib import Path
import pytest
from dc_ops_env.config import (
BUILTIN_CONFIGS,
DatacenterConfig,
load_datacenter_config,
make_default_datacenter_config,
)
from dc_ops_env.models import DcOpsAction, DcOpsObservation
from dc_ops_env.server.dc_ops_env_environment import DcOpsEnvironment
from dc_ops_env.scenarios.registry import registered_scenario_ids
# ===========================================================================
# Config Loading Tests
# ===========================================================================
class TestConfigLoading:
"""Validate YAML config loading and built-in configs."""
def test_builtin_configs_exist(self) -> None:
"""All built-in config files should exist on disk."""
for name, path in BUILTIN_CONFIGS.items():
assert path.exists(), f"Built-in config '{name}' not found at {path}"
@pytest.mark.parametrize("config_name", ["default", "small", "large"])
def test_load_builtin(self, config_name: str) -> None:
"""Each built-in config should load without error."""
cfg = load_datacenter_config(config_name)
assert isinstance(cfg, DatacenterConfig)
assert len(cfg.zones) > 0
for zone in cfg.zones:
assert len(zone.racks) > 0
assert len(zone.crac_units) > 0
def test_load_by_path(self) -> None:
"""Loading by explicit path should work."""
path = BUILTIN_CONFIGS["default"]
cfg = load_datacenter_config(path)
assert cfg.name == "DC-OPS Default Facility"
def test_load_nonexistent_raises(self) -> None:
"""Loading a missing file should raise FileNotFoundError."""
with pytest.raises(FileNotFoundError):
load_datacenter_config("/nonexistent/path.yaml")
def test_default_yaml_matches_programmatic(self) -> None:
"""YAML default config should match make_default_datacenter_config()."""
yaml_cfg = load_datacenter_config("default")
prog_cfg = make_default_datacenter_config()
assert yaml_cfg.name == prog_cfg.name
assert len(yaml_cfg.zones) == len(prog_cfg.zones)
assert yaml_cfg.outside_temp_c == prog_cfg.outside_temp_c
# Same number of racks and CRACs
yaml_racks = sum(len(z.racks) for z in yaml_cfg.zones)
prog_racks = sum(len(z.racks) for z in prog_cfg.zones)
assert yaml_racks == prog_racks
yaml_cracs = sum(len(z.crac_units) for z in yaml_cfg.zones)
prog_cracs = sum(len(z.crac_units) for z in prog_cfg.zones)
assert yaml_cracs == prog_cracs
def test_small_facility_dimensions(self) -> None:
"""Small facility should have correct dimensions."""
cfg = load_datacenter_config("small")
assert len(cfg.zones) == 1
total_racks = sum(len(z.racks) for z in cfg.zones)
assert total_racks == 10
total_it = sum(r.it_load_kw for z in cfg.zones for r in z.racks)
assert total_it == pytest.approx(80.0)
assert len(cfg.power.ups_units) == 1
def test_large_facility_dimensions(self) -> None:
"""Large facility should have correct dimensions."""
cfg = load_datacenter_config("large")
assert len(cfg.zones) == 4
total_racks = sum(len(z.racks) for z in cfg.zones)
assert total_racks == 60
total_it = sum(r.it_load_kw for z in cfg.zones for r in z.racks)
assert total_it == pytest.approx(600.0)
assert len(cfg.power.ups_units) == 4
def test_large_facility_has_h1_zone(self) -> None:
"""Large facility should include an H1 high-density zone."""
cfg = load_datacenter_config("large")
h1_zones = [z for z in cfg.zones if z.ashrae_class == "H1"]
assert len(h1_zones) == 1
# H1 zone should have higher per-rack load
for rack in h1_zones[0].racks:
assert rack.it_load_kw == 20.0
# ===========================================================================
# Config-to-Environment Tests
# ===========================================================================
class TestConfigToEnvironment:
"""Validate that loaded configs produce runnable environments."""
@pytest.mark.parametrize("config_name", ["default", "small", "large"])
def test_env_runs_with_config(self, config_name: str) -> None:
"""Environment should initialize and run steps with each config."""
cfg = load_datacenter_config(config_name)
env = DcOpsEnvironment()
obs = env.reset(config=cfg)
assert isinstance(obs, DcOpsObservation)
assert obs.done is False
obs = env.step(DcOpsAction(command="check_status"))
assert isinstance(obs, DcOpsObservation)
def test_small_facility_pue(self) -> None:
"""Small facility PUE should be realistic after warmup."""
cfg = load_datacenter_config("small")
env = DcOpsEnvironment()
obs = env.reset(config=cfg)
pue = obs.metadata["pue"]
assert 1.1 < pue < 2.5, f"PUE {pue} out of realistic range"
def test_large_facility_total_load(self) -> None:
"""Large facility total IT load should match config."""
cfg = load_datacenter_config("large")
env = DcOpsEnvironment()
obs = env.reset(config=cfg)
total_it = obs.metadata["total_it_load_kw"]
assert total_it == pytest.approx(600.0, rel=0.01)
# ===========================================================================
# Full Episode Playback: Thermal Scenarios
# ===========================================================================
class TestEpisodePlaybackThermal:
"""Full episode playback with known-good action sequences for thermal scenarios."""
def test_a1_optimal_episode(self) -> None:
"""A1 (Cooling Setpoint Optimization): raise setpoints to reduce PUE.
Optimal sequence: check_status → raise each CRAC setpoint → wait for convergence.
PUE should improve significantly from baseline.
"""
env = DcOpsEnvironment()
obs = env.reset(scenario="A1")
pue_initial = obs.metadata["pue"]
# 1. Check status first (procedure bonus)
obs = env.step(DcOpsAction(command="check_status"))
assert not obs.done
# 2. Raise setpoints on all 4 CRACs from 15°C → 24°C (aggressive)
for crac_id in ["CRAC-1", "CRAC-2", "CRAC-3", "CRAC-4"]:
obs = env.step(DcOpsAction(command=f"adjust_setpoint {crac_id} 24"))
# 3. Wait for temps to converge
for _ in range(5):
obs = env.step(DcOpsAction(command="wait"))
if obs.done:
break
pue_final = obs.metadata["pue"]
# PUE should have improved (lower is better)
assert pue_final < pue_initial, (
f"PUE should improve: {pue_initial:.2f}{pue_final:.2f}"
)
def test_a2_optimal_episode(self) -> None:
"""A2 (Thermal Event Response): diagnose CRAC-3, compensate with remaining units.
Optimal: diagnose → increase fan speeds on survivors → adjust setpoints.
"""
env = DcOpsEnvironment()
obs = env.reset(scenario="A2")
# 1. Diagnose the failed CRAC
obs = env.step(DcOpsAction(command="diagnose CRAC-3"))
assert "COMPRESSOR" in obs.action_result or "compressor" in obs.action_result.lower()
# 2. Increase fan speed on remaining CRACs
for crac_id in ["CRAC-1", "CRAC-2", "CRAC-4"]:
obs = env.step(DcOpsAction(command=f"set_fan_speed {crac_id} 100"))
# 3. Lower setpoints slightly on surviving units to compensate
for crac_id in ["CRAC-1", "CRAC-2", "CRAC-4"]:
obs = env.step(DcOpsAction(command=f"adjust_setpoint {crac_id} 16"))
# 4. Wait for stabilization
for _ in range(8):
obs = env.step(DcOpsAction(command="wait"))
if obs.done:
break
# Should resolve or be close — temps within recommended for 2+ steps
# Even if not fully resolved, reward should be reasonable
assert obs.metadata["cumulative_reward"] > -5.0
def test_a4_episode_with_load_shedding(self) -> None:
"""A4 (CRAC Failure Cascade): diagnose both, compensate, shed load.
This is the hardest thermal scenario — two CRACs down.
"""
env = DcOpsEnvironment()
obs = env.reset(scenario="A4")
# 1. Diagnose both failed units
obs = env.step(DcOpsAction(command="diagnose CRAC-1"))
obs = env.step(DcOpsAction(command="diagnose CRAC-3"))
# 2. Max out surviving CRACs
obs = env.step(DcOpsAction(command="set_fan_speed CRAC-2 100"))
obs = env.step(DcOpsAction(command="set_fan_speed CRAC-4 100"))
obs = env.step(DcOpsAction(command="adjust_setpoint CRAC-2 15"))
obs = env.step(DcOpsAction(command="adjust_setpoint CRAC-4 15"))
# 3. Shed load on hottest racks
for rack_id in ["A-01", "A-02", "B-01", "B-02"]:
obs = env.step(DcOpsAction(command=f"set_rack_load {rack_id} 4"))
# 4. Wait and monitor
for _ in range(10):
obs = env.step(DcOpsAction(command="wait"))
if obs.done:
break
# Hard scenario — may not fully resolve, but should make progress
assert obs.metadata["cumulative_reward"] > -10.0
# ===========================================================================
# Full Episode Playback: Power Scenarios
# ===========================================================================
class TestEpisodePlaybackPower:
"""Full episode playback with known-good action sequences for power scenarios."""
def test_b1_optimal_episode(self) -> None:
"""B1 (UPS Alarm Response): diagnose UPS, acknowledge alarm.
Simple 2-step resolution.
"""
env = DcOpsEnvironment()
obs = env.reset(scenario="B1")
# 1. Diagnose UPS status
obs = env.step(DcOpsAction(command="diagnose UPS-1"))
assert not obs.done
# 2. Acknowledge the alarm
obs = env.step(DcOpsAction(command="acknowledge_alarm"))
assert obs.done, "B1 should resolve after diagnose + acknowledge"
# Speed bonus: (10 - 2) / 10 = 0.8
assert obs.reward > 0.5, "Should have significant speed bonus"
def test_b3_optimal_episode(self) -> None:
"""B3 (Generator Test Protocol): follow the correct test sequence.
check_status → start_generator → diagnose GEN-1 → stop_generator → acknowledge.
"""
env = DcOpsEnvironment()
obs = env.reset(scenario="B3")
# Follow correct protocol
obs = env.step(DcOpsAction(command="check_status"))
assert not obs.done
obs = env.step(DcOpsAction(command="start_generator"))
assert not obs.done
# Wait for generator to start (30s game time per step, gen startup ~17s)
obs = env.step(DcOpsAction(command="wait"))
obs = env.step(DcOpsAction(command="diagnose GEN-1"))
assert not obs.done
obs = env.step(DcOpsAction(command="stop_generator"))
assert not obs.done
# Wait for cooldown
obs = env.step(DcOpsAction(command="wait"))
obs = env.step(DcOpsAction(command="acknowledge_alarm"))
assert obs.done, "B3 should resolve after full protocol"
def test_b4_episode_with_load_shedding(self) -> None:
"""B4 (Power Failure Cascade): manage battery, wait for generator.
Generator starts automatically on utility loss. Agent monitors
and sheds load to extend battery life.
"""
env = DcOpsEnvironment()
obs = env.reset(scenario="B4")
# 1. Diagnose to understand the situation
obs = env.step(DcOpsAction(command="diagnose UPS-1"))
obs = env.step(DcOpsAction(command="diagnose UPS-2"))
# 2. Shed non-critical load to extend battery
obs = env.step(DcOpsAction(command="set_rack_load A-01 4"))
obs = env.step(DcOpsAction(command="set_rack_load B-01 4"))
# 3. Check generator status
obs = env.step(DcOpsAction(command="diagnose GEN-1"))
# 4. Wait for generator to come online and stabilize
for _ in range(14):
obs = env.step(DcOpsAction(command="wait"))
if obs.done:
break
# B4 is hard — may or may not resolve, but should make progress
assert obs.metadata["cumulative_reward"] > -10.0
# ===========================================================================
# Reward Signal Quality
# ===========================================================================
class TestRewardSignalQuality:
"""Validate that reward signals are well-behaved across full episodes."""
def test_rewards_bounded_per_step(self) -> None:
"""Every per-step reward should be bounded."""
env = DcOpsEnvironment()
env.reset(scenario="A2")
for _ in range(15):
obs = env.step(DcOpsAction(command="wait"))
# Base reward is [-1, 1], speed bonus can add up to 1.0
assert -2.0 <= obs.reward <= 2.0, f"Reward {obs.reward} out of bounds"
if obs.done:
break
def test_good_actions_beat_bad_actions(self) -> None:
"""An optimal sequence should yield higher cumulative reward than a bad one."""
env = DcOpsEnvironment()
# Good episode: diagnose then fix
env.reset(scenario="B1")
env.step(DcOpsAction(command="diagnose UPS-1"))
obs_good = env.step(DcOpsAction(command="acknowledge_alarm"))
r_good = obs_good.metadata["cumulative_reward"]
# Bad episode: just wait
env.reset(scenario="B1")
for _ in range(10):
obs_bad = env.step(DcOpsAction(command="wait"))
if obs_bad.done:
break
r_bad = obs_bad.metadata["cumulative_reward"]
assert r_good > r_bad, f"Good ({r_good:.2f}) should beat bad ({r_bad:.2f})"
def test_procedure_bonus_visible(self) -> None:
"""Following correct procedure should yield higher cumulative reward.
Full episode comparison: both episodes do the same actions, but one
follows procedure (check_status first) and the other doesn't.
"""
env = DcOpsEnvironment()
# With procedure: check_status → adjust_setpoint → wait
env.reset(scenario="A1")
env.step(DcOpsAction(command="check_status"))
env.step(DcOpsAction(command="adjust_setpoint CRAC-1 22"))
obs_proc = env.step(DcOpsAction(command="wait"))
r_with = obs_proc.metadata["cumulative_reward"]
# Without procedure: wait → adjust_setpoint → wait (no check_status)
env.reset(scenario="A1")
env.step(DcOpsAction(command="wait"))
env.step(DcOpsAction(command="adjust_setpoint CRAC-1 22"))
obs_noproc = env.step(DcOpsAction(command="wait"))
r_without = obs_noproc.metadata["cumulative_reward"]
assert r_with > r_without, (
f"Procedure bonus not visible: with={r_with:.3f} vs without={r_without:.3f}"
)
@pytest.mark.parametrize("scenario_id", registered_scenario_ids())
def test_no_nan_rewards(self, scenario_id: str) -> None:
"""No scenario should produce NaN rewards."""
import math
env = DcOpsEnvironment()
env.reset(scenario=scenario_id)
for _ in range(5):
obs = env.step(DcOpsAction(command="check_status"))
assert not math.isnan(obs.reward), f"NaN reward in {scenario_id}"
assert not math.isinf(obs.reward), f"Inf reward in {scenario_id}"
if obs.done:
break
# ===========================================================================
# Cross-Facility Scenario Tests
# ===========================================================================
class TestCrossFacility:
"""Validate scenarios work with different facility configs."""
def test_scenario_with_small_facility(self) -> None:
"""Scenarios should adapt to smaller configs that have compatible CRACs."""
cfg = load_datacenter_config("small")
env = DcOpsEnvironment()
# Run without a scenario, just with small config
obs = env.reset(config=cfg, step_budget=5)
assert obs.done is False
# Basic operations should work
obs = env.step(DcOpsAction(command="check_status"))
assert "status" in obs.action_result.lower()
obs = env.step(DcOpsAction(command="diagnose CRAC-1"))
assert "Diagnostic Report" in obs.action_result
def test_large_facility_steady_state(self) -> None:
"""Large facility should reach reasonable steady state."""
cfg = load_datacenter_config("large")
env = DcOpsEnvironment()
obs = env.reset(config=cfg, step_budget=10)
pue = obs.metadata["pue"]
assert 1.1 < pue < 3.0, f"Large facility PUE {pue} unrealistic"
total_cooling = obs.metadata["total_cooling_power_kw"]
total_it = obs.metadata["total_it_load_kw"]
assert total_cooling > 0
assert total_it > 0
# ===========================================================================
# Episode Metrics & Physics Consistency
# ===========================================================================
class TestEpisodeMetrics:
"""Validate physics consistency across episode metrics."""
def test_pue_always_above_one(self) -> None:
"""PUE should always be >= 1.0 (physically impossible otherwise)."""
env = DcOpsEnvironment()
env.reset(scenario="A1")
for _ in range(10):
obs = env.step(DcOpsAction(command="wait"))
assert obs.metadata["pue"] >= 1.0
if obs.done:
break
def test_higher_load_raises_temperature(self) -> None:
"""Adding rack load should cause temperature to rise."""
env = DcOpsEnvironment()
obs = env.reset()
t_before = obs.metadata["zones"]["zone_a"]["cold_aisle_temp_c"]
# Significantly increase multiple racks' load
env.step(DcOpsAction(command="set_rack_load A-01 15"))
env.step(DcOpsAction(command="set_rack_load A-02 15"))
env.step(DcOpsAction(command="set_rack_load A-03 15"))
# Wait for thermal response
for _ in range(7):
obs = env.step(DcOpsAction(command="wait"))
t_after = obs.metadata["zones"]["zone_a"]["cold_aisle_temp_c"]
assert t_after > t_before, (
f"Temp should rise with more load: {t_before:.1f}{t_after:.1f}"
)
def test_sim_time_monotonically_increases(self) -> None:
"""Simulation time should always advance."""
env = DcOpsEnvironment()
obs = env.reset()
prev_time = obs.metadata["sim_time_s"]
for _ in range(5):
obs = env.step(DcOpsAction(command="wait"))
assert obs.metadata["sim_time_s"] > prev_time
prev_time = obs.metadata["sim_time_s"]
# ===========================================================================
# Performance Tests
# ===========================================================================
class TestIntegrationPerformance:
"""Validate performance across different facility sizes."""
@pytest.mark.parametrize("config_name", ["default", "small", "large"])
def test_episode_completes_fast(self, config_name: str) -> None:
"""Full episode should complete quickly for any facility size."""
cfg = load_datacenter_config(config_name)
env = DcOpsEnvironment()
start = time.perf_counter()
env.reset(config=cfg, step_budget=10)
for _ in range(10):
env.step(DcOpsAction(command="wait"))
elapsed = time.perf_counter() - start
assert elapsed < 10.0, (
f"{config_name} facility 10-step episode took {elapsed:.2f}s, should be <10s"
)
def test_all_scenarios_full_episode_under_10s(self) -> None:
"""Running every scenario for its full step budget should be fast."""
env = DcOpsEnvironment()
total_start = time.perf_counter()
for sid in registered_scenario_ids():
env.reset(scenario=sid)
for _ in range(20): # Max budget across scenarios
obs = env.step(DcOpsAction(command="wait"))
if obs.done:
break
total_elapsed = time.perf_counter() - total_start
assert total_elapsed < 15.0, (
f"All {len(registered_scenario_ids())} scenarios took {total_elapsed:.2f}s"
)