File size: 21,381 Bytes
aedaf74
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.

"""Integration tests: full episode playback, config loading, cross-facility.

Validates:
  - Known-good action sequences resolve each scenario
  - Reward signals are well-behaved across full episodes
  - YAML config loading produces valid, runnable environments
  - Different facility sizes work correctly
  - Episode metrics (PUE, temps, rewards) are in expected ranges
"""

from __future__ import annotations

import time
from pathlib import Path

import pytest

from dc_ops_env.config import (
    BUILTIN_CONFIGS,
    DatacenterConfig,
    load_datacenter_config,
    make_default_datacenter_config,
)
from dc_ops_env.models import DcOpsAction, DcOpsObservation
from dc_ops_env.server.dc_ops_env_environment import DcOpsEnvironment
from dc_ops_env.scenarios.registry import registered_scenario_ids


# ===========================================================================
# Config Loading Tests
# ===========================================================================
class TestConfigLoading:
    """Validate YAML config loading and built-in configs."""

    def test_builtin_configs_exist(self) -> None:
        """All built-in config files should exist on disk."""
        for name, path in BUILTIN_CONFIGS.items():
            assert path.exists(), f"Built-in config '{name}' not found at {path}"

    @pytest.mark.parametrize("config_name", ["default", "small", "large"])
    def test_load_builtin(self, config_name: str) -> None:
        """Each built-in config should load without error."""
        cfg = load_datacenter_config(config_name)
        assert isinstance(cfg, DatacenterConfig)
        assert len(cfg.zones) > 0
        for zone in cfg.zones:
            assert len(zone.racks) > 0
            assert len(zone.crac_units) > 0

    def test_load_by_path(self) -> None:
        """Loading by explicit path should work."""
        path = BUILTIN_CONFIGS["default"]
        cfg = load_datacenter_config(path)
        assert cfg.name == "DC-OPS Default Facility"

    def test_load_nonexistent_raises(self) -> None:
        """Loading a missing file should raise FileNotFoundError."""
        with pytest.raises(FileNotFoundError):
            load_datacenter_config("/nonexistent/path.yaml")

    def test_default_yaml_matches_programmatic(self) -> None:
        """YAML default config should match make_default_datacenter_config()."""
        yaml_cfg = load_datacenter_config("default")
        prog_cfg = make_default_datacenter_config()

        assert yaml_cfg.name == prog_cfg.name
        assert len(yaml_cfg.zones) == len(prog_cfg.zones)
        assert yaml_cfg.outside_temp_c == prog_cfg.outside_temp_c

        # Same number of racks and CRACs
        yaml_racks = sum(len(z.racks) for z in yaml_cfg.zones)
        prog_racks = sum(len(z.racks) for z in prog_cfg.zones)
        assert yaml_racks == prog_racks

        yaml_cracs = sum(len(z.crac_units) for z in yaml_cfg.zones)
        prog_cracs = sum(len(z.crac_units) for z in prog_cfg.zones)
        assert yaml_cracs == prog_cracs

    def test_small_facility_dimensions(self) -> None:
        """Small facility should have correct dimensions."""
        cfg = load_datacenter_config("small")
        assert len(cfg.zones) == 1
        total_racks = sum(len(z.racks) for z in cfg.zones)
        assert total_racks == 10
        total_it = sum(r.it_load_kw for z in cfg.zones for r in z.racks)
        assert total_it == pytest.approx(80.0)
        assert len(cfg.power.ups_units) == 1

    def test_large_facility_dimensions(self) -> None:
        """Large facility should have correct dimensions."""
        cfg = load_datacenter_config("large")
        assert len(cfg.zones) == 4
        total_racks = sum(len(z.racks) for z in cfg.zones)
        assert total_racks == 60
        total_it = sum(r.it_load_kw for z in cfg.zones for r in z.racks)
        assert total_it == pytest.approx(600.0)
        assert len(cfg.power.ups_units) == 4

    def test_large_facility_has_h1_zone(self) -> None:
        """Large facility should include an H1 high-density zone."""
        cfg = load_datacenter_config("large")
        h1_zones = [z for z in cfg.zones if z.ashrae_class == "H1"]
        assert len(h1_zones) == 1
        # H1 zone should have higher per-rack load
        for rack in h1_zones[0].racks:
            assert rack.it_load_kw == 20.0


# ===========================================================================
# Config-to-Environment Tests
# ===========================================================================
class TestConfigToEnvironment:
    """Validate that loaded configs produce runnable environments."""

    @pytest.mark.parametrize("config_name", ["default", "small", "large"])
    def test_env_runs_with_config(self, config_name: str) -> None:
        """Environment should initialize and run steps with each config."""
        cfg = load_datacenter_config(config_name)
        env = DcOpsEnvironment()
        obs = env.reset(config=cfg)
        assert isinstance(obs, DcOpsObservation)
        assert obs.done is False

        obs = env.step(DcOpsAction(command="check_status"))
        assert isinstance(obs, DcOpsObservation)

    def test_small_facility_pue(self) -> None:
        """Small facility PUE should be realistic after warmup."""
        cfg = load_datacenter_config("small")
        env = DcOpsEnvironment()
        obs = env.reset(config=cfg)
        pue = obs.metadata["pue"]
        assert 1.1 < pue < 2.5, f"PUE {pue} out of realistic range"

    def test_large_facility_total_load(self) -> None:
        """Large facility total IT load should match config."""
        cfg = load_datacenter_config("large")
        env = DcOpsEnvironment()
        obs = env.reset(config=cfg)
        total_it = obs.metadata["total_it_load_kw"]
        assert total_it == pytest.approx(600.0, rel=0.01)


# ===========================================================================
# Full Episode Playback: Thermal Scenarios
# ===========================================================================
class TestEpisodePlaybackThermal:
    """Full episode playback with known-good action sequences for thermal scenarios."""

    def test_a1_optimal_episode(self) -> None:
        """A1 (Cooling Setpoint Optimization): raise setpoints to reduce PUE.

        Optimal sequence: check_status → raise each CRAC setpoint → wait for convergence.
        PUE should improve significantly from baseline.
        """
        env = DcOpsEnvironment()
        obs = env.reset(scenario="A1")
        pue_initial = obs.metadata["pue"]

        # 1. Check status first (procedure bonus)
        obs = env.step(DcOpsAction(command="check_status"))
        assert not obs.done

        # 2. Raise setpoints on all 4 CRACs from 15°C → 24°C (aggressive)
        for crac_id in ["CRAC-1", "CRAC-2", "CRAC-3", "CRAC-4"]:
            obs = env.step(DcOpsAction(command=f"adjust_setpoint {crac_id} 24"))

        # 3. Wait for temps to converge
        for _ in range(5):
            obs = env.step(DcOpsAction(command="wait"))
            if obs.done:
                break

        pue_final = obs.metadata["pue"]
        # PUE should have improved (lower is better)
        assert pue_final < pue_initial, (
            f"PUE should improve: {pue_initial:.2f}{pue_final:.2f}"
        )

    def test_a2_optimal_episode(self) -> None:
        """A2 (Thermal Event Response): diagnose CRAC-3, compensate with remaining units.

        Optimal: diagnose → increase fan speeds on survivors → adjust setpoints.
        """
        env = DcOpsEnvironment()
        obs = env.reset(scenario="A2")

        # 1. Diagnose the failed CRAC
        obs = env.step(DcOpsAction(command="diagnose CRAC-3"))
        assert "COMPRESSOR" in obs.action_result or "compressor" in obs.action_result.lower()

        # 2. Increase fan speed on remaining CRACs
        for crac_id in ["CRAC-1", "CRAC-2", "CRAC-4"]:
            obs = env.step(DcOpsAction(command=f"set_fan_speed {crac_id} 100"))

        # 3. Lower setpoints slightly on surviving units to compensate
        for crac_id in ["CRAC-1", "CRAC-2", "CRAC-4"]:
            obs = env.step(DcOpsAction(command=f"adjust_setpoint {crac_id} 16"))

        # 4. Wait for stabilization
        for _ in range(8):
            obs = env.step(DcOpsAction(command="wait"))
            if obs.done:
                break

        # Should resolve or be close — temps within recommended for 2+ steps
        # Even if not fully resolved, reward should be reasonable
        assert obs.metadata["cumulative_reward"] > -5.0

    def test_a4_episode_with_load_shedding(self) -> None:
        """A4 (CRAC Failure Cascade): diagnose both, compensate, shed load.

        This is the hardest thermal scenario — two CRACs down.
        """
        env = DcOpsEnvironment()
        obs = env.reset(scenario="A4")

        # 1. Diagnose both failed units
        obs = env.step(DcOpsAction(command="diagnose CRAC-1"))
        obs = env.step(DcOpsAction(command="diagnose CRAC-3"))

        # 2. Max out surviving CRACs
        obs = env.step(DcOpsAction(command="set_fan_speed CRAC-2 100"))
        obs = env.step(DcOpsAction(command="set_fan_speed CRAC-4 100"))
        obs = env.step(DcOpsAction(command="adjust_setpoint CRAC-2 15"))
        obs = env.step(DcOpsAction(command="adjust_setpoint CRAC-4 15"))

        # 3. Shed load on hottest racks
        for rack_id in ["A-01", "A-02", "B-01", "B-02"]:
            obs = env.step(DcOpsAction(command=f"set_rack_load {rack_id} 4"))

        # 4. Wait and monitor
        for _ in range(10):
            obs = env.step(DcOpsAction(command="wait"))
            if obs.done:
                break

        # Hard scenario — may not fully resolve, but should make progress
        assert obs.metadata["cumulative_reward"] > -10.0


# ===========================================================================
# Full Episode Playback: Power Scenarios
# ===========================================================================
class TestEpisodePlaybackPower:
    """Full episode playback with known-good action sequences for power scenarios."""

    def test_b1_optimal_episode(self) -> None:
        """B1 (UPS Alarm Response): diagnose UPS, acknowledge alarm.

        Simple 2-step resolution.
        """
        env = DcOpsEnvironment()
        obs = env.reset(scenario="B1")

        # 1. Diagnose UPS status
        obs = env.step(DcOpsAction(command="diagnose UPS-1"))
        assert not obs.done

        # 2. Acknowledge the alarm
        obs = env.step(DcOpsAction(command="acknowledge_alarm"))
        assert obs.done, "B1 should resolve after diagnose + acknowledge"

        # Speed bonus: (10 - 2) / 10 = 0.8
        assert obs.reward > 0.5, "Should have significant speed bonus"

    def test_b3_optimal_episode(self) -> None:
        """B3 (Generator Test Protocol): follow the correct test sequence.

        check_status → start_generator → diagnose GEN-1 → stop_generator → acknowledge.
        """
        env = DcOpsEnvironment()
        obs = env.reset(scenario="B3")

        # Follow correct protocol
        obs = env.step(DcOpsAction(command="check_status"))
        assert not obs.done

        obs = env.step(DcOpsAction(command="start_generator"))
        assert not obs.done

        # Wait for generator to start (30s game time per step, gen startup ~17s)
        obs = env.step(DcOpsAction(command="wait"))

        obs = env.step(DcOpsAction(command="diagnose GEN-1"))
        assert not obs.done

        obs = env.step(DcOpsAction(command="stop_generator"))
        assert not obs.done

        # Wait for cooldown
        obs = env.step(DcOpsAction(command="wait"))

        obs = env.step(DcOpsAction(command="acknowledge_alarm"))
        assert obs.done, "B3 should resolve after full protocol"

    def test_b4_episode_with_load_shedding(self) -> None:
        """B4 (Power Failure Cascade): manage battery, wait for generator.

        Generator starts automatically on utility loss. Agent monitors
        and sheds load to extend battery life.
        """
        env = DcOpsEnvironment()
        obs = env.reset(scenario="B4")

        # 1. Diagnose to understand the situation
        obs = env.step(DcOpsAction(command="diagnose UPS-1"))
        obs = env.step(DcOpsAction(command="diagnose UPS-2"))

        # 2. Shed non-critical load to extend battery
        obs = env.step(DcOpsAction(command="set_rack_load A-01 4"))
        obs = env.step(DcOpsAction(command="set_rack_load B-01 4"))

        # 3. Check generator status
        obs = env.step(DcOpsAction(command="diagnose GEN-1"))

        # 4. Wait for generator to come online and stabilize
        for _ in range(14):
            obs = env.step(DcOpsAction(command="wait"))
            if obs.done:
                break

        # B4 is hard — may or may not resolve, but should make progress
        assert obs.metadata["cumulative_reward"] > -10.0


# ===========================================================================
# Reward Signal Quality
# ===========================================================================
class TestRewardSignalQuality:
    """Validate that reward signals are well-behaved across full episodes."""

    def test_rewards_bounded_per_step(self) -> None:
        """Every per-step reward should be bounded."""
        env = DcOpsEnvironment()
        env.reset(scenario="A2")

        for _ in range(15):
            obs = env.step(DcOpsAction(command="wait"))
            # Base reward is [-1, 1], speed bonus can add up to 1.0
            assert -2.0 <= obs.reward <= 2.0, f"Reward {obs.reward} out of bounds"
            if obs.done:
                break

    def test_good_actions_beat_bad_actions(self) -> None:
        """An optimal sequence should yield higher cumulative reward than a bad one."""
        env = DcOpsEnvironment()

        # Good episode: diagnose then fix
        env.reset(scenario="B1")
        env.step(DcOpsAction(command="diagnose UPS-1"))
        obs_good = env.step(DcOpsAction(command="acknowledge_alarm"))
        r_good = obs_good.metadata["cumulative_reward"]

        # Bad episode: just wait
        env.reset(scenario="B1")
        for _ in range(10):
            obs_bad = env.step(DcOpsAction(command="wait"))
            if obs_bad.done:
                break
        r_bad = obs_bad.metadata["cumulative_reward"]

        assert r_good > r_bad, f"Good ({r_good:.2f}) should beat bad ({r_bad:.2f})"

    def test_procedure_bonus_visible(self) -> None:
        """Following correct procedure should yield higher cumulative reward.

        Full episode comparison: both episodes do the same actions, but one
        follows procedure (check_status first) and the other doesn't.
        """
        env = DcOpsEnvironment()

        # With procedure: check_status → adjust_setpoint → wait
        env.reset(scenario="A1")
        env.step(DcOpsAction(command="check_status"))
        env.step(DcOpsAction(command="adjust_setpoint CRAC-1 22"))
        obs_proc = env.step(DcOpsAction(command="wait"))
        r_with = obs_proc.metadata["cumulative_reward"]

        # Without procedure: wait → adjust_setpoint → wait (no check_status)
        env.reset(scenario="A1")
        env.step(DcOpsAction(command="wait"))
        env.step(DcOpsAction(command="adjust_setpoint CRAC-1 22"))
        obs_noproc = env.step(DcOpsAction(command="wait"))
        r_without = obs_noproc.metadata["cumulative_reward"]

        assert r_with > r_without, (
            f"Procedure bonus not visible: with={r_with:.3f} vs without={r_without:.3f}"
        )

    @pytest.mark.parametrize("scenario_id", registered_scenario_ids())
    def test_no_nan_rewards(self, scenario_id: str) -> None:
        """No scenario should produce NaN rewards."""
        import math

        env = DcOpsEnvironment()
        env.reset(scenario=scenario_id)

        for _ in range(5):
            obs = env.step(DcOpsAction(command="check_status"))
            assert not math.isnan(obs.reward), f"NaN reward in {scenario_id}"
            assert not math.isinf(obs.reward), f"Inf reward in {scenario_id}"
            if obs.done:
                break


# ===========================================================================
# Cross-Facility Scenario Tests
# ===========================================================================
class TestCrossFacility:
    """Validate scenarios work with different facility configs."""

    def test_scenario_with_small_facility(self) -> None:
        """Scenarios should adapt to smaller configs that have compatible CRACs."""
        cfg = load_datacenter_config("small")
        env = DcOpsEnvironment()
        # Run without a scenario, just with small config
        obs = env.reset(config=cfg, step_budget=5)
        assert obs.done is False

        # Basic operations should work
        obs = env.step(DcOpsAction(command="check_status"))
        assert "status" in obs.action_result.lower()

        obs = env.step(DcOpsAction(command="diagnose CRAC-1"))
        assert "Diagnostic Report" in obs.action_result

    def test_large_facility_steady_state(self) -> None:
        """Large facility should reach reasonable steady state."""
        cfg = load_datacenter_config("large")
        env = DcOpsEnvironment()
        obs = env.reset(config=cfg, step_budget=10)

        pue = obs.metadata["pue"]
        assert 1.1 < pue < 3.0, f"Large facility PUE {pue} unrealistic"

        total_cooling = obs.metadata["total_cooling_power_kw"]
        total_it = obs.metadata["total_it_load_kw"]
        assert total_cooling > 0
        assert total_it > 0


# ===========================================================================
# Episode Metrics & Physics Consistency
# ===========================================================================
class TestEpisodeMetrics:
    """Validate physics consistency across episode metrics."""

    def test_pue_always_above_one(self) -> None:
        """PUE should always be >= 1.0 (physically impossible otherwise)."""
        env = DcOpsEnvironment()
        env.reset(scenario="A1")

        for _ in range(10):
            obs = env.step(DcOpsAction(command="wait"))
            assert obs.metadata["pue"] >= 1.0
            if obs.done:
                break

    def test_higher_load_raises_temperature(self) -> None:
        """Adding rack load should cause temperature to rise."""
        env = DcOpsEnvironment()
        obs = env.reset()
        t_before = obs.metadata["zones"]["zone_a"]["cold_aisle_temp_c"]

        # Significantly increase multiple racks' load
        env.step(DcOpsAction(command="set_rack_load A-01 15"))
        env.step(DcOpsAction(command="set_rack_load A-02 15"))
        env.step(DcOpsAction(command="set_rack_load A-03 15"))

        # Wait for thermal response
        for _ in range(7):
            obs = env.step(DcOpsAction(command="wait"))

        t_after = obs.metadata["zones"]["zone_a"]["cold_aisle_temp_c"]
        assert t_after > t_before, (
            f"Temp should rise with more load: {t_before:.1f}{t_after:.1f}"
        )

    def test_sim_time_monotonically_increases(self) -> None:
        """Simulation time should always advance."""
        env = DcOpsEnvironment()
        obs = env.reset()
        prev_time = obs.metadata["sim_time_s"]

        for _ in range(5):
            obs = env.step(DcOpsAction(command="wait"))
            assert obs.metadata["sim_time_s"] > prev_time
            prev_time = obs.metadata["sim_time_s"]


# ===========================================================================
# Performance Tests
# ===========================================================================
class TestIntegrationPerformance:
    """Validate performance across different facility sizes."""

    @pytest.mark.parametrize("config_name", ["default", "small", "large"])
    def test_episode_completes_fast(self, config_name: str) -> None:
        """Full episode should complete quickly for any facility size."""
        cfg = load_datacenter_config(config_name)
        env = DcOpsEnvironment()

        start = time.perf_counter()
        env.reset(config=cfg, step_budget=10)
        for _ in range(10):
            env.step(DcOpsAction(command="wait"))
        elapsed = time.perf_counter() - start

        assert elapsed < 10.0, (
            f"{config_name} facility 10-step episode took {elapsed:.2f}s, should be <10s"
        )

    def test_all_scenarios_full_episode_under_10s(self) -> None:
        """Running every scenario for its full step budget should be fast."""
        env = DcOpsEnvironment()
        total_start = time.perf_counter()

        for sid in registered_scenario_ids():
            env.reset(scenario=sid)
            for _ in range(20):  # Max budget across scenarios
                obs = env.step(DcOpsAction(command="wait"))
                if obs.done:
                    break

        total_elapsed = time.perf_counter() - total_start
        assert total_elapsed < 15.0, (
            f"All {len(registered_scenario_ids())} scenarios took {total_elapsed:.2f}s"
        )