Spaces:
Running
Running
| """End-to-end tests for the Advanced and Max runners. | |
| The runners live in ``sre_gym.advanced.runner`` and ``sre_gym.max.runner``. | |
| These tests assert that: | |
| - All 3 Advanced reference scenarios can be run with the default | |
| scripted-optimal policy and produce a coherent ``AdvancedResult``. | |
| - Every Max chaos pattern is reachable by ``run_max(...)`` against the | |
| ``ecommerce_vibecoded_saas`` family without raising. | |
| - The runners produce deterministic outputs given the same seed. | |
| """ | |
| from __future__ import annotations | |
| import pytest | |
| from sre_gym.strategy.runner import ( | |
| AdvancedResult, | |
| list_advanced_scenarios, | |
| run_advanced, | |
| ) | |
| from sre_gym.operations.runner import ( | |
| CHAOS_PATTERNS, | |
| MaxResult, | |
| list_max_families, | |
| run_max, | |
| ) | |
| # ---------- Advanced runner ---------- | |
| def test_list_advanced_scenarios_has_three_entries() -> None: | |
| scenarios = list_advanced_scenarios() | |
| assert {"cascading_release_train", "observability_pipeline_outage", "supabase_rls_silent_leak"}.issubset( | |
| set(scenarios) | |
| ) | |
| def test_advanced_scenario_runs_end_to_end(scenario_id: str) -> None: | |
| result = run_advanced(scenario_id, seed=1) | |
| assert isinstance(result, AdvancedResult) | |
| assert result.scenario_id == scenario_id | |
| assert len(result.phases) >= 1 | |
| # Every phase must produce a final score in [0.0, 1.0]. | |
| for phase in result.phases: | |
| assert 0.0 <= phase.final_score <= 1.0 | |
| # Horizon-decay ≤ 1, raw mean ≥ 0. | |
| assert 0.0 <= result.horizon_decay_factor <= 1.0 | |
| assert 0.0 <= result.raw_mean_reward <= 1.0 | |
| def test_advanced_runner_is_deterministic_for_same_seed() -> None: | |
| a = run_advanced("cascading_release_train", seed=42) | |
| b = run_advanced("cascading_release_train", seed=42) | |
| assert a.final_reward == b.final_reward | |
| assert a.raw_mean_reward == b.raw_mean_reward | |
| assert len(a.phases) == len(b.phases) | |
| def test_advanced_emits_log_lines_when_callback_provided() -> None: | |
| captured: list[str] = [] | |
| run_advanced("cascading_release_train", seed=1, on_log=captured.append) | |
| assert any("phase 1" in line for line in captured) | |
| assert any("phase 2" in line for line in captured) | |
| assert any("declare_resolved" in line for line in captured) | |
| # ---------- Max runner ---------- | |
| def test_list_max_families_has_ecommerce_family() -> None: | |
| families = list_max_families() | |
| assert "ecommerce_vibecoded_saas" in families | |
| def test_max_chaos_pattern_runs_without_error(chaos: str) -> None: | |
| """Every documented chaos pattern must be reachable from run_max.""" | |
| result = run_max("ecommerce_vibecoded_saas", chaos=chaos, seed=1) | |
| assert isinstance(result, MaxResult) | |
| assert result.chaos == chaos | |
| assert 0.0 <= result.final_reward <= 1.0 | |
| assert 0 <= result.tick_count <= 25 | |
| def test_max_runner_is_deterministic_for_same_seed() -> None: | |
| a = run_max("ecommerce_vibecoded_saas", chaos="deploy_regression", seed=42) | |
| b = run_max("ecommerce_vibecoded_saas", chaos="deploy_regression", seed=42) | |
| assert a.final_reward == b.final_reward | |
| assert a.cumulative_reward == b.cumulative_reward | |
| assert a.tick_count == b.tick_count | |
| def test_max_security_classified_chaos_carries_classification() -> None: | |
| """rls_silent_leak / oauth_supply_chain_pivot / cdn_cache_contamination | |
| must surface ``classification == 'security'`` in MaxResult so the UI | |
| can render a security badge.""" | |
| for chaos in ("rls_silent_leak", "oauth_supply_chain_pivot", "cdn_cache_contamination"): | |
| result = run_max("ecommerce_vibecoded_saas", chaos=chaos, seed=1) | |
| assert result.classification == "security", ( | |
| f"{chaos} must be classification='security' (got {result.classification!r})" | |
| ) | |
| def test_max_per_step_env_returns_observation_on_reset() -> None: | |
| """The MaxRunnerEnv (used by SREGym(tier=Tier.MAX).reset/step) returns a | |
| valid observation.""" | |
| from sre_gym.operations.runner import MaxRunnerEnv | |
| env = MaxRunnerEnv(family_id="ecommerce_vibecoded_saas") | |
| obs = env.reset(chaos="deploy_regression", seed=1) | |
| assert obs.tick_count == 0 | |
| assert obs.family_id == "ecommerce_vibecoded_saas" | |
| assert obs.chaos == "deploy_regression" | |
| assert "api-gateway" in obs.services | |
| def test_max_per_step_env_steps() -> None: | |
| from sre_gym.operations.runner import MaxRunnerEnv | |
| env = MaxRunnerEnv() | |
| env.reset(chaos="deploy_regression", seed=1) | |
| obs = env.step({"action_type": "query_logs", "service": "orders-service"}) | |
| assert obs.tick_count == 1 | |
| assert "query" in obs.last_log | |