"""Phase 2 verification tests for SentinelOpsArena environment.""" from sentinelops_arena.environment import SentinelOpsArena from sentinelops_arena.models import SentinelAction, AgentRole # ------------------------------------------------------------------- # Basic environment tests # ------------------------------------------------------------------- def test_reset(): env = SentinelOpsArena() obs = env.reset(seed=42) assert obs.done is False assert obs.current_agent == AgentRole.ATTACKER assert obs.tick == 0 assert env.state.step_count == 0 print("PASS: test_reset") def test_turn_order(): env = SentinelOpsArena() obs = env.reset(seed=42) assert obs.current_agent == AgentRole.ATTACKER obs = env.step(SentinelAction(agent=AgentRole.ATTACKER, action_type="pass")) assert obs.current_agent == AgentRole.WORKER obs = env.step(SentinelAction( agent=AgentRole.WORKER, action_type="respond", response_text="Hello" )) assert obs.current_agent == AgentRole.OVERSIGHT obs = env.step(SentinelAction( agent=AgentRole.OVERSIGHT, action_type="approve", flag=False )) assert obs.current_agent == AgentRole.ATTACKER assert env.tick == 1 # tick advanced after full rotation print("PASS: test_turn_order") def test_full_episode(): env = SentinelOpsArena() obs = env.reset(seed=42) steps = 0 while not obs.done: agent = obs.current_agent if agent == AgentRole.ATTACKER: action = SentinelAction(agent=AgentRole.ATTACKER, action_type="pass") elif agent == AgentRole.WORKER: action = SentinelAction( agent=AgentRole.WORKER, action_type="respond", response_text="Done", ) else: action = SentinelAction( agent=AgentRole.OVERSIGHT, action_type="approve", flag=False ) obs = env.step(action) steps += 1 assert env.tick == 30, f"Expected tick=30, got {env.tick}" assert steps == 90, f"Expected 90 steps, got {steps}" assert obs.done is True print("PASS: test_full_episode") def test_wrong_turn_rejected(): env = SentinelOpsArena() env.reset(seed=42) # Try worker action when it's attacker's turn obs = env.step(SentinelAction( agent=AgentRole.WORKER, action_type="respond", response_text="Wrong turn" )) assert obs.reward == -1.0 print("PASS: test_wrong_turn_rejected") # ------------------------------------------------------------------- # MCP routing tests # ------------------------------------------------------------------- def test_mcp_list_tools(): from openenv.core.env_server.mcp_types import ListToolsAction env = SentinelOpsArena() env.reset(seed=42) obs = env.step(ListToolsAction()) tool_names = [t.name for t in obs.tools] assert "lookup_customer" in tool_names assert "launch_attack" in tool_names assert "issue_refund" in tool_names assert "flag_action" in tool_names # Reserved names must NOT appear assert "reset" not in tool_names assert "step" not in tool_names assert "state" not in tool_names assert "close" not in tool_names print(f"PASS: test_mcp_list_tools ({len(tool_names)} tools)") def test_mcp_call_tool(): from openenv.core.env_server.mcp_types import CallToolAction env = SentinelOpsArena() env.reset(seed=42) obs = env.step(CallToolAction( tool_name="lookup_customer", arguments={"customer_id": "C000"} )) assert obs.tool_name == "lookup_customer" assert obs.result is not None print("PASS: test_mcp_call_tool") # ------------------------------------------------------------------- # Attack tests # ------------------------------------------------------------------- def test_attacker_launch_attack(): env = SentinelOpsArena() env.reset(seed=42) obs = env.step(SentinelAction( agent=AgentRole.ATTACKER, action_type="launch_attack", parameters={ "attack_type": "schema_drift", "target_system": "crm", "old_field": "name", "new_field": "full_name", }, )) # Attacker turn done, should be worker's turn now assert obs.current_agent == AgentRole.WORKER # Verify schema drift took effect schema = env.crm.get_schema() assert "full_name" in schema["fields"] assert "name" not in schema["fields"] print("PASS: test_attacker_launch_attack") def test_worker_lookup_after_drift(): env = SentinelOpsArena() env.reset(seed=42) # Attacker applies schema drift env.step(SentinelAction( agent=AgentRole.ATTACKER, action_type="launch_attack", parameters={ "attack_type": "schema_drift", "target_system": "crm", "old_field": "name", "new_field": "full_name", }, )) # Worker looks up customer obs = env.step(SentinelAction( agent=AgentRole.WORKER, action_type="lookup_customer", parameters={"customer_id": "C000"}, )) # Should still succeed (field renamed but lookup_customer uses _apply_field_map) assert obs.last_action_result is not None print("PASS: test_worker_lookup_after_drift") # ------------------------------------------------------------------- # State tests # ------------------------------------------------------------------- def test_state_tracking(): env = SentinelOpsArena() env.reset(seed=42) assert env.state.tick == 0 assert env.state.step_count == 0 assert env.state.tasks_total == 30 # Do one full rotation env.step(SentinelAction(agent=AgentRole.ATTACKER, action_type="pass")) env.step(SentinelAction( agent=AgentRole.WORKER, action_type="respond", response_text="ok" )) env.step(SentinelAction( agent=AgentRole.OVERSIGHT, action_type="approve", flag=False )) assert env.state.tick == 1 assert env.state.step_count == 3 print("PASS: test_state_tracking") # ------------------------------------------------------------------- # HTTP server test # ------------------------------------------------------------------- def test_create_app(): from openenv.core.env_server.http_server import create_app from sentinelops_arena.models import SentinelAction, SentinelObservation app = create_app( SentinelOpsArena, SentinelAction, SentinelObservation, env_name="sentinelops_arena", ) assert app is not None print("PASS: test_create_app") # ------------------------------------------------------------------- # Run all # ------------------------------------------------------------------- if __name__ == "__main__": tests = [ test_reset, test_turn_order, test_full_episode, test_wrong_turn_rejected, test_mcp_list_tools, test_mcp_call_tool, test_attacker_launch_attack, test_worker_lookup_after_drift, test_state_tracking, test_create_app, ] passed = 0 failed = 0 for test in tests: try: test() passed += 1 except Exception as e: print(f"FAIL: {test.__name__}: {e}") failed += 1 print(f"\n{'='*50}") print(f"Results: {passed}/{passed + failed} passed") if failed == 0: print("ALL PHASE 2 TESTS PASSED") else: print(f"{failed} test(s) FAILED")