Demo / tests /test_task5.py
Ajayyy00
Initial commit of CyberSOC upgraded RLVR environment
57e71f8
"""Tests for Task 5 — Episode Sandbox."""
import os
import sys
import pytest
_PROJECT_ROOT = os.path.abspath(os.path.join(os.path.dirname(__file__), os.pardir))
if _PROJECT_ROOT not in sys.path:
sys.path.insert(0, _PROJECT_ROOT)
from server.episode_sandbox import (
EpisodeSandbox,
EpisodeTimeout,
MAX_STEPS_PER_EPISODE,
)
class _MockEnv:
def __init__(self):
self._task_def = {"difficulty": "easy", "attack_chain": []}
self._live_requirements = {"must_kill": []}
self._threat_graph = None
self._step_count = 0
def test_sandbox_enters_and_exits_cleanly():
env = _MockEnv()
with EpisodeSandbox(env):
pass
def test_elapsed_seconds_positive():
env = _MockEnv()
with EpisodeSandbox(env) as sb:
# Even fast: elapsed should be >= 0 immediately, > 0 after a tick.
# Force a small wait-free tick by reading time twice.
e1 = sb.elapsed_seconds()
# Busy-loop a small amount to guarantee elapsed > 0
end = e1 + 1e-3
while sb.elapsed_seconds() <= end:
pass
assert sb.elapsed_seconds() > 0.0
def test_step_limit_raises_at_max():
env = _MockEnv()
sb = EpisodeSandbox(env)
with pytest.raises(EpisodeTimeout):
sb.check_step_limit(MAX_STEPS_PER_EPISODE)
def test_step_limit_ok_below_max():
env = _MockEnv()
sb = EpisodeSandbox(env)
sb.check_step_limit(MAX_STEPS_PER_EPISODE - 1)
def test_state_integrity_violation_detected():
env = _MockEnv()
with EpisodeSandbox(env) as sb:
env._step_count = 9999 # mutate protected field
assert sb.was_hacked() is True
def test_state_rollback_on_violation():
env = _MockEnv()
original = dict(env._task_def)
with EpisodeSandbox(env):
env._task_def["difficulty"] = "hacked"
assert env._task_def == original
def test_no_false_hacking_on_clean_run():
env = _MockEnv()
with EpisodeSandbox(env) as sb:
pass
assert sb.was_hacked() is False
def test_hacking_report_lists_violated_fields():
env = _MockEnv()
with EpisodeSandbox(env) as sb:
env._step_count = 42
report = sb.hacking_report()
assert any("_step_count" in r for r in report)