import sys sys.path.insert(0, ".") import pytest import math from src.pll_sim import SRFPLLSimulator, OMEGA0 from src.env import PLLAttackEnv from src.models import Action from src.attacks import AttackGenerator, sample_sinusoidal_params import numpy as np DUMMY_ACTION = Action( attack_detected=False, attack_type=0, confidence=0.5, protective_action=0 ) def test_episode_terminates_at_500(): """Episode must terminate with done=True at step 500.""" env = PLLAttackEnv() env.reset(task_id=0, seed=42) done = False steps = 0 while not done: _, _, done, _ = env.step(DUMMY_ACTION) steps += 1 assert steps == 500, f"Episode ended at step {steps}, expected 500" def test_all_tasks_reset(): """All three tasks must reset without error.""" env = PLLAttackEnv() for task_id in range(3): obs = env.reset(task_id=task_id, seed=42) assert obs.task_id == task_id assert obs.step == 0 assert len(obs.vq_window) == 20 def test_oracle_agent_nonzero_reward(): """An oracle agent should accumulate positive reward.""" env = PLLAttackEnv() env.reset(task_id=0, seed=42) total_reward = 0.0 done = False while not done: action = Action( attack_detected=env.attack_active, attack_type=env.true_attack_type if env.attack_active else 0, confidence=1.0, protective_action=0 ) _, reward, done, _ = env.step(action) total_reward += reward.total assert total_reward > 0, f"Oracle agent got non-positive reward: {total_reward}" def test_reward_bounds(): """Reward total must stay within [-2.5, 1.5] per step.""" env = PLLAttackEnv() env.reset(task_id=2, seed=42) done = False while not done: _, reward, done, _ = env.step(DUMMY_ACTION) assert -2.5 <= reward.total <= 1.5, ( f"Reward out of bounds: {reward.total}" )