"""Tests for the Feature Flag Cleanup environment.""" import sys import os sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) from env.environment import FeatureFlagCleanupEnv def test_reset(): env = FeatureFlagCleanupEnv() obs = env.reset("easy") assert "flag_name" in obs assert "code_snippet" in obs assert "previously_removed" in obs assert obs["done"] is False state = env.state() assert state["step_count"] == 0 assert state["task_id"] == "easy" print("PASS: test_reset") def test_step(): env = FeatureFlagCleanupEnv() env.reset("easy") obs, reward, done, info = env.step({"action": "remove"}) assert isinstance(reward, float) assert isinstance(done, bool) assert "correct_action" in info state = env.state() assert state["step_count"] == 1 print("PASS: test_step") def test_investigate(): env = FeatureFlagCleanupEnv() obs = env.reset("easy") assert obs["investigation_notes"] == "" # Hidden before investigate # Investigate obs, reward, done, info = env.step({"action": "investigate"}) assert reward == -0.05 assert done is False assert obs["investigation_notes"] != "" # Revealed after investigate assert info["agent_action"] == "investigate" # Now take real action on same flag obs, reward, done, info = env.step({"action": "remove"}) assert info["correct_action"] == "remove" assert info["investigated"] is True print("PASS: test_investigate") def test_investigate_twice(): env = FeatureFlagCleanupEnv() env.reset("easy") env.step({"action": "investigate"}) obs, reward, done, info = env.step({"action": "investigate"}) assert reward == -0.05 # Still penalized assert "Already investigated" in info.get("message", "") print("PASS: test_investigate_twice") def test_cascade_tracking(): env = FeatureFlagCleanupEnv() env.reset("easy") # Remove first flag env.step({"action": "remove"}) state = env.state() assert "enable_new_checkout_flow" in state["flags_removed"] print("PASS: test_cascade_tracking") def test_full_episode(): env = FeatureFlagCleanupEnv() obs = env.reset("easy") done = False steps = 0 while not done: obs, reward, done, info = env.step({"action": "remove"}) steps += 1 assert done is True assert steps > 0 score = env.grade() assert 0.0 <= score <= 1.0 print(f"PASS: test_full_episode (steps={steps}, score={score:.4f})") def test_all_tasks(): env = FeatureFlagCleanupEnv() for task_id in ["easy", "medium", "hard"]: obs = env.reset(task_id) assert obs["task_id"] == task_id done = False while not done: obs, reward, done, info = env.step({"action": "keep"}) score = env.grade() assert 0.0 <= score <= 1.0 print(f"PASS: test_{task_id} (score={score:.4f})") def test_reward_range(): env = FeatureFlagCleanupEnv() for action in ["remove", "keep", "deprecate", "escalate"]: env.reset("easy") obs, reward, done, info = env.step({"action": action}) assert -1.0 <= reward <= 1.0, f"Reward {reward} out of range for action {action}" print("PASS: test_reward_range") def test_invalid_action(): env = FeatureFlagCleanupEnv() env.reset("easy") try: env.step({"action": "invalid_action"}) assert False, "Should have raised an error" except Exception: pass print("PASS: test_invalid_action") def test_rich_observations(): env = FeatureFlagCleanupEnv() obs = env.reset("easy") assert obs["code_snippet"] != "" assert obs["last_commit_message"] != "" assert obs["pr_context"] != "" assert isinstance(obs["related_incidents"], list) assert isinstance(obs["previously_removed"], list) print("PASS: test_rich_observations") def test_hard_task_has_more_flags(): env = FeatureFlagCleanupEnv() env.reset("hard") state = env.state() assert state["total_flags"] >= 10 # Hard should have more flags print(f"PASS: test_hard_task_has_more_flags (total={state['total_flags']})") if __name__ == "__main__": test_reset() test_step() test_investigate() test_investigate_twice() test_cascade_tracking() test_full_episode() test_all_tasks() test_reward_range() test_invalid_action() test_rich_observations() test_hard_task_has_more_flags() print("\nAll tests passed!")