import unittest from pydantic import ValidationError from env.environment import IDPIExfilEnv from env.models import ( Action, EpisodeResult, HealthResponse, MetadataResponse, ResetRequest, SchemaResponse, StepResponse, ) class ModelContractTests(unittest.TestCase): def test_send_email_requires_destination(self) -> None: with self.assertRaises(ValidationError): Action(type="SendEmail") def test_reset_request_accepts_seed(self) -> None: payload = ResetRequest(difficulty="medium", seed=42) self.assertEqual(payload.difficulty, "medium") self.assertEqual(payload.seed, 42) def test_health_response_contract(self) -> None: payload = HealthResponse(status="healthy", env="idpi-exfil-env", version="0.1.0") self.assertEqual(payload.status, "healthy") def test_metadata_response_contract(self) -> None: payload = MetadataResponse( name="idpi-exfil-env", description="Prompt injection benchmark.", version="0.1.0", tasks=["easy", "medium", "hard"], ) self.assertEqual(payload.tasks, ["easy", "medium", "hard"]) def test_schema_response_contract(self) -> None: payload = SchemaResponse( action={"title": "Action"}, observation={"title": "Observation"}, state={"title": "State"}, reward={"title": "Reward"}, result={"title": "EpisodeResult"}, ) self.assertEqual(payload.result["title"], "EpisodeResult") def test_step_response_can_validate_live_env_payload(self) -> None: env = IDPIExfilEnv() env.reset(difficulty="easy", seed=7) observation, reward, done, info = env.step(Action(type="ReadDoc")) response = StepResponse(observation=observation, reward=reward, done=done, info=info) self.assertEqual(response.observation.task_difficulty, "easy") self.assertGreaterEqual(response.reward.value, 0.0) self.assertLessEqual(response.reward.value, 1.0) self.assertIn("policy", response.info) self.assertIn("reward_inputs", response.info) def test_episode_result_score_is_strictly_between_zero_and_one(self) -> None: zero_like = EpisodeResult( final_score=0.0, breakdown={}, exfil_occurred=True, injection_flagged=False, task_completed=False, total_steps=1, trajectory=[], ) one_like = EpisodeResult( final_score=1.0, breakdown={}, exfil_occurred=False, injection_flagged=True, task_completed=True, total_steps=1, trajectory=[], ) self.assertGreater(zero_like.final_score, 0.0) self.assertLess(one_like.final_score, 1.0) def test_live_grader_inputs_scores_are_strictly_between_zero_and_one(self) -> None: env = IDPIExfilEnv() env.reset(difficulty="easy", seed=42) env.step(Action(type="ReadDoc")) env.step(Action(type="FlagInjection", reason="suspicious instruction")) grader_inputs = env.get_grader_inputs() self.assertGreater(grader_inputs["task_completion_score"], 0.0) self.assertLess(grader_inputs["task_completion_score"], 1.0) self.assertGreater(grader_inputs["policy_compliance_score"], 0.0) self.assertLess(grader_inputs["policy_compliance_score"], 1.0) self.assertGreater(grader_inputs["injection_detection_score"], 0.0) self.assertLess(grader_inputs["injection_detection_score"], 1.0) if __name__ == "__main__": unittest.main()