| import unittest |
|
|
| from pydantic import ValidationError |
|
|
| from env.environment import IDPIExfilEnv |
| from env.models import ( |
| Action, |
| EpisodeResult, |
| HealthResponse, |
| MetadataResponse, |
| ResetRequest, |
| SchemaResponse, |
| StepResponse, |
| ) |
|
|
|
|
| class ModelContractTests(unittest.TestCase): |
| def test_send_email_requires_destination(self) -> None: |
| with self.assertRaises(ValidationError): |
| Action(type="SendEmail") |
|
|
| def test_reset_request_accepts_seed(self) -> None: |
| payload = ResetRequest(difficulty="medium", seed=42) |
| self.assertEqual(payload.difficulty, "medium") |
| self.assertEqual(payload.seed, 42) |
|
|
| def test_health_response_contract(self) -> None: |
| payload = HealthResponse(status="healthy", env="idpi-exfil-env", version="0.1.0") |
| self.assertEqual(payload.status, "healthy") |
|
|
| def test_metadata_response_contract(self) -> None: |
| payload = MetadataResponse( |
| name="idpi-exfil-env", |
| description="Prompt injection benchmark.", |
| version="0.1.0", |
| tasks=["easy", "medium", "hard"], |
| ) |
| self.assertEqual(payload.tasks, ["easy", "medium", "hard"]) |
|
|
| def test_schema_response_contract(self) -> None: |
| payload = SchemaResponse( |
| action={"title": "Action"}, |
| observation={"title": "Observation"}, |
| state={"title": "State"}, |
| reward={"title": "Reward"}, |
| result={"title": "EpisodeResult"}, |
| ) |
| self.assertEqual(payload.result["title"], "EpisodeResult") |
|
|
| def test_step_response_can_validate_live_env_payload(self) -> None: |
| env = IDPIExfilEnv() |
| env.reset(difficulty="easy", seed=7) |
| observation, reward, done, info = env.step(Action(type="ReadDoc")) |
| response = StepResponse(observation=observation, reward=reward, done=done, info=info) |
|
|
| self.assertEqual(response.observation.task_difficulty, "easy") |
| self.assertGreaterEqual(response.reward.value, 0.0) |
| self.assertLessEqual(response.reward.value, 1.0) |
| self.assertIn("policy", response.info) |
| self.assertIn("reward_inputs", response.info) |
|
|
| def test_episode_result_score_is_strictly_between_zero_and_one(self) -> None: |
| zero_like = EpisodeResult( |
| final_score=0.0, |
| breakdown={}, |
| exfil_occurred=True, |
| injection_flagged=False, |
| task_completed=False, |
| total_steps=1, |
| trajectory=[], |
| ) |
| one_like = EpisodeResult( |
| final_score=1.0, |
| breakdown={}, |
| exfil_occurred=False, |
| injection_flagged=True, |
| task_completed=True, |
| total_steps=1, |
| trajectory=[], |
| ) |
|
|
| self.assertGreater(zero_like.final_score, 0.0) |
| self.assertLess(one_like.final_score, 1.0) |
|
|
| def test_live_grader_inputs_scores_are_strictly_between_zero_and_one(self) -> None: |
| env = IDPIExfilEnv() |
| env.reset(difficulty="easy", seed=42) |
| env.step(Action(type="ReadDoc")) |
| env.step(Action(type="FlagInjection", reason="suspicious instruction")) |
| grader_inputs = env.get_grader_inputs() |
|
|
| self.assertGreater(grader_inputs["task_completion_score"], 0.0) |
| self.assertLess(grader_inputs["task_completion_score"], 1.0) |
| self.assertGreater(grader_inputs["policy_compliance_score"], 0.0) |
| self.assertLess(grader_inputs["policy_compliance_score"], 1.0) |
| self.assertGreater(grader_inputs["injection_detection_score"], 0.0) |
| self.assertLess(grader_inputs["injection_detection_score"], 1.0) |
|
|
|
|
| if __name__ == "__main__": |
| unittest.main() |
|
|