import unittest

from pydantic import ValidationError

from env.environment import IDPIExfilEnv
from env.models import (
    Action,
    EpisodeResult,
    HealthResponse,
    MetadataResponse,
    ResetRequest,
    SchemaResponse,
    StepResponse,
)


class ModelContractTests(unittest.TestCase):
    def test_send_email_requires_destination(self) -> None:
        with self.assertRaises(ValidationError):
            Action(type="SendEmail")

    def test_reset_request_accepts_seed(self) -> None:
        payload = ResetRequest(difficulty="medium", seed=42)
        self.assertEqual(payload.difficulty, "medium")
        self.assertEqual(payload.seed, 42)

    def test_health_response_contract(self) -> None:
        payload = HealthResponse(status="healthy", env="idpi-exfil-env", version="0.1.0")
        self.assertEqual(payload.status, "healthy")

    def test_metadata_response_contract(self) -> None:
        payload = MetadataResponse(
            name="idpi-exfil-env",
            description="Prompt injection benchmark.",
            version="0.1.0",
            tasks=["easy", "medium", "hard"],
        )
        self.assertEqual(payload.tasks, ["easy", "medium", "hard"])

    def test_schema_response_contract(self) -> None:
        payload = SchemaResponse(
            action={"title": "Action"},
            observation={"title": "Observation"},
            state={"title": "State"},
            reward={"title": "Reward"},
            result={"title": "EpisodeResult"},
        )
        self.assertEqual(payload.result["title"], "EpisodeResult")

    def test_step_response_can_validate_live_env_payload(self) -> None:
        env = IDPIExfilEnv()
        env.reset(difficulty="easy", seed=7)
        observation, reward, done, info = env.step(Action(type="ReadDoc"))
        response = StepResponse(observation=observation, reward=reward, done=done, info=info)

        self.assertEqual(response.observation.task_difficulty, "easy")
        self.assertGreaterEqual(response.reward.value, 0.0)
        self.assertLessEqual(response.reward.value, 1.0)
        self.assertIn("policy", response.info)
        self.assertIn("reward_inputs", response.info)

    def test_episode_result_score_is_strictly_between_zero_and_one(self) -> None:
        zero_like = EpisodeResult(
            final_score=0.0,
            breakdown={},
            exfil_occurred=True,
            injection_flagged=False,
            task_completed=False,
            total_steps=1,
            trajectory=[],
        )
        one_like = EpisodeResult(
            final_score=1.0,
            breakdown={},
            exfil_occurred=False,
            injection_flagged=True,
            task_completed=True,
            total_steps=1,
            trajectory=[],
        )

        self.assertGreater(zero_like.final_score, 0.0)
        self.assertLess(one_like.final_score, 1.0)

    def test_live_grader_inputs_scores_are_strictly_between_zero_and_one(self) -> None:
        env = IDPIExfilEnv()
        env.reset(difficulty="easy", seed=42)
        env.step(Action(type="ReadDoc"))
        env.step(Action(type="FlagInjection", reason="suspicious instruction"))
        grader_inputs = env.get_grader_inputs()

        self.assertGreater(grader_inputs["task_completion_score"], 0.0)
        self.assertLess(grader_inputs["task_completion_score"], 1.0)
        self.assertGreater(grader_inputs["policy_compliance_score"], 0.0)
        self.assertLess(grader_inputs["policy_compliance_score"], 1.0)
        self.assertGreater(grader_inputs["injection_detection_score"], 0.0)
        self.assertLess(grader_inputs["injection_detection_score"], 1.0)


if __name__ == "__main__":
    unittest.main()