File size: 5,715 Bytes

c7b7c5e

"""Tests for the HallucinationGuard environment."""
import pytest
from server.environment import HallucinationGuardEnvironment


class TestEnvironmentReset:
    """Tests for environment reset functionality."""

    def test_reset_returns_observation(self):
        """Reset should return a valid observation."""
        env = HallucinationGuardEnvironment()
        obs = env.reset()

        assert obs is not None
        assert hasattr(obs, 'question')
        assert hasattr(obs, 'context')
        assert hasattr(obs, 'reward')
        assert hasattr(obs, 'done')

    def test_reset_sets_initial_reward_to_zero(self):
        """Initial reward should be zero."""
        env = HallucinationGuardEnvironment()
        obs = env.reset()

        assert obs.reward == 0.0

    def test_reset_sets_done_to_false(self):
        """Episode should not be done after reset."""
        env = HallucinationGuardEnvironment()
        obs = env.reset()

        assert obs.done is False

    def test_reset_provides_attempts_remaining(self):
        """Reset should indicate attempts remaining."""
        env = HallucinationGuardEnvironment()
        obs = env.reset()

        assert obs.attempts_remaining > 0

    def test_reset_with_task_id(self):
        """Reset with specific task ID should work."""
        env = HallucinationGuardEnvironment()
        obs = env.reset(task_id="task_1_factual_grounding")

        assert obs is not None

    def test_reset_clears_previous_state(self):
        """Multiple resets should produce clean state each time."""
        env = HallucinationGuardEnvironment()
        env.reset()
        obs = env.reset()

        assert obs.reward == 0.0
        assert obs.done is False


class TestEnvironmentStep:
    """Tests for environment step functionality."""

    def test_step_returns_observation(self):
        """Step should return a valid observation."""
        env = HallucinationGuardEnvironment()
        env.reset()

        action = {
            "answer": "test answer",
            "confidence": 0.8,
            "source_quote": "",
            "reasoning": "",
            "uncertainty_flags": []
        }
        obs = env.step(action)

        assert obs is not None
        assert hasattr(obs, 'reward')

    def test_step_reward_in_valid_range(self):
        """Step reward should be in [0.0, 1.0] range."""
        env = HallucinationGuardEnvironment()
        env.reset()

        action = {
            "answer": "test answer",
            "confidence": 0.5,
            "source_quote": "",
            "reasoning": "",
            "uncertainty_flags": []
        }
        obs = env.step(action)

        assert -1.0 <= obs.reward <= 1.0

    def test_step_with_high_confidence(self):
        """Step with high confidence should work."""
        env = HallucinationGuardEnvironment()
        env.reset()

        action = {
            "answer": "test answer",
            "confidence": 1.0,
            "source_quote": "",
            "reasoning": "",
            "uncertainty_flags": []
        }
        obs = env.step(action)

        assert obs is not None

    def test_step_with_low_confidence(self):
        """Step with low confidence should work."""
        env = HallucinationGuardEnvironment()
        env.reset()

        action = {
            "answer": "test answer",
            "confidence": 0.1,
            "source_quote": "",
            "reasoning": "",
            "uncertainty_flags": []
        }
        obs = env.step(action)

        assert obs is not None

    def test_step_updates_attempts(self):
        """Step should decrement attempts remaining."""
        env = HallucinationGuardEnvironment()
        obs1 = env.reset()

        action = {
            "answer": "test",
            "confidence": 0.5,
            "source_quote": "",
            "reasoning": "",
            "uncertainty_flags": []
        }
        obs2 = env.step(action)

        assert obs2.attempts_remaining < obs1.attempts_remaining


class TestEnvironmentState:
    """Tests for environment state functionality."""

    def test_state_returns_metadata(self):
        """State should return episode metadata."""
        env = HallucinationGuardEnvironment()
        env.reset()
        state = env.state()

        assert state is not None
        assert hasattr(state, 'episode_id') or hasattr(state, 'step_count') or 'episode_id' in state or 'step_count' in state

    def test_state_tracks_step_count(self):
        """State should track step count."""
        env = HallucinationGuardEnvironment()
        env.reset()

        action = {
            "answer": "test",
            "confidence": 0.5,
            "source_quote": "",
            "reasoning": "",
            "uncertainty_flags": []
        }
        env.step(action)
        state = env.state()

        # State should reflect that a step was taken
        assert state is not None


class TestTaskSelection:
    """Tests for task selection."""

    def test_reset_with_task_1(self):
        """Reset with task_1_factual_grounding should work."""
        env = HallucinationGuardEnvironment()
        obs = env.reset(task_id="task_1_factual_grounding")

        assert obs is not None

    def test_reset_with_task_2(self):
        """Reset with task_2_multi_hop_synthesis should work."""
        env = HallucinationGuardEnvironment()
        obs = env.reset(task_id="task_2_multi_hop_synthesis")

        assert obs is not None

    def test_reset_with_task_3(self):
        """Reset with task_3_adversarial_resistance should work."""
        env = HallucinationGuardEnvironment()
        obs = env.reset(task_id="task_3_adversarial_resistance")

        assert obs is not None