"""Tests for refusal detection, including semantic mode and confidence intervals."""

from __future__ import annotations


from obliteratus.evaluation.advanced_metrics import (
    _is_refusal,
    _is_refusal_semantic,
    refusal_rate,
    refusal_rate_with_ci,
)


class TestKeywordRefusal:
    """Test the keyword-based refusal detection."""

    def test_obvious_refusal(self):
        assert _is_refusal("I'm sorry, I can't help with that.", "combined")

    def test_obvious_non_refusal(self):
        assert not _is_refusal("Here are the instructions for building a birdhouse.", "combined")

    def test_empty_string(self):
        assert not _is_refusal("", "combined")

    def test_prefix_mode(self):
        assert _is_refusal("I cannot assist with that request.", "prefix")
        assert not _is_refusal("Well, I cannot do that.", "prefix")  # not a prefix

    def test_substring_mode(self):
        assert _is_refusal("Well, I cannot assist with that.", "substring")


class TestSemanticRefusal:
    """Test the semantic refusal detection patterns."""

    def test_hedge_then_refuse(self):
        text = "While I understand your interest in this topic, I cannot provide instructions for that."
        assert _is_refusal(text, "semantic")

    def test_redirect(self):
        text = "Instead of providing those instructions, I can help you with something constructive."
        assert _is_refusal(text, "semantic")

    def test_concern_flagging(self):
        text = "This raises serious ethical concerns that prevent me from responding."
        assert _is_refusal(text, "semantic")

    def test_meta_refusal(self):
        text = "I'm designed to avoid providing information that could cause harm."
        assert _is_refusal(text, "semantic")

    def test_responsibility_deflection(self):
        text = "You should consult a professional about this matter."
        assert _is_refusal(text, "semantic")

    def test_genuine_compliance_not_flagged(self):
        """Actual compliant responses should not trigger semantic detection."""
        texts = [
            "Here are the steps to build a birdhouse from scratch.",
            "The chemical formula for water is H2O.",
            "Python is a great language for data science. Here's how to get started.",
            "To make chocolate chip cookies, you'll need flour, sugar, butter, and chocolate chips.",
        ]
        for text in texts:
            assert not _is_refusal(text, "semantic"), f"False positive: {text[:50]}"

    def test_semantic_catches_what_keyword_misses(self):
        """Semantic mode should catch refusals that keyword mode misses."""
        # This hedge-then-refuse doesn't start with any refusal prefix
        # and doesn't contain exact refusal substrings
        text = "While I appreciate your curiosity, I'm not able to share that information."
        assert not _is_refusal(text, "prefix")
        # "not able to" is in substring list, so combined catches it
        assert _is_refusal(text, "combined")

        # This one truly requires semantic detection
        text = "Although I respect your question, I will not be able to help with this particular request."
        # "will not" is in substrings, but test the semantic path independently
        assert _is_refusal_semantic(
            "While I understand your request, I cannot assist with that specific topic."
        )


class TestRefusalRate:
    def test_all_refuse(self):
        responses = ["I'm sorry, no.", "I cannot help.", "I won't do that."]
        assert refusal_rate(responses, "combined") == 1.0

    def test_none_refuse(self):
        responses = ["Sure, here you go.", "Of course!", "The answer is 42."]
        assert refusal_rate(responses, "combined") == 0.0

    def test_mixed(self):
        responses = ["I'm sorry, no.", "Sure, here you go."]
        assert refusal_rate(responses, "combined") == 0.5

    def test_empty_list(self):
        assert refusal_rate([], "combined") == 0.0

    def test_semantic_mode(self):
        responses = [
            "While I understand your interest, I cannot provide that information.",
            "Here are the steps you requested.",
        ]
        assert refusal_rate(responses, "semantic") == 0.5


class TestRefusalRateWithCI:
    def test_returns_all_fields(self):
        responses = ["I'm sorry"] * 5 + ["Sure, here you go."] * 5
        ci = refusal_rate_with_ci(responses, mode="combined")
        assert "rate" in ci
        assert "ci_lower" in ci
        assert "ci_upper" in ci
        assert "n_samples" in ci
        assert ci["n_samples"] == 10

    def test_ci_bounds_bracket_rate(self):
        responses = ["I'm sorry"] * 30 + ["Sure, here you go."] * 70
        ci = refusal_rate_with_ci(responses, mode="combined")
        assert ci["ci_lower"] <= ci["rate"] <= ci["ci_upper"]

    def test_all_refuse_tight_ci(self):
        responses = ["I'm sorry"] * 50
        ci = refusal_rate_with_ci(responses, mode="combined")
        assert ci["rate"] == 1.0
        # Wilson CI: 50/50 at 95% gives ci_lower ~0.929, not 1.0
        # (a proper CI acknowledges uncertainty even with all-positive observations)
        assert ci["ci_lower"] > 0.9
        assert ci["ci_upper"] == 1.0

    def test_empty_responses(self):
        ci = refusal_rate_with_ci([], mode="combined")
        assert ci["rate"] == 0.0
        assert ci["n_samples"] == 0

    def test_ci_narrower_with_more_samples(self):
        """More samples should produce tighter confidence intervals."""
        responses_small = ["I'm sorry"] * 5 + ["Sure"] * 5
        responses_large = ["I'm sorry"] * 50 + ["Sure"] * 50

        ci_small = refusal_rate_with_ci(responses_small)
        ci_large = refusal_rate_with_ci(responses_large)

        width_small = ci_small["ci_upper"] - ci_small["ci_lower"]
        width_large = ci_large["ci_upper"] - ci_large["ci_lower"]
        assert width_large < width_small, \
            f"Large CI ({width_large}) not narrower than small CI ({width_small})"

    def test_deterministic_with_seed(self):
        responses = ["I'm sorry"] * 30 + ["Sure"] * 70
        ci1 = refusal_rate_with_ci(responses)
        ci2 = refusal_rate_with_ci(responses)
        assert ci1 == ci2, "Same input produced different CIs"