"""Tests for diversity-aware compute_optimal_k in adaptive_sizer."""

from __future__ import annotations

import json

from headroom.transforms.adaptive_sizer import compute_optimal_k


def _make_unique_items(n: int) -> list[str]:
    """Create n completely unique JSON items (high diversity)."""
    return [
        json.dumps(
            {
                "id": i,
                "title": f"Unique topic number {i} about subject area {chr(65 + i % 26)}",
                "content": (
                    f"This is document {i} discussing a completely different subject. "
                    f"It covers concepts like {chr(65 + i % 26)}-theory, "
                    f"methodology-{i * 7 % 100}, and framework-{i * 13 % 50}. "
                    f"The key finding is result-{i} which has implications for field-{i % 10}."
                ),
                "source": f"source_{i}.pdf",
                "score": round(0.99 - i * 0.03, 2),
            }
        )
        for i in range(n)
    ]


def _make_repetitive_items(n: int, templates: int = 3) -> list[str]:
    """Create n items from a few templates (low diversity)."""
    base_templates = [
        {
            "status": "ok",
            "message": "Health check passed",
            "latency_ms": 12,
            "service": "api-gateway",
        },
        {
            "status": "ok",
            "message": "Health check passed",
            "latency_ms": 15,
            "service": "auth-service",
        },
        {
            "status": "ok",
            "message": "Health check passed",
            "latency_ms": 8,
            "service": "db-proxy",
        },
    ]
    return [
        json.dumps({**base_templates[i % templates], "timestamp": f"2026-03-25T10:{i:02d}:00Z"})
        for i in range(n)
    ]


def _make_mixed_items(n: int, unique_fraction: float) -> list[str]:
    """Create items where unique_fraction are unique, rest are duplicates."""
    unique_count = int(n * unique_fraction)
    dup_count = n - unique_count
    items = _make_unique_items(unique_count)
    if dup_count > 0:
        template = json.dumps(
            {
                "status": "ok",
                "message": "Routine health check passed successfully",
                "latency_ms": 10,
            }
        )
        items.extend([template] * dup_count)
    return items


class TestSmallArrays:
    def test_small_array_returns_n(self):
        """Arrays with n <= 8 should always return n (unchanged)."""
        items = _make_unique_items(5)
        assert compute_optimal_k(items) == 5

    def test_eight_items_returns_eight(self):
        items = _make_unique_items(8)
        assert compute_optimal_k(items) == 8


class TestNearTotalRedundancy:
    def test_identical_items_returns_min(self):
        """20 identical items should return ~3 (near-total redundancy)."""
        items = [json.dumps({"status": "ok", "msg": "healthy"})] * 20
        k = compute_optimal_k(items)
        assert k <= 3

    def test_two_groups_returns_small_k(self):
        """Items from 2 groups should return small k."""
        items = [json.dumps({"type": "A", "val": 1})] * 10 + [
            json.dumps({"type": "B", "val": 2})
        ] * 10
        k = compute_optimal_k(items)
        assert k <= 5


class TestHighDiversity:
    def test_all_unique_keeps_most(self):
        """15 completely unique items → should keep >= 10 (not 4 like before)."""
        items = _make_unique_items(15)
        k = compute_optimal_k(items)
        assert k >= 10, f"Expected k >= 10 for 15 unique items, got k={k}"

    def test_twenty_unique_keeps_most(self):
        """20 unique items → should keep >= 14."""
        items = _make_unique_items(20)
        k = compute_optimal_k(items)
        assert k >= 14, f"Expected k >= 14 for 20 unique items, got k={k}"

    def test_twelve_unique_rag_chunks(self):
        """12 unique RAG chunks → should keep >= 8."""
        items = _make_unique_items(12)
        k = compute_optimal_k(items)
        assert k >= 8, f"Expected k >= 8 for 12 unique RAG chunks, got k={k}"


class TestLowDiversity:
    def test_repetitive_items_unchanged(self):
        """15 items from 3 templates → k should stay small (same as before)."""
        items = _make_repetitive_items(15, templates=3)
        k = compute_optimal_k(items)
        assert k <= 8, f"Expected k <= 8 for repetitive items, got k={k}"

    def test_twenty_repetitive_stays_small(self):
        """20 items from 3 templates → k stays small."""
        items = _make_repetitive_items(20, templates=3)
        k = compute_optimal_k(items)
        assert k <= 10, f"Expected k <= 10 for 20 repetitive items, got k={k}"


class TestModerateDiversity:
    def test_half_unique_scales(self):
        """20 items, 50% unique → k should be in middle range."""
        items = _make_mixed_items(20, unique_fraction=0.5)
        k = compute_optimal_k(items)
        assert 6 <= k <= 16, f"Expected 6 <= k <= 16 for 50% unique, got k={k}"


class TestKneeInteraction:
    def test_knee_with_high_diversity_gets_floor(self):
        """Even if knee is found at low value, high diversity boosts k."""
        # Create items that have a weak bigram knee but are all unique via SimHash
        items = _make_unique_items(15)
        k = compute_optimal_k(items)
        # With diversity_ratio ~1.0, diversity_floor should boost k
        assert k >= 10, f"Expected k >= 10 with high diversity floor, got k={k}"

    def test_knee_with_low_diversity_stays(self):
        """Low diversity + knee found → k stays at knee."""
        items = _make_repetitive_items(15, templates=3)
        k = compute_optimal_k(items)
        assert k <= 8, f"Expected knee-derived k <= 8 for low diversity, got k={k}"


class TestBiasAndCaps:
    def test_bias_increases_k(self):
        """Bias > 1 should increase k."""
        items = _make_unique_items(15)
        k_normal = compute_optimal_k(items, bias=1.0)
        k_biased = compute_optimal_k(items, bias=1.5)
        assert k_biased >= k_normal

    def test_bias_decreases_k(self):
        """Bias < 1 should decrease k."""
        items = _make_unique_items(15)
        k_normal = compute_optimal_k(items, bias=1.0)
        k_biased = compute_optimal_k(items, bias=0.5)
        assert k_biased <= k_normal

    def test_max_k_cap_respected(self):
        """Even with high diversity, max_k cap is honored."""
        items = _make_unique_items(20)
        k = compute_optimal_k(items, max_k=5)
        assert k <= 5

    def test_min_k_floor_respected(self):
        """Even with low diversity, min_k floor is honored."""
        items = [json.dumps({"x": 1})] * 20
        k = compute_optimal_k(items, min_k=3)
        assert k >= 3