import asyncio
import sys
import types
from pathlib import Path

sys.path.insert(0, str(Path(__file__).parent.parent))

import pytest
import app.vector_store as vector_store


def test_verification_thresholds_calibrated_for_bge_m3():
    # User-preferred conservative thresholds for BGE-M3 embedding space
    assert vector_store.VERIFIED_DENSE_THRESHOLD == 0.70
    assert vector_store.VERIFIED_HYBRID_THRESHOLD == 0.65


def test_embed_text_rejects_non_1024_vector(monkeypatch):
    class FakeEmbedding:
        def __init__(self, values):
            self._values = values

        def tolist(self):
            return self._values

    class FakeModel:
        def query_embed(self, texts):
            yield FakeEmbedding([0.1, 0.2])

    monkeypatch.setattr(vector_store, "_get_embedding_model", lambda: FakeModel())
    with pytest.raises(ValueError, match="expected 1024"):
        vector_store.embed_text("lettuce tipburn")


class FakeResponse:
    def __init__(self, status_code=200, payload=None, text="ok"):
        self.status_code = status_code
        self._payload = payload or []
        self.text = text

    def json(self):
        return self._payload


class FakeAsyncClient:
    def __init__(self, *args, **kwargs):
        self.calls = []

    async def __aenter__(self):
        return self

    async def __aexit__(self, exc_type, exc, tb):
        return False

    async def post(self, url, headers=None, json=None):
        self.calls.append({"url": url, "headers": headers, "json": json})
        return FakeResponse(
            payload=[
                {
                    "source": "Doc",
                    "filename": "doc.pdf",
                    "page_number": 2,
                    "content": "Expanded horticultural context",
                    "similarity": 0.91,
                }
            ]
        )


def test_search_knowledge_logs_hyde_query_label_and_embeds_transformed_query(monkeypatch, capsys):
    captured = {}
    client = FakeAsyncClient()

    monkeypatch.setattr(vector_store, "is_configured", lambda: True)
    monkeypatch.setattr(vector_store, "SUPABASE_URL", "https://example.supabase.co")
    def fake_embed_text(text):
        captured["embedded_query"] = text
        return [0.1, 0.2]

    monkeypatch.setattr(vector_store, "embed_text", fake_embed_text)
    monkeypatch.setattr(vector_store.httpx, "AsyncClient", lambda timeout=10.0: client)

    chunks = asyncio.run(
        vector_store.search_knowledge(
            query="Expanded agronomic explanation for lettuce humidity",
            query_label="hyde",
        )
    )

    output = capsys.readouterr().out

    assert chunks[0]["source"] == "Doc"
    assert "filename" in chunks[0], "Supabase response must include 'filename' for parent expansion"
    assert captured["embedded_query"] == "Expanded agronomic explanation for lettuce humidity"
    assert client.calls[0]["json"]["match_count"] == vector_store.DEFAULT_MATCH_COUNT
    assert "[VectorRAG:hyde]" in output


def test_search_knowledge_defaults_to_raw_query_label(monkeypatch, capsys):
    client = FakeAsyncClient()

    monkeypatch.setattr(vector_store, "is_configured", lambda: True)
    monkeypatch.setattr(vector_store, "SUPABASE_URL", "https://example.supabase.co")
    monkeypatch.setattr(vector_store, "embed_text", lambda text: [0.1, 0.2])
    monkeypatch.setattr(vector_store.httpx, "AsyncClient", lambda timeout=10.0: client)

    asyncio.run(vector_store.search_knowledge(query="plain query"))

    output = capsys.readouterr().out

    assert "[VectorRAG:raw]" in output


def test_merge_knowledge_results_deduplicates_by_filename_page_content_keeps_higher_similarity():
    from app.vector_store import merge_knowledge_results

    primary = [
        {"filename": "cornell.pdf", "source": "Cornell-Lettuce", "page_number": 5, "content": "tipburn info", "similarity": 0.75},
        {"filename": "kubis.pdf", "source": "Kubis-Guide", "page_number": 2, "content": "cabbage info", "similarity": 0.82},
    ]
    english = [
        {"filename": "cornell.pdf", "source": "Cornell-Lettuce", "page_number": 5, "content": "tipburn info", "similarity": 0.85},
        {"filename": "new.pdf", "source": "New-Source", "page_number": 1, "content": "new content", "similarity": 0.90},
    ]

    merged = merge_knowledge_results([primary, english])

    sources = [c["source"] for c in merged]
    assert "Cornell-Lettuce" in sources  # deduped
    assert "Kubis-Guide" in sources
    assert "New-Source" in sources
    assert len(merged) == 3  # no duplicates

    cornell_chunk = next(c for c in merged if c["source"] == "Cornell-Lettuce")
    assert cornell_chunk["similarity"] == 0.85  # higher similarity kept


def test_merge_knowledge_results_does_not_dedup_different_content_same_page():
    from app.vector_store import merge_knowledge_results

    chunks = [[
        {"filename": "doc.pdf", "page_number": 3, "content": "first paragraph", "similarity": 0.80},
        {"filename": "doc.pdf", "page_number": 3, "content": "second paragraph", "similarity": 0.79},
    ]]
    merged = merge_knowledge_results(chunks)
    assert len(merged) == 2  # different content → not deduped


def test_merge_knowledge_results_respects_top_k():
    from app.vector_store import merge_knowledge_results

    chunks = [
        [{"source": f"Doc{i}", "page_number": i, "content": "x", "similarity": 0.9 - i * 0.01}
         for i in range(4)]
    ]
    merged = merge_knowledge_results(chunks, top_k=2)
    assert len(merged) == 2


def test_merge_knowledge_results_handles_empty_inputs():
    from app.vector_store import merge_knowledge_results

    assert merge_knowledge_results([]) == []
    assert merge_knowledge_results([[], []]) == []


# =============================================================================
# expand_knowledge_results tests
# =============================================================================

def test_expand_knowledge_results_passthrough_on_empty_corpus(monkeypatch):
    from app import vector_store
    monkeypatch.setattr(vector_store, "_corpus", [])
    monkeypatch.setattr(vector_store, "_corpus_lookup", {})

    chunks = [
        {"filename": "a.pdf", "page_number": 1, "content": "hello", "similarity": 0.80},
        {"filename": "b.pdf", "page_number": 2, "content": "world", "similarity": 0.75},
    ]
    pairs = vector_store.expand_knowledge_results(chunks)
    assert len(pairs) == 2
    for original, window in pairs:
        assert window is None
    assert pairs[0][0]["filename"] == "a.pdf"
    assert pairs[1][0]["filename"] == "b.pdf"


def test_expand_knowledge_results_returns_none_window_when_corpus_empty(monkeypatch):
    from app import vector_store
    monkeypatch.setattr(vector_store, "_corpus", [])
    monkeypatch.setattr(vector_store, "_corpus_lookup", {})

    chunks = [{"filename": "doc.pdf", "page_number": 1, "content": "some text", "similarity": 0.85}]
    pairs = vector_store.expand_knowledge_results(chunks)
    assert len(pairs) == 1
    original, window = pairs[0]
    assert original["content"] == "some text"
    assert window is None


def test_expand_knowledge_results_returns_window_when_match_found(monkeypatch):
    from app import vector_store
    from app.knowledge_chunking import NormalizedChildChunk
    from app.parent_context import ParentWindow

    chunk = {"filename": "guide.pdf", "page_number": 2, "content": "matched text", "similarity": 0.91}

    fake_chunk = NormalizedChildChunk(
        child_id="guide.pdf::p2::i0",
        source="Guide",
        filename="guide.pdf",
        page_number=2,
        content="matched text",
        corpus_ordinal=0,
    )
    fake_window = ParentWindow(
        primary_child=fake_chunk,
        left_neighbor=None,
        right_neighbor=None,
        combined_text="matched text",
    )

    monkeypatch.setattr(vector_store, "_corpus", [fake_chunk])
    monkeypatch.setattr(vector_store, "_corpus_lookup", {
        ("guide.pdf", 2, "matched text"): fake_chunk
    })

    import app.parent_context as pc_mod
    monkeypatch.setattr(pc_mod, "find_and_expand", lambda hit, corpus, lookup: fake_window)

    pairs = vector_store.expand_knowledge_results([chunk])
    assert len(pairs) == 1
    original, window = pairs[0]
    assert original["similarity"] == 0.91
    assert window is not None
    assert window is fake_window


# =============================================================================
# format_knowledge_context with parent windows
# =============================================================================

def test_format_knowledge_context_renders_matched_paragraph_label(monkeypatch):
    """format_knowledge_context should label the primary text as [MATCHED PARAGRAPH]."""
    from app import vector_store
    monkeypatch.setattr(vector_store, "_corpus", [])
    monkeypatch.setattr(vector_store, "_corpus_lookup", {})

    chunks = [{"source": "Guide", "page_number": 1, "content": "tipburn info", "similarity": 0.91, "filename": "guide.pdf"}]
    result = vector_store.format_knowledge_context(chunks)

    assert "[MATCHED PARAGRAPH]" in result
    assert "tipburn info" in result
    assert "CITE AS 📖" in result


def test_format_knowledge_context_renders_supporting_context_when_window_present(monkeypatch):
    """format_knowledge_context should render Supporting context when neighbors exist."""
    from app import vector_store
    from app.knowledge_chunking import NormalizedChildChunk
    from app.parent_context import ParentWindow

    left = NormalizedChildChunk("f::p0::i0", "Guide", "guide.pdf", 0, "left neighbor text", 0)
    primary = NormalizedChildChunk("f::p1::i1", "Guide", "guide.pdf", 1, "primary text", 1)
    right = NormalizedChildChunk("f::p2::i2", "Guide", "guide.pdf", 2, "right neighbor text", 2)
    fake_window = ParentWindow(
        primary_child=primary,
        left_neighbor=left,
        right_neighbor=right,
        combined_text="left neighbor text\n\nprimary text\n\nright neighbor text",
    )

    chunk = {"filename": "guide.pdf", "page_number": 1, "content": "primary text", "similarity": 0.91, "source": "Guide"}

    monkeypatch.setattr(vector_store, "expand_knowledge_results", lambda chunks: [(chunks[0], fake_window)])

    result = vector_store.format_knowledge_context([chunk])

    assert "[MATCHED PARAGRAPH]" in result
    assert "primary text" in result
    assert "Supporting context" in result
    assert "left neighbor text" in result
    assert "right neighbor text" in result
    assert "CITE AS" in result


# =============================================================================
# format_knowledge_context — plant_aliases citation filter
# =============================================================================

def test_format_knowledge_context_plant_alias_filter_promotes_matching_chunk(monkeypatch):
    """Chunk that mentions an alias IN CONTENT stays as 📖 Verified."""
    from app import vector_store
    monkeypatch.setattr(vector_store, "expand_knowledge_results",
                        lambda chunks: [(c, None) for c in chunks])

    chunk = {
        "source": "Petunjuk Teknis Budidaya Sayuran Dataran Rendah",
        "filename": "sayuran.pdf",
        "page_number": 22,
        "content": "Hama yang menyerang tanaman kangkung antara lain ulat grayak.",
        "similarity": 0.72,
        "retrieval_modes": ["dense"],
    }
    result = vector_store.format_knowledge_context(
        [chunk],
        plant_aliases=["kangkung", "Water Spinach", "Ipomoea aquatica"],
    )
    assert "CITE AS 📖" in result
    assert "Background Context" not in result


def test_format_knowledge_context_plant_alias_matches_source_name(monkeypatch):
    """Chunk whose SOURCE NAME contains the alias qualifies even if content does not mention it.

    This covers dedicated crop documents (e.g. 'Budidaya Cabe Di Perkotaan') where
    ~59% of chunks never repeat the crop name inside the paragraph body.
    """
    from app import vector_store
    monkeypatch.setattr(vector_store, "expand_knowledge_results",
                        lambda chunks: [(c, None) for c in chunks])

    chunk = {
        "source": "Budidaya Cabe Di Perkotaan",  # "Cabe" is in the source name
        "filename": "budidaya-cabe.pdf",
        "page_number": 33,
        "content": "Layu Fusarium / Fusarium wilt disebabkan oleh jamur Fusarium oxysporum.",
        "similarity": 0.72,
        "retrieval_modes": ["dense"],
    }
    result = vector_store.format_knowledge_context(
        [chunk],
        plant_aliases=["Cabe", "Cabai", "Chili", "Capsicum annuum"],
    )
    assert "CITE AS 📖" in result
    assert "Background Context" not in result


def test_format_knowledge_context_plant_alias_filter_demotes_non_matching_chunk(monkeypatch):
    """Chunk with NO alias in content AND no alias in source name → Background Context."""
    from app import vector_store
    monkeypatch.setattr(vector_store, "expand_knowledge_results",
                        lambda chunks: [(c, None) for c in chunks])

    chunk = {
        "source": "Melon Pest Guide",
        "filename": "melon.pdf",
        "page_number": 38,
        "content": "Patogen masuk ke dalam tanaman melalui ujung-ujung akar.",
        "similarity": 0.72,
        "retrieval_modes": ["dense"],
    }
    result = vector_store.format_knowledge_context(
        [chunk],
        plant_aliases=["kangkung", "Water Spinach", "Ipomoea aquatica"],
    )
    assert "CITE AS 📖" not in result
    assert "Background Context" in result


def test_format_knowledge_context_no_plant_aliases_skips_filter(monkeypatch):
    """When plant_aliases=None (general query), verified chunks keep 📖 regardless of content."""
    from app import vector_store
    monkeypatch.setattr(vector_store, "expand_knowledge_results",
                        lambda chunks: [(c, None) for c in chunks])

    chunk = {
        "source": "Melon Pest Guide",
        "filename": "melon.pdf",
        "page_number": 38,
        "content": "Patogen masuk ke dalam tanaman melon melalui ujung-ujung akar.",
        "similarity": 0.72,
        "retrieval_modes": ["dense"],
    }
    result = vector_store.format_knowledge_context([chunk], plant_aliases=None)
    assert "CITE AS 📖" in result


def test_format_knowledge_context_plant_alias_case_insensitive(monkeypatch):
    """Alias matching is case-insensitive."""
    from app import vector_store
    monkeypatch.setattr(vector_store, "expand_knowledge_results",
                        lambda chunks: [(c, None) for c in chunks])

    chunk = {
        "source": "Guide",
        "filename": "guide.pdf",
        "page_number": 1,
        "content": "Water Spinach is susceptible to Pythium root rot.",
        "similarity": 0.75,
        "retrieval_modes": ["dense"],
    }
    result = vector_store.format_knowledge_context(
        [chunk],
        plant_aliases=["water spinach"],  # lowercase
    )
    assert "CITE AS 📖" in result


def test_format_knowledge_context_uses_selected_chunk_order(monkeypatch):
    from app import vector_store

    monkeypatch.setattr(
        vector_store,
        "select_knowledge_chunks",
        lambda chunks, plant_aliases=None, stage=None, max_verified_chunks=3, max_background_chunks=1: [
            dict(chunks[1], selection_score=0.93, selection_promoted_background=False),
            dict(chunks[0], selection_score=0.51, selection_promoted_background=False),
        ],
    )
    monkeypatch.setattr(vector_store, "expand_knowledge_results", lambda chunks: [(c, None) for c in chunks])

    chunks = [
        {"filename": "generic.pdf", "page_number": 1, "source": "Generic", "content": "generic", "similarity": 0.72, "retrieval_modes": ["dense"]},
        {"filename": "lettuce.pdf", "page_number": 2, "source": "Lettuce", "content": "lettuce", "similarity": 0.68, "retrieval_modes": ["dense", "lexical"]},
    ]

    result = vector_store.format_knowledge_context(chunks, plant_aliases=["lettuce"], stage="vegetative")

    assert result.index("lettuce") < result.index("generic")


def test_format_knowledge_context_drops_extra_background_chunks(monkeypatch):
    from app import vector_store

    def fake_select(chunks, plant_aliases=None, stage=None, max_verified_chunks=3, max_background_chunks=1):
        return [dict(chunks[0], selection_score=0.44, selection_promoted_background=False)]

    monkeypatch.setattr(vector_store, "select_knowledge_chunks", fake_select)
    monkeypatch.setattr(vector_store, "expand_knowledge_results", lambda chunks: [(c, None) for c in chunks])

    chunks = [
        {"filename": "bg-0.pdf", "page_number": 1, "source": "BG 0", "content": "first context", "similarity": 0.41, "retrieval_modes": ["dense"]},
        {"filename": "bg-1.pdf", "page_number": 2, "source": "BG 1", "content": "second context", "similarity": 0.40, "retrieval_modes": ["dense"]},
    ]

    result = vector_store.format_knowledge_context(chunks, plant_aliases=["lettuce"], stage="vegetative")

    assert "first context" in result
    assert "second context" not in result


def test_format_knowledge_context_returns_empty_when_selected_empty(monkeypatch):
    from app import vector_store

    def fake_select(chunks, plant_aliases=None, stage=None, max_verified_chunks=3, max_background_chunks=1):
        return []

    monkeypatch.setattr(vector_store, "select_knowledge_chunks", fake_select)

    chunks = [
        {"filename": "bg-0.pdf", "page_number": 1, "source": "BG 0", "content": "first context", "similarity": 0.41, "retrieval_modes": ["dense"]},
    ]

    result = vector_store.format_knowledge_context(chunks, plant_aliases=["lettuce"], stage="vegetative")

    assert result == ""


# =============================================================================
# Lexical retrieval and RRF utilities
# =============================================================================

def test_search_knowledge_fts_posts_query_text_to_match_knowledge_fts(monkeypatch):
    client = FakeAsyncClient()
    monkeypatch.setattr(vector_store, "is_configured", lambda: True)
    monkeypatch.setattr(vector_store, "SUPABASE_URL", "https://example.supabase.co")
    monkeypatch.setattr(vector_store.httpx, "AsyncClient", lambda timeout=10.0: client)

    result = asyncio.run(vector_store.search_knowledge_fts("pythium root rot", match_count=6))

    assert client.calls[0]["url"].endswith("/rpc/match_knowledge_fts")
    assert client.calls[0]["json"]["query_text"] == "pythium root rot"
    assert client.calls[0]["json"]["match_count"] == 6
    assert result[0]["filename"] == "doc.pdf"


def test_search_knowledge_fts_returns_empty_when_not_configured(monkeypatch):
    monkeypatch.setattr(vector_store, "is_configured", lambda: False)
    result = asyncio.run(vector_store.search_knowledge_fts("pythium root rot"))
    assert result == []


def test_search_knowledge_fts_returns_empty_on_non_200_response(monkeypatch):
    class ErrorClient:
        async def __aenter__(self):
            return self
        async def __aexit__(self, *a):
            return False
        async def post(self, *a, **kw):
            return FakeResponse(status_code=503, payload=[], text="Service Unavailable")

    monkeypatch.setattr(vector_store, "is_configured", lambda: True)
    monkeypatch.setattr(vector_store, "SUPABASE_URL", "https://example.supabase.co")
    monkeypatch.setattr(vector_store.httpx, "AsyncClient", lambda timeout=10.0: ErrorClient())

    result = asyncio.run(vector_store.search_knowledge_fts("query"))
    assert result == []


def test_reciprocal_rank_fuse_prefers_chunk_seen_by_both_lists():
    dense = [
        {"filename": "leafy.pdf", "page_number": 79, "content": "Pythium Root Rot...", "similarity": 0.68},
        {"filename": "pumpkin.pdf", "page_number": 13, "content": "Fusarium crown rot...", "similarity": 0.67},
    ]
    lexical = [
        {"filename": "leafy.pdf", "page_number": 79, "content": "Pythium Root Rot...", "lexical_score": 0.42},
        {"filename": "manual.pdf", "page_number": 47, "content": "Growing plants...", "lexical_score": 0.31},
    ]

    fused = vector_store.reciprocal_rank_fuse(dense, lexical, top_k=2, rrf_k=60)

    assert fused[0]["filename"] == "leafy.pdf"
    assert fused[0]["retrieval_modes"] == ["dense", "lexical"]


def test_search_knowledge_hybrid_falls_back_to_dense_only_on_lexical_failure(monkeypatch):
    async def fake_search_knowledge(query, match_count=7, match_threshold=0.30, query_label="raw"):
        return [{"filename": "doc.pdf", "page_number": 1, "content": "dense hit", "similarity": 0.8}]

    async def fake_search_knowledge_fts(query, match_count=7):
        raise RuntimeError("fts down")

    monkeypatch.setattr(vector_store, "search_knowledge", fake_search_knowledge)
    monkeypatch.setattr(vector_store, "search_knowledge_fts", fake_search_knowledge_fts)

    dense_queries = [types.SimpleNamespace(text="dense query", label="hyde")]
    fused_list, _ = asyncio.run(vector_store.search_knowledge_hybrid(raw_query="raw query", dense_queries=dense_queries))

    assert fused_list[0]["content"] == "dense hit"


def test_select_knowledge_chunks_prefers_cross_modal_plant_match():
    """Cross-modal (dense+lexical) signal should win when plant and stage signals are equal.

    Construct two chunks with identical plant mentions and stage mentions; only the
    presence of the lexical signal differs. The dense+lexical chunk should be preferred.
    """
    from app import vector_store

    chunks = [
        {
            "filename": "lettuce.pdf",
            "page_number": 8,
            "source": "Generic Lettuce Guide",
            "content": "Lettuce crop management. Vegetative stage note.",
            "similarity": 0.67,
            "retrieval_modes": ["dense", "lexical"],
        },
        {
            "filename": "generic.pdf",
            "page_number": 2,
            "source": "Generic Lettuce Guide",
            "content": "Lettuce crop management. Vegetative stage note.",
            "similarity": 0.67,
            "retrieval_modes": ["dense"],
        },
    ]

    selected = vector_store.select_knowledge_chunks(
        chunks,
        plant_aliases=["lettuce"],
        stage="vegetative",
        max_verified_chunks=2,
        max_background_chunks=1,
    )

    assert selected[0]["filename"] == "lettuce.pdf"
    assert selected[0]["selection_score"] > selected[1]["selection_score"]


def test_select_knowledge_chunks_limits_background_chunks():
    from app import vector_store

    chunks = [
        {
            "filename": f"bg-{i}.pdf",
            "page_number": i,
            "source": f"Background {i}",
            "content": f"Generic context {i}",
            "similarity": 0.41 - (i * 0.01),
            "retrieval_modes": ["dense"],
        }
        for i in range(4)
    ]

    selected = vector_store.select_knowledge_chunks(
        chunks,
        plant_aliases=["lettuce"],
        stage="vegetative",
        max_verified_chunks=0,
        max_background_chunks=1,
    )

    assert len(selected) == 1
    assert selected[0]["filename"] == "bg-0.pdf"


def test_select_knowledge_chunks_rewards_stage_match():
    """Ensure stage signal breaks ties when plant/cross-modal signals are equal.

    Both candidate chunks are constructed to have identical plant-match and
    similarity/retrieval signals; only the stage mention differs. The vegetative
    chunk should therefore be preferred when stage="vegetative".
    """
    from app import vector_store

    # Both chunks mention the plant equally (in source/content) and have identical
    # similarity and retrieval_modes so that the only distinguishing signal is stage.
    chunks = [
        {
            "filename": "veg.pdf",
            "page_number": 4,
            "source": "Generic Lettuce Guide",
            "content": "Lettuce crop management. Vegetative stage details.",
            "similarity": 0.62,
            "retrieval_modes": ["dense"],
        },
        {
            "filename": "fruiting.pdf",
            "page_number": 9,
            "source": "Generic Lettuce Guide",
            "content": "Lettuce crop management. Fruiting stage details.",
            "similarity": 0.62,
            "retrieval_modes": ["dense"],
        },
    ]

    selected = vector_store.select_knowledge_chunks(
        chunks,
        plant_aliases=["lettuce"],
        stage="vegetative",
        max_verified_chunks=2,
        max_background_chunks=0,
    )

    assert selected[0]["filename"] == "veg.pdf"


def test_selection_promoted_background_flag():
    """Verify selection_promoted_background flags promoted and non-promoted chunks.

    - One true verified chunk should be selected and have selection_promoted_background False.
    - The highest-scoring non-verified (background) chunk should be promoted into the
      remaining verified slot and be marked selection_promoted_background True.
    - Any additionally appended background chunk should have the flag False.
    """
    from app import vector_store

    chunks = [
        # True verified chunk (similarity >= 0.70)
        {
            "filename": "verified.pdf",
            "page_number": 1,
            "source": "Verified Guide",
            "content": "Verified authoritative content",
            "similarity": 0.72,
            "retrieval_modes": ["dense"],
        },
        # Background chunks (below verified threshold)
        {
            "filename": "bg-promoted.pdf",
            "page_number": 2,
            "source": "Background Source",
            "content": "Relevant background content A",
            "similarity": 0.60,
            "retrieval_modes": ["dense"],
        },
        {
            "filename": "bg-normal.pdf",
            "page_number": 3,
            "source": "Background Source",
            "content": "Relevant background content B",
            "similarity": 0.59,
            "retrieval_modes": ["dense"],
        },
    ]

    selected = vector_store.select_knowledge_chunks(
        chunks,
        plant_aliases=None,
        stage=None,
        max_verified_chunks=2,
        max_background_chunks=1,
    )

    # Expect order: verified (non-promoted), bg-promoted (promoted into verified), then bg-normal (background)
    assert len(selected) == 3
    assert selected[0]["filename"] == "verified.pdf"
    assert selected[0]["selection_promoted_background"] is False

    assert selected[1]["filename"] == "bg-promoted.pdf"
    assert selected[1]["selection_promoted_background"] is True

    assert selected[2]["filename"] == "bg-normal.pdf"
    assert selected[2]["selection_promoted_background"] is False