Spaces:

Jacooo
/

PGC-AI-Chatbot

Running

App Files Files Community

PGC-AI-Chatbot / tests /test_vector_store.py

Jacooo

Deploy from GitHub: f66aedb

c397c36 verified 20 days ago

raw

history blame contribute delete

26.7 kB

	import asyncio
	import sys
	import types
	from pathlib import Path

	sys.path.insert(0, str(Path(__file__).parent.parent))

	import pytest
	import app.vector_store as vector_store


	def test_verification_thresholds_calibrated_for_bge_m3():
	# User-preferred conservative thresholds for BGE-M3 embedding space
	assert vector_store.VERIFIED_DENSE_THRESHOLD == 0.70
	assert vector_store.VERIFIED_HYBRID_THRESHOLD == 0.65


	def test_embed_text_rejects_non_1024_vector(monkeypatch):
	class FakeEmbedding:
	def __init__(self, values):
	self._values = values

	def tolist(self):
	return self._values

	class FakeModel:
	def query_embed(self, texts):
	yield FakeEmbedding([0.1, 0.2])

	monkeypatch.setattr(vector_store, "_get_embedding_model", lambda: FakeModel())
	with pytest.raises(ValueError, match="expected 1024"):
	vector_store.embed_text("lettuce tipburn")



	class FakeResponse:
	def __init__(self, status_code=200, payload=None, text="ok"):
	self.status_code = status_code
	self._payload = payload or []
	self.text = text

	def json(self):
	return self._payload


	class FakeAsyncClient:
	def __init__(self, args, *kwargs):
	self.calls = []

	async def __aenter__(self):
	return self

	async def __aexit__(self, exc_type, exc, tb):
	return False

	async def post(self, url, headers=None, json=None):
	self.calls.append({"url": url, "headers": headers, "json": json})
	return FakeResponse(
	payload=[
	{
	"source": "Doc",
	"filename": "doc.pdf",
	"page_number": 2,
	"content": "Expanded horticultural context",
	"similarity": 0.91,
	}
	]
	)


	def test_search_knowledge_logs_hyde_query_label_and_embeds_transformed_query(monkeypatch, capsys):
	captured = {}
	client = FakeAsyncClient()

	monkeypatch.setattr(vector_store, "is_configured", lambda: True)
	monkeypatch.setattr(vector_store, "SUPABASE_URL", "https://example.supabase.co")
	def fake_embed_text(text):
	captured["embedded_query"] = text
	return [0.1, 0.2]

	monkeypatch.setattr(vector_store, "embed_text", fake_embed_text)
	monkeypatch.setattr(vector_store.httpx, "AsyncClient", lambda timeout=10.0: client)

	chunks = asyncio.run(
	vector_store.search_knowledge(
	query="Expanded agronomic explanation for lettuce humidity",
	query_label="hyde",
	)
	)

	output = capsys.readouterr().out

	assert chunks[0]["source"] == "Doc"
	assert "filename" in chunks[0], "Supabase response must include 'filename' for parent expansion"
	assert captured["embedded_query"] == "Expanded agronomic explanation for lettuce humidity"
	assert client.calls[0]["json"]["match_count"] == vector_store.DEFAULT_MATCH_COUNT
	assert "[VectorRAG:hyde]" in output


	def test_search_knowledge_defaults_to_raw_query_label(monkeypatch, capsys):
	client = FakeAsyncClient()

	monkeypatch.setattr(vector_store, "is_configured", lambda: True)
	monkeypatch.setattr(vector_store, "SUPABASE_URL", "https://example.supabase.co")
	monkeypatch.setattr(vector_store, "embed_text", lambda text: [0.1, 0.2])
	monkeypatch.setattr(vector_store.httpx, "AsyncClient", lambda timeout=10.0: client)

	asyncio.run(vector_store.search_knowledge(query="plain query"))

	output = capsys.readouterr().out

	assert "[VectorRAG:raw]" in output


	def test_merge_knowledge_results_deduplicates_by_filename_page_content_keeps_higher_similarity():
	from app.vector_store import merge_knowledge_results

	primary = [
	{"filename": "cornell.pdf", "source": "Cornell-Lettuce", "page_number": 5, "content": "tipburn info", "similarity": 0.75},
	{"filename": "kubis.pdf", "source": "Kubis-Guide", "page_number": 2, "content": "cabbage info", "similarity": 0.82},
	]
	english = [
	{"filename": "cornell.pdf", "source": "Cornell-Lettuce", "page_number": 5, "content": "tipburn info", "similarity": 0.85},
	{"filename": "new.pdf", "source": "New-Source", "page_number": 1, "content": "new content", "similarity": 0.90},
	]

	merged = merge_knowledge_results([primary, english])

	sources = [c["source"] for c in merged]
	assert "Cornell-Lettuce" in sources # deduped
	assert "Kubis-Guide" in sources
	assert "New-Source" in sources
	assert len(merged) == 3 # no duplicates

	cornell_chunk = next(c for c in merged if c["source"] == "Cornell-Lettuce")
	assert cornell_chunk["similarity"] == 0.85 # higher similarity kept


	def test_merge_knowledge_results_does_not_dedup_different_content_same_page():
	from app.vector_store import merge_knowledge_results

	chunks = [[
	{"filename": "doc.pdf", "page_number": 3, "content": "first paragraph", "similarity": 0.80},
	{"filename": "doc.pdf", "page_number": 3, "content": "second paragraph", "similarity": 0.79},
	]]
	merged = merge_knowledge_results(chunks)
	assert len(merged) == 2 # different content → not deduped


	def test_merge_knowledge_results_respects_top_k():
	from app.vector_store import merge_knowledge_results

	chunks = [
	[{"source": f"Doc{i}", "page_number": i, "content": "x", "similarity": 0.9 - i * 0.01}
	for i in range(4)]
	]
	merged = merge_knowledge_results(chunks, top_k=2)
	assert len(merged) == 2


	def test_merge_knowledge_results_handles_empty_inputs():
	from app.vector_store import merge_knowledge_results

	assert merge_knowledge_results([]) == []
	assert merge_knowledge_results([[], []]) == []


	# =============================================================================
	# expand_knowledge_results tests
	# =============================================================================

	def test_expand_knowledge_results_passthrough_on_empty_corpus(monkeypatch):
	from app import vector_store
	monkeypatch.setattr(vector_store, "_corpus", [])
	monkeypatch.setattr(vector_store, "_corpus_lookup", {})

	chunks = [
	{"filename": "a.pdf", "page_number": 1, "content": "hello", "similarity": 0.80},
	{"filename": "b.pdf", "page_number": 2, "content": "world", "similarity": 0.75},
	]
	pairs = vector_store.expand_knowledge_results(chunks)
	assert len(pairs) == 2
	for original, window in pairs:
	assert window is None
	assert pairs[0][0]["filename"] == "a.pdf"
	assert pairs[1][0]["filename"] == "b.pdf"


	def test_expand_knowledge_results_returns_none_window_when_corpus_empty(monkeypatch):
	from app import vector_store
	monkeypatch.setattr(vector_store, "_corpus", [])
	monkeypatch.setattr(vector_store, "_corpus_lookup", {})

	chunks = [{"filename": "doc.pdf", "page_number": 1, "content": "some text", "similarity": 0.85}]
	pairs = vector_store.expand_knowledge_results(chunks)
	assert len(pairs) == 1
	original, window = pairs[0]
	assert original["content"] == "some text"
	assert window is None


	def test_expand_knowledge_results_returns_window_when_match_found(monkeypatch):
	from app import vector_store
	from app.knowledge_chunking import NormalizedChildChunk
	from app.parent_context import ParentWindow

	chunk = {"filename": "guide.pdf", "page_number": 2, "content": "matched text", "similarity": 0.91}

	fake_chunk = NormalizedChildChunk(
	child_id="guide.pdf::p2::i0",
	source="Guide",
	filename="guide.pdf",
	page_number=2,
	content="matched text",
	corpus_ordinal=0,
	)
	fake_window = ParentWindow(
	primary_child=fake_chunk,
	left_neighbor=None,
	right_neighbor=None,
	combined_text="matched text",
	)

	monkeypatch.setattr(vector_store, "_corpus", [fake_chunk])
	monkeypatch.setattr(vector_store, "_corpus_lookup", {
	("guide.pdf", 2, "matched text"): fake_chunk
	})

	import app.parent_context as pc_mod
	monkeypatch.setattr(pc_mod, "find_and_expand", lambda hit, corpus, lookup: fake_window)

	pairs = vector_store.expand_knowledge_results([chunk])
	assert len(pairs) == 1
	original, window = pairs[0]
	assert original["similarity"] == 0.91
	assert window is not None
	assert window is fake_window


	# =============================================================================
	# format_knowledge_context with parent windows
	# =============================================================================

	def test_format_knowledge_context_renders_matched_paragraph_label(monkeypatch):
	"""format_knowledge_context should label the primary text as [MATCHED PARAGRAPH]."""
	from app import vector_store
	monkeypatch.setattr(vector_store, "_corpus", [])
	monkeypatch.setattr(vector_store, "_corpus_lookup", {})

	chunks = [{"source": "Guide", "page_number": 1, "content": "tipburn info", "similarity": 0.91, "filename": "guide.pdf"}]
	result = vector_store.format_knowledge_context(chunks)

	assert "[MATCHED PARAGRAPH]" in result
	assert "tipburn info" in result
	assert "CITE AS 📖" in result


	def test_format_knowledge_context_renders_supporting_context_when_window_present(monkeypatch):
	"""format_knowledge_context should render Supporting context when neighbors exist."""
	from app import vector_store
	from app.knowledge_chunking import NormalizedChildChunk
	from app.parent_context import ParentWindow

	left = NormalizedChildChunk("f::p0::i0", "Guide", "guide.pdf", 0, "left neighbor text", 0)
	primary = NormalizedChildChunk("f::p1::i1", "Guide", "guide.pdf", 1, "primary text", 1)
	right = NormalizedChildChunk("f::p2::i2", "Guide", "guide.pdf", 2, "right neighbor text", 2)
	fake_window = ParentWindow(
	primary_child=primary,
	left_neighbor=left,
	right_neighbor=right,
	combined_text="left neighbor text\n\nprimary text\n\nright neighbor text",
	)

	chunk = {"filename": "guide.pdf", "page_number": 1, "content": "primary text", "similarity": 0.91, "source": "Guide"}

	monkeypatch.setattr(vector_store, "expand_knowledge_results", lambda chunks: [(chunks[0], fake_window)])

	result = vector_store.format_knowledge_context([chunk])

	assert "[MATCHED PARAGRAPH]" in result
	assert "primary text" in result
	assert "Supporting context" in result
	assert "left neighbor text" in result
	assert "right neighbor text" in result
	assert "CITE AS" in result



	# =============================================================================
	# format_knowledge_context — plant_aliases citation filter
	# =============================================================================

	def test_format_knowledge_context_plant_alias_filter_promotes_matching_chunk(monkeypatch):
	"""Chunk that mentions an alias IN CONTENT stays as 📖 Verified."""
	from app import vector_store
	monkeypatch.setattr(vector_store, "expand_knowledge_results",
	lambda chunks: [(c, None) for c in chunks])

	chunk = {
	"source": "Petunjuk Teknis Budidaya Sayuran Dataran Rendah",
	"filename": "sayuran.pdf",
	"page_number": 22,
	"content": "Hama yang menyerang tanaman kangkung antara lain ulat grayak.",
	"similarity": 0.72,
	"retrieval_modes": ["dense"],
	}
	result = vector_store.format_knowledge_context(
	[chunk],
	plant_aliases=["kangkung", "Water Spinach", "Ipomoea aquatica"],
	)
	assert "CITE AS 📖" in result
	assert "Background Context" not in result


	def test_format_knowledge_context_plant_alias_matches_source_name(monkeypatch):
	"""Chunk whose SOURCE NAME contains the alias qualifies even if content does not mention it.

	This covers dedicated crop documents (e.g. 'Budidaya Cabe Di Perkotaan') where
	~59% of chunks never repeat the crop name inside the paragraph body.
	"""
	from app import vector_store
	monkeypatch.setattr(vector_store, "expand_knowledge_results",
	lambda chunks: [(c, None) for c in chunks])

	chunk = {
	"source": "Budidaya Cabe Di Perkotaan", # "Cabe" is in the source name
	"filename": "budidaya-cabe.pdf",
	"page_number": 33,
	"content": "Layu Fusarium / Fusarium wilt disebabkan oleh jamur Fusarium oxysporum.",
	"similarity": 0.72,
	"retrieval_modes": ["dense"],
	}
	result = vector_store.format_knowledge_context(
	[chunk],
	plant_aliases=["Cabe", "Cabai", "Chili", "Capsicum annuum"],
	)
	assert "CITE AS 📖" in result
	assert "Background Context" not in result


	def test_format_knowledge_context_plant_alias_filter_demotes_non_matching_chunk(monkeypatch):
	"""Chunk with NO alias in content AND no alias in source name → Background Context."""
	from app import vector_store
	monkeypatch.setattr(vector_store, "expand_knowledge_results",
	lambda chunks: [(c, None) for c in chunks])

	chunk = {
	"source": "Melon Pest Guide",
	"filename": "melon.pdf",
	"page_number": 38,
	"content": "Patogen masuk ke dalam tanaman melalui ujung-ujung akar.",
	"similarity": 0.72,
	"retrieval_modes": ["dense"],
	}
	result = vector_store.format_knowledge_context(
	[chunk],
	plant_aliases=["kangkung", "Water Spinach", "Ipomoea aquatica"],
	)
	assert "CITE AS 📖" not in result
	assert "Background Context" in result


	def test_format_knowledge_context_no_plant_aliases_skips_filter(monkeypatch):
	"""When plant_aliases=None (general query), verified chunks keep 📖 regardless of content."""
	from app import vector_store
	monkeypatch.setattr(vector_store, "expand_knowledge_results",
	lambda chunks: [(c, None) for c in chunks])

	chunk = {
	"source": "Melon Pest Guide",
	"filename": "melon.pdf",
	"page_number": 38,
	"content": "Patogen masuk ke dalam tanaman melon melalui ujung-ujung akar.",
	"similarity": 0.72,
	"retrieval_modes": ["dense"],
	}
	result = vector_store.format_knowledge_context([chunk], plant_aliases=None)
	assert "CITE AS 📖" in result


	def test_format_knowledge_context_plant_alias_case_insensitive(monkeypatch):
	"""Alias matching is case-insensitive."""
	from app import vector_store
	monkeypatch.setattr(vector_store, "expand_knowledge_results",
	lambda chunks: [(c, None) for c in chunks])

	chunk = {
	"source": "Guide",
	"filename": "guide.pdf",
	"page_number": 1,
	"content": "Water Spinach is susceptible to Pythium root rot.",
	"similarity": 0.75,
	"retrieval_modes": ["dense"],
	}
	result = vector_store.format_knowledge_context(
	[chunk],
	plant_aliases=["water spinach"], # lowercase
	)
	assert "CITE AS 📖" in result


	def test_format_knowledge_context_uses_selected_chunk_order(monkeypatch):
	from app import vector_store

	monkeypatch.setattr(
	vector_store,
	"select_knowledge_chunks",
	lambda chunks, plant_aliases=None, stage=None, max_verified_chunks=3, max_background_chunks=1: [
	dict(chunks[1], selection_score=0.93, selection_promoted_background=False),
	dict(chunks[0], selection_score=0.51, selection_promoted_background=False),
	],
	)
	monkeypatch.setattr(vector_store, "expand_knowledge_results", lambda chunks: [(c, None) for c in chunks])

	chunks = [
	{"filename": "generic.pdf", "page_number": 1, "source": "Generic", "content": "generic", "similarity": 0.72, "retrieval_modes": ["dense"]},
	{"filename": "lettuce.pdf", "page_number": 2, "source": "Lettuce", "content": "lettuce", "similarity": 0.68, "retrieval_modes": ["dense", "lexical"]},
	]

	result = vector_store.format_knowledge_context(chunks, plant_aliases=["lettuce"], stage="vegetative")

	assert result.index("lettuce") < result.index("generic")


	def test_format_knowledge_context_drops_extra_background_chunks(monkeypatch):
	from app import vector_store

	def fake_select(chunks, plant_aliases=None, stage=None, max_verified_chunks=3, max_background_chunks=1):
	return [dict(chunks[0], selection_score=0.44, selection_promoted_background=False)]

	monkeypatch.setattr(vector_store, "select_knowledge_chunks", fake_select)
	monkeypatch.setattr(vector_store, "expand_knowledge_results", lambda chunks: [(c, None) for c in chunks])

	chunks = [
	{"filename": "bg-0.pdf", "page_number": 1, "source": "BG 0", "content": "first context", "similarity": 0.41, "retrieval_modes": ["dense"]},
	{"filename": "bg-1.pdf", "page_number": 2, "source": "BG 1", "content": "second context", "similarity": 0.40, "retrieval_modes": ["dense"]},
	]

	result = vector_store.format_knowledge_context(chunks, plant_aliases=["lettuce"], stage="vegetative")

	assert "first context" in result
	assert "second context" not in result


	def test_format_knowledge_context_returns_empty_when_selected_empty(monkeypatch):
	from app import vector_store

	def fake_select(chunks, plant_aliases=None, stage=None, max_verified_chunks=3, max_background_chunks=1):
	return []

	monkeypatch.setattr(vector_store, "select_knowledge_chunks", fake_select)

	chunks = [
	{"filename": "bg-0.pdf", "page_number": 1, "source": "BG 0", "content": "first context", "similarity": 0.41, "retrieval_modes": ["dense"]},
	]

	result = vector_store.format_knowledge_context(chunks, plant_aliases=["lettuce"], stage="vegetative")

	assert result == ""


	# =============================================================================
	# Lexical retrieval and RRF utilities
	# =============================================================================

	def test_search_knowledge_fts_posts_query_text_to_match_knowledge_fts(monkeypatch):
	client = FakeAsyncClient()
	monkeypatch.setattr(vector_store, "is_configured", lambda: True)
	monkeypatch.setattr(vector_store, "SUPABASE_URL", "https://example.supabase.co")
	monkeypatch.setattr(vector_store.httpx, "AsyncClient", lambda timeout=10.0: client)

	result = asyncio.run(vector_store.search_knowledge_fts("pythium root rot", match_count=6))

	assert client.calls[0]["url"].endswith("/rpc/match_knowledge_fts")
	assert client.calls[0]["json"]["query_text"] == "pythium root rot"
	assert client.calls[0]["json"]["match_count"] == 6
	assert result[0]["filename"] == "doc.pdf"


	def test_search_knowledge_fts_returns_empty_when_not_configured(monkeypatch):
	monkeypatch.setattr(vector_store, "is_configured", lambda: False)
	result = asyncio.run(vector_store.search_knowledge_fts("pythium root rot"))
	assert result == []


	def test_search_knowledge_fts_returns_empty_on_non_200_response(monkeypatch):
	class ErrorClient:
	async def __aenter__(self):
	return self
	async def __aexit__(self, *a):
	return False
	async def post(self, a, *kw):
	return FakeResponse(status_code=503, payload=[], text="Service Unavailable")

	monkeypatch.setattr(vector_store, "is_configured", lambda: True)
	monkeypatch.setattr(vector_store, "SUPABASE_URL", "https://example.supabase.co")
	monkeypatch.setattr(vector_store.httpx, "AsyncClient", lambda timeout=10.0: ErrorClient())

	result = asyncio.run(vector_store.search_knowledge_fts("query"))
	assert result == []


	def test_reciprocal_rank_fuse_prefers_chunk_seen_by_both_lists():
	dense = [
	{"filename": "leafy.pdf", "page_number": 79, "content": "Pythium Root Rot...", "similarity": 0.68},
	{"filename": "pumpkin.pdf", "page_number": 13, "content": "Fusarium crown rot...", "similarity": 0.67},
	]
	lexical = [
	{"filename": "leafy.pdf", "page_number": 79, "content": "Pythium Root Rot...", "lexical_score": 0.42},
	{"filename": "manual.pdf", "page_number": 47, "content": "Growing plants...", "lexical_score": 0.31},
	]

	fused = vector_store.reciprocal_rank_fuse(dense, lexical, top_k=2, rrf_k=60)

	assert fused[0]["filename"] == "leafy.pdf"
	assert fused[0]["retrieval_modes"] == ["dense", "lexical"]


	def test_search_knowledge_hybrid_falls_back_to_dense_only_on_lexical_failure(monkeypatch):
	async def fake_search_knowledge(query, match_count=7, match_threshold=0.30, query_label="raw"):
	return [{"filename": "doc.pdf", "page_number": 1, "content": "dense hit", "similarity": 0.8}]

	async def fake_search_knowledge_fts(query, match_count=7):
	raise RuntimeError("fts down")

	monkeypatch.setattr(vector_store, "search_knowledge", fake_search_knowledge)
	monkeypatch.setattr(vector_store, "search_knowledge_fts", fake_search_knowledge_fts)

	dense_queries = [types.SimpleNamespace(text="dense query", label="hyde")]
	fused_list, _ = asyncio.run(vector_store.search_knowledge_hybrid(raw_query="raw query", dense_queries=dense_queries))

	assert fused_list[0]["content"] == "dense hit"


	def test_select_knowledge_chunks_prefers_cross_modal_plant_match():
	"""Cross-modal (dense+lexical) signal should win when plant and stage signals are equal.

	Construct two chunks with identical plant mentions and stage mentions; only the
	presence of the lexical signal differs. The dense+lexical chunk should be preferred.
	"""
	from app import vector_store

	chunks = [
	{
	"filename": "lettuce.pdf",
	"page_number": 8,
	"source": "Generic Lettuce Guide",
	"content": "Lettuce crop management. Vegetative stage note.",
	"similarity": 0.67,
	"retrieval_modes": ["dense", "lexical"],
	},
	{
	"filename": "generic.pdf",
	"page_number": 2,
	"source": "Generic Lettuce Guide",
	"content": "Lettuce crop management. Vegetative stage note.",
	"similarity": 0.67,
	"retrieval_modes": ["dense"],
	},
	]

	selected = vector_store.select_knowledge_chunks(
	chunks,
	plant_aliases=["lettuce"],
	stage="vegetative",
	max_verified_chunks=2,
	max_background_chunks=1,
	)

	assert selected[0]["filename"] == "lettuce.pdf"
	assert selected[0]["selection_score"] > selected[1]["selection_score"]


	def test_select_knowledge_chunks_limits_background_chunks():
	from app import vector_store

	chunks = [
	{
	"filename": f"bg-{i}.pdf",
	"page_number": i,
	"source": f"Background {i}",
	"content": f"Generic context {i}",
	"similarity": 0.41 - (i * 0.01),
	"retrieval_modes": ["dense"],
	}
	for i in range(4)
	]

	selected = vector_store.select_knowledge_chunks(
	chunks,
	plant_aliases=["lettuce"],
	stage="vegetative",
	max_verified_chunks=0,
	max_background_chunks=1,
	)

	assert len(selected) == 1
	assert selected[0]["filename"] == "bg-0.pdf"


	def test_select_knowledge_chunks_rewards_stage_match():
	"""Ensure stage signal breaks ties when plant/cross-modal signals are equal.

	Both candidate chunks are constructed to have identical plant-match and
	similarity/retrieval signals; only the stage mention differs. The vegetative
	chunk should therefore be preferred when stage="vegetative".
	"""
	from app import vector_store

	# Both chunks mention the plant equally (in source/content) and have identical
	# similarity and retrieval_modes so that the only distinguishing signal is stage.
	chunks = [
	{
	"filename": "veg.pdf",
	"page_number": 4,
	"source": "Generic Lettuce Guide",
	"content": "Lettuce crop management. Vegetative stage details.",
	"similarity": 0.62,
	"retrieval_modes": ["dense"],
	},
	{
	"filename": "fruiting.pdf",
	"page_number": 9,
	"source": "Generic Lettuce Guide",
	"content": "Lettuce crop management. Fruiting stage details.",
	"similarity": 0.62,
	"retrieval_modes": ["dense"],
	},
	]

	selected = vector_store.select_knowledge_chunks(
	chunks,
	plant_aliases=["lettuce"],
	stage="vegetative",
	max_verified_chunks=2,
	max_background_chunks=0,
	)

	assert selected[0]["filename"] == "veg.pdf"


	def test_selection_promoted_background_flag():
	"""Verify selection_promoted_background flags promoted and non-promoted chunks.

	- One true verified chunk should be selected and have selection_promoted_background False.
	- The highest-scoring non-verified (background) chunk should be promoted into the
	remaining verified slot and be marked selection_promoted_background True.
	- Any additionally appended background chunk should have the flag False.
	"""
	from app import vector_store

	chunks = [
	# True verified chunk (similarity >= 0.70)
	{
	"filename": "verified.pdf",
	"page_number": 1,
	"source": "Verified Guide",
	"content": "Verified authoritative content",
	"similarity": 0.72,
	"retrieval_modes": ["dense"],
	},
	# Background chunks (below verified threshold)
	{
	"filename": "bg-promoted.pdf",
	"page_number": 2,
	"source": "Background Source",
	"content": "Relevant background content A",
	"similarity": 0.60,
	"retrieval_modes": ["dense"],
	},
	{
	"filename": "bg-normal.pdf",
	"page_number": 3,
	"source": "Background Source",
	"content": "Relevant background content B",
	"similarity": 0.59,
	"retrieval_modes": ["dense"],
	},
	]

	selected = vector_store.select_knowledge_chunks(
	chunks,
	plant_aliases=None,
	stage=None,
	max_verified_chunks=2,
	max_background_chunks=1,
	)

	# Expect order: verified (non-promoted), bg-promoted (promoted into verified), then bg-normal (background)
	assert len(selected) == 3
	assert selected[0]["filename"] == "verified.pdf"
	assert selected[0]["selection_promoted_background"] is False

	assert selected[1]["filename"] == "bg-promoted.pdf"
	assert selected[1]["selection_promoted_background"] is True

	assert selected[2]["filename"] == "bg-normal.pdf"
	assert selected[2]["selection_promoted_background"] is False