Spaces:
Sleeping
Sleeping
| """Recommendation-transparency (deploy-#2 follow-up, 2026-05-16). | |
| CONTEXT (owner Image#8 diagnosis, CONFIRMED): the recommendation-fit gate | |
| is CORRECT β when a new hard constraint appears (e.g. user says "zero | |
| co-pay, individual only" after "Royal Sundaram Multiplier" was shown), the | |
| gate rightly drops Multiplier because it carries a co-pay. The BUG is purely | |
| conversational: the assistant SILENTLY swaps the recommendation set with no | |
| explanation, so it feels "random / dropped a policy" to the user. | |
| These tests pin the FIX (single_brain.py only β the gate logic is NOT | |
| touched): | |
| 1. Pure-derivation layer (`_recommendation_change_note` / | |
| `_constraint_reason_clause`): | |
| β’ constraint added β previously-cited policy dropped β returns a | |
| one-line note that NAMES the real dropped policy and ties the | |
| removal to the REAL constraint the user stated (from this turn's | |
| save_profile_field updates) β nothing invented. | |
| β’ set unchanged / no new constraint / no prior snapshot / empty | |
| current set β returns "" (NO spurious explanation). | |
| β’ canonical identity: a doctype-sibling re-id of a still-cited | |
| policy is NOT mis-reported as dropped. | |
| 2. End-to-end `handle_turn` integration (Gemini + retrieve_policies | |
| mocked at their network seams): | |
| β’ Turn 1: Multiplier (has co-pay) is in the cited set, snapshot | |
| persisted, NO drop note. | |
| β’ Turn 2: user states "zero co-pay" β fit gate drops Multiplier β | |
| reply is PREPENDED with a transparent line naming Multiplier and | |
| citing the zero-co-pay constraint. | |
| β’ Turn where the set is unchanged β NO note prepended. | |
| Run: | |
| .venv/bin/python -m pytest -q tests/test_recommendation_transparency.py | |
| """ | |
| from __future__ import annotations | |
| import asyncio | |
| import sys | |
| import unittest | |
| import uuid | |
| from pathlib import Path | |
| from unittest import mock | |
| _REPO_ROOT = Path(__file__).resolve().parent.parent | |
| if str(_REPO_ROOT) not in sys.path: | |
| sys.path.insert(0, str(_REPO_ROOT)) | |
| from backend import brain_tools, single_brain # noqa: E402 | |
| from backend.single_brain import ( # noqa: E402 | |
| _constraint_reason_clause, | |
| _recommendation_change_note, | |
| ) | |
| def _run(coro): | |
| return asyncio.new_event_loop().run_until_complete(coro) | |
| def _cite(pid, name): | |
| """A cited-card dict shaped like _build_recommendation_citations output.""" | |
| return { | |
| "chunk_id": f"{pid}#c", | |
| "policy_id": pid, | |
| "policy_name": name, | |
| "insurer_slug": pid.split("__")[0], | |
| "doc_type": "policy", | |
| "source_url": f"https://example.com/{pid}.pdf", | |
| "score": 0.5, | |
| } | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # LAYER 1 β pure derivation: _constraint_reason_clause | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| class TestConstraintReasonClause(unittest.TestCase): | |
| def test_zero_copay_is_the_canonical_reason(self): | |
| # The Image#8 scenario: user said "zero co-pay" β LLM persisted | |
| # copay_pct=0 this turn. | |
| self.assertEqual( | |
| _constraint_reason_clause({"copay_pct": "0"}), | |
| "you want zero co-pay", | |
| ) | |
| self.assertEqual( | |
| _constraint_reason_clause({"copay_pct": 0}), | |
| "you want zero co-pay", | |
| ) | |
| def test_nonzero_copay_does_not_claim_zero(self): | |
| # User accepted SOME co-pay β must NOT say "you want zero co-pay". | |
| clause = _constraint_reason_clause({"copay_pct": "20"}) | |
| self.assertNotIn("zero", clause) | |
| self.assertIn("co-pay", clause) | |
| def test_other_known_fields_map_to_their_phrase(self): | |
| self.assertEqual( | |
| _constraint_reason_clause({"budget_band": "under_15k"}), | |
| "you gave a budget", | |
| ) | |
| self.assertEqual( | |
| _constraint_reason_clause({"parents_to_insure": True}), | |
| "you're now insuring parents", | |
| ) | |
| def test_unknown_field_falls_back_to_generic_not_invented(self): | |
| # An unrecognised field must NOT fabricate a specific reason. | |
| clause = _constraint_reason_clause({"some_future_field": "x"}) | |
| self.assertEqual(clause, "based on the preference you just shared") | |
| def test_no_updates_is_generic(self): | |
| self.assertEqual( | |
| _constraint_reason_clause({}), | |
| "based on the preference you just shared", | |
| ) | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # LAYER 1 β pure derivation: _recommendation_change_note | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| class TestRecommendationChangeNote(unittest.TestCase): | |
| def test_drop_due_to_zero_copay_is_explained_and_named(self): | |
| # Royal Sundaram Multiplier (has a co-pay) WAS shown last turn; | |
| # this turn it's gone and the user just set copay_pct=0. | |
| prev = { | |
| "royal-sundaram__multiplier": "Royal Sundaram Multiplier", | |
| "niva-bupa__reassure-3": "ReAssure 3.0", | |
| } | |
| current = [_cite("niva-bupa__reassure-3", "ReAssure 3.0")] | |
| note = _recommendation_change_note( | |
| prev_snapshot=prev, | |
| current_citations=current, | |
| profile_updates={"copay_pct": "0"}, | |
| ) | |
| self.assertTrue(note, "a drop+constraint must produce a note") | |
| # Names the REAL dropped policy (from the snapshot, not invented). | |
| self.assertIn("Royal Sundaram Multiplier", note) | |
| # Ties it to the REAL constraint the user stated this turn. | |
| self.assertIn("zero co-pay", note) | |
| # Does NOT wrongly name a policy that is still cited. | |
| self.assertNotIn("ReAssure", note) | |
| self.assertTrue(note.rstrip().endswith(":"), | |
| "note should lead INTO the new shortlist") | |
| def test_no_note_when_set_unchanged(self): | |
| prev = {"niva-bupa__reassure-3": "ReAssure 3.0"} | |
| current = [_cite("niva-bupa__reassure-3", "ReAssure 3.0")] | |
| # Even WITH a constraint update, nothing was dropped β no note. | |
| self.assertEqual( | |
| _recommendation_change_note( | |
| prev_snapshot=prev, | |
| current_citations=current, | |
| profile_updates={"copay_pct": "0"}, | |
| ), | |
| "", | |
| ) | |
| def test_no_note_when_set_only_grew(self): | |
| prev = {"niva-bupa__reassure-3": "ReAssure 3.0"} | |
| current = [ | |
| _cite("niva-bupa__reassure-3", "ReAssure 3.0"), | |
| _cite("care__supreme", "Care Supreme"), | |
| ] | |
| self.assertEqual( | |
| _recommendation_change_note( | |
| prev_snapshot=prev, | |
| current_citations=current, | |
| profile_updates={"copay_pct": "0"}, | |
| ), | |
| "", | |
| ) | |
| def test_no_note_without_a_new_constraint(self): | |
| # A set change with NO constraint persisted this turn is a normal | |
| # refinement, not a silent constraint-driven drop β stay quiet. | |
| prev = {"royal-sundaram__multiplier": "Royal Sundaram Multiplier"} | |
| current = [_cite("niva-bupa__reassure-3", "ReAssure 3.0")] | |
| self.assertEqual( | |
| _recommendation_change_note( | |
| prev_snapshot=prev, | |
| current_citations=current, | |
| profile_updates={}, | |
| ), | |
| "", | |
| ) | |
| def test_no_note_without_prior_snapshot(self): | |
| # First recommendation ever β nothing to diff against. | |
| self.assertEqual( | |
| _recommendation_change_note( | |
| prev_snapshot={}, | |
| current_citations=[_cite("niva-bupa__reassure-3", | |
| "ReAssure 3.0")], | |
| profile_updates={"copay_pct": "0"}, | |
| ), | |
| "", | |
| ) | |
| def test_no_note_when_current_set_empty(self): | |
| # Empty cited set is a separate "no plan fits" path β there is no | |
| # "and these now fit better" to lead into. | |
| self.assertEqual( | |
| _recommendation_change_note( | |
| prev_snapshot={"royal-sundaram__multiplier": | |
| "Royal Sundaram Multiplier"}, | |
| current_citations=[], | |
| profile_updates={"copay_pct": "0"}, | |
| ), | |
| "", | |
| ) | |
| def test_doctype_sibling_reid_not_misreported_as_dropped(self): | |
| # Same product, different doctype-sibling id this turn. canonical | |
| # identity must treat it as STILL cited β no false "removed" note. | |
| prev = { | |
| "national-insurance__new-national-parivar-mediclaim__brochure": | |
| "New National Parivar Mediclaim", | |
| } | |
| current = [_cite( | |
| "national-insurance__new-national-parivar-mediclaim__wordings", | |
| "New National Parivar Mediclaim")] | |
| self.assertEqual( | |
| _recommendation_change_note( | |
| prev_snapshot=prev, | |
| current_citations=current, | |
| profile_updates={"copay_pct": "0"}, | |
| ), | |
| "", | |
| "a re-id of a STILL-cited product must not be reported dropped", | |
| ) | |
| def test_multiple_dropped_policies_all_named(self): | |
| prev = { | |
| "royal-sundaram__multiplier": "Royal Sundaram Multiplier", | |
| "star__assure": "Star Assure", | |
| "niva-bupa__reassure-3": "ReAssure 3.0", | |
| } | |
| current = [_cite("niva-bupa__reassure-3", "ReAssure 3.0")] | |
| note = _recommendation_change_note( | |
| prev_snapshot=prev, | |
| current_citations=current, | |
| profile_updates={"copay_pct": "0"}, | |
| ) | |
| self.assertIn("Royal Sundaram Multiplier", note) | |
| self.assertIn("Star Assure", note) | |
| self.assertIn("they don't", note) | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # LAYER 2 β end-to-end handle_turn (Gemini + retrieve_policies mocked) | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| def _fc_part(name, args): | |
| return {"functionCall": {"name": name, "args": args}} | |
| def _text_payload(text): | |
| return {"candidates": [{"content": {"parts": [{"text": text}]}}]} | |
| def _tool_payload(parts): | |
| return {"candidates": [{"content": {"parts": parts}}]} | |
| # Catalog the fake retrieve_policies serves. Multiplier carries a co-pay; | |
| # ReAssure 3.0 / Care Supreme are zero-co-pay. | |
| _MULTIPLIER = { | |
| "chunk_id": "rs1", "policy_id": "royal-sundaram__multiplier", | |
| "policy_name": "Royal Sundaram Multiplier", | |
| "insurer_slug": "royal-sundaram", "doc_type": "policy", | |
| "source_url": "https://example.com/multiplier.pdf", "score": 0.61, | |
| "uin_code": "RSAHLIP21001V010001", | |
| } | |
| _REASSURE = { | |
| "chunk_id": "nb1", "policy_id": "niva-bupa__reassure-3", | |
| "policy_name": "ReAssure 3.0", | |
| "insurer_slug": "niva-bupa", "doc_type": "policy", | |
| "source_url": "https://example.com/reassure.pdf", "score": 0.55, | |
| "uin_code": "MAXHLIP21177V032122", | |
| } | |
| _CARE = { | |
| "chunk_id": "ch1", "policy_id": "care__supreme", | |
| "policy_name": "Care Supreme", | |
| "insurer_slug": "care", "doc_type": "policy", | |
| "source_url": "https://example.com/caresupreme.pdf", "score": 0.50, | |
| "uin_code": "CARHLIP21001V010001", | |
| } | |
| class _HandleTurnHarness(unittest.TestCase): | |
| """Drives single_brain.handle_turn with the two network seams stubbed: | |
| _gemini_call (scripted per-turn payloads) and brain_tools.retrieve_policies | |
| (scripted chunk lists). The fit gate, citation builder and transparency | |
| layer all run for real.""" | |
| def setUp(self): | |
| import os | |
| self._env = mock.patch.dict(os.environ, | |
| {"GOOGLE_API_KEY": "test-key"}) | |
| self._env.start() | |
| self._gemini_script: list = [] | |
| self._retrieve_chunks: list = [] | |
| async def _fake_gemini(*_a, **_k): | |
| if not self._gemini_script: | |
| return _text_payload("(no more scripted turns)") | |
| return self._gemini_script.pop(0) | |
| async def _fake_retrieve(*_a, **_k): | |
| chunks = list(self._retrieve_chunks) | |
| # mark_recommendation gates on session.last_retrieved_chunks β | |
| # mirror what the real retrieve_policies stamps. | |
| sess = _k.get("session") | |
| if sess is not None: | |
| sess.last_retrieved_chunks = list(chunks) | |
| sess.slug_to_insurer = { | |
| c["policy_id"]: c["insurer_slug"] for c in chunks | |
| } | |
| return {"chunks": chunks, "count": len(chunks)} | |
| self._gp = mock.patch.object(single_brain, "_gemini_call", | |
| _fake_gemini) | |
| self._rp = mock.patch.object(brain_tools, "retrieve_policies", | |
| _fake_retrieve) | |
| self._gp.start() | |
| self._rp.start() | |
| def tearDown(self): | |
| self._rp.stop() | |
| self._gp.stop() | |
| self._env.stop() | |
| def _fresh_session(self): | |
| from backend.session_state import SessionState | |
| return SessionState(session_id=f"t_{uuid.uuid4().hex[:8]}") | |
| def _ready_session(self): | |
| """Session with the 7 required slots filled + the post-recap pricing | |
| bundle marked skipped β the realistic precondition for a | |
| RECOMMENDATION turn. Bug #107 only attaches policy citations once | |
| brain_tools._profile_complete is satisfied, and Bug #108's one-shot | |
| bundle re-ask gate is bypassed when the user skipped the pricing | |
| inputs; recommendation-transparency assertions (snapshot persists, | |
| drop note fires) must reflect that real flow.""" | |
| sess = self._fresh_session() | |
| sess.profile.name = "Asha" | |
| sess.profile.age = 35 | |
| sess.profile.dependents = "self+spouse" | |
| sess.profile.location_tier = "metro" | |
| sess.profile.income_band = "10L-25L" | |
| sess.profile.primary_goal = "first_buy" | |
| sess.profile.health_conditions = ["none"] | |
| sess.pricing_bundle_skipped = True | |
| return sess | |
| class TestHandleTurnTransparency(_HandleTurnHarness): | |
| def test_silent_swap_becomes_explicit_on_new_constraint(self): | |
| sess = self._ready_session() | |
| # ---- Turn 1: recommend Multiplier + ReAssure (no constraint yet). | |
| self._retrieve_chunks = [_MULTIPLIER, _REASSURE] | |
| self._gemini_script = [ | |
| _tool_payload([_fc_part("retrieve_policies", | |
| {"query": "comprehensive health cover"})]), | |
| _tool_payload([_fc_part("mark_recommendation", { | |
| "policy_ids": ["royal-sundaram__multiplier", | |
| "niva-bupa__reassure-3"]})]), | |
| _text_payload( | |
| "I recommend Royal Sundaram Multiplier and ReAssure 3.0 " | |
| "for comprehensive cover."), | |
| ] | |
| r1 = _run(single_brain.handle_turn(sess, "I want health insurance")) | |
| self.assertNotIn("I've removed", r1.reply_text, | |
| "no prior shortlist β no drop note on turn 1") | |
| cited1 = {c["policy_id"] for c in r1.citations} | |
| self.assertIn("royal-sundaram__multiplier", cited1) | |
| # Snapshot of THIS turn's cited set must persist for the next diff. | |
| snap = getattr(sess, "last_recommendation_snapshot", {}) | |
| self.assertEqual( | |
| snap.get("royal-sundaram__multiplier"), | |
| "Royal Sundaram Multiplier") | |
| # ---- Turn 2: user demands zero co-pay. The gate drops Multiplier | |
| # (it has a co-pay); retrieval now only surfaces zero-co-pay plans. | |
| self._retrieve_chunks = [_REASSURE, _CARE] | |
| self._gemini_script = [ | |
| _tool_payload([_fc_part("save_profile_field", | |
| {"field": "copay_pct", "value": "0"})]), | |
| _tool_payload([_fc_part("retrieve_policies", | |
| {"query": "zero co-pay comprehensive"})]), | |
| _tool_payload([_fc_part("mark_recommendation", { | |
| "policy_ids": ["niva-bupa__reassure-3", "care__supreme"]})]), | |
| _text_payload( | |
| "ReAssure 3.0 and Care Supreme are strong zero-co-pay " | |
| "options."), | |
| ] | |
| r2 = _run(single_brain.handle_turn( | |
| sess, "I want zero co-pay, individual only")) | |
| # The swap is now EXPLAINED, naming the real dropped policy and the | |
| # real stated constraint β and prepended ahead of the rec prose. | |
| self.assertIn("Royal Sundaram Multiplier", r2.reply_text) | |
| self.assertIn("zero co-pay", r2.reply_text) | |
| self.assertIn("I've removed", r2.reply_text) | |
| self.assertLess( | |
| r2.reply_text.index("Royal Sundaram Multiplier"), | |
| r2.reply_text.index("ReAssure 3.0"), | |
| "transparency line must be PREPENDED before the rec prose") | |
| # Gate behaviour unchanged: Multiplier is no longer cited. | |
| cited2 = {c["policy_id"] for c in r2.citations} | |
| self.assertNotIn("royal-sundaram__multiplier", cited2) | |
| self.assertIn("niva-bupa__reassure-3", cited2) | |
| def test_no_spurious_note_when_set_unchanged(self): | |
| sess = self._ready_session() | |
| self._retrieve_chunks = [_REASSURE, _CARE] | |
| self._gemini_script = [ | |
| _tool_payload([_fc_part("retrieve_policies", {"query": "cover"})]), | |
| _tool_payload([_fc_part("mark_recommendation", { | |
| "policy_ids": ["niva-bupa__reassure-3", "care__supreme"]})]), | |
| _text_payload("ReAssure 3.0 and Care Supreme look good."), | |
| ] | |
| _run(single_brain.handle_turn(sess, "show me plans")) | |
| # Turn 2: a constraint is stated BUT the same set still fits β no | |
| # policy dropped β must NOT fabricate a removal note. | |
| self._retrieve_chunks = [_REASSURE, _CARE] | |
| self._gemini_script = [ | |
| _tool_payload([_fc_part("save_profile_field", | |
| {"field": "copay_pct", "value": "0"})]), | |
| _tool_payload([_fc_part("retrieve_policies", {"query": "cover"})]), | |
| _tool_payload([_fc_part("mark_recommendation", { | |
| "policy_ids": ["niva-bupa__reassure-3", "care__supreme"]})]), | |
| _text_payload("Both still fit with zero co-pay."), | |
| ] | |
| r2 = _run(single_brain.handle_turn(sess, "I want zero co-pay")) | |
| self.assertNotIn("I've removed", r2.reply_text) | |
| self.assertEqual(r2.reply_text, "Both still fit with zero co-pay.") | |
| def test_no_note_on_pure_qa_turn_after_recommendation(self): | |
| sess = self._ready_session() | |
| self._retrieve_chunks = [_MULTIPLIER, _REASSURE] | |
| self._gemini_script = [ | |
| _tool_payload([_fc_part("retrieve_policies", {"query": "cover"})]), | |
| _tool_payload([_fc_part("mark_recommendation", { | |
| "policy_ids": ["royal-sundaram__multiplier", | |
| "niva-bupa__reassure-3"]})]), | |
| _text_payload("Royal Sundaram Multiplier and ReAssure 3.0."), | |
| ] | |
| _run(single_brain.handle_turn(sess, "recommend plans")) | |
| snap_before = dict( | |
| getattr(sess, "last_recommendation_snapshot", {})) | |
| # Pure QA follow-up: no shortlist named, is_recommendation False. | |
| self._gemini_script = [ | |
| _text_payload("A waiting period is the time before a benefit " | |
| "becomes claimable."), | |
| ] | |
| r2 = _run(single_brain.handle_turn( | |
| sess, "what does waiting period mean?")) | |
| self.assertNotIn("I've removed", r2.reply_text) | |
| # Active shortlist identity must NOT be wiped by a QA turn. | |
| self.assertEqual( | |
| getattr(sess, "last_recommendation_snapshot", {}), | |
| snap_before, | |
| "QA turn must not erase the active shortlist snapshot") | |
| if __name__ == "__main__": | |
| unittest.main() | |