InsuranceBot / tests /test_upload_uin_dedup.py
rohitsar567's picture
#47: UIN-based net-new dedup for user-uploaded policy PDFs
1a7161d
Raw
History Blame Contribute Delete
2.84 kB
"""Regression test for #47 β€” UIN net-new dedup on user uploads (2026-05-21).
When a user uploads a policy PDF whose IRDAI UIN already belongs to a
catalogue policy, it is NOT net-new: the upload route must short-circuit
and return the existing card (UploadResponse.already_in_catalogue=True)
instead of indexing a duplicate. These pin the helper logic + the
response-model contract; both symbols did not exist pre-#47.
"""
import unittest
from backend.main import (
UploadResponse,
_catalogue_uin_index,
_match_catalogue_uin,
)
class TestUploadUinDedup(unittest.TestCase):
def test_catalogue_uin_index_is_built(self):
idx = _catalogue_uin_index()
self.assertGreater(len(idx), 50, "catalogue UIN index looks empty")
for uin, val in idx.items():
self.assertRegex(uin, r"^[A-Z]{5,9}\d{5}V\d{6}$")
self.assertEqual(len(val), 2, "value must be (policy_id, name)")
def test_known_catalogue_uin_matches(self):
# Every indexed (modern-format) UIN must round-trip through the
# matcher when embedded in wording text. Order-independent β€” not
# fragile to dict-iteration / module-cache state.
idx = _catalogue_uin_index()
self.assertTrue(idx, "catalogue UIN index is empty")
misses = []
for uin, val in idx.items():
if _match_catalogue_uin(f"policy wording UIN {uin} terms") != val:
misses.append(uin)
if _match_catalogue_uin(uin.lower()) != val: # case-insensitive
misses.append(uin + " (lowercase)")
self.assertEqual(misses, [], f"UINs that did not round-trip: {misses[:8]}")
def test_unknown_or_fake_uin_is_not_matched(self):
self.assertIsNone(_match_catalogue_uin("plain text, no identifier"))
self.assertIsNone(_match_catalogue_uin(""))
# valid UIN *shape* but not a real catalogue policy
self.assertIsNone(_match_catalogue_uin("ZZZHLIP00000V000000"))
def test_uploadresponse_dedup_fields(self):
hit = UploadResponse(
policy_id="acko__acko-health-ii", policy_name="Acko Health Ii",
chunks_added=0, pages_indexed=10, elapsed_ms=5,
already_in_catalogue=True,
existing_policy_id="acko__acko-health-ii",
existing_policy_name="Acko Health Ii")
self.assertTrue(hit.already_in_catalogue)
self.assertEqual(hit.existing_policy_id, "acko__acko-health-ii")
# net-new upload β€” fields default to the not-a-dedup-hit state
fresh = UploadResponse(
policy_id="user-upload__x__y", policy_name="Y",
chunks_added=42, pages_indexed=45, elapsed_ms=9)
self.assertFalse(fresh.already_in_catalogue)
self.assertIsNone(fresh.existing_policy_id)
if __name__ == "__main__":
unittest.main()