Spaces:
Sleeping
Sleeping
| """Regression test for #47 β UIN net-new dedup on user uploads (2026-05-21). | |
| When a user uploads a policy PDF whose IRDAI UIN already belongs to a | |
| catalogue policy, it is NOT net-new: the upload route must short-circuit | |
| and return the existing card (UploadResponse.already_in_catalogue=True) | |
| instead of indexing a duplicate. These pin the helper logic + the | |
| response-model contract; both symbols did not exist pre-#47. | |
| """ | |
| import unittest | |
| from backend.main import ( | |
| UploadResponse, | |
| _catalogue_uin_index, | |
| _match_catalogue_uin, | |
| ) | |
| class TestUploadUinDedup(unittest.TestCase): | |
| def test_catalogue_uin_index_is_built(self): | |
| idx = _catalogue_uin_index() | |
| self.assertGreater(len(idx), 50, "catalogue UIN index looks empty") | |
| for uin, val in idx.items(): | |
| self.assertRegex(uin, r"^[A-Z]{5,9}\d{5}V\d{6}$") | |
| self.assertEqual(len(val), 2, "value must be (policy_id, name)") | |
| def test_known_catalogue_uin_matches(self): | |
| # Every indexed (modern-format) UIN must round-trip through the | |
| # matcher when embedded in wording text. Order-independent β not | |
| # fragile to dict-iteration / module-cache state. | |
| idx = _catalogue_uin_index() | |
| self.assertTrue(idx, "catalogue UIN index is empty") | |
| misses = [] | |
| for uin, val in idx.items(): | |
| if _match_catalogue_uin(f"policy wording UIN {uin} terms") != val: | |
| misses.append(uin) | |
| if _match_catalogue_uin(uin.lower()) != val: # case-insensitive | |
| misses.append(uin + " (lowercase)") | |
| self.assertEqual(misses, [], f"UINs that did not round-trip: {misses[:8]}") | |
| def test_unknown_or_fake_uin_is_not_matched(self): | |
| self.assertIsNone(_match_catalogue_uin("plain text, no identifier")) | |
| self.assertIsNone(_match_catalogue_uin("")) | |
| # valid UIN *shape* but not a real catalogue policy | |
| self.assertIsNone(_match_catalogue_uin("ZZZHLIP00000V000000")) | |
| def test_uploadresponse_dedup_fields(self): | |
| hit = UploadResponse( | |
| policy_id="acko__acko-health-ii", policy_name="Acko Health Ii", | |
| chunks_added=0, pages_indexed=10, elapsed_ms=5, | |
| already_in_catalogue=True, | |
| existing_policy_id="acko__acko-health-ii", | |
| existing_policy_name="Acko Health Ii") | |
| self.assertTrue(hit.already_in_catalogue) | |
| self.assertEqual(hit.existing_policy_id, "acko__acko-health-ii") | |
| # net-new upload β fields default to the not-a-dedup-hit state | |
| fresh = UploadResponse( | |
| policy_id="user-upload__x__y", policy_name="Y", | |
| chunks_added=42, pages_indexed=45, elapsed_ms=9) | |
| self.assertFalse(fresh.already_in_catalogue) | |
| self.assertIsNone(fresh.existing_policy_id) | |
| if __name__ == "__main__": | |
| unittest.main() | |