Spaces:
Running
Running
| """Regression tests for alias data-layer bugs fixed in 2026-06-14. | |
| These tests lock in the fixes for the three alias bugs that the | |
| GitHub audit flagged: | |
| 1. dahi/curd/yogurt resolved to different canonicals depending on | |
| which resolver was called (FIXED: all three now resolve to "curd") | |
| 2. padavalakayi (Kannada for snake gourd) was listed as both a carrot | |
| alias and a snake_gourd alias (FIXED: removed from carrot) | |
| 3. The combo parser hardcoded sambar veg components that didn't | |
| match the actual Swiggy data (FIXED: components now match the | |
| description, and a description-based parser is the primary path) | |
| Per motto_v3 Β§0.8 (Data Layer and Configuration Rule), alias maps | |
| are product architecture. These tests ensure the fixes stick. | |
| """ | |
| from __future__ import annotations | |
| import re | |
| from pathlib import Path | |
| import pytest | |
| from shopstack.domain import unit_price | |
| from shopstack.domain.product_matching import _ALIAS_TO_CANONICAL | |
| from shopstack.domain.unit_price import ( | |
| _CANONICAL_MAP, | |
| ITEM_ALIASES, | |
| canonicalize_name, | |
| normalize_item_name, | |
| resolve_canonical, | |
| ) | |
| # ββ Regression: dahi/curd/yogurt unified to "curd" βββββββββββββββββββββ | |
| class TestDahiCurdYogurtUnification: | |
| """All three queries must resolve to the same canonical: 'curd'.""" | |
| def test_resolve_canonical_returns_curd(self, query: str): | |
| assert resolve_canonical(query) == "curd", ( | |
| f"resolve_canonical({query!r}) must return 'curd' " | |
| f"(Indian market canonical), got {resolve_canonical(query)!r}" | |
| ) | |
| def test_normalize_item_name_returns_curd(self, query: str): | |
| result = normalize_item_name(query) | |
| assert result == "curd", ( | |
| f"normalize_item_name({query!r}) must return 'curd', " | |
| f"got {result!r}" | |
| ) | |
| def test_canonical_map_no_longer_routes_dahi_to_yogurt(self): | |
| """_CANONICAL_MAP must not route dahi/curd/yogurt to yogurt.""" | |
| for query in ("dahi", "curd", "yogurt"): | |
| if query in _CANONICAL_MAP: | |
| assert _CANONICAL_MAP[query] == "curd", ( | |
| f"_CANONICAL_MAP[{query!r}] = {_CANONICAL_MAP[query]!r} " | |
| f"must be 'curd', not 'yogurt'. This is the dahiβyogurt " | |
| f"divergence that broke inventory search." | |
| ) | |
| def test_product_matching_canonical_is_curd_not_yogurt(self): | |
| """_ALIAS_TO_CANONICAL in product_matching.py must treat | |
| 'curd' as the canonical, not 'yogurt'.""" | |
| assert "curd" in _ALIAS_TO_CANONICAL, ( | |
| "_ALIAS_TO_CANONICAL must include 'curd' as a canonical" | |
| ) | |
| assert "yogurt" not in _ALIAS_TO_CANONICAL or \ | |
| _ALIAS_TO_CANONICAL.get("yogurt") == "curd", ( | |
| "If 'yogurt' is in _ALIAS_TO_CANONICAL, it must resolve to 'curd'" | |
| ) | |
| # ββ Regression: padavalakayi no longer misclassified as carrot βββββββββ | |
| class TestPadavalakayiSnakeGourd: | |
| """padavalakayi is Kannada for snake gourd, NOT carrot.""" | |
| def test_padavalakayi_not_a_carrot_alias(self): | |
| """padavalakayi must NOT appear in ITEM_ALIASES['carrot'].""" | |
| carrot_aliases = ITEM_ALIASES.get("carrot", []) | |
| assert "padavalakayi" not in carrot_aliases, ( | |
| "padavalakayi is Kannada for snake gourd, not carrot. " | |
| "Remove it from ITEM_ALIASES['carrot']." | |
| ) | |
| def test_padavalakayi_resolves_to_snake_gourd(self): | |
| """padavalakayi must resolve to snake_gourd (not carrot).""" | |
| result = resolve_canonical("padavalakayi") | |
| assert result == "snake_gourd", ( | |
| f"padavalakayi must resolve to 'snake_gourd', got {result!r}" | |
| ) | |
| def test_padavalakayi_is_a_snake_gourd_alias(self): | |
| """padavalakayi should still be listed as a snake_gourd alias.""" | |
| snake_gourd_aliases = ITEM_ALIASES.get("snake_gourd", []) | |
| assert "padavalakayi" in snake_gourd_aliases, ( | |
| "padavalakayi should be in ITEM_ALIASES['snake_gourd']" | |
| ) | |
| # ββ Regression: combo parser matches actual Swiggy data βββββββββββββββ | |
| class TestSambarVegComboComponents: | |
| """The sambar veg combo components must match the Swiggy snapshot.""" | |
| def test_sambar_veg_uses_actual_swiggy_components(self): | |
| """The Sambar Veg Combo description from the Swiggy snapshot | |
| is: 'Drumstick, Brinjal, Raw Banana and Pumpkin Fresh veggies | |
| combo for Vishu festive cooking' | |
| The combo parser must extract these components. | |
| """ | |
| _, _, components = canonicalize_name( | |
| "Sambar Veg Combo", | |
| description="Drumstick, Brinjal, Raw Banana and Pumpkin Fresh veggies combo for Vishu festive cooking", | |
| ) | |
| # All four actual components should be present | |
| for expected in ("drumstick", "brinjal", "raw_banana", "pumpkin"): | |
| assert expected in components, ( | |
| f"Sambar Veg Combo should include {expected!r}, " | |
| f"got {components!r}. The combo parser isn't matching " | |
| f"the actual Swiggy data." | |
| ) | |
| def test_sambar_veg_old_wrong_components_absent(self): | |
| """The OLD wrong components (radish, cluster_beans, ladys_finger) | |
| must no longer appear in sambar veg combos parsed from the | |
| Swiggy description.""" | |
| _, _, components = canonicalize_name( | |
| "Sambar Veg Combo", | |
| description="Drumstick, Brinjal, Raw Banana and Pumpkin Fresh veggies combo for Vishu festive cooking", | |
| ) | |
| for wrong in ("radish", "cluster_beans", "ladys_finger"): | |
| assert wrong not in components, ( | |
| f"{wrong!r} was the OLD hardcoded component that didn't " | |
| f"match the actual Swiggy data. The description-based " | |
| f"parser should not return it." | |
| ) | |
| # ββ Regression: combo parser v2 uses description when available ββββββββ | |
| class TestComboParserV2PrimaryPath: | |
| """The description-based parser is the primary path; the name-based | |
| fallback is only for empty descriptions.""" | |
| def test_description_takes_precedence_over_name_heuristic(self): | |
| """If a description is provided, it should be used, not the | |
| hardcoded name-based heuristic.""" | |
| _, _, components = canonicalize_name( | |
| "Sambar Veg Combo", | |
| description="Okra, Pumpkin Fresh combo", | |
| ) | |
| # Description's components should be used | |
| assert "okra" in components or "ladys_finger" in components | |
| assert "pumpkin" in components | |
| # Hardcoded fallback should NOT add "brinjal" or "raw_banana" | |
| # (those were the wrong old components) | |
| assert "brinjal" not in components | |
| def test_no_description_falls_back_to_name_heuristic(self): | |
| """When no description is provided, the name-based heuristic | |
| still works for known combos (backward compat).""" | |
| _, _, components = canonicalize_name("Sambar Veg Combo") | |
| assert "drumstick" in components | |
| assert "brinjal" in components | |
| assert "raw_banana" in components | |
| assert "pumpkin" in components | |