shopstack / tests /test_alias_data_fixes.py
pranaysuyash's picture
Sync ShopStack HEAD 6f8adfc
d999bba verified
Raw
History Blame Contribute Delete
7.41 kB
"""Regression tests for alias data-layer bugs fixed in 2026-06-14.
These tests lock in the fixes for the three alias bugs that the
GitHub audit flagged:
1. dahi/curd/yogurt resolved to different canonicals depending on
which resolver was called (FIXED: all three now resolve to "curd")
2. padavalakayi (Kannada for snake gourd) was listed as both a carrot
alias and a snake_gourd alias (FIXED: removed from carrot)
3. The combo parser hardcoded sambar veg components that didn't
match the actual Swiggy data (FIXED: components now match the
description, and a description-based parser is the primary path)
Per motto_v3 Β§0.8 (Data Layer and Configuration Rule), alias maps
are product architecture. These tests ensure the fixes stick.
"""
from __future__ import annotations
import re
from pathlib import Path
import pytest
from shopstack.domain import unit_price
from shopstack.domain.product_matching import _ALIAS_TO_CANONICAL
from shopstack.domain.unit_price import (
_CANONICAL_MAP,
ITEM_ALIASES,
canonicalize_name,
normalize_item_name,
resolve_canonical,
)
# ── Regression: dahi/curd/yogurt unified to "curd" ─────────────────────
class TestDahiCurdYogurtUnification:
"""All three queries must resolve to the same canonical: 'curd'."""
@pytest.mark.parametrize("query", ["dahi", "curd", "yogurt"])
def test_resolve_canonical_returns_curd(self, query: str):
assert resolve_canonical(query) == "curd", (
f"resolve_canonical({query!r}) must return 'curd' "
f"(Indian market canonical), got {resolve_canonical(query)!r}"
)
@pytest.mark.parametrize("query", ["dahi", "curd", "yogurt"])
def test_normalize_item_name_returns_curd(self, query: str):
result = normalize_item_name(query)
assert result == "curd", (
f"normalize_item_name({query!r}) must return 'curd', "
f"got {result!r}"
)
def test_canonical_map_no_longer_routes_dahi_to_yogurt(self):
"""_CANONICAL_MAP must not route dahi/curd/yogurt to yogurt."""
for query in ("dahi", "curd", "yogurt"):
if query in _CANONICAL_MAP:
assert _CANONICAL_MAP[query] == "curd", (
f"_CANONICAL_MAP[{query!r}] = {_CANONICAL_MAP[query]!r} "
f"must be 'curd', not 'yogurt'. This is the dahi→yogurt "
f"divergence that broke inventory search."
)
def test_product_matching_canonical_is_curd_not_yogurt(self):
"""_ALIAS_TO_CANONICAL in product_matching.py must treat
'curd' as the canonical, not 'yogurt'."""
assert "curd" in _ALIAS_TO_CANONICAL, (
"_ALIAS_TO_CANONICAL must include 'curd' as a canonical"
)
assert "yogurt" not in _ALIAS_TO_CANONICAL or \
_ALIAS_TO_CANONICAL.get("yogurt") == "curd", (
"If 'yogurt' is in _ALIAS_TO_CANONICAL, it must resolve to 'curd'"
)
# ── Regression: padavalakayi no longer misclassified as carrot ─────────
class TestPadavalakayiSnakeGourd:
"""padavalakayi is Kannada for snake gourd, NOT carrot."""
def test_padavalakayi_not_a_carrot_alias(self):
"""padavalakayi must NOT appear in ITEM_ALIASES['carrot']."""
carrot_aliases = ITEM_ALIASES.get("carrot", [])
assert "padavalakayi" not in carrot_aliases, (
"padavalakayi is Kannada for snake gourd, not carrot. "
"Remove it from ITEM_ALIASES['carrot']."
)
def test_padavalakayi_resolves_to_snake_gourd(self):
"""padavalakayi must resolve to snake_gourd (not carrot)."""
result = resolve_canonical("padavalakayi")
assert result == "snake_gourd", (
f"padavalakayi must resolve to 'snake_gourd', got {result!r}"
)
def test_padavalakayi_is_a_snake_gourd_alias(self):
"""padavalakayi should still be listed as a snake_gourd alias."""
snake_gourd_aliases = ITEM_ALIASES.get("snake_gourd", [])
assert "padavalakayi" in snake_gourd_aliases, (
"padavalakayi should be in ITEM_ALIASES['snake_gourd']"
)
# ── Regression: combo parser matches actual Swiggy data ───────────────
class TestSambarVegComboComponents:
"""The sambar veg combo components must match the Swiggy snapshot."""
def test_sambar_veg_uses_actual_swiggy_components(self):
"""The Sambar Veg Combo description from the Swiggy snapshot
is: 'Drumstick, Brinjal, Raw Banana and Pumpkin Fresh veggies
combo for Vishu festive cooking'
The combo parser must extract these components.
"""
_, _, components = canonicalize_name(
"Sambar Veg Combo",
description="Drumstick, Brinjal, Raw Banana and Pumpkin Fresh veggies combo for Vishu festive cooking",
)
# All four actual components should be present
for expected in ("drumstick", "brinjal", "raw_banana", "pumpkin"):
assert expected in components, (
f"Sambar Veg Combo should include {expected!r}, "
f"got {components!r}. The combo parser isn't matching "
f"the actual Swiggy data."
)
def test_sambar_veg_old_wrong_components_absent(self):
"""The OLD wrong components (radish, cluster_beans, ladys_finger)
must no longer appear in sambar veg combos parsed from the
Swiggy description."""
_, _, components = canonicalize_name(
"Sambar Veg Combo",
description="Drumstick, Brinjal, Raw Banana and Pumpkin Fresh veggies combo for Vishu festive cooking",
)
for wrong in ("radish", "cluster_beans", "ladys_finger"):
assert wrong not in components, (
f"{wrong!r} was the OLD hardcoded component that didn't "
f"match the actual Swiggy data. The description-based "
f"parser should not return it."
)
# ── Regression: combo parser v2 uses description when available ────────
class TestComboParserV2PrimaryPath:
"""The description-based parser is the primary path; the name-based
fallback is only for empty descriptions."""
def test_description_takes_precedence_over_name_heuristic(self):
"""If a description is provided, it should be used, not the
hardcoded name-based heuristic."""
_, _, components = canonicalize_name(
"Sambar Veg Combo",
description="Okra, Pumpkin Fresh combo",
)
# Description's components should be used
assert "okra" in components or "ladys_finger" in components
assert "pumpkin" in components
# Hardcoded fallback should NOT add "brinjal" or "raw_banana"
# (those were the wrong old components)
assert "brinjal" not in components
def test_no_description_falls_back_to_name_heuristic(self):
"""When no description is provided, the name-based heuristic
still works for known combos (backward compat)."""
_, _, components = canonicalize_name("Sambar Veg Combo")
assert "drumstick" in components
assert "brinjal" in components
assert "raw_banana" in components
assert "pumpkin" in components