Spaces:
Sleeping
Sleeping
| from __future__ import annotations | |
| import re | |
| from typing import Iterable, List | |
| try: | |
| import inflect | |
| _INFLECT = inflect.engine() | |
| except Exception: # pragma: no cover | |
| _INFLECT = None | |
| AMOUNT_PREFIX_RE = re.compile( | |
| r"""^\s* | |
| [\d\s\/\.½¼¾⅓⅔]+ | |
| \s* | |
| (?:g|kg|mg|ml|l|oz|lb|pound|pounds|cup|cups|tbsp|tablespoon|tablespoons|tsp|teaspoon|teaspoons| | |
| clove|cloves|can|cans|slice|slices|piece|pieces|pinch|dash|handful|sprig|sprigs|bunch|bunches| | |
| package|packages|jar|jars|stalk|stalks|stick|sticks|packet|packets|quart|quarts|pint|pints)? | |
| \s* | |
| """, | |
| re.IGNORECASE | re.VERBOSE, | |
| ) | |
| PAREN_RE = re.compile(r"\s*[\(\[].*?[\)\]]\s*") | |
| MULTISPACE_RE = re.compile(r"\s+") | |
| DESCRIPTOR_PREFIXES = { | |
| "fresh", "dried", "ground", "minced", "chopped", "sliced", "grated", | |
| "large", "small", "medium", "extra", "extra-virgin", "unsalted", "salted", | |
| "boneless", "skinless", "whole", "low-fat", "reduced-fat", "fat-free", | |
| "light", "dark", "white", "black", "red", "green", "ripe", "plain", | |
| "organic" | |
| } | |
| def normalize_text(text: str) -> str: | |
| text = (text or "").lower().strip() | |
| text = text.replace("’", "'").replace("“", '"').replace("”", '"') | |
| text = text.replace("\n", " ") | |
| text = PAREN_RE.sub(" ", text) | |
| text = re.sub(r"[^\w\s\-/]", " ", text) | |
| text = MULTISPACE_RE.sub(" ", text).strip() | |
| return text | |
| def strip_amounts_and_preps(text: str) -> str: | |
| text = normalize_text(text) | |
| text = AMOUNT_PREFIX_RE.sub("", text) | |
| text = text.lstrip("-•*").strip() | |
| text = MULTISPACE_RE.sub(" ", text).strip() | |
| return text | |
| def singularize(text: str) -> str: | |
| text = normalize_text(text) | |
| if not text: | |
| return "" | |
| if _INFLECT is None: | |
| return text | |
| singular = _INFLECT.singular_noun(text) | |
| return singular if singular else text | |
| def dedupe_preserve_order(items: Iterable[str]) -> List[str]: | |
| seen = set() | |
| out: List[str] = [] | |
| for item in items: | |
| if not item: | |
| continue | |
| key = item.strip() | |
| if key and key not in seen: | |
| seen.add(key) | |
| out.append(key) | |
| return out | |
| def as_aliases(value: str | None) -> List[str]: | |
| if not value: | |
| return [] | |
| aliases = [] | |
| for part in str(value).split("|"): | |
| part = normalize_text(part) | |
| if part: | |
| aliases.append(part) | |
| return dedupe_preserve_order(aliases) | |
| def ingredient_variants(text: str) -> List[str]: | |
| base = strip_amounts_and_preps(text) | |
| base = normalize_text(base) | |
| if not base: | |
| return [] | |
| words = base.split() | |
| variants = [base] | |
| if len(words) >= 2 and words[0] in DESCRIPTOR_PREFIXES: | |
| variants.append(" ".join(words[1:])) | |
| if len(words) >= 2: | |
| variants.append(words[-1]) | |
| variants.append(" ".join(words[:-1])) | |
| variants.append(" ".join(words[:2])) | |
| variants.append(" ".join(words[-2:])) | |
| return dedupe_preserve_order(variants) | |
| def normalize_ingredient_for_lookup(text: str) -> str: | |
| return normalize_text(strip_amounts_and_preps(text)) | |
| def ingredient_lookup_variants(text: str) -> List[str]: | |
| return ingredient_variants(text) | |