Spaces:
Sleeping
Sleeping
File size: 3,184 Bytes
72bf70a 9373226 72bf70a 8517b5a 72bf70a 9373226 094cf40 72bf70a 9373226 72bf70a 9373226 72bf70a 8517b5a 72bf70a 094cf40 9373226 094cf40 9373226 094cf40 9373226 094cf40 72bf70a 9373226 72bf70a | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 | from __future__ import annotations
import re
from typing import Iterable, List
try:
import inflect
_INFLECT = inflect.engine()
except Exception: # pragma: no cover
_INFLECT = None
AMOUNT_PREFIX_RE = re.compile(
r"""^\s*
[\d\s\/\.½¼¾⅓⅔]+
\s*
(?:g|kg|mg|ml|l|oz|lb|pound|pounds|cup|cups|tbsp|tablespoon|tablespoons|tsp|teaspoon|teaspoons|
clove|cloves|can|cans|slice|slices|piece|pieces|pinch|dash|handful|sprig|sprigs|bunch|bunches|
package|packages|jar|jars|stalk|stalks|stick|sticks|packet|packets|quart|quarts|pint|pints)?
\s*
""",
re.IGNORECASE | re.VERBOSE,
)
PAREN_RE = re.compile(r"\s*[\(\[].*?[\)\]]\s*")
MULTISPACE_RE = re.compile(r"\s+")
DESCRIPTOR_PREFIXES = {
"fresh", "dried", "ground", "minced", "chopped", "sliced", "grated",
"large", "small", "medium", "extra", "extra-virgin", "unsalted", "salted",
"boneless", "skinless", "whole", "low-fat", "reduced-fat", "fat-free",
"light", "dark", "white", "black", "red", "green", "ripe", "plain",
"organic"
}
def normalize_text(text: str) -> str:
text = (text or "").lower().strip()
text = text.replace("’", "'").replace("“", '"').replace("”", '"')
text = text.replace("\n", " ")
text = PAREN_RE.sub(" ", text)
text = re.sub(r"[^\w\s\-/]", " ", text)
text = MULTISPACE_RE.sub(" ", text).strip()
return text
def strip_amounts_and_preps(text: str) -> str:
text = normalize_text(text)
text = AMOUNT_PREFIX_RE.sub("", text)
text = text.lstrip("-•*").strip()
text = MULTISPACE_RE.sub(" ", text).strip()
return text
def singularize(text: str) -> str:
text = normalize_text(text)
if not text:
return ""
if _INFLECT is None:
return text
singular = _INFLECT.singular_noun(text)
return singular if singular else text
def dedupe_preserve_order(items: Iterable[str]) -> List[str]:
seen = set()
out: List[str] = []
for item in items:
if not item:
continue
key = item.strip()
if key and key not in seen:
seen.add(key)
out.append(key)
return out
def as_aliases(value: str | None) -> List[str]:
if not value:
return []
aliases = []
for part in str(value).split("|"):
part = normalize_text(part)
if part:
aliases.append(part)
return dedupe_preserve_order(aliases)
def ingredient_variants(text: str) -> List[str]:
base = strip_amounts_and_preps(text)
base = normalize_text(base)
if not base:
return []
words = base.split()
variants = [base]
if len(words) >= 2 and words[0] in DESCRIPTOR_PREFIXES:
variants.append(" ".join(words[1:]))
if len(words) >= 2:
variants.append(words[-1])
variants.append(" ".join(words[:-1]))
variants.append(" ".join(words[:2]))
variants.append(" ".join(words[-2:]))
return dedupe_preserve_order(variants)
def normalize_ingredient_for_lookup(text: str) -> str:
return normalize_text(strip_amounts_and_preps(text))
def ingredient_lookup_variants(text: str) -> List[str]:
return ingredient_variants(text)
|