from __future__ import annotations from functools import lru_cache from typing import List from .text_utils import dedupe_preserve_order, normalize_text, strip_amounts_and_preps try: from ingredient_parser import parse_ingredient except Exception: # pragma: no cover parse_ingredient = None def _clean_fragment(fragment: str) -> str: fragment = (fragment or "").strip() fragment = fragment.lstrip("-•*").strip() fragment = fragment.removeprefix("and ").removeprefix("or ").strip() fragment = strip_amounts_and_preps(fragment) return fragment def _parsed_name(parsed) -> str: if parsed is None: return "" name = getattr(parsed, "name", None) if not name: return "" try: first = name[0] text = getattr(first, "text", "") or "" return normalize_text(text) except Exception: pass text = getattr(name, "text", "") or "" return normalize_text(text) @lru_cache(maxsize=4096) def parse_single_ingredient(fragment: str) -> str: fragment = _clean_fragment(fragment) if not fragment: return "" if parse_ingredient is not None: try: parsed = parse_ingredient(fragment) name = _parsed_name(parsed) if name: return name except Exception: pass return normalize_text(fragment) def extract_ingredients(recipe_text: str, max_items: int = 48) -> List[str]: text = (recipe_text or "").strip() if not text: return [] parts = [part.strip() for part in text.split(",")] out: List[str] = [] for part in parts: candidate = parse_single_ingredient(part) candidate = normalize_text(candidate) if not candidate: continue if len(candidate) < 2: continue if candidate in {"and", "or", "the", "a", "an"}: continue out.append(candidate) if len(out) >= max_items: break return dedupe_preserve_order(out)