Spaces:
Sleeping
Sleeping
| from __future__ import annotations | |
| from functools import lru_cache | |
| from typing import List | |
| from .text_utils import dedupe_preserve_order, normalize_text, strip_amounts_and_preps | |
| try: | |
| from ingredient_parser import parse_ingredient | |
| except Exception: # pragma: no cover | |
| parse_ingredient = None | |
| def _clean_fragment(fragment: str) -> str: | |
| fragment = (fragment or "").strip() | |
| fragment = fragment.lstrip("-•*").strip() | |
| fragment = fragment.removeprefix("and ").removeprefix("or ").strip() | |
| fragment = strip_amounts_and_preps(fragment) | |
| return fragment | |
| def _parsed_name(parsed) -> str: | |
| if parsed is None: | |
| return "" | |
| name = getattr(parsed, "name", None) | |
| if not name: | |
| return "" | |
| try: | |
| first = name[0] | |
| text = getattr(first, "text", "") or "" | |
| return normalize_text(text) | |
| except Exception: | |
| pass | |
| text = getattr(name, "text", "") or "" | |
| return normalize_text(text) | |
| def parse_single_ingredient(fragment: str) -> str: | |
| fragment = _clean_fragment(fragment) | |
| if not fragment: | |
| return "" | |
| if parse_ingredient is not None: | |
| try: | |
| parsed = parse_ingredient(fragment) | |
| name = _parsed_name(parsed) | |
| if name: | |
| return name | |
| except Exception: | |
| pass | |
| return normalize_text(fragment) | |
| def extract_ingredients(recipe_text: str, max_items: int = 48) -> List[str]: | |
| text = (recipe_text or "").strip() | |
| if not text: | |
| return [] | |
| parts = [part.strip() for part in text.split(",")] | |
| out: List[str] = [] | |
| for part in parts: | |
| candidate = parse_single_ingredient(part) | |
| candidate = normalize_text(candidate) | |
| if not candidate: | |
| continue | |
| if len(candidate) < 2: | |
| continue | |
| if candidate in {"and", "or", "the", "a", "an"}: | |
| continue | |
| out.append(candidate) | |
| if len(out) >= max_items: | |
| break | |
| return dedupe_preserve_order(out) | |