Spaces:
Sleeping
Sleeping
File size: 2,039 Bytes
9373226 d33a661 c5e7cb1 d33a661 8387c0e d33a661 9373226 d33a661 8387c0e d33a661 8387c0e 9373226 8387c0e 9373226 8387c0e d33a661 8387c0e 9373226 8387c0e d33a661 8387c0e d33a661 8387c0e d33a661 9373226 d33a661 9373226 d33a661 8387c0e 9373226 8387c0e | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 | from __future__ import annotations
from functools import lru_cache
from typing import List
from .text_utils import dedupe_preserve_order, normalize_text, strip_amounts_and_preps
try:
from ingredient_parser import parse_ingredient
except Exception: # pragma: no cover
parse_ingredient = None
def _clean_fragment(fragment: str) -> str:
fragment = (fragment or "").strip()
fragment = fragment.lstrip("-•*").strip()
fragment = fragment.removeprefix("and ").removeprefix("or ").strip()
fragment = strip_amounts_and_preps(fragment)
return fragment
def _parsed_name(parsed) -> str:
if parsed is None:
return ""
name = getattr(parsed, "name", None)
if not name:
return ""
try:
first = name[0]
text = getattr(first, "text", "") or ""
return normalize_text(text)
except Exception:
pass
text = getattr(name, "text", "") or ""
return normalize_text(text)
@lru_cache(maxsize=4096)
def parse_single_ingredient(fragment: str) -> str:
fragment = _clean_fragment(fragment)
if not fragment:
return ""
if parse_ingredient is not None:
try:
parsed = parse_ingredient(fragment)
name = _parsed_name(parsed)
if name:
return name
except Exception:
pass
return normalize_text(fragment)
def extract_ingredients(recipe_text: str, max_items: int = 48) -> List[str]:
text = (recipe_text or "").strip()
if not text:
return []
parts = [part.strip() for part in text.split(",")]
out: List[str] = []
for part in parts:
candidate = parse_single_ingredient(part)
candidate = normalize_text(candidate)
if not candidate:
continue
if len(candidate) < 2:
continue
if candidate in {"and", "or", "the", "a", "an"}:
continue
out.append(candidate)
if len(out) >= max_items:
break
return dedupe_preserve_order(out)
|