File size: 2,039 Bytes
9373226
 
 
 
 
d33a661
 
 
 
c5e7cb1
d33a661
 
 
8387c0e
 
 
 
 
 
d33a661
 
 
 
9373226
 
d33a661
 
 
 
8387c0e
d33a661
8387c0e
 
 
 
9373226
8387c0e
 
9373226
 
8387c0e
 
 
d33a661
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8387c0e
 
 
9373226
 
8387c0e
d33a661
8387c0e
d33a661
8387c0e
d33a661
 
 
9373226
d33a661
9373226
d33a661
 
 
8387c0e
 
 
9373226
8387c0e
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
from __future__ import annotations

from functools import lru_cache
from typing import List

from .text_utils import dedupe_preserve_order, normalize_text, strip_amounts_and_preps

try:
    from ingredient_parser import parse_ingredient
except Exception:  # pragma: no cover
    parse_ingredient = None


def _clean_fragment(fragment: str) -> str:
    fragment = (fragment or "").strip()
    fragment = fragment.lstrip("-•*").strip()
    fragment = fragment.removeprefix("and ").removeprefix("or ").strip()
    fragment = strip_amounts_and_preps(fragment)
    return fragment


def _parsed_name(parsed) -> str:
    if parsed is None:
        return ""

    name = getattr(parsed, "name", None)
    if not name:
        return ""

    try:
        first = name[0]
        text = getattr(first, "text", "") or ""
        return normalize_text(text)
    except Exception:
        pass

    text = getattr(name, "text", "") or ""
    return normalize_text(text)


@lru_cache(maxsize=4096)
def parse_single_ingredient(fragment: str) -> str:
    fragment = _clean_fragment(fragment)
    if not fragment:
        return ""

    if parse_ingredient is not None:
        try:
            parsed = parse_ingredient(fragment)
            name = _parsed_name(parsed)
            if name:
                return name
        except Exception:
            pass

    return normalize_text(fragment)


def extract_ingredients(recipe_text: str, max_items: int = 48) -> List[str]:
    text = (recipe_text or "").strip()
    if not text:
        return []

    parts = [part.strip() for part in text.split(",")]

    out: List[str] = []
    for part in parts:
        candidate = parse_single_ingredient(part)
        candidate = normalize_text(candidate)

        if not candidate:
            continue
        if len(candidate) < 2:
            continue
        if candidate in {"and", "or", "the", "a", "an"}:
            continue

        out.append(candidate)
        if len(out) >= max_items:
            break

    return dedupe_preserve_order(out)