File size: 2,373 Bytes
d9d1fcf
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
import dateparser
import re
from datetime import datetime
import json
import os
import inflect

# Load shelf life data from spoilage.json
with open("spoilage_data.json", "r") as f:
    shelf_life_data = json.load(f)

p = inflect.engine()

def parse_ingredients(text):
    lines = [item.strip() for item in text.split(',')]
    parsed = []

    for line in lines:
        raw_line = line
        expiry = None
        expiry_phrase = None
        quantity = "1"
        unit = ""

        # Extract expiry phrase
        keywords = r"(?:expiring|expire|exp|exp dt|expiration date|use by|best before|by|from|on)"
        expiry_match = re.search(rf'\b{keywords}\b\s*(.*)', line, flags=re.IGNORECASE)

        if expiry_match:
            expiry_phrase = expiry_match.group(0)
            possible_date_str = expiry_match.group(1)
            maybe_date = dateparser.parse(possible_date_str)
            if maybe_date:
                expiry = maybe_date.date()
                line = line.replace(expiry_phrase, '').strip()

        # Extract quantity + unit + name
        match = re.match(r"(?:(\d+)\s*([a-zA-Z]+)?\s+([a-zA-Z ]+))|([a-zA-Z ]+)\s+(\d+)([a-zA-Z]*)", line)
        if match:
            if match.group(1):
                quantity = match.group(1)
                unit = match.group(2) or ""
                name_clean = match.group(3).strip()
            elif match.group(4):
                name_clean = match.group(4).strip()
                quantity = match.group(5)
                unit = match.group(6) or ""
        else:
            name_clean = re.sub(r'\d+.*', '', line).strip()

        # Calculate days left
        if expiry:
            days_left = (expiry - datetime.today().date()).days
        else:
            # Estimate from shelf life data
            shelf_key = name_clean.lower()
            singular_key = p.singular_noun(shelf_key) or shelf_key
            shelf_life = shelf_life_data.get(shelf_key) or shelf_life_data.get(singular_key)
            days_left = shelf_life if isinstance(shelf_life, int) else None

        parsed.append({
            "raw": raw_line,
            "name": name_clean.lower(),
            "quantity": quantity,
            "unit": unit.lower(),
            "expiry_date": expiry.isoformat() if expiry else None,
            "days_left": days_left,
            "note": "ok"
        })

    return parsed