import dateparser import re from datetime import datetime import json import os import inflect # Load shelf life data from spoilage.json with open("spoilage_data.json", "r") as f: shelf_life_data = json.load(f) p = inflect.engine() def parse_ingredients(text): lines = [item.strip() for item in text.split(',')] parsed = [] for line in lines: raw_line = line expiry = None expiry_phrase = None quantity = "1" unit = "" # Extract expiry phrase keywords = r"(?:expiring|expire|exp|exp dt|expiration date|use by|best before|by|from|on)" expiry_match = re.search(rf'\b{keywords}\b\s*(.*)', line, flags=re.IGNORECASE) if expiry_match: expiry_phrase = expiry_match.group(0) possible_date_str = expiry_match.group(1) maybe_date = dateparser.parse(possible_date_str) if maybe_date: expiry = maybe_date.date() line = line.replace(expiry_phrase, '').strip() # Extract quantity + unit + name match = re.match(r"(?:(\d+)\s*([a-zA-Z]+)?\s+([a-zA-Z ]+))|([a-zA-Z ]+)\s+(\d+)([a-zA-Z]*)", line) if match: if match.group(1): quantity = match.group(1) unit = match.group(2) or "" name_clean = match.group(3).strip() elif match.group(4): name_clean = match.group(4).strip() quantity = match.group(5) unit = match.group(6) or "" else: name_clean = re.sub(r'\d+.*', '', line).strip() # Calculate days left if expiry: days_left = (expiry - datetime.today().date()).days else: # Estimate from shelf life data shelf_key = name_clean.lower() singular_key = p.singular_noun(shelf_key) or shelf_key shelf_life = shelf_life_data.get(shelf_key) or shelf_life_data.get(singular_key) days_left = shelf_life if isinstance(shelf_life, int) else None parsed.append({ "raw": raw_line, "name": name_clean.lower(), "quantity": quantity, "unit": unit.lower(), "expiry_date": expiry.isoformat() if expiry else None, "days_left": days_left, "note": "ok" }) return parsed