Spaces:
Running
Running
| """Pantry-grounded seasoning planner for the Epicurean Simmer node. | |
| The hard, non-deterministic problem here is NOT temperature — a thermostat does | |
| that. It's flavour: given the specific, messy set of things in your kitchen and | |
| what you're trying to cook, what should you add, what can stand in for what you | |
| lack, and in what order? That space is combinatorial and grounded in real data | |
| (the Epicure flavour model, 1,790 ingredients from ~4M recipes), so it's where | |
| the agent earns its place. | |
| Division of labour: | |
| - The LLM (Mellum 2) ORCHESTRATES: it reads the messy pantry, decides which | |
| Epicure tools to call, matches surfaced pairings against what's on hand, | |
| finds substitutes for gaps, respects constraints, and sequences additions | |
| by cooking stage. None of that is a lookup table. | |
| - Epicure provides the grounded flavour facts (pairing scores, neighbours). | |
| - Temperature is just the playhead: it advances through the plan the agent | |
| built and decides WHEN each step surfaces on the LCD. | |
| `build_plan` uses the real model. `scripted_plan` is an offline approximation | |
| that still calls Epicure live, so local/MOCK runs show real flavour facts even | |
| without a GPU — but the adaptive reasoning quality comes from the model. | |
| """ | |
| import json | |
| import re | |
| from sandbox import run_code | |
| MAX_STEPS = 8 | |
| # Cooking stages, in order. Each maps to the thermal phase at which it fires. | |
| STAGES = ["bloom", "aromatics", "body", "finish"] | |
| STAGE_LABEL = { | |
| "bloom": "Bloom whole spices", | |
| "aromatics": "Add aromatics", | |
| "body": "Season the simmer", | |
| "finish": "Finish off heat", | |
| } | |
| # Keyword -> stage, used by the scripted planner to sequence ingredients the way | |
| # a cook would. The model is free to override this with its own reasoning. | |
| STAGE_KEYWORDS = { | |
| "bloom": ["cumin", "coriander seed", "mustard seed", "mustard", "fenugreek", | |
| "cardamom", "cinnamon", "clove", "bay", "peppercorn", "black pepper", | |
| "fennel", "nigella", "star anise", "asafoetida", "curry leaf", | |
| "curry", "ras el hanout", "garam masala", "turmeric", "cayenne"], | |
| "aromatics": ["onion", "garlic", "ginger", "shallot", "leek", "scallion", | |
| "spring onion", "chili", "chilli", "green chili", "pepper"], | |
| "finish": ["lemon", "lime", "lemon juice", "lime juice", "vinegar", "tamarind", | |
| "yogurt", "yoghurt", "cilantro", "coriander", "coriander leaf", | |
| "mint", "cream", "parsley", "basil", "amchur", "lemon zest"], | |
| } | |
| CONSTRAINT_BANS = { | |
| "low sodium": ["salt", "soy sauce", "miso", "stock cube", "bouillon"], | |
| "no salt": ["salt", "soy sauce", "miso"], | |
| "no dairy": ["yogurt", "yoghurt", "cream", "butter", "ghee", "milk", "cheese"], | |
| "vegan": ["yogurt", "yoghurt", "cream", "butter", "ghee", "milk", "cheese", | |
| "honey", "fish sauce"], | |
| "no nightshade": ["tomato", "pepper", "chili", "chilli", "paprika", "cayenne", | |
| "potato", "eggplant", "aubergine"], | |
| } | |
| # Vague category words that aren't real ingredients — vision sometimes emits | |
| # these ("I see produce"); they can't be paired or planned around, so drop them. | |
| _GENERIC = { | |
| "produce", "vegetable", "vegetables", "veggies", "veg", "fruit", "fruits", | |
| "herb", "herbs", "spice", "spices", "seasoning", "seasonings", "ingredient", | |
| "ingredients", "item", "items", "food", "foods", "groceries", "stuff", | |
| "various", "other", "misc", "condiment", "condiments", "pantry", | |
| } | |
| # --------------------------------------------------------------------- helpers | |
| def parse_pantry(text: str) -> list[str]: | |
| """Split a free-text pantry into normalised ingredient names.""" | |
| parts = re.split(r"[,\n;]+|\band\b", text or "") | |
| items = [] | |
| for part in parts: | |
| name = part.strip().lower() | |
| name = re.sub(r"^(some|a|an|the|fresh|dried|ground|whole|few|bit of|couple of)\s+", "", name) | |
| name = re.sub(r"\s+", " ", name).strip(" .") | |
| if name and len(name) > 1 and name not in _GENERIC: | |
| items.append(name) | |
| # de-dupe, keep order | |
| seen, out = set(), [] | |
| for it in items: | |
| if it not in seen: | |
| seen.add(it) | |
| out.append(it) | |
| return out | |
| def apply_constraints(pantry: list[str], constraints: str) -> tuple[list[str], list[str]]: | |
| """Drop banned ingredients. Returns (allowed, removed).""" | |
| text = (constraints or "").lower() | |
| banned = set() | |
| for phrase, items in CONSTRAINT_BANS.items(): | |
| if phrase in text: | |
| banned.update(items) | |
| allowed = [p for p in pantry if not any(b in p for b in banned)] | |
| removed = [p for p in pantry if any(b in p for b in banned)] | |
| return allowed, removed | |
| def stage_for(ingredient: str) -> str: | |
| for stage, keywords in STAGE_KEYWORDS.items(): | |
| if any(kw in ingredient for kw in keywords): | |
| return stage | |
| return "body" | |
| def dish_core(dish: str) -> list[str]: | |
| """Best-effort core ingredient(s) of a dish, for seeding pairing queries.""" | |
| text = (dish or "").lower() | |
| known = { | |
| "lentil": "lentil", "dal": "lentil", "dahl": "lentil", "daal": "lentil", | |
| "chickpea": "chickpea", "chana": "chickpea", "coffee": "coffee", | |
| "tomato": "tomato", "rice": "rice", "chicken": "chicken", "bean": "bean", | |
| "soup": "vegetable broth", "curry": "curry", "pasta": "pasta", | |
| } | |
| cores = [v for k, v in known.items() if k in text] | |
| # de-dupe preserving order | |
| seen, out = set(), [] | |
| for c in cores: | |
| if c not in seen: | |
| seen.add(c) | |
| out.append(c) | |
| return out or ["onion"] | |
| def parse_servings(text: str, default: int = 2) -> int: | |
| """Pull a serving count out of the dish text ('dal for 4', '6 people').""" | |
| m = re.search(r"(?:for|serves?|makes?)\s+(\d{1,2})|(\d{1,2})\s*(?:people|servings|portions|cups)", (text or "").lower()) | |
| if m: | |
| n = int(m.group(1) or m.group(2)) | |
| if 1 <= n <= 24: | |
| return n | |
| return default | |
| def dish_kind(dish: str, core: str) -> str: | |
| """Classify the dish so the kitchen math fits it (a salad gets no simmer). | |
| Whole-word matching throughout, so short keywords don't match inside other | |
| words ('tea' in 'steak', 'raw' in 'strawberry', 'tart' in 'tartare').""" | |
| t = (dish or "").lower() | |
| def has(*words): | |
| return re.search(r"\b(" + "|".join(words) + r")\b", t) is not None | |
| if core == "coffee" or has("coffee", "espresso", "latte", "tea", "brew", "matcha"): | |
| return "beverage" | |
| if has("salad", "slaw", "raw", "ceviche", "smoothie", "juice", "salsa", | |
| "guacamole", "dip", "tartare", "carpaccio"): | |
| return "raw" | |
| # No bare 'sweet' — it would mis-flag 'sweet potato'. The \w*cakes? pattern | |
| # catches cupcake/shortcake/cheesecake; the rest are whole words. | |
| if (has("tart", "pie", "cookie", "biscuit", "brownie", "muffin", "scone", | |
| "pastry", "crumble", "cobbler", "pudding", "custard", "dessert", | |
| "mousse", "fudge", "parfait", "jam", "compote", "sorbet", "gelato") | |
| or re.search(r"\b\w*cakes?\b", t) or "ice cream" in t): | |
| return "sweet" | |
| if core in ("lentil", "chickpea", "bean", "rice", "pasta") or has( | |
| "soup", "stew", "curry", "dal", "dahl", "daal", "braise", "simmer", | |
| "rice", "pasta", "risotto", "porridge", "congee", "chili", "chilli", "broth"): | |
| return "simmered" | |
| return "generic" | |
| def kitchen_math_script(core: str, servings: int, allow_salt: bool, dish: str = "") -> str: | |
| """A small, readable Python script for the quantitative side — tailored to the | |
| dish type. Run in the sandbox (deterministic, not model-written, so it's | |
| always real arithmetic rather than the LLM's mental math).""" | |
| kind = dish_kind(dish, core) | |
| if kind == "beverage": | |
| return f""" | |
| servings = {servings} | |
| water_ml = servings * 250 # 250 ml per cup | |
| ratio = 16 # 1:16 grounds:water by weight | |
| grounds_g = round(water_ml / ratio, 1) | |
| print(f"water: {{water_ml}} ml") | |
| print(f"grounds: {{grounds_g}} g (1:{{ratio}} ratio)") | |
| print(f"bloom: 30 s with ~{{round(grounds_g*2)}} g water, then pour the rest") | |
| """ | |
| if kind == "sweet": | |
| return f""" | |
| servings = {servings} | |
| filling_g = servings * 120 # ~120 g fruit/nut filling per serving | |
| sweetener_g = servings * 25 # ~25 g sugar/honey/syrup per serving | |
| print(f"makes: {{servings}} servings") | |
| print(f"fruit/nut filling: ~{{filling_g}} g total") | |
| print(f"sweetener: ~{{sweetener_g}} g total, to taste") | |
| print("salt: just a pinch, to lift the sweetness") | |
| print("if baked: ~180C / 350F until set") | |
| """ | |
| if kind == "raw": | |
| salt = ('print(f"salt: {round(produce_g*0.006)} g (light, 0.6%)")' | |
| if allow_salt else 'print("salt: skipped (constraint)")') | |
| return f""" | |
| servings = {servings} | |
| produce_g = servings * 150 # ~150 g produce per serving | |
| acid_ml = servings * 10 # ~10 ml citrus/acid dressing per serving | |
| print(f"produce: {{produce_g}} g total") | |
| print(f"dressing acid: {{acid_ml}} ml") | |
| {salt} | |
| print("toss just before serving — no cooking") | |
| """ | |
| if kind == "simmered": | |
| salt = ('print(f"salt: {round(water_ml*0.008)} g (0.8% of water)")' | |
| if allow_salt else 'print("salt: skipped (constraint)")') | |
| return f""" | |
| servings = {servings} | |
| base_g = servings * 75 # 75 g dry pulse/grain per serving | |
| water_ml = base_g * 3 # 3:1 hydration for a soft simmer | |
| bloom_tsp = round(base_g / 100, 1) # ~1 tsp whole spice per 100 g | |
| print(f"{core}: {{base_g}} g dry") | |
| print(f"water: {{water_ml}} ml") | |
| {salt} | |
| print(f"whole spices to bloom: ~{{bloom_tsp}} tsp total") | |
| print(f"simmer: ~{{20 + (servings-2)*2}} min, lid ajar") | |
| """ | |
| salt = ('print(f"salt: {round(base_g*0.008)} g (0.8%)")' | |
| if allow_salt else 'print("salt: skipped (constraint)")') | |
| return f""" | |
| servings = {servings} | |
| base_g = servings * 200 # ~200 g main ingredient per serving | |
| print(f"main ingredient: ~{{base_g}} g total for {{servings}}") | |
| {salt} | |
| """ | |
| def compute_kitchen_math(core: str, servings: int, allow_salt: bool, dish: str = "") -> dict | None: | |
| """Run the kitchen-math script in the sandbox; return {code, output} or None.""" | |
| code = kitchen_math_script(core, servings, allow_salt, dish) | |
| result = run_code(code) | |
| if not result["ok"]: | |
| return None | |
| return {"code": re.sub(r"^\s+", "", code, flags=re.M).strip(), | |
| "output": result["stdout"].strip(), "backend": result["backend"]} | |
| _SEASONINGS = { | |
| "salt", "pepper", "black pepper", "oil", "olive oil", "water", "sugar", | |
| "cumin", "coriander", "coriander seed", "turmeric", "garlic", "onion", | |
| "ginger", "chili", "chilli", "cinnamon", "cardamom", "lemon", "lime", | |
| "vinegar", "bay", "clove", "nutmeg", "paprika", "cayenne", | |
| } | |
| def _heuristic_dish(pantry: list[str]) -> str: | |
| """Offline dish guess from what's on hand (no model).""" | |
| for key, name in [ | |
| ("lentil", "red lentil dal"), ("dal", "red lentil dal"), | |
| ("chickpea", "chickpea curry"), ("coffee", "pour-over coffee"), | |
| ("rice", "fried rice"), ("pasta", "tomato pasta"), | |
| ("spaghetti", "tomato pasta"), ("egg", "spiced scramble"), | |
| ("potato", "spiced potatoes"), ("chicken", "braised chicken"), | |
| ("bean", "stewed beans"), ("tomato", "tomato soup"), | |
| ]: | |
| if any(key in p for p in pantry): | |
| return name | |
| mains = [p for p in pantry if p not in _SEASONINGS][:2] | |
| return ("a simple " + " & ".join(mains)) if mains else "a simple sauté" | |
| def suggest_dish(pantry_text: str, constraints: str = "", generate_fn=None) -> str: | |
| """Decide WHAT to cook from the ingredients on hand. Uses the model when | |
| available (better, contextual), else a heuristic. Returns '' if no pantry.""" | |
| pantry, _ = apply_constraints(parse_pantry(pantry_text), constraints) | |
| if not pantry: | |
| return "" | |
| if generate_fn is not None: | |
| msgs = [ | |
| {"role": "system", "content": | |
| "Suggest ONE simple, realistic dish to cook from the ingredients " | |
| "on hand. Reply with ONLY the dish name (2-5 words) — no prose, no " | |
| "list, no punctuation."}, | |
| {"role": "user", "content": | |
| "Ingredients: " + ", ".join(pantry) | |
| + (f"\nConstraints: {constraints}" if constraints else "") | |
| + "\nDish name:"}, | |
| ] | |
| try: | |
| raw = generate_fn(msgs) | |
| line = next((l.strip() for l in raw.splitlines() if l.strip()), "") | |
| line = re.sub(r"^[\"'\-\*\d\.\)\s]+", "", line).strip(" .\"'") | |
| if 2 <= len(line) <= 60: | |
| return line | |
| except Exception: | |
| pass | |
| return _heuristic_dish(pantry) | |
| def extract_json(text: str) -> dict | None: | |
| start = text.find("{") | |
| if start == -1: | |
| return None | |
| depth = 0 | |
| for i, char in enumerate(text[start:], start): | |
| if char == "{": | |
| depth += 1 | |
| elif char == "}": | |
| depth -= 1 | |
| if depth == 0: | |
| try: | |
| return json.loads(text[start : i + 1]) | |
| except json.JSONDecodeError: | |
| return None | |
| return None | |
| # --------------------------------------------------------- scripted planner (offline) | |
| def _parse_score(raw: str) -> tuple[float | None, str]: | |
| try: | |
| data = json.loads(raw) | |
| return data.get("pairing_score"), data.get("percentile_label", "") | |
| except (json.JSONDecodeError, AttributeError): | |
| return None, "" | |
| def _singularize(name: str) -> str: | |
| """Cheap de-pluralisation so 'lemons' resolves as 'lemon' in Epicure.""" | |
| if name.endswith("ies") and len(name) > 4: | |
| return name[:-3] + "y" | |
| if name.endswith(("ses", "xes", "zes", "ches", "shes")): | |
| return name[:-2] | |
| if name.endswith("s") and not name.endswith("ss") and len(name) > 3: | |
| return name[:-1] | |
| return name | |
| def ground_evidence(core: str, steps: list[dict], mcp) -> None: | |
| """Overwrite each step's evidence with the REAL Epicure pairing score, so the | |
| numbers come from the tool, never the model's imagination. Tries the name and | |
| a singular form; on success also adopts Epicure's canonical resolved name.""" | |
| if not core: | |
| return | |
| for s in steps: | |
| ing = s.get("ingredient") | |
| if not ing or ing == core: | |
| continue | |
| for cand in dict.fromkeys([ing, _singularize(ing)]): # de-dup, keep order | |
| try: | |
| raw = mcp.call_tool("pairing_score", | |
| {"ingredient_a": core, "ingredient_b": cand}) | |
| data = json.loads(raw) | |
| except Exception: | |
| continue | |
| score = data.get("pairing_score") if isinstance(data, dict) else None | |
| if score is not None: | |
| s["ingredient"] = data.get("resolved_b", cand) | |
| s["evidence"] = (f"pairs {round(score, 3)} with " | |
| f"{data.get('resolved_a', core)} ({data.get('percentile_label', '')})") | |
| break | |
| def scripted_plan(dish: str, pantry_text: str, constraints: str, mcp) -> dict: | |
| """Offline approximation that still calls Epicure live. | |
| For each pantry item, score it against the dish's core ingredient, keep the | |
| ones that genuinely pair, sequence them by cooking stage, and look up a | |
| substitute (via `neighbors`) for any core the cook lacks. Grounded in real | |
| scores from the server — not a hardcoded table — but rigid in its pipeline. | |
| """ | |
| pantry = parse_pantry(pantry_text) | |
| allowed, removed = apply_constraints(pantry, constraints) | |
| cores = dish_core(dish) | |
| core = cores[0] | |
| scored, errors = [], 0 | |
| for item in allowed[:10]: # cap calls to the rate-limited public server | |
| if item == core: | |
| continue | |
| try: | |
| raw = mcp.call_tool("pairing_score", {"ingredient_a": core, "ingredient_b": item}) | |
| score, label = _parse_score(raw) | |
| except Exception: | |
| errors += 1 | |
| continue | |
| if score is not None and score >= 0.092: # corpus median; weaker pairings dropped | |
| scored.append({"ingredient": item, "score": round(score, 3), "label": label}) | |
| # Don't pretend a busy server means "nothing pairs": say so plainly. | |
| if errors and not scored: | |
| return { | |
| "dish": dish, "core": core, "steps": [], | |
| "notes": [f"Epicure server was busy ({errors} calls rate-limited) — " | |
| "try again in a moment."], | |
| "source": "scripted (Epicure unavailable)", | |
| } | |
| scored.sort(key=lambda s: s["score"], reverse=True) | |
| steps = [] | |
| for stage in STAGES: | |
| members = [s for s in scored if stage_for(s["ingredient"]) == stage] | |
| for s in members: | |
| steps.append({ | |
| "stage": stage, | |
| "ingredient": s["ingredient"], | |
| "evidence": f"pairs {s['score']} with {core} ({s['label']})", | |
| "why": f"{STAGE_LABEL[stage].lower()}", | |
| }) | |
| # Substitution: if the dish wants a hallmark ingredient the cook lacks, point | |
| # to the nearest thing they DO have in Epicure's flavour space. NB neighbours | |
| # are co-occurrence-similar, not functional swaps — so we surface it as | |
| # "cooks reach for this in the same dishes", not "use X for the sourness of Y". | |
| notes = [] | |
| used = {s["ingredient"] for s in steps} | |
| hallmark = {"lentil": "tamarind", "chickpea": "tahini", "coffee": "cardamom"}.get(core) | |
| if hallmark and not any(hallmark in p for p in pantry): | |
| try: | |
| raw = mcp.call_tool("neighbors", {"ingredient": hallmark, "top_k": 12}) | |
| neigh = {n["name"].replace("_", " ") for n in json.loads(raw).get("neighbors", [])} | |
| # Exact match only: suggest a pantry item that genuinely IS one of the | |
| # hallmark's flavour neighbours, not a substring lookalike (so 'lemon' | |
| # never gets mistaken for 'lemongrass'). Skip anything already planned. | |
| sub = next((p for p in allowed if p in neigh and p not in used), None) | |
| if sub: | |
| notes.append(f"No {hallmark} on hand — {sub} sits closest to it in flavour space.") | |
| steps.append({ | |
| "stage": "finish", "ingredient": sub, | |
| "evidence": f"flavour-space neighbour of {hallmark}", | |
| "why": f"cooks reach for it in the same dishes as {hallmark}", | |
| }) | |
| except Exception: | |
| pass | |
| if removed: | |
| notes.append(f"Skipped per constraints: {', '.join(removed)}.") | |
| if errors: | |
| notes.append(f"{errors} ingredient(s) skipped — Epicure was rate-limiting.") | |
| allow_salt = "salt" not in {b for p, items in CONSTRAINT_BANS.items() | |
| if p in (constraints or "").lower() for b in items} | |
| math = compute_kitchen_math(core, parse_servings(dish), allow_salt, dish) | |
| return { | |
| "dish": dish, "core": core, "steps": steps, "notes": notes, "math": math, | |
| "source": "scripted (model offline — real Epicure scores)", | |
| } | |
| # ------------------------------------------------------------- LLM planner (Space) | |
| SYSTEM_PROMPT = """\ | |
| You are the flavour brain of a small stovetop cooking node. The cook gives you a | |
| dish, the actual contents of their kitchen (messy free text), and any dietary | |
| constraints. Your job is to turn that into a STAGED seasoning plan, grounded in | |
| the Epicure flavour-science tools — never invent pairings. | |
| Sequence the additions sensibly for THIS dish. Treat the stage labels as a | |
| generic order — robust/early elements first, aromatics next, the main body, then | |
| delicate finishing touches (acids, fresh herbs, garnishes). Describe each step | |
| TRUTHFULLY for the actual ingredient: never call something a "whole spice" or | |
| "aromatic" if it isn't (a nut is not a spice; lemon is not an allium). Do NOT | |
| assume savoury stovetop cooking — adapt your wording when the dish is a salad, | |
| a drink, or a dessert/baked good. Only recommend things the cook actually has | |
| (or a substitute you justify with the tools). Respect every constraint. | |
| You have these Epicure tools; call them to ground your choices: | |
| {tools} | |
| You can ALSO run Python in a sandbox for the quantitative side of the cook — | |
| scaling to the number of servings, grain/legume hydration ratios, salt as a % | |
| of water weight, brew ratios, simmer timing. Do the arithmetic in code, never in | |
| your head. STRICT: plain arithmetic and f-strings only. NO imports, NO | |
| sys.path, NO file or network access — just assign numbers and `print(...)` them. | |
| RESPONSE FORMAT — reply with EXACTLY ONE JSON object, nothing else. | |
| To call an Epicure tool: | |
| {{"action": "call_tool", "tool": "<name>", "arguments": {{...}}}} | |
| To run Python in the sandbox: | |
| {{"action": "run_python", "code": "servings=4\\nprint('water', servings*225)"}} | |
| To deliver the finished plan (ends the turn): | |
| {{"action": "plan", | |
| "core": "<dish core ingredient you seeded on>", | |
| "steps": [ | |
| {{"stage": "bloom|aromatics|body|finish", | |
| "ingredient": "<from the pantry>", | |
| "evidence": "<the tool fact that justifies it, e.g. 'pairs 0.22 (>p90)'>", | |
| "why": "<one short clause>"}} | |
| ], | |
| "notes": ["<substitutions made, constraints honoured, etc.>"]}} | |
| Rules: ingredient names must be canonical singulars Epicure resolves ('lentil', | |
| not 'red lentils'). At most {max_steps} tool/python calls, then you MUST deliver | |
| a plan. No markdown, no prose outside the JSON. | |
| """ | |
| def format_tools(tools: list[dict]) -> str: | |
| keep = {"find_pairings", "pairing_score", "neighbors", "morph", "closest_mode", "list_targets"} | |
| lines = [] | |
| for tool in tools: | |
| if tool["name"] in keep: | |
| lines.append(f"- {tool['name']}: {tool.get('description', '')[:160]}") | |
| return "\n".join(lines) | |
| def build_plan(dish: str, pantry_text: str, constraints: str, generate_fn, mcp) -> dict: | |
| """Real-model planner: the LLM orchestrates Epicure tools into a staged plan.""" | |
| try: | |
| tools_block = format_tools(mcp.list_tools()) | |
| except Exception as exc: | |
| tools_block = f"(tools unreachable: {exc})" | |
| pantry = parse_pantry(pantry_text) | |
| allowed, removed = apply_constraints(pantry, constraints) | |
| user = ( | |
| f"Dish: {dish}\n" | |
| f"Pantry (use only these): {', '.join(allowed) or '(empty)'}\n" | |
| f"Constraints: {constraints or 'none'}" | |
| + (f"\nAlready removed for constraints: {', '.join(removed)}" if removed else "") | |
| + "\n\nBuild the staged seasoning plan." | |
| ) | |
| messages = [ | |
| {"role": "system", "content": SYSTEM_PROMPT.format( | |
| tools=tools_block, max_steps=MAX_STEPS)}, | |
| {"role": "user", "content": user}, | |
| ] | |
| tool_calls, math = [], None | |
| for _ in range(MAX_STEPS + 2): | |
| decision = extract_json(generate_fn(messages)) | |
| if decision is None: | |
| messages.append({"role": "user", "content": "Invalid. One JSON object only."}) | |
| continue | |
| if decision.get("action") == "call_tool": | |
| tool, args = decision.get("tool", ""), decision.get("arguments", {}) | |
| try: | |
| result = mcp.call_tool(tool, args) | |
| except Exception as exc: | |
| result = f"(tool error: {exc})" | |
| tool_calls.append({"tool": tool, "arguments": args, "result": result[:400]}) | |
| messages.append({"role": "assistant", "content": json.dumps(decision)}) | |
| messages.append({"role": "user", "content": f"{tool} returned:\n{result[:1500]}\nContinue."}) | |
| continue | |
| if decision.get("action") == "run_python": | |
| code = decision.get("code", "") | |
| result = run_code(code) | |
| out = result["stdout"] if result["ok"] else f"ERROR: {result['stderr']}" | |
| if result["ok"] and result["stdout"].strip(): | |
| math = {"code": code.strip(), "output": result["stdout"].strip(), | |
| "backend": result["backend"]} | |
| tool_calls.append({"tool": f"run_python ({result['backend']})", | |
| "arguments": {"code": code[:120]}, "result": out[:400]}) | |
| messages.append({"role": "assistant", "content": json.dumps(decision)}) | |
| messages.append({"role": "user", "content": f"sandbox output:\n{out[:1500]}\nContinue."}) | |
| continue | |
| if decision.get("action") == "plan": | |
| # The model's JSON shape is not guaranteed — coerce defensively. | |
| notes = decision.get("notes", []) | |
| notes = [notes] if isinstance(notes, str) else (notes if isinstance(notes, list) else []) | |
| if removed: | |
| notes.append(f"Pre-removed: {', '.join(removed)}") | |
| raw_steps = decision.get("steps", []) | |
| steps = [s for s in raw_steps if isinstance(s, dict) and s.get("ingredient")] | |
| # Drop any step whose ingredient the cook doesn't actually have — keeps | |
| # the model from planning around vague/invented items. | |
| def _in_pantry(ing: str) -> bool: | |
| ing = str(ing).lower() | |
| toks = set(ing.split()) | |
| return any(ing == p or ing in p or p in ing or (toks & set(p.split())) | |
| for p in allowed) | |
| steps = [s for s in steps if _in_pantry(s["ingredient"])] | |
| for s in steps: | |
| if s.get("stage") not in STAGES: | |
| s["stage"] = "body" | |
| s.setdefault("evidence", "") | |
| s.setdefault("why", "") | |
| core = str(decision.get("core", "")) or (dish_core(dish)[0] if dish else "") | |
| # Ground the evidence with REAL Epicure scores (the model paraphrases | |
| # or fabricates otherwise — we saw "0.85" when the truth was 0.17). | |
| ground_evidence(core, steps, mcp) | |
| # Always compute the displayed math deterministically in the sandbox — | |
| # the model's free-form run_python is unreliable (it printed its own | |
| # draft steps instead of doing arithmetic). | |
| allow_salt = "salt" not in {b for ph, items in CONSTRAINT_BANS.items() | |
| if ph in (constraints or "").lower() for b in items} | |
| math = compute_kitchen_math(core, parse_servings(dish), allow_salt, dish) or math | |
| return { | |
| "dish": dish, | |
| "core": core, | |
| "steps": steps, | |
| "notes": [str(n) for n in notes], | |
| "math": math, | |
| "tool_calls": tool_calls, | |
| "source": "Mellum 2 + Epicure + sandbox", | |
| } | |
| messages.append({"role": "user", "content": 'Use "call_tool", "run_python" or "plan".'}) | |
| # Model never converged — fall back to the grounded scripted pipeline. | |
| plan = scripted_plan(dish, pantry_text, constraints, mcp) | |
| plan["notes"].append("model did not converge; scripted fallback used") | |
| plan["tool_calls"] = tool_calls | |
| return plan | |