"""Pantry-grounded seasoning planner for the Epicurean Simmer node.

The hard, non-deterministic problem here is NOT temperature — a thermostat does
that. It's flavour: given the specific, messy set of things in your kitchen and
what you're trying to cook, what should you add, what can stand in for what you
lack, and in what order? That space is combinatorial and grounded in real data
(the Epicure flavour model, 1,790 ingredients from ~4M recipes), so it's where
the agent earns its place.

Division of labour:
  - The LLM (Mellum 2) ORCHESTRATES: it reads the messy pantry, decides which
    Epicure tools to call, matches surfaced pairings against what's on hand,
    finds substitutes for gaps, respects constraints, and sequences additions
    by cooking stage. None of that is a lookup table.
  - Epicure provides the grounded flavour facts (pairing scores, neighbours).
  - Temperature is just the playhead: it advances through the plan the agent
    built and decides WHEN each step surfaces on the LCD.

`build_plan` uses the real model. `scripted_plan` is an offline approximation
that still calls Epicure live, so local/MOCK runs show real flavour facts even
without a GPU — but the adaptive reasoning quality comes from the model.
"""

import json
import re

from sandbox import run_code

MAX_STEPS = 8

# Cooking stages, in order. Each maps to the thermal phase at which it fires.
STAGES = ["bloom", "aromatics", "body", "finish"]
STAGE_LABEL = {
    "bloom": "Bloom whole spices",
    "aromatics": "Add aromatics",
    "body": "Season the simmer",
    "finish": "Finish off heat",
}

# Keyword -> stage, used by the scripted planner to sequence ingredients the way
# a cook would. The model is free to override this with its own reasoning.
STAGE_KEYWORDS = {
    "bloom": ["cumin", "coriander seed", "mustard seed", "mustard", "fenugreek",
              "cardamom", "cinnamon", "clove", "bay", "peppercorn", "black pepper",
              "fennel", "nigella", "star anise", "asafoetida", "curry leaf",
              "curry", "ras el hanout", "garam masala", "turmeric", "cayenne"],
    "aromatics": ["onion", "garlic", "ginger", "shallot", "leek", "scallion",
                  "spring onion", "chili", "chilli", "green chili", "pepper"],
    "finish": ["lemon", "lime", "lemon juice", "lime juice", "vinegar", "tamarind",
               "yogurt", "yoghurt", "cilantro", "coriander", "coriander leaf",
               "mint", "cream", "parsley", "basil", "amchur", "lemon zest"],
}

CONSTRAINT_BANS = {
    "low sodium": ["salt", "soy sauce", "miso", "stock cube", "bouillon"],
    "no salt": ["salt", "soy sauce", "miso"],
    "no dairy": ["yogurt", "yoghurt", "cream", "butter", "ghee", "milk", "cheese"],
    "vegan": ["yogurt", "yoghurt", "cream", "butter", "ghee", "milk", "cheese",
              "honey", "fish sauce"],
    "no nightshade": ["tomato", "pepper", "chili", "chilli", "paprika", "cayenne",
                       "potato", "eggplant", "aubergine"],
}


# Vague category words that aren't real ingredients — vision sometimes emits
# these ("I see produce"); they can't be paired or planned around, so drop them.
_GENERIC = {
    "produce", "vegetable", "vegetables", "veggies", "veg", "fruit", "fruits",
    "herb", "herbs", "spice", "spices", "seasoning", "seasonings", "ingredient",
    "ingredients", "item", "items", "food", "foods", "groceries", "stuff",
    "various", "other", "misc", "condiment", "condiments", "pantry",
}


# --------------------------------------------------------------------- helpers

def parse_pantry(text: str) -> list[str]:
    """Split a free-text pantry into normalised ingredient names."""
    parts = re.split(r"[,\n;]+|\band\b", text or "")
    items = []
    for part in parts:
        name = part.strip().lower()
        name = re.sub(r"^(some|a|an|the|fresh|dried|ground|whole|few|bit of|couple of)\s+", "", name)
        name = re.sub(r"\s+", " ", name).strip(" .")
        if name and len(name) > 1 and name not in _GENERIC:
            items.append(name)
    # de-dupe, keep order
    seen, out = set(), []
    for it in items:
        if it not in seen:
            seen.add(it)
            out.append(it)
    return out


def apply_constraints(pantry: list[str], constraints: str) -> tuple[list[str], list[str]]:
    """Drop banned ingredients. Returns (allowed, removed)."""
    text = (constraints or "").lower()
    banned = set()
    for phrase, items in CONSTRAINT_BANS.items():
        if phrase in text:
            banned.update(items)
    allowed = [p for p in pantry if not any(b in p for b in banned)]
    removed = [p for p in pantry if any(b in p for b in banned)]
    return allowed, removed


def stage_for(ingredient: str) -> str:
    for stage, keywords in STAGE_KEYWORDS.items():
        if any(kw in ingredient for kw in keywords):
            return stage
    return "body"


def dish_core(dish: str) -> list[str]:
    """Best-effort core ingredient(s) of a dish, for seeding pairing queries."""
    text = (dish or "").lower()
    known = {
        "lentil": "lentil", "dal": "lentil", "dahl": "lentil", "daal": "lentil",
        "chickpea": "chickpea", "chana": "chickpea", "coffee": "coffee",
        "tomato": "tomato", "rice": "rice", "chicken": "chicken", "bean": "bean",
        "soup": "vegetable broth", "curry": "curry", "pasta": "pasta",
    }
    cores = [v for k, v in known.items() if k in text]
    # de-dupe preserving order
    seen, out = set(), []
    for c in cores:
        if c not in seen:
            seen.add(c)
            out.append(c)
    return out or ["onion"]


def parse_servings(text: str, default: int = 2) -> int:
    """Pull a serving count out of the dish text ('dal for 4', '6 people')."""
    m = re.search(r"(?:for|serves?|makes?)\s+(\d{1,2})|(\d{1,2})\s*(?:people|servings|portions|cups)", (text or "").lower())
    if m:
        n = int(m.group(1) or m.group(2))
        if 1 <= n <= 24:
            return n
    return default


def dish_kind(dish: str, core: str) -> str:
    """Classify the dish so the kitchen math fits it (a salad gets no simmer).

    Whole-word matching throughout, so short keywords don't match inside other
    words ('tea' in 'steak', 'raw' in 'strawberry', 'tart' in 'tartare')."""
    t = (dish or "").lower()

    def has(*words):
        return re.search(r"\b(" + "|".join(words) + r")\b", t) is not None

    if core == "coffee" or has("coffee", "espresso", "latte", "tea", "brew", "matcha"):
        return "beverage"
    if has("salad", "slaw", "raw", "ceviche", "smoothie", "juice", "salsa",
           "guacamole", "dip", "tartare", "carpaccio"):
        return "raw"
    # No bare 'sweet' — it would mis-flag 'sweet potato'. The \w*cakes? pattern
    # catches cupcake/shortcake/cheesecake; the rest are whole words.
    if (has("tart", "pie", "cookie", "biscuit", "brownie", "muffin", "scone",
            "pastry", "crumble", "cobbler", "pudding", "custard", "dessert",
            "mousse", "fudge", "parfait", "jam", "compote", "sorbet", "gelato")
            or re.search(r"\b\w*cakes?\b", t) or "ice cream" in t):
        return "sweet"
    if core in ("lentil", "chickpea", "bean", "rice", "pasta") or has(
            "soup", "stew", "curry", "dal", "dahl", "daal", "braise", "simmer",
            "rice", "pasta", "risotto", "porridge", "congee", "chili", "chilli", "broth"):
        return "simmered"
    return "generic"


def kitchen_math_script(core: str, servings: int, allow_salt: bool, dish: str = "") -> str:
    """A small, readable Python script for the quantitative side — tailored to the
    dish type. Run in the sandbox (deterministic, not model-written, so it's
    always real arithmetic rather than the LLM's mental math)."""
    kind = dish_kind(dish, core)
    if kind == "beverage":
        return f"""
            servings = {servings}
            water_ml = servings * 250          # 250 ml per cup
            ratio = 16                         # 1:16 grounds:water by weight
            grounds_g = round(water_ml / ratio, 1)
            print(f"water: {{water_ml}} ml")
            print(f"grounds: {{grounds_g}} g  (1:{{ratio}} ratio)")
            print(f"bloom: 30 s with ~{{round(grounds_g*2)}} g water, then pour the rest")
        """
    if kind == "sweet":
        return f"""
            servings = {servings}
            filling_g = servings * 120        # ~120 g fruit/nut filling per serving
            sweetener_g = servings * 25       # ~25 g sugar/honey/syrup per serving
            print(f"makes: {{servings}} servings")
            print(f"fruit/nut filling: ~{{filling_g}} g total")
            print(f"sweetener: ~{{sweetener_g}} g total, to taste")
            print("salt: just a pinch, to lift the sweetness")
            print("if baked: ~180C / 350F until set")
        """
    if kind == "raw":
        salt = ('print(f"salt: {round(produce_g*0.006)} g  (light, 0.6%)")'
                if allow_salt else 'print("salt: skipped (constraint)")')
        return f"""
            servings = {servings}
            produce_g = servings * 150         # ~150 g produce per serving
            acid_ml = servings * 10            # ~10 ml citrus/acid dressing per serving
            print(f"produce: {{produce_g}} g total")
            print(f"dressing acid: {{acid_ml}} ml")
            {salt}
            print("toss just before serving — no cooking")
        """
    if kind == "simmered":
        salt = ('print(f"salt: {round(water_ml*0.008)} g  (0.8% of water)")'
                if allow_salt else 'print("salt: skipped (constraint)")')
        return f"""
            servings = {servings}
            base_g = servings * 75             # 75 g dry pulse/grain per serving
            water_ml = base_g * 3              # 3:1 hydration for a soft simmer
            bloom_tsp = round(base_g / 100, 1) # ~1 tsp whole spice per 100 g
            print(f"{core}: {{base_g}} g dry")
            print(f"water: {{water_ml}} ml")
            {salt}
            print(f"whole spices to bloom: ~{{bloom_tsp}} tsp total")
            print(f"simmer: ~{{20 + (servings-2)*2}} min, lid ajar")
        """
    salt = ('print(f"salt: {round(base_g*0.008)} g  (0.8%)")'
            if allow_salt else 'print("salt: skipped (constraint)")')
    return f"""
        servings = {servings}
        base_g = servings * 200                # ~200 g main ingredient per serving
        print(f"main ingredient: ~{{base_g}} g total for {{servings}}")
        {salt}
    """


def compute_kitchen_math(core: str, servings: int, allow_salt: bool, dish: str = "") -> dict | None:
    """Run the kitchen-math script in the sandbox; return {code, output} or None."""
    code = kitchen_math_script(core, servings, allow_salt, dish)
    result = run_code(code)
    if not result["ok"]:
        return None
    return {"code": re.sub(r"^\s+", "", code, flags=re.M).strip(),
            "output": result["stdout"].strip(), "backend": result["backend"]}


_SEASONINGS = {
    "salt", "pepper", "black pepper", "oil", "olive oil", "water", "sugar",
    "cumin", "coriander", "coriander seed", "turmeric", "garlic", "onion",
    "ginger", "chili", "chilli", "cinnamon", "cardamom", "lemon", "lime",
    "vinegar", "bay", "clove", "nutmeg", "paprika", "cayenne",
}


def _heuristic_dish(pantry: list[str]) -> str:
    """Offline dish guess from what's on hand (no model)."""
    for key, name in [
        ("lentil", "red lentil dal"), ("dal", "red lentil dal"),
        ("chickpea", "chickpea curry"), ("coffee", "pour-over coffee"),
        ("rice", "fried rice"), ("pasta", "tomato pasta"),
        ("spaghetti", "tomato pasta"), ("egg", "spiced scramble"),
        ("potato", "spiced potatoes"), ("chicken", "braised chicken"),
        ("bean", "stewed beans"), ("tomato", "tomato soup"),
    ]:
        if any(key in p for p in pantry):
            return name
    mains = [p for p in pantry if p not in _SEASONINGS][:2]
    return ("a simple " + " & ".join(mains)) if mains else "a simple sauté"


def suggest_dish(pantry_text: str, constraints: str = "", generate_fn=None) -> str:
    """Decide WHAT to cook from the ingredients on hand. Uses the model when
    available (better, contextual), else a heuristic. Returns '' if no pantry."""
    pantry, _ = apply_constraints(parse_pantry(pantry_text), constraints)
    if not pantry:
        return ""
    if generate_fn is not None:
        msgs = [
            {"role": "system", "content":
                "Suggest ONE simple, realistic dish to cook from the ingredients "
                "on hand. Reply with ONLY the dish name (2-5 words) — no prose, no "
                "list, no punctuation."},
            {"role": "user", "content":
                "Ingredients: " + ", ".join(pantry)
                + (f"\nConstraints: {constraints}" if constraints else "")
                + "\nDish name:"},
        ]
        try:
            raw = generate_fn(msgs)
            line = next((l.strip() for l in raw.splitlines() if l.strip()), "")
            line = re.sub(r"^[\"'\-\*\d\.\)\s]+", "", line).strip(" .\"'")
            if 2 <= len(line) <= 60:
                return line
        except Exception:
            pass
    return _heuristic_dish(pantry)


def extract_json(text: str) -> dict | None:
    start = text.find("{")
    if start == -1:
        return None
    depth = 0
    for i, char in enumerate(text[start:], start):
        if char == "{":
            depth += 1
        elif char == "}":
            depth -= 1
            if depth == 0:
                try:
                    return json.loads(text[start : i + 1])
                except json.JSONDecodeError:
                    return None
    return None


# --------------------------------------------------------- scripted planner (offline)

def _parse_score(raw: str) -> tuple[float | None, str]:
    try:
        data = json.loads(raw)
        return data.get("pairing_score"), data.get("percentile_label", "")
    except (json.JSONDecodeError, AttributeError):
        return None, ""


def _singularize(name: str) -> str:
    """Cheap de-pluralisation so 'lemons' resolves as 'lemon' in Epicure."""
    if name.endswith("ies") and len(name) > 4:
        return name[:-3] + "y"
    if name.endswith(("ses", "xes", "zes", "ches", "shes")):
        return name[:-2]
    if name.endswith("s") and not name.endswith("ss") and len(name) > 3:
        return name[:-1]
    return name


def ground_evidence(core: str, steps: list[dict], mcp) -> None:
    """Overwrite each step's evidence with the REAL Epicure pairing score, so the
    numbers come from the tool, never the model's imagination. Tries the name and
    a singular form; on success also adopts Epicure's canonical resolved name."""
    if not core:
        return
    for s in steps:
        ing = s.get("ingredient")
        if not ing or ing == core:
            continue
        for cand in dict.fromkeys([ing, _singularize(ing)]):  # de-dup, keep order
            try:
                raw = mcp.call_tool("pairing_score",
                                    {"ingredient_a": core, "ingredient_b": cand})
                data = json.loads(raw)
            except Exception:
                continue
            score = data.get("pairing_score") if isinstance(data, dict) else None
            if score is not None:
                s["ingredient"] = data.get("resolved_b", cand)
                s["evidence"] = (f"pairs {round(score, 3)} with "
                                 f"{data.get('resolved_a', core)} ({data.get('percentile_label', '')})")
                break


def scripted_plan(dish: str, pantry_text: str, constraints: str, mcp) -> dict:
    """Offline approximation that still calls Epicure live.

    For each pantry item, score it against the dish's core ingredient, keep the
    ones that genuinely pair, sequence them by cooking stage, and look up a
    substitute (via `neighbors`) for any core the cook lacks. Grounded in real
    scores from the server — not a hardcoded table — but rigid in its pipeline.
    """
    pantry = parse_pantry(pantry_text)
    allowed, removed = apply_constraints(pantry, constraints)
    cores = dish_core(dish)
    core = cores[0]

    scored, errors = [], 0
    for item in allowed[:10]:  # cap calls to the rate-limited public server
        if item == core:
            continue
        try:
            raw = mcp.call_tool("pairing_score", {"ingredient_a": core, "ingredient_b": item})
            score, label = _parse_score(raw)
        except Exception:
            errors += 1
            continue
        if score is not None and score >= 0.092:  # corpus median; weaker pairings dropped
            scored.append({"ingredient": item, "score": round(score, 3), "label": label})

    # Don't pretend a busy server means "nothing pairs": say so plainly.
    if errors and not scored:
        return {
            "dish": dish, "core": core, "steps": [],
            "notes": [f"Epicure server was busy ({errors} calls rate-limited) — "
                      "try again in a moment."],
            "source": "scripted (Epicure unavailable)",
        }

    scored.sort(key=lambda s: s["score"], reverse=True)

    steps = []
    for stage in STAGES:
        members = [s for s in scored if stage_for(s["ingredient"]) == stage]
        for s in members:
            steps.append({
                "stage": stage,
                "ingredient": s["ingredient"],
                "evidence": f"pairs {s['score']} with {core} ({s['label']})",
                "why": f"{STAGE_LABEL[stage].lower()}",
            })

    # Substitution: if the dish wants a hallmark ingredient the cook lacks, point
    # to the nearest thing they DO have in Epicure's flavour space. NB neighbours
    # are co-occurrence-similar, not functional swaps — so we surface it as
    # "cooks reach for this in the same dishes", not "use X for the sourness of Y".
    notes = []
    used = {s["ingredient"] for s in steps}
    hallmark = {"lentil": "tamarind", "chickpea": "tahini", "coffee": "cardamom"}.get(core)
    if hallmark and not any(hallmark in p for p in pantry):
        try:
            raw = mcp.call_tool("neighbors", {"ingredient": hallmark, "top_k": 12})
            neigh = {n["name"].replace("_", " ") for n in json.loads(raw).get("neighbors", [])}
            # Exact match only: suggest a pantry item that genuinely IS one of the
            # hallmark's flavour neighbours, not a substring lookalike (so 'lemon'
            # never gets mistaken for 'lemongrass'). Skip anything already planned.
            sub = next((p for p in allowed if p in neigh and p not in used), None)
            if sub:
                notes.append(f"No {hallmark} on hand — {sub} sits closest to it in flavour space.")
                steps.append({
                    "stage": "finish", "ingredient": sub,
                    "evidence": f"flavour-space neighbour of {hallmark}",
                    "why": f"cooks reach for it in the same dishes as {hallmark}",
                })
        except Exception:
            pass

    if removed:
        notes.append(f"Skipped per constraints: {', '.join(removed)}.")
    if errors:
        notes.append(f"{errors} ingredient(s) skipped — Epicure was rate-limiting.")

    allow_salt = "salt" not in {b for p, items in CONSTRAINT_BANS.items()
                                if p in (constraints or "").lower() for b in items}
    math = compute_kitchen_math(core, parse_servings(dish), allow_salt, dish)

    return {
        "dish": dish, "core": core, "steps": steps, "notes": notes, "math": math,
        "source": "scripted (model offline — real Epicure scores)",
    }


# ------------------------------------------------------------- LLM planner (Space)

SYSTEM_PROMPT = """\
You are the flavour brain of a small stovetop cooking node. The cook gives you a
dish, the actual contents of their kitchen (messy free text), and any dietary
constraints. Your job is to turn that into a STAGED seasoning plan, grounded in
the Epicure flavour-science tools — never invent pairings.

Sequence the additions sensibly for THIS dish. Treat the stage labels as a
generic order — robust/early elements first, aromatics next, the main body, then
delicate finishing touches (acids, fresh herbs, garnishes). Describe each step
TRUTHFULLY for the actual ingredient: never call something a "whole spice" or
"aromatic" if it isn't (a nut is not a spice; lemon is not an allium). Do NOT
assume savoury stovetop cooking — adapt your wording when the dish is a salad,
a drink, or a dessert/baked good. Only recommend things the cook actually has
(or a substitute you justify with the tools). Respect every constraint.

You have these Epicure tools; call them to ground your choices:
{tools}

You can ALSO run Python in a sandbox for the quantitative side of the cook —
scaling to the number of servings, grain/legume hydration ratios, salt as a %
of water weight, brew ratios, simmer timing. Do the arithmetic in code, never in
your head. STRICT: plain arithmetic and f-strings only. NO imports, NO
sys.path, NO file or network access — just assign numbers and `print(...)` them.

RESPONSE FORMAT — reply with EXACTLY ONE JSON object, nothing else.
  To call an Epicure tool:
    {{"action": "call_tool", "tool": "<name>", "arguments": {{...}}}}
  To run Python in the sandbox:
    {{"action": "run_python", "code": "servings=4\\nprint('water', servings*225)"}}
  To deliver the finished plan (ends the turn):
    {{"action": "plan",
      "core": "<dish core ingredient you seeded on>",
      "steps": [
        {{"stage": "bloom|aromatics|body|finish",
          "ingredient": "<from the pantry>",
          "evidence": "<the tool fact that justifies it, e.g. 'pairs 0.22 (>p90)'>",
          "why": "<one short clause>"}}
      ],
      "notes": ["<substitutions made, constraints honoured, etc.>"]}}

Rules: ingredient names must be canonical singulars Epicure resolves ('lentil',
not 'red lentils'). At most {max_steps} tool/python calls, then you MUST deliver
a plan. No markdown, no prose outside the JSON.
"""


def format_tools(tools: list[dict]) -> str:
    keep = {"find_pairings", "pairing_score", "neighbors", "morph", "closest_mode", "list_targets"}
    lines = []
    for tool in tools:
        if tool["name"] in keep:
            lines.append(f"- {tool['name']}: {tool.get('description', '')[:160]}")
    return "\n".join(lines)


def build_plan(dish: str, pantry_text: str, constraints: str, generate_fn, mcp) -> dict:
    """Real-model planner: the LLM orchestrates Epicure tools into a staged plan."""
    try:
        tools_block = format_tools(mcp.list_tools())
    except Exception as exc:
        tools_block = f"(tools unreachable: {exc})"

    pantry = parse_pantry(pantry_text)
    allowed, removed = apply_constraints(pantry, constraints)
    user = (
        f"Dish: {dish}\n"
        f"Pantry (use only these): {', '.join(allowed) or '(empty)'}\n"
        f"Constraints: {constraints or 'none'}"
        + (f"\nAlready removed for constraints: {', '.join(removed)}" if removed else "")
        + "\n\nBuild the staged seasoning plan."
    )
    messages = [
        {"role": "system", "content": SYSTEM_PROMPT.format(
            tools=tools_block, max_steps=MAX_STEPS)},
        {"role": "user", "content": user},
    ]

    tool_calls, math = [], None
    for _ in range(MAX_STEPS + 2):
        decision = extract_json(generate_fn(messages))
        if decision is None:
            messages.append({"role": "user", "content": "Invalid. One JSON object only."})
            continue
        if decision.get("action") == "call_tool":
            tool, args = decision.get("tool", ""), decision.get("arguments", {})
            try:
                result = mcp.call_tool(tool, args)
            except Exception as exc:
                result = f"(tool error: {exc})"
            tool_calls.append({"tool": tool, "arguments": args, "result": result[:400]})
            messages.append({"role": "assistant", "content": json.dumps(decision)})
            messages.append({"role": "user", "content": f"{tool} returned:\n{result[:1500]}\nContinue."})
            continue
        if decision.get("action") == "run_python":
            code = decision.get("code", "")
            result = run_code(code)
            out = result["stdout"] if result["ok"] else f"ERROR: {result['stderr']}"
            if result["ok"] and result["stdout"].strip():
                math = {"code": code.strip(), "output": result["stdout"].strip(),
                        "backend": result["backend"]}
            tool_calls.append({"tool": f"run_python ({result['backend']})",
                               "arguments": {"code": code[:120]}, "result": out[:400]})
            messages.append({"role": "assistant", "content": json.dumps(decision)})
            messages.append({"role": "user", "content": f"sandbox output:\n{out[:1500]}\nContinue."})
            continue
        if decision.get("action") == "plan":
            # The model's JSON shape is not guaranteed — coerce defensively.
            notes = decision.get("notes", [])
            notes = [notes] if isinstance(notes, str) else (notes if isinstance(notes, list) else [])
            if removed:
                notes.append(f"Pre-removed: {', '.join(removed)}")
            raw_steps = decision.get("steps", [])
            steps = [s for s in raw_steps if isinstance(s, dict) and s.get("ingredient")]
            # Drop any step whose ingredient the cook doesn't actually have — keeps
            # the model from planning around vague/invented items.
            def _in_pantry(ing: str) -> bool:
                ing = str(ing).lower()
                toks = set(ing.split())
                return any(ing == p or ing in p or p in ing or (toks & set(p.split()))
                           for p in allowed)
            steps = [s for s in steps if _in_pantry(s["ingredient"])]
            for s in steps:
                if s.get("stage") not in STAGES:
                    s["stage"] = "body"
                s.setdefault("evidence", "")
                s.setdefault("why", "")
            core = str(decision.get("core", "")) or (dish_core(dish)[0] if dish else "")
            # Ground the evidence with REAL Epicure scores (the model paraphrases
            # or fabricates otherwise — we saw "0.85" when the truth was 0.17).
            ground_evidence(core, steps, mcp)
            # Always compute the displayed math deterministically in the sandbox —
            # the model's free-form run_python is unreliable (it printed its own
            # draft steps instead of doing arithmetic).
            allow_salt = "salt" not in {b for ph, items in CONSTRAINT_BANS.items()
                                        if ph in (constraints or "").lower() for b in items}
            math = compute_kitchen_math(core, parse_servings(dish), allow_salt, dish) or math
            return {
                "dish": dish,
                "core": core,
                "steps": steps,
                "notes": [str(n) for n in notes],
                "math": math,
                "tool_calls": tool_calls,
                "source": "Mellum 2 + Epicure + sandbox",
            }
        messages.append({"role": "user", "content": 'Use "call_tool", "run_python" or "plan".'})

    # Model never converged — fall back to the grounded scripted pipeline.
    plan = scripted_plan(dish, pantry_text, constraints, mcp)
    plan["notes"].append("model did not converge; scripted fallback used")
    plan["tool_calls"] = tool_calls
    return plan