"""Pinch โ€” Build Small Hackathon 2026 (Backyard AI track). Photograph your ingredients; a small vision model reads them, a 12B agent (JetBrains Mellum 2) decides a dish and builds a seasoning plan grounded in the Epicure flavour model โ€” what to add, what to substitute for what you lack, in what order โ€” and runs code for the amounts. Optionally FLUX renders the dish. Modes: - full: MiniCPM-V (vision) + Mellum 2 (reasoning) + Epicure + sandbox - MOCK_LLM=1: scripted planner, still calling Epicure live (no model needed) """ import json import os # Load .env for local dev BEFORE importing modules that read os.environ at import # time (agent/vision/imagery). No-op on a Space (no file) or if dotenv is absent. try: from dotenv import load_dotenv load_dotenv() except ImportError: pass import gradio as gr import pandas as pd from agent import build_plan, parse_pantry, scripted_plan, suggest_dish from epicure_client import EpicureMCP from imagery import generate_dish_image from vision import detect_ingredients MOCK_LLM = os.environ.get("MOCK_LLM") == "1" MODEL_ID = os.environ.get("MODEL_ID", "JetBrains/Mellum2-12B-A2.5B-Instruct") # Where real inference runs: "zerogpu" (in-Space, free, 40min/day cap) or # "modal" (own GPU endpoint, paid from credits, no quota). Ignored when MOCK_LLM. BACKEND = os.environ.get("INFERENCE_BACKEND", "zerogpu") MODAL_REASON_URL = os.environ.get("MODAL_REASON_URL", "") try: import spaces except ImportError: import types spaces = types.SimpleNamespace(GPU=lambda **kwargs: (lambda fn: fn)) if not MOCK_LLM and BACKEND == "zerogpu": import torch from transformers import AutoModelForCausalLM, AutoTokenizer tokenizer = AutoTokenizer.from_pretrained(MODEL_ID) model = AutoModelForCausalLM.from_pretrained(MODEL_ID, dtype=torch.bfloat16).to("cuda") @spaces.GPU(duration=120) def _reason_zerogpu(messages: list[dict]) -> str: enc = tokenizer.apply_chat_template( messages, add_generation_prompt=True, return_tensors="pt", return_dict=True ).to(model.device) input_len = enc["input_ids"].shape[-1] output = model.generate( **enc, max_new_tokens=400, temperature=0.3, do_sample=True, pad_token_id=tokenizer.eos_token_id, ) return tokenizer.decode(output[0][input_len:], skip_special_tokens=True) def _reason_modal(messages: list[dict]) -> str: import httpx resp = httpx.post(MODAL_REASON_URL, json={"messages": messages, "max_tokens": 400}, timeout=300, follow_redirects=True) # 300s tolerates a cold start resp.raise_for_status() return resp.json()["text"] def llm_generate(messages: list[dict]) -> str: """Route a reasoning call to the configured backend.""" if BACKEND == "modal": return _reason_modal(messages) return _reason_zerogpu(messages) MCP = EpicureMCP() # --------------------------------------------------------------------- state # ----------------------------------------------------------- planning (the AI) CURRENT_PLAN = {"dish": "", "core": "", "steps": [], "notes": [], "source": ""} def make_plan(dish: str, pantry: str, constraints: str) -> dict: """Run the agent once to build the plan (and remember it for the UI).""" global CURRENT_PLAN if MOCK_LLM: plan = scripted_plan(dish, pantry, constraints, MCP) else: plan = build_plan(dish, pantry, constraints, llm_generate, MCP) CURRENT_PLAN = plan return plan # ------------------------------------------------------------------------- ui STAGE_ORDER = ["bloom", "aromatics", "body", "finish"] STAGE_EMOJI = {"bloom": "๐Ÿ”ฅ", "aromatics": "๐Ÿง…", "body": "๐Ÿฅ˜", "finish": "๐Ÿ‹"} def render_plan(plan: dict) -> str: if not plan.get("steps"): return ("_No plan yet โ€” add a photo or type your ingredients, then " "**Decide a dish & plan**._") # Build discrete blocks and join with blank lines, so each stage header is its # own paragraph (otherwise markdown folds it into the preceding list item). blocks = [f"### Plan for **{plan['dish']}** \n" f"*seeded on `{plan.get('core','')}` ยท {plan.get('source','')}*"] for stage in STAGE_ORDER: members = [s for s in plan["steps"] if s["stage"] == stage] if not members: continue lines = [f"**{STAGE_EMOJI.get(stage,'โ€ข')} {stage.title()}**", ""] for s in members: lines.append(f"- **{s['ingredient']}** โ€” {s.get('why','')} \n โ†ณ _{s.get('evidence','')}_") blocks.append("\n".join(lines)) for n in plan.get("notes", []): blocks.append(f"> ๐Ÿ“ {n}") # each its own block so they don't merge math = plan.get("math") if math: blocks.append(f"**๐Ÿงฎ Kitchen math** _(computed in {math.get('backend','?')} sandbox)_\n\n" f"```\n{math['output']}\n```") return "\n\n".join(blocks) @spaces.GPU(duration=60) def detect_from_photo(image, current_pantry): """Run the VLM on the uploaded photo and merge its findings into the pantry box (the cook reviews/edits before planning โ€” human in the loop).""" names, note = detect_ingredients(image) if not names: return current_pantry, f"๐Ÿ“ท {note}" existing = parse_pantry(current_pantry) merged = existing + [n for n in names if n not in existing] return ", ".join(merged), f"๐Ÿ“ท {note} โ€” added: {', '.join(names)}" def on_plan(dish, pantry, constraints): # If the cook didn't name a dish, work it out from the ingredients on hand. if not (dish or "").strip(): gen = None if MOCK_LLM else llm_generate dish = suggest_dish(pantry, constraints, generate_fn=gen) or "a simple dish" plan = make_plan(dish, pantry, constraints) calls = pd.DataFrame( [(c["tool"], json.dumps(c["arguments"])[:60], c["result"][:90]) for c in plan.get("tool_calls", [])], columns=["tool", "arguments", "result"], ) return dish, render_plan(plan), calls def on_see_dish(dish, pantry, constraints, photo): """Optional FLUX.2-klein render: transforms the ingredients photo into the plated dish (separate click).""" if not (dish or "").strip(): gen = None if MOCK_LLM else llm_generate dish = suggest_dish(pantry, constraints, generate_fn=gen) image, note = generate_dish_image(dish, parse_pantry(pantry), input_image=photo) return dish, image, note with gr.Blocks(title="Pinch") as demo: gr.Markdown( "# ๐Ÿค Pinch\n" "**Snap a photo of your ingredients โ€” I'll work out what to cook and plan the " "seasoning, grounded in real flavour science, with the amounts worked out.** \n" "_Vision reads your shelf; a 12B agent picks a dish, reasons over the Epicure " "flavour model (1,790 ingredients from ~4M recipes), and runs code for the " "quantities._" ) with gr.Row(): with gr.Column(scale=1): gr.Markdown("### 1 ยท Photograph your ingredients") photo = gr.Image(label="Spice rack / counter / open fridge", type="pil", height=200) detect_btn = gr.Button("๐Ÿ‘๏ธ Detect ingredients", variant="primary") vision_note = gr.Markdown("") gr.Markdown("### 2 ยท Keep only what you actually have") pantry = gr.Textbox(label="Detected ingredients (edit freely)", lines=3, value="", placeholder="upload a photo and press Detect, or type hereโ€ฆ") constraints = gr.Textbox(label="Constraints (optional)", value="", placeholder="low sodium, no dairy, veganโ€ฆ") dish = gr.Textbox(label="Dish (left blank = I'll decide from your ingredients)", value="", placeholder="auto-suggested โ€” override if you like") plan_btn = gr.Button("๐Ÿณ Decide a dish & plan", variant="primary") tool_log = gr.Dataframe(headers=["tool", "arguments", "result"], label="Agent's tool calls (Epicure + sandbox)", wrap=True) with gr.Column(scale=1): gr.Markdown("### 3 ยท The plan") plan_md = gr.Markdown(render_plan(CURRENT_PLAN)) see_btn = gr.Button("โœจ See the dish (FLUX.2 klein)") dish_image = gr.Image(label="Dish preview", height=260) image_note = gr.Markdown("") detect_btn.click(detect_from_photo, [photo, pantry], [pantry, vision_note]) plan_btn.click(on_plan, [dish, pantry, constraints], [dish, plan_md, tool_log]) see_btn.click(on_see_dish, [dish, pantry, constraints, photo], [dish, dish_image, image_note]) if __name__ == "__main__": demo.launch()