pinch / app.py
Alptraum's picture
Upload app.py with huggingface_hub
ca47b0e verified
Raw
History Blame Contribute Delete
8.77 kB
"""Pinch — Build Small Hackathon 2026 (Backyard AI track).
Photograph your ingredients; a small vision model reads them, a 12B agent
(JetBrains Mellum 2) decides a dish and builds a seasoning plan grounded in the
Epicure flavour model — what to add, what to substitute for what you lack, in
what order — and runs code for the amounts. Optionally FLUX renders the dish.
Modes:
- full: MiniCPM-V (vision) + Mellum 2 (reasoning) + Epicure + sandbox
- MOCK_LLM=1: scripted planner, still calling Epicure live (no model needed)
"""
import json
import os
# Load .env for local dev BEFORE importing modules that read os.environ at import
# time (agent/vision/imagery). No-op on a Space (no file) or if dotenv is absent.
try:
from dotenv import load_dotenv
load_dotenv()
except ImportError:
pass
import gradio as gr
import pandas as pd
from agent import build_plan, parse_pantry, scripted_plan, suggest_dish
from epicure_client import EpicureMCP
from imagery import generate_dish_image
from vision import detect_ingredients
MOCK_LLM = os.environ.get("MOCK_LLM") == "1"
MODEL_ID = os.environ.get("MODEL_ID", "JetBrains/Mellum2-12B-A2.5B-Instruct")
# Where real inference runs: "zerogpu" (in-Space, free, 40min/day cap) or
# "modal" (own GPU endpoint, paid from credits, no quota). Ignored when MOCK_LLM.
BACKEND = os.environ.get("INFERENCE_BACKEND", "zerogpu")
MODAL_REASON_URL = os.environ.get("MODAL_REASON_URL", "")
try:
import spaces
except ImportError:
import types
spaces = types.SimpleNamespace(GPU=lambda **kwargs: (lambda fn: fn))
if not MOCK_LLM and BACKEND == "zerogpu":
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
model = AutoModelForCausalLM.from_pretrained(MODEL_ID, dtype=torch.bfloat16).to("cuda")
@spaces.GPU(duration=120)
def _reason_zerogpu(messages: list[dict]) -> str:
enc = tokenizer.apply_chat_template(
messages, add_generation_prompt=True, return_tensors="pt", return_dict=True
).to(model.device)
input_len = enc["input_ids"].shape[-1]
output = model.generate(
**enc, max_new_tokens=400, temperature=0.3, do_sample=True,
pad_token_id=tokenizer.eos_token_id,
)
return tokenizer.decode(output[0][input_len:], skip_special_tokens=True)
def _reason_modal(messages: list[dict]) -> str:
import httpx
resp = httpx.post(MODAL_REASON_URL, json={"messages": messages, "max_tokens": 400},
timeout=300, follow_redirects=True) # 300s tolerates a cold start
resp.raise_for_status()
return resp.json()["text"]
def llm_generate(messages: list[dict]) -> str:
"""Route a reasoning call to the configured backend."""
if BACKEND == "modal":
return _reason_modal(messages)
return _reason_zerogpu(messages)
MCP = EpicureMCP()
# --------------------------------------------------------------------- state
# ----------------------------------------------------------- planning (the AI)
CURRENT_PLAN = {"dish": "", "core": "", "steps": [], "notes": [], "source": ""}
def make_plan(dish: str, pantry: str, constraints: str) -> dict:
"""Run the agent once to build the plan (and remember it for the UI)."""
global CURRENT_PLAN
if MOCK_LLM:
plan = scripted_plan(dish, pantry, constraints, MCP)
else:
plan = build_plan(dish, pantry, constraints, llm_generate, MCP)
CURRENT_PLAN = plan
return plan
# ------------------------------------------------------------------------- ui
STAGE_ORDER = ["bloom", "aromatics", "body", "finish"]
STAGE_EMOJI = {"bloom": "🔥", "aromatics": "🧅", "body": "🥘", "finish": "🍋"}
def render_plan(plan: dict) -> str:
if not plan.get("steps"):
return ("_No plan yet — add a photo or type your ingredients, then "
"**Decide a dish & plan**._")
# Build discrete blocks and join with blank lines, so each stage header is its
# own paragraph (otherwise markdown folds it into the preceding list item).
blocks = [f"### Plan for **{plan['dish']}** \n"
f"*seeded on `{plan.get('core','')}` · {plan.get('source','')}*"]
for stage in STAGE_ORDER:
members = [s for s in plan["steps"] if s["stage"] == stage]
if not members:
continue
lines = [f"**{STAGE_EMOJI.get(stage,'•')} {stage.title()}**", ""]
for s in members:
lines.append(f"- **{s['ingredient']}** — {s.get('why','')} \n ↳ _{s.get('evidence','')}_")
blocks.append("\n".join(lines))
for n in plan.get("notes", []):
blocks.append(f"> 📝 {n}") # each its own block so they don't merge
math = plan.get("math")
if math:
blocks.append(f"**🧮 Kitchen math** _(computed in {math.get('backend','?')} sandbox)_\n\n"
f"```\n{math['output']}\n```")
return "\n\n".join(blocks)
@spaces.GPU(duration=60)
def detect_from_photo(image, current_pantry):
"""Run the VLM on the uploaded photo and merge its findings into the pantry
box (the cook reviews/edits before planning — human in the loop)."""
names, note = detect_ingredients(image)
if not names:
return current_pantry, f"📷 {note}"
existing = parse_pantry(current_pantry)
merged = existing + [n for n in names if n not in existing]
return ", ".join(merged), f"📷 {note} — added: {', '.join(names)}"
def on_plan(dish, pantry, constraints):
# If the cook didn't name a dish, work it out from the ingredients on hand.
if not (dish or "").strip():
gen = None if MOCK_LLM else llm_generate
dish = suggest_dish(pantry, constraints, generate_fn=gen) or "a simple dish"
plan = make_plan(dish, pantry, constraints)
calls = pd.DataFrame(
[(c["tool"], json.dumps(c["arguments"])[:60], c["result"][:90])
for c in plan.get("tool_calls", [])],
columns=["tool", "arguments", "result"],
)
return dish, render_plan(plan), calls
def on_see_dish(dish, pantry, constraints, photo):
"""Optional FLUX.2-klein render: transforms the ingredients photo into the
plated dish (separate click)."""
if not (dish or "").strip():
gen = None if MOCK_LLM else llm_generate
dish = suggest_dish(pantry, constraints, generate_fn=gen)
image, note = generate_dish_image(dish, parse_pantry(pantry), input_image=photo)
return dish, image, note
with gr.Blocks(title="Pinch") as demo:
gr.Markdown(
"# 🤏 Pinch\n"
"**Snap a photo of your ingredients — I'll work out what to cook and plan the "
"seasoning, grounded in real flavour science, with the amounts worked out.** \n"
"_Vision reads your shelf; a 12B agent picks a dish, reasons over the Epicure "
"flavour model (1,790 ingredients from ~4M recipes), and runs code for the "
"quantities._"
)
with gr.Row():
with gr.Column(scale=1):
gr.Markdown("### 1 · Photograph your ingredients")
photo = gr.Image(label="Spice rack / counter / open fridge", type="pil", height=200)
detect_btn = gr.Button("👁️ Detect ingredients", variant="primary")
vision_note = gr.Markdown("")
gr.Markdown("### 2 · Keep only what you actually have")
pantry = gr.Textbox(label="Detected ingredients (edit freely)", lines=3, value="",
placeholder="upload a photo and press Detect, or type here…")
constraints = gr.Textbox(label="Constraints (optional)", value="",
placeholder="low sodium, no dairy, vegan…")
dish = gr.Textbox(label="Dish (left blank = I'll decide from your ingredients)",
value="", placeholder="auto-suggested — override if you like")
plan_btn = gr.Button("🍳 Decide a dish & plan", variant="primary")
tool_log = gr.Dataframe(headers=["tool", "arguments", "result"],
label="Agent's tool calls (Epicure + sandbox)", wrap=True)
with gr.Column(scale=1):
gr.Markdown("### 3 · The plan")
plan_md = gr.Markdown(render_plan(CURRENT_PLAN))
see_btn = gr.Button("✨ See the dish (FLUX.2 klein)")
dish_image = gr.Image(label="Dish preview", height=260)
image_note = gr.Markdown("")
detect_btn.click(detect_from_photo, [photo, pantry], [pantry, vision_note])
plan_btn.click(on_plan, [dish, pantry, constraints], [dish, plan_md, tool_log])
see_btn.click(on_see_dish, [dish, pantry, constraints, photo], [dish, dish_image, image_note])
if __name__ == "__main__":
demo.launch()