Spaces:

build-small-hackathon
/

pinch

Running

App Files Files Community

pinch / app.py

Alptraum

Upload app.py with huggingface_hub

ca47b0e verified 20 days ago

Raw

History Blame Contribute Delete

8.77 kB

	"""Pinch — Build Small Hackathon 2026 (Backyard AI track).

	Photograph your ingredients; a small vision model reads them, a 12B agent
	(JetBrains Mellum 2) decides a dish and builds a seasoning plan grounded in the
	Epicure flavour model — what to add, what to substitute for what you lack, in
	what order — and runs code for the amounts. Optionally FLUX renders the dish.

	Modes:
	- full: MiniCPM-V (vision) + Mellum 2 (reasoning) + Epicure + sandbox
	- MOCK_LLM=1: scripted planner, still calling Epicure live (no model needed)
	"""

	import json
	import os

	# Load .env for local dev BEFORE importing modules that read os.environ at import
	# time (agent/vision/imagery). No-op on a Space (no file) or if dotenv is absent.
	try:
	from dotenv import load_dotenv

	load_dotenv()
	except ImportError:
	pass

	import gradio as gr
	import pandas as pd

	from agent import build_plan, parse_pantry, scripted_plan, suggest_dish
	from epicure_client import EpicureMCP
	from imagery import generate_dish_image
	from vision import detect_ingredients

	MOCK_LLM = os.environ.get("MOCK_LLM") == "1"
	MODEL_ID = os.environ.get("MODEL_ID", "JetBrains/Mellum2-12B-A2.5B-Instruct")
	# Where real inference runs: "zerogpu" (in-Space, free, 40min/day cap) or
	# "modal" (own GPU endpoint, paid from credits, no quota). Ignored when MOCK_LLM.
	BACKEND = os.environ.get("INFERENCE_BACKEND", "zerogpu")
	MODAL_REASON_URL = os.environ.get("MODAL_REASON_URL", "")

	try:
	import spaces
	except ImportError:
	import types
	spaces = types.SimpleNamespace(GPU=lambda **kwargs: (lambda fn: fn))

	if not MOCK_LLM and BACKEND == "zerogpu":
	import torch
	from transformers import AutoModelForCausalLM, AutoTokenizer

	tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
	model = AutoModelForCausalLM.from_pretrained(MODEL_ID, dtype=torch.bfloat16).to("cuda")


	@spaces.GPU(duration=120)
	def _reason_zerogpu(messages: list[dict]) -> str:
	enc = tokenizer.apply_chat_template(
	messages, add_generation_prompt=True, return_tensors="pt", return_dict=True
	).to(model.device)
	input_len = enc["input_ids"].shape[-1]
	output = model.generate(
	**enc, max_new_tokens=400, temperature=0.3, do_sample=True,
	pad_token_id=tokenizer.eos_token_id,
	)
	return tokenizer.decode(output[0][input_len:], skip_special_tokens=True)


	def _reason_modal(messages: list[dict]) -> str:
	import httpx

	resp = httpx.post(MODAL_REASON_URL, json={"messages": messages, "max_tokens": 400},
	timeout=300, follow_redirects=True) # 300s tolerates a cold start
	resp.raise_for_status()
	return resp.json()["text"]


	def llm_generate(messages: list[dict]) -> str:
	"""Route a reasoning call to the configured backend."""
	if BACKEND == "modal":
	return _reason_modal(messages)
	return _reason_zerogpu(messages)


	MCP = EpicureMCP()


	# --------------------------------------------------------------------- state

	# ----------------------------------------------------------- planning (the AI)

	CURRENT_PLAN = {"dish": "", "core": "", "steps": [], "notes": [], "source": ""}


	def make_plan(dish: str, pantry: str, constraints: str) -> dict:
	"""Run the agent once to build the plan (and remember it for the UI)."""
	global CURRENT_PLAN
	if MOCK_LLM:
	plan = scripted_plan(dish, pantry, constraints, MCP)
	else:
	plan = build_plan(dish, pantry, constraints, llm_generate, MCP)
	CURRENT_PLAN = plan
	return plan


	# ------------------------------------------------------------------------- ui

	STAGE_ORDER = ["bloom", "aromatics", "body", "finish"]
	STAGE_EMOJI = {"bloom": "🔥", "aromatics": "🧅", "body": "🥘", "finish": "🍋"}


	def render_plan(plan: dict) -> str:
	if not plan.get("steps"):
	return ("_No plan yet — add a photo or type your ingredients, then "
	"Decide a dish & plan._")
	# Build discrete blocks and join with blank lines, so each stage header is its
	# own paragraph (otherwise markdown folds it into the preceding list item).
	blocks = [f"### Plan for {plan['dish']} \n"
	f"seeded on `{plan.get('core','')}` · {plan.get('source','')}"]
	for stage in STAGE_ORDER:
	members = [s for s in plan["steps"] if s["stage"] == stage]
	if not members:
	continue
	lines = [f"{STAGE_EMOJI.get(stage,'•')} {stage.title()}", ""]
	for s in members:
	lines.append(f"- {s['ingredient']} — {s.get('why','')} \n ↳ _{s.get('evidence','')}_")
	blocks.append("\n".join(lines))
	for n in plan.get("notes", []):
	blocks.append(f"> 📝 {n}") # each its own block so they don't merge
	math = plan.get("math")
	if math:
	blocks.append(f"🧮 Kitchen math _(computed in {math.get('backend','?')} sandbox)_\n\n"
	f"```\n{math['output']}\n```")
	return "\n\n".join(blocks)


	@spaces.GPU(duration=60)
	def detect_from_photo(image, current_pantry):
	"""Run the VLM on the uploaded photo and merge its findings into the pantry
	box (the cook reviews/edits before planning — human in the loop)."""
	names, note = detect_ingredients(image)
	if not names:
	return current_pantry, f"📷 {note}"
	existing = parse_pantry(current_pantry)
	merged = existing + [n for n in names if n not in existing]
	return ", ".join(merged), f"📷 {note} — added: {', '.join(names)}"


	def on_plan(dish, pantry, constraints):
	# If the cook didn't name a dish, work it out from the ingredients on hand.
	if not (dish or "").strip():
	gen = None if MOCK_LLM else llm_generate
	dish = suggest_dish(pantry, constraints, generate_fn=gen) or "a simple dish"
	plan = make_plan(dish, pantry, constraints)
	calls = pd.DataFrame(
	[(c["tool"], json.dumps(c["arguments"])[:60], c["result"][:90])
	for c in plan.get("tool_calls", [])],
	columns=["tool", "arguments", "result"],
	)
	return dish, render_plan(plan), calls


	def on_see_dish(dish, pantry, constraints, photo):
	"""Optional FLUX.2-klein render: transforms the ingredients photo into the
	plated dish (separate click)."""
	if not (dish or "").strip():
	gen = None if MOCK_LLM else llm_generate
	dish = suggest_dish(pantry, constraints, generate_fn=gen)
	image, note = generate_dish_image(dish, parse_pantry(pantry), input_image=photo)
	return dish, image, note


	with gr.Blocks(title="Pinch") as demo:
	gr.Markdown(
	"# 🤏 Pinch\n"
	"**Snap a photo of your ingredients — I'll work out what to cook and plan the "
	"seasoning, grounded in real flavour science, with the amounts worked out.** \n"
	"_Vision reads your shelf; a 12B agent picks a dish, reasons over the Epicure "
	"flavour model (1,790 ingredients from ~4M recipes), and runs code for the "
	"quantities._"
	)

	with gr.Row():
	with gr.Column(scale=1):
	gr.Markdown("### 1 · Photograph your ingredients")
	photo = gr.Image(label="Spice rack / counter / open fridge", type="pil", height=200)
	detect_btn = gr.Button("👁️ Detect ingredients", variant="primary")
	vision_note = gr.Markdown("")
	gr.Markdown("### 2 · Keep only what you actually have")
	pantry = gr.Textbox(label="Detected ingredients (edit freely)", lines=3, value="",
	placeholder="upload a photo and press Detect, or type here…")
	constraints = gr.Textbox(label="Constraints (optional)", value="",
	placeholder="low sodium, no dairy, vegan…")
	dish = gr.Textbox(label="Dish (left blank = I'll decide from your ingredients)",
	value="", placeholder="auto-suggested — override if you like")
	plan_btn = gr.Button("🍳 Decide a dish & plan", variant="primary")
	tool_log = gr.Dataframe(headers=["tool", "arguments", "result"],
	label="Agent's tool calls (Epicure + sandbox)", wrap=True)
	with gr.Column(scale=1):
	gr.Markdown("### 3 · The plan")
	plan_md = gr.Markdown(render_plan(CURRENT_PLAN))
	see_btn = gr.Button("✨ See the dish (FLUX.2 klein)")
	dish_image = gr.Image(label="Dish preview", height=260)
	image_note = gr.Markdown("")

	detect_btn.click(detect_from_photo, [photo, pantry], [pantry, vision_note])
	plan_btn.click(on_plan, [dish, pantry, constraints], [dish, plan_md, tool_log])
	see_btn.click(on_see_dish, [dish, pantry, constraints, photo], [dish, dish_image, image_note])

	if __name__ == "__main__":
	demo.launch()