Cook_with_a_LLM / src /agents /step_illustrator.py
Fred1e4's picture
Complete Cook App (#5)
75c5414
"""Step image generator — delegates to the deployed Modal FLUX.2 endpoint."""
from __future__ import annotations
import base64
import logging
from typing import Optional
from src import config
from src.pipeline import Recipe, RecipeStep
log = logging.getLogger(__name__)
# ---------------------------------------------------------------------------
# Helpers
# ---------------------------------------------------------------------------
def _b64(png_bytes: bytes) -> str:
return base64.b64encode(png_bytes).decode()
def _step_prompt(visual: str, cuisine: str, n: int) -> str:
desc = visual.strip() or f"cooking step {n}"
return (
f"Top-down photo of a kitchen pan or plate showing {desc}. "
f"{cuisine} home cooking. Warm natural lighting. "
"Recipe magazine style. Photorealistic. Appetizing."
)
def _dish_prompt(visual: str, cuisine: str) -> str:
desc = visual.strip() or "the finished plated dish, garnished and beautifully presented"
return (
f"Top-down photo of a {desc} on a rustic wooden table. "
f"{cuisine} home cooking. Warm natural lighting. "
"Recipe magazine style. Photorealistic. Appetizing."
)
# ---------------------------------------------------------------------------
# Modal call
# ---------------------------------------------------------------------------
def _call_modal(prompt: str, seed: int = 42) -> Optional[bytes]:
"""Call the deployed Modal FLUX endpoint. Returns PNG bytes or None."""
try:
import modal
cls = modal.Cls.from_name(config.MODAL_APP_NAME, config.MODAL_CLS_NAME)
return cls().render_step.remote(prompt, seed=seed)
except Exception as exc:
log.warning("Modal FLUX call failed: %s", exc)
return None
# ---------------------------------------------------------------------------
# Public function
# ---------------------------------------------------------------------------
def illustrate_recipe(recipe: Recipe) -> Recipe:
"""Generate FLUX images for every step + final dish.
Mutates and returns the same Recipe with image_b64 fields populated
(or left as None when Modal is unavailable).
"""
cuisine = recipe.cuisine or "International"
# Final dish hero image
final_bytes = _call_modal(_dish_prompt(recipe.final_dish_visual, cuisine), seed=0)
if final_bytes:
recipe.final_dish_image_b64 = _b64(final_bytes)
log.info("Generated final dish image.")
# Per-step images (sequential to respect GPU limits on Modal)
for step in recipe.steps:
prompt = _step_prompt(step.visual, cuisine, step.n)
step_bytes = _call_modal(prompt, seed=step.n)
if step_bytes:
step.image_b64 = _b64(step_bytes)
log.info("Generated image for step %d.", step.n)
return recipe