tmp / 10samples /generate_10samples.py
Elemmire's picture
Upload folder using huggingface_hub
d4118e5 verified
Raw
History Blame
49.7 kB
from __future__ import annotations
import json
import math
import random
from pathlib import Path
from typing import Any
from PIL import Image, ImageDraw, ImageFilter, ImageFont
CANVAS_SIZE = (1024, 1024)
OUT_DIR = Path("10samples")
IDENTITY_THRESHOLD = 0.55
IOU_THRESHOLD = 0.10
def font(size: int, bold: bool = False) -> ImageFont.ImageFont:
candidates = [
"/usr/share/fonts/truetype/dejavu/DejaVuSans-Bold.ttf" if bold else "/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf",
"/usr/share/fonts/dejavu/DejaVuSans-Bold.ttf" if bold else "/usr/share/fonts/dejavu/DejaVuSans.ttf",
]
for path in candidates:
try:
return ImageFont.truetype(path, size)
except OSError:
pass
return ImageFont.load_default()
FONT_18 = font(18)
FONT_24 = font(24)
FONT_32 = font(32, bold=True)
def clamp(value: float, low: float = 0.0, high: float = 1.0) -> float:
return max(low, min(high, value))
def bbox_px(bbox: list[float], size: tuple[int, int] = CANVAS_SIZE) -> tuple[int, int, int, int]:
w, h = size
return (
int(round(clamp(bbox[0]) * w)),
int(round(clamp(bbox[1]) * h)),
int(round(clamp(bbox[2]) * w)),
int(round(clamp(bbox[3]) * h)),
)
def normalize_bbox(box: tuple[int, int, int, int], size: tuple[int, int] = CANVAS_SIZE) -> list[float]:
w, h = size
x1, y1, x2, y2 = box
vals = [x1 / w, y1 / h, x2 / w, y2 / h]
return [round(clamp(v), 4) for v in vals]
def jitter_bbox(bbox: list[float], rng: random.Random) -> list[float]:
x1, y1, x2, y2 = bbox
width = x2 - x1
height = y2 - y1
dx = rng.uniform(-0.018, 0.018) * width
dy = rng.uniform(-0.018, 0.018) * height
grow_x = rng.uniform(-0.018, 0.024) * width
grow_y = rng.uniform(-0.018, 0.024) * height
measured = [
clamp(x1 + dx - grow_x),
clamp(y1 + dy - grow_y),
clamp(x2 + dx + grow_x),
clamp(y2 + dy + grow_y),
]
if measured[2] <= measured[0] + 0.01:
measured[2] = clamp(measured[0] + 0.01)
if measured[3] <= measured[1] + 0.01:
measured[3] = clamp(measured[1] + 0.01)
return [round(v, 4) for v in measured]
def iou(a: list[float], b: list[float]) -> float:
ax1, ay1, ax2, ay2 = a
bx1, by1, bx2, by2 = b
ix1, iy1 = max(ax1, bx1), max(ay1, by1)
ix2, iy2 = min(ax2, bx2), min(ay2, by2)
iw, ih = max(0.0, ix2 - ix1), max(0.0, iy2 - iy1)
inter = iw * ih
area_a = max(0.0, ax2 - ax1) * max(0.0, ay2 - ay1)
area_b = max(0.0, bx2 - bx1) * max(0.0, by2 - by1)
denom = area_a + area_b - inter
return 0.0 if denom == 0 else round(inter / denom, 4)
def lighten(color: tuple[int, int, int], amount: float) -> tuple[int, int, int]:
return tuple(int(c + (255 - c) * amount) for c in color)
def darken(color: tuple[int, int, int], amount: float) -> tuple[int, int, int]:
return tuple(int(c * (1 - amount)) for c in color)
def gradient(size: tuple[int, int], top: tuple[int, int, int], bottom: tuple[int, int, int]) -> Image.Image:
w, h = size
img = Image.new("RGB", size, top)
draw = ImageDraw.Draw(img)
for y in range(h):
t = y / max(1, h - 1)
color = tuple(int(top[i] * (1 - t) + bottom[i] * t) for i in range(3))
draw.line([(0, y), (w, y)], fill=color)
return img
def paste_shadow(base: Image.Image, layer: Image.Image, offset: tuple[int, int] = (8, 10), blur: int = 12) -> None:
alpha = layer.split()[-1]
shadow = Image.new("RGBA", layer.size, (0, 0, 0, 0))
shadow.putalpha(alpha.filter(ImageFilter.GaussianBlur(blur)))
sx, sy = offset
base.alpha_composite(shadow, (sx, sy))
base.alpha_composite(layer)
def rounded_rect(draw: ImageDraw.ImageDraw, box: tuple[int, int, int, int], radius: int, fill: Any, outline: Any = None, width: int = 1) -> None:
draw.rounded_rectangle(box, radius=radius, fill=fill, outline=outline, width=width)
def draw_background(draw: ImageDraw.ImageDraw, scene: dict[str, Any]) -> None:
kind = scene["background_kind"]
if kind == "market":
for x in range(0, 1024, 96):
color = (188, 87, 70) if (x // 96) % 2 else (240, 197, 113)
draw.polygon([(x, 0), (x + 96, 0), (x + 70, 110), (x + 26, 110)], fill=color)
draw.rectangle((0, 672, 1024, 1024), fill=(126, 113, 91))
for x in range(-80, 1024, 160):
draw.line((x, 720, x + 260, 1024), fill=(100, 91, 76), width=5)
draw.rectangle((0, 545, 1024, 650), fill=(99, 73, 48))
elif kind == "kitchen":
draw.rectangle((0, 0, 1024, 460), fill=(206, 222, 224))
for x in range(0, 1024, 128):
draw.rectangle((x, 0, x + 126, 458), outline=(178, 198, 201), width=2)
draw.rectangle((0, 560, 1024, 1024), fill=(189, 155, 105))
for x in range(-150, 1200, 150):
draw.line((x, 560, x + 210, 1024), fill=(159, 128, 84), width=4)
draw.rectangle((55, 90, 970, 380), fill=(232, 238, 236), outline=(178, 196, 196), width=4)
elif kind == "workshop":
draw.rectangle((0, 0, 1024, 1024), fill=(88, 91, 87))
for y in range(110, 640, 110):
draw.line((0, y, 1024, y), fill=(67, 70, 68), width=7)
draw.rectangle((0, 640, 1024, 1024), fill=(107, 94, 75))
for x in range(70, 1000, 170):
draw.rectangle((x, 80, x + 24, 520), fill=(54, 57, 55))
draw.rectangle((50, 560, 974, 700), fill=(73, 58, 43))
elif kind == "library":
draw.rectangle((0, 0, 1024, 1024), fill=(119, 82, 55))
for y in (84, 250, 416, 582):
draw.rectangle((0, y, 1024, y + 18), fill=(83, 55, 37))
rng = random.Random(12)
for x in range(18, 1010, 30):
for y in (105, 271, 437):
h = rng.randint(88, 132)
draw.rectangle((x, y, x + rng.randint(14, 24), y + h), fill=rng.choice([(156, 53, 58), (61, 97, 130), (205, 160, 74), (84, 123, 92)]))
draw.rectangle((0, 650, 1024, 1024), fill=(156, 127, 86))
elif kind == "greenhouse":
draw.rectangle((0, 0, 1024, 1024), fill=(176, 209, 199))
for x in range(0, 1025, 128):
draw.line((x, 0, x + 180, 650), fill=(123, 153, 146), width=5)
draw.rectangle((0, 615, 1024, 1024), fill=(104, 126, 87))
for _ in range(70):
x = random.randint(0, 1024)
y = random.randint(490, 1024)
draw.ellipse((x - 18, y - 8, x + 18, y + 8), fill=random.choice([(45, 117, 72), (62, 145, 82), (86, 121, 69)]))
elif kind == "laundromat":
draw.rectangle((0, 0, 1024, 1024), fill=(202, 213, 218))
for x in range(55, 985, 155):
draw.rectangle((x, 100, x + 116, 270), fill=(164, 178, 187), outline=(100, 117, 128), width=5)
draw.ellipse((x + 22, 122, x + 94, 194), fill=(107, 139, 162), outline=(230, 238, 241), width=5)
draw.rectangle((0, 560, 1024, 1024), fill=(132, 149, 156))
draw.rectangle((0, 440, 1024, 540), fill=(176, 188, 194))
elif kind == "picnic":
draw.rectangle((0, 0, 1024, 490), fill=(139, 185, 216))
draw.ellipse((-120, 170, 1140, 870), fill=(90, 153, 86))
draw.rectangle((0, 735, 1024, 1024), fill=(79, 133, 74))
for _ in range(90):
x = random.randint(0, 1024)
y = random.randint(500, 1010)
draw.line((x, y, x + 16, y - 20), fill=(55, 110, 58), width=2)
elif kind == "music_room":
draw.rectangle((0, 0, 1024, 1024), fill=(84, 74, 82))
draw.rectangle((0, 0, 1024, 260), fill=(62, 53, 63))
for x in range(80, 950, 180):
draw.ellipse((x, 58, x + 70, 128), fill=(235, 196, 105))
draw.line((x + 35, 128, x + 20, 260), fill=(235, 196, 105), width=5)
draw.rectangle((0, 660, 1024, 1024), fill=(115, 89, 70))
elif kind == "clinic":
draw.rectangle((0, 0, 1024, 1024), fill=(219, 229, 225))
for x in range(0, 1024, 128):
draw.line((x, 0, x, 600), fill=(195, 209, 205), width=2)
draw.rectangle((0, 610, 1024, 1024), fill=(174, 193, 190))
draw.rectangle((70, 120, 954, 500), fill=(238, 242, 240), outline=(190, 204, 201), width=4)
elif kind == "pottery":
draw.rectangle((0, 0, 1024, 1024), fill=(153, 117, 91))
for y in range(90, 560, 145):
draw.rectangle((0, y, 1024, y + 18), fill=(107, 79, 60))
draw.rectangle((0, 650, 1024, 1024), fill=(128, 91, 68))
for x in range(20, 1024, 140):
draw.ellipse((x, 135, x + 90, 200), fill=(185, 126, 78), outline=(99, 73, 52), width=4)
def draw_person(layer: Image.Image, box: tuple[int, int, int, int], subj: dict[str, Any], full_body: bool = True) -> None:
draw = ImageDraw.Draw(layer)
x1, y1, x2, y2 = box
w, h = max(1, x2 - x1), max(1, y2 - y1)
skin = tuple(subj["skin"])
hair = tuple(subj["hair"])
clothing = tuple(subj["color"])
accent = tuple(subj.get("accent", lighten(clothing, 0.35)))
cx = x1 + w // 2
head_r = max(14, int(min(w, h) * (0.14 if full_body else 0.22)))
head_cy = y1 + int(h * (0.18 if full_body else 0.32))
body_top = head_cy + head_r - 4
body_bottom = y2 - int(h * 0.10)
shoulder_w = int(w * (0.34 if full_body else 0.48))
hip_w = int(w * (0.24 if full_body else 0.36))
draw.ellipse((cx - head_r - 5, head_cy - head_r - 9, cx + head_r + 5, head_cy + head_r + 5), fill=hair)
draw.ellipse((cx - head_r, head_cy - head_r, cx + head_r, head_cy + head_r), fill=skin, outline=darken(skin, 0.25), width=2)
draw.arc((cx - head_r // 2, head_cy - 2, cx + head_r // 2, head_cy + head_r // 2), 10, 170, fill=darken(skin, 0.45), width=2)
draw.ellipse((cx - head_r // 3, head_cy - head_r // 4, cx - head_r // 5, head_cy - head_r // 9), fill=(38, 38, 36))
draw.ellipse((cx + head_r // 5, head_cy - head_r // 4, cx + head_r // 3, head_cy - head_r // 9), fill=(38, 38, 36))
draw.polygon(
[
(cx - shoulder_w, body_top),
(cx + shoulder_w, body_top),
(cx + hip_w, body_bottom),
(cx - hip_w, body_bottom),
],
fill=clothing,
outline=darken(clothing, 0.25),
)
draw.line((cx - shoulder_w + 4, body_top + 12, x1 + int(w * 0.13), y1 + int(h * 0.58)), fill=skin, width=max(5, w // 14))
draw.line((cx + shoulder_w - 4, body_top + 12, x2 - int(w * 0.13), y1 + int(h * 0.58)), fill=skin, width=max(5, w // 14))
if full_body:
leg_top = body_bottom - 2
draw.line((cx - hip_w // 2, leg_top, cx - int(w * 0.18), y2 - 4), fill=darken(clothing, 0.42), width=max(7, w // 12))
draw.line((cx + hip_w // 2, leg_top, cx + int(w * 0.18), y2 - 4), fill=darken(clothing, 0.42), width=max(7, w // 12))
draw.line((cx - shoulder_w + 5, body_top + int(h * 0.12), cx + shoulder_w - 5, body_top + int(h * 0.12)), fill=accent, width=max(3, h // 42))
def draw_object(layer: Image.Image, box: tuple[int, int, int, int], subj: dict[str, Any]) -> None:
draw = ImageDraw.Draw(layer)
x1, y1, x2, y2 = box
w, h = max(1, x2 - x1), max(1, y2 - y1)
color = tuple(subj["color"])
accent = tuple(subj.get("accent", lighten(color, 0.35)))
kind = subj.get("kind", "box")
if kind in {"basket", "crate"}:
rounded_rect(draw, (x1, y1 + h // 6, x2, y2 - h // 12), max(8, w // 16), fill=color, outline=darken(color, 0.35), width=max(3, w // 30))
for i in range(4):
yy = y1 + h // 4 + i * h // 7
draw.line((x1 + 8, yy, x2 - 8, yy), fill=darken(color, 0.28), width=3)
for i in range(5):
xx = x1 + w // 7 + i * w // 7
draw.line((xx, y1 + h // 5, xx, y2 - h // 8), fill=lighten(color, 0.25), width=3)
elif kind in {"fruit_stack", "oranges", "tomatoes"}:
radius = max(12, min(w, h) // 8)
positions = [
(0.25, 0.68), (0.40, 0.52), (0.56, 0.66), (0.69, 0.48), (0.48, 0.32),
(0.22, 0.38), (0.74, 0.72), (0.58, 0.22),
]
for px, py in positions:
cx, cy = x1 + int(px * w), y1 + int(py * h)
draw.ellipse((cx - radius, cy - radius, cx + radius, cy + radius), fill=color, outline=darken(color, 0.28), width=2)
draw.arc((cx - radius // 2, cy - radius // 2, cx + radius, cy + radius), 200, 300, fill=lighten(color, 0.45), width=2)
elif kind == "umbrella":
draw.pieslice((x1, y1, x2, y1 + int(h * 0.9)), 180, 360, fill=color, outline=darken(color, 0.35), width=max(3, w // 40))
for i in range(1, 5):
xx = x1 + i * w // 5
draw.line((x1 + w // 2, y1 + h // 4, xx, y1 + int(h * 0.45)), fill=darken(color, 0.25), width=2)
draw.line((x1 + w // 2, y1 + h // 3, x1 + w // 2, y2), fill=darken(color, 0.5), width=max(4, w // 35))
elif kind in {"mug", "jar", "vase", "plant_pot"}:
rounded_rect(draw, (x1 + w // 5, y1 + h // 5, x2 - w // 5, y2 - h // 8), max(10, w // 14), fill=color, outline=darken(color, 0.3), width=max(3, w // 35))
draw.ellipse((x1 + w // 5, y1 + h // 8, x2 - w // 5, y1 + h // 3), fill=lighten(color, 0.2), outline=darken(color, 0.25), width=2)
if kind == "mug":
draw.arc((x2 - w // 3, y1 + h // 3, x2 - w // 15, y1 + h * 2 // 3), -80, 95, fill=darken(color, 0.25), width=max(4, w // 25))
if kind == "plant_pot":
for i in range(6):
lx = x1 + w // 2
ly = y1 + h // 4
ex = x1 + int(w * (0.15 + 0.14 * i))
ey = y1 + int(h * (0.05 + 0.07 * (i % 2)))
draw.line((lx, ly, ex, ey), fill=(44, 116, 65), width=max(3, w // 36))
draw.ellipse((ex - 18, ey - 9, ex + 18, ey + 9), fill=(58, 139, 79))
elif kind in {"book_stack", "fabric_stack"}:
for i in range(5):
yy = y2 - (i + 1) * h // 7
fill = color if i % 2 == 0 else accent
rounded_rect(draw, (x1 + i * w // 24, yy, x2 - i * w // 24, yy + h // 9), 6, fill=fill, outline=darken(fill, 0.3), width=2)
elif kind == "lamp":
draw.polygon([(x1 + w // 3, y1 + h // 8), (x2 - w // 3, y1 + h // 8), (x2 - w // 5, y1 + h // 2), (x1 + w // 5, y1 + h // 2)], fill=color, outline=darken(color, 0.25))
draw.line((x1 + w // 2, y1 + h // 2, x1 + w // 2, y2 - h // 8), fill=darken(color, 0.55), width=max(5, w // 30))
draw.ellipse((x1 + w // 4, y2 - h // 5, x2 - w // 4, y2 - h // 12), fill=darken(color, 0.25))
elif kind == "instrument":
draw.ellipse((x1 + w // 4, y1 + h // 3, x2 - w // 6, y2 - h // 8), fill=color, outline=darken(color, 0.35), width=max(3, w // 35))
draw.ellipse((x1 + w // 3, y1 + h // 2, x1 + w // 2, y1 + h * 2 // 3), fill=darken(color, 0.45))
draw.rectangle((x1 + w // 12, y1 + h // 6, x1 + w // 3, y1 + h // 4), fill=darken(color, 0.35))
for i in range(4):
y = y1 + h // 5 + i * h // 35
draw.line((x1 + w // 10, y, x2 - w // 5, y + h // 3), fill=(235, 228, 193), width=1)
elif kind == "bicycle":
draw.ellipse((x1, y1 + h // 2, x1 + w // 3, y2), outline=darken(color, 0.35), width=max(5, w // 35))
draw.ellipse((x2 - w // 3, y1 + h // 2, x2, y2), outline=darken(color, 0.35), width=max(5, w // 35))
draw.line((x1 + w // 6, y1 + h * 3 // 4, x1 + w // 2, y1 + h // 3), fill=color, width=max(5, w // 35))
draw.line((x1 + w // 2, y1 + h // 3, x2 - w // 6, y1 + h * 3 // 4), fill=color, width=max(5, w // 35))
draw.line((x1 + w // 6, y1 + h * 3 // 4, x2 - w // 6, y1 + h * 3 // 4), fill=color, width=max(5, w // 35))
elif kind == "dog":
draw.ellipse((x1 + w // 5, y1 + h // 3, x2 - w // 8, y2 - h // 6), fill=color, outline=darken(color, 0.3), width=3)
draw.ellipse((x1, y1 + h // 5, x1 + w // 3, y1 + h // 2), fill=color, outline=darken(color, 0.3), width=3)
draw.polygon([(x1 + w // 10, y1 + h // 5), (x1 + w // 4, y1 + h // 9), (x1 + w // 5, y1 + h // 3)], fill=darken(color, 0.18))
draw.line((x2 - w // 7, y1 + h // 2, x2, y1 + h // 4), fill=color, width=max(5, w // 20))
for lx in (x1 + w // 3, x1 + w * 2 // 3):
draw.line((lx, y2 - h // 4, lx - w // 18, y2), fill=darken(color, 0.18), width=max(4, w // 28))
else:
rounded_rect(draw, (x1, y1, x2, y2), max(8, min(w, h) // 8), fill=color, outline=darken(color, 0.28), width=max(3, w // 35))
draw.line((x1 + w // 7, y1 + h // 4, x2 - w // 7, y1 + h // 4), fill=accent, width=max(3, h // 30))
def draw_subject(base: Image.Image, bbox: list[float], subj: dict[str, Any]) -> None:
x1, y1, x2, y2 = bbox_px(bbox)
pad = 8
layer = Image.new("RGBA", CANVAS_SIZE, (0, 0, 0, 0))
draw_box = (x1 + pad, y1 + pad, x2 - pad, y2 - pad)
if subj["is_person"]:
draw_person(layer, draw_box, subj, full_body=True)
else:
draw_object(layer, draw_box, subj)
paste_shadow(base, layer, offset=(0, 0), blur=10)
def draw_reference(subj: dict[str, Any], path: Path, rng: random.Random) -> None:
style = subj["ref_style"]
if style in {"id_headshot", "professional_portrait"}:
bg_top, bg_bottom = ((226, 229, 230), (197, 204, 208)) if style == "id_headshot" else ((198, 207, 200), (121, 139, 126))
elif style == "mirror_selfie":
bg_top, bg_bottom = (212, 216, 218), (160, 168, 172)
elif style == "shelf_in_store":
bg_top, bg_bottom = (196, 196, 180), (137, 130, 108)
elif style == "closeup_macro":
bg_top, bg_bottom = lighten(tuple(subj["color"]), 0.65), darken(tuple(subj["color"]), 0.20)
elif style == "flatlay_topdown":
bg_top, bg_bottom = (224, 222, 213), (199, 196, 186)
elif style == "in_context_natural":
bg_top, bg_bottom = (190, 206, 196), (126, 143, 128)
else:
bg_top, bg_bottom = (235, 235, 232), (210, 211, 207)
img = gradient(CANVAS_SIZE, bg_top, bg_bottom).convert("RGBA")
d = ImageDraw.Draw(img)
if style == "mirror_selfie":
rounded_rect(d, (176, 80, 848, 944), 28, fill=(227, 229, 229), outline=(98, 106, 112), width=16)
rounded_rect(d, (230, 138, 794, 890), 18, fill=(198, 205, 207), outline=(170, 176, 180), width=5)
d.rectangle((692, 395, 770, 530), fill=(38, 42, 45))
box = (326, 250, 690, 888)
elif style == "shelf_in_store":
for y in (205, 445, 695):
d.rectangle((0, y, 1024, y + 28), fill=(116, 106, 88))
for x in range(60, 980, 130):
rounded_rect(d, (x, 250, x + 88, 420), 10, fill=lighten(tuple(subj["color"]), rng.uniform(0.1, 0.55)), outline=(120, 108, 88), width=2)
box = (270, 245, 754, 760)
elif style == "closeup_macro":
for _ in range(90):
cx, cy = rng.randint(0, 1024), rng.randint(0, 1024)
r = rng.randint(7, 28)
fill = lighten(tuple(subj["color"]), rng.uniform(0.05, 0.45)) + (70,)
d.ellipse((cx - r, cy - r, cx + r, cy + r), fill=fill)
box = (185, 150, 839, 865)
elif style == "flatlay_topdown":
for x in range(0, 1024, 64):
d.line((x, 0, x, 1024), fill=(204, 201, 192), width=1)
for y in range(0, 1024, 64):
d.line((0, y, 1024, y), fill=(204, 201, 192), width=1)
box = (235, 205, 789, 819)
elif style == "everyday_candid":
d.rectangle((0, 630, 1024, 1024), fill=(139, 145, 134))
d.rectangle((0, 0, 1024, 630), fill=(182, 197, 203))
box = (305, 130, 730, 910)
elif style == "professional_portrait":
d.rectangle((0, 680, 1024, 1024), fill=(98, 99, 88))
d.ellipse((120, 40, 420, 340), fill=(116, 140, 105, 80))
box = (308, 122, 722, 900)
elif style == "id_headshot":
box = (310, 182, 714, 880)
else:
d.ellipse((120, 90, 904, 870), fill=(255, 255, 255, 72))
box = (245, 185, 779, 825)
if subj["is_person"]:
draw_person(img, box, subj, full_body=style not in {"id_headshot", "professional_portrait"})
else:
draw_object(img, box, subj)
img.convert("RGB").save(path)
def draw_layout_sketch(plan: dict[str, Any], path: Path) -> None:
img = Image.new("RGBA", CANVAS_SIZE, (248, 248, 245, 255))
d = ImageDraw.Draw(img)
for x in range(0, 1025, 128):
d.line((x, 0, x, 1024), fill=(220, 220, 216), width=1)
for y in range(0, 1025, 128):
d.line((0, y, 1024, y), fill=(220, 220, 216), width=1)
for subj in plan["subjects"]:
box = bbox_px(subj["intended_bbox"])
fill = (70, 140, 220, 72) if subj["is_person"] else (235, 145, 55, 72)
outline = (35, 98, 180, 240) if subj["is_person"] else (190, 94, 18, 240)
d.rectangle(box, fill=fill, outline=outline, width=4)
label = subj["name"]
tb = d.textbbox((0, 0), label, font=FONT_24)
d.rectangle((box[0] + 6, box[1] + 6, box[0] + 18 + tb[2], box[1] + 40), fill=(255, 255, 255, 220))
d.text((box[0] + 12, box[1] + 9), label, fill=(32, 32, 32), font=FONT_24)
img.convert("RGB").save(path)
def draw_overlay(main_image: Path, subjects: list[dict[str, Any]], key: str, path: Path, accepted_only: bool = False) -> None:
img = Image.open(main_image).convert("RGBA")
d = ImageDraw.Draw(img)
for subj in subjects:
if accepted_only and not subj.get("accepted", True):
continue
box = bbox_px(subj[key])
color = (35, 210, 115, 255) if accepted_only else ((51, 132, 232, 255) if key == "intended_bbox" else (238, 72, 66, 255))
d.rectangle(box, outline=color, width=5)
label = subj["name"]
tb = d.textbbox((0, 0), label, font=FONT_18)
d.rectangle((box[0], max(0, box[1] - 28), box[0] + tb[2] + 12, max(24, box[1] - 2)), fill=(0, 0, 0, 170))
d.text((box[0] + 6, max(0, box[1] - 27)), label, fill=(255, 255, 255), font=FONT_18)
img.convert("RGB").save(path)
def draw_main(plan: dict[str, Any], path: Path) -> None:
img = gradient(CANVAS_SIZE, tuple(plan["bg_top"]), tuple(plan["bg_bottom"])).convert("RGBA")
d = ImageDraw.Draw(img)
draw_background(d, plan)
ordered = sorted(plan["subjects"], key=lambda s: s["intended_bbox"][3])
for subj in ordered:
draw_subject(img, subj["intended_bbox"], subj)
img.convert("RGB").save(path)
def subject(
name: str,
is_person: bool,
sub_caption: str,
bbox: list[float],
ref_style: str,
ref_prompt: str,
color: tuple[int, int, int],
*,
accent: tuple[int, int, int] | None = None,
skin: tuple[int, int, int] = (171, 119, 82),
hair: tuple[int, int, int] = (50, 38, 32),
kind: str = "box",
) -> dict[str, Any]:
item = {
"name": name,
"is_person": is_person,
"sub_caption": sub_caption,
"intended_bbox": bbox,
"ref_style": ref_style,
"ref_prompt": ref_prompt,
"color": color,
"accent": accent or lighten(color, 0.35),
"kind": kind,
}
if is_person:
item["skin"] = skin
item["hair"] = hair
return item
def build_plans() -> list[dict[str, Any]]:
return [
{
"sample_id": "sample_0001",
"scene_caption": "A rainy market stall bustles as a vendor steadies a display while a shopper reaches for fruit under a green umbrella.",
"story": "The rain has just eased, and the stall is busy again. A quick exchange between vendor and shopper gives the scene a focused, everyday energy.",
"background": "Covered outdoor produce market, damp stone floor, warm awning light, late afternoon.",
"style": "photorealistic",
"canvas_size": list(CANVAS_SIZE),
"background_kind": "market",
"bg_top": (186, 197, 198),
"bg_bottom": (128, 118, 96),
"subjects": [
subject("vendor_in_apron", True, "middle-aged person with short dark hair, tan skin, blue apron, leaning forward with a concentrated expression", [0.08, 0.24, 0.31, 0.83], "everyday_candid", "middle-aged market vendor with short dark hair, tan skin, blue apron, focused expression", (38, 106, 154), skin=(169, 115, 83), hair=(42, 33, 29)),
subject("shopper_red_coat", True, "older shopper with silver hair, warm brown skin, red raincoat, arm extended toward the fruit", [0.26, 0.22, 0.50, 0.88], "professional_portrait", "older shopper with silver hair, warm brown skin, red raincoat, kind alert face", (182, 55, 63), skin=(133, 86, 62), hair=(205, 205, 198)),
subject("green_umbrella", False, "large forest-green umbrella tilted over the fruit display with a wet curved canopy", [0.37, 0.04, 0.82, 0.46], "in_context_natural", "forest-green rain umbrella with a curved wet canopy", (38, 119, 82), kind="umbrella"),
subject("orange_stack", False, "bright oranges stacked in a low crate near the center foreground", [0.44, 0.49, 0.71, 0.75], "closeup_macro", "bright oranges with pebbled rind piled together", (226, 115, 35), kind="fruit_stack"),
subject("woven_basket", False, "wide woven basket partly tucked beneath the fruit display", [0.61, 0.62, 0.88, 0.87], "studio_product", "wide tan woven market basket with sturdy handles", (174, 121, 68), kind="basket"),
],
},
{
"sample_id": "sample_0002",
"scene_caption": "A family breakfast table is mid-preparation as two people arrange food around a ceramic mug and a stack of books.",
"story": "The morning is calm but active. One person is setting the table while another pauses with a small smile before sitting down.",
"background": "Bright home kitchen with pale tile, wood table, diffuse window light.",
"style": "photorealistic",
"canvas_size": list(CANVAS_SIZE),
"background_kind": "kitchen",
"bg_top": (225, 235, 235),
"bg_bottom": (176, 147, 105),
"subjects": [
subject("person_yellow_sweater", True, "young adult with curly black hair, medium brown skin, yellow sweater, holding a plate near the table", [0.11, 0.18, 0.37, 0.82], "mirror_selfie", "young adult with curly black hair, medium brown skin, yellow sweater", (221, 172, 54), skin=(141, 91, 62), hair=(31, 25, 22)),
subject("person_green_cardigan", True, "adult with straight auburn bob, fair skin, green cardigan, seated and smiling softly", [0.55, 0.25, 0.82, 0.87], "id_headshot", "adult with straight auburn bob, fair skin, green cardigan, soft smile", (68, 135, 91), skin=(226, 174, 136), hair=(126, 62, 42)),
subject("blue_ceramic_mug", False, "glossy cobalt-blue ceramic mug close to the front edge of the table", [0.35, 0.58, 0.51, 0.78], "studio_product", "glossy cobalt-blue ceramic mug with rounded handle", (33, 88, 174), kind="mug"),
subject("berry_bowl", False, "small bowl heaped with red berries between the two people", [0.45, 0.48, 0.63, 0.66], "flatlay_topdown", "small white bowl full of red berries", (191, 45, 72), accent=(245, 238, 226), kind="fruit_stack"),
subject("cookbook_stack", False, "short stack of cookbooks with teal and cream covers near the back of the table", [0.18, 0.46, 0.39, 0.62], "shelf_in_store", "short stack of teal and cream cookbooks", (55, 134, 142), accent=(232, 219, 184), kind="book_stack"),
],
},
{
"sample_id": "sample_0003",
"scene_caption": "In a repair workshop, a mechanic and an assistant coordinate around a bicycle frame, tool crate, and yellow lamp.",
"story": "The team is close to solving the repair. Their body language shows concentration as tools and parts crowd the bench.",
"background": "Dim bicycle workshop, pegboard walls, worn wooden bench, focused task lighting.",
"style": "photorealistic",
"canvas_size": list(CANVAS_SIZE),
"background_kind": "workshop",
"bg_top": (92, 98, 96),
"bg_bottom": (89, 75, 57),
"subjects": [
subject("mechanic_gray_overalls", True, "stocky adult with shaved head, deep brown skin, gray overalls, crouched toward the bicycle", [0.07, 0.24, 0.34, 0.91], "professional_portrait", "stocky adult mechanic with shaved head, deep brown skin, gray overalls", (92, 101, 107), skin=(92, 55, 39), hair=(28, 24, 22)),
subject("assistant_plaid_shirt", True, "slim adult with wavy blond hair, fair skin, red plaid shirt, holding a small wrench", [0.68, 0.19, 0.91, 0.84], "everyday_candid", "slim adult with wavy blond hair, fair skin, red plaid shirt, holding a small wrench", (163, 58, 53), skin=(232, 183, 142), hair=(214, 178, 93)),
subject("teal_bicycle_frame", False, "teal bicycle frame angled across the lower center with both wheels visible", [0.29, 0.49, 0.76, 0.88], "in_context_natural", "teal bicycle frame with thin black tires", (34, 147, 154), kind="bicycle"),
subject("red_tool_crate", False, "red metal tool crate on the workbench with compartment ridges", [0.31, 0.38, 0.54, 0.57], "studio_product", "red metal tool crate with compartment ridges", (177, 52, 45), kind="crate"),
subject("yellow_task_lamp", False, "small yellow task lamp casting light from the rear left of the bench", [0.10, 0.06, 0.30, 0.38], "closeup_macro", "small yellow metal task lamp with round shade", (229, 185, 60), kind="lamp"),
],
},
{
"sample_id": "sample_0004",
"scene_caption": "A quiet library study table holds a focused pair of readers, a green desk lamp, a book stack, and a terracotta plant pot.",
"story": "The moment is hushed and intent. One reader marks a page while the other leans in to compare notes.",
"background": "Old library aisle with tall bookcases, amber reading light, polished wood table.",
"style": "photorealistic",
"canvas_size": list(CANVAS_SIZE),
"background_kind": "library",
"bg_top": (122, 83, 55),
"bg_bottom": (142, 108, 73),
"subjects": [
subject("reader_blue_jacket", True, "adult with short coiled hair, dark brown skin, blue jacket, leaning over an open page", [0.14, 0.20, 0.40, 0.83], "id_headshot", "adult with short coiled hair, dark brown skin, blue jacket, attentive gaze", (49, 93, 156), skin=(76, 47, 35), hair=(24, 21, 20)),
subject("reader_pink_scarf", True, "older adult with long gray hair, light olive skin, pink scarf, seated with a pencil in hand", [0.48, 0.22, 0.75, 0.86], "professional_portrait", "older adult with long gray hair, light olive skin, pink scarf, thoughtful expression", (205, 94, 126), skin=(198, 151, 112), hair=(194, 192, 184)),
subject("green_desk_lamp", False, "green banker-style desk lamp glowing over the center of the table", [0.35, 0.40, 0.57, 0.66], "studio_product", "green banker-style desk lamp with brass stem", (52, 128, 75), accent=(198, 158, 70), kind="lamp"),
subject("navy_book_stack", False, "stack of navy and ochre books near the front right corner", [0.61, 0.58, 0.86, 0.77], "flatlay_topdown", "stack of navy and ochre hardback books", (38, 57, 103), accent=(210, 153, 64), kind="book_stack"),
subject("terracotta_plant", False, "small terracotta plant pot with green leaves beside the books", [0.75, 0.42, 0.92, 0.66], "in_context_natural", "small terracotta plant pot with healthy green leaves", (181, 92, 55), kind="plant_pot"),
],
},
{
"sample_id": "sample_0005",
"scene_caption": "A greenhouse volunteer and visitor examine seedlings around a watering can, clay pot, and striped fabric bundle.",
"story": "The air is humid and bright after watering. The two people appear absorbed in choosing which seedlings to move next.",
"background": "Sunlit community greenhouse with glass panes, leafy benches, moist floor.",
"style": "photorealistic",
"canvas_size": list(CANVAS_SIZE),
"background_kind": "greenhouse",
"bg_top": (184, 216, 205),
"bg_bottom": (95, 130, 85),
"subjects": [
subject("volunteer_orange_vest", True, "adult with cropped black hair, medium tan skin, orange vest, kneeling with careful hands", [0.08, 0.27, 0.35, 0.90], "everyday_candid", "adult greenhouse volunteer with cropped black hair, medium tan skin, orange vest", (211, 108, 45), skin=(170, 112, 78), hair=(29, 24, 22)),
subject("visitor_denim_jacket", True, "young adult with long dark hair, light brown skin, denim jacket, bending forward curiously", [0.58, 0.18, 0.84, 0.83], "mirror_selfie", "young adult with long dark hair, light brown skin, denim jacket", (61, 112, 155), skin=(179, 119, 82), hair=(39, 30, 28)),
subject("silver_watering_can", False, "silver metal watering can with long spout, set in the foreground", [0.33, 0.58, 0.57, 0.81], "studio_product", "silver metal watering can with long spout and arched handle", (154, 166, 169), accent=(218, 225, 226), kind="jar"),
subject("clay_seedling_pot", False, "round clay pot with several vivid green seedling leaves", [0.48, 0.43, 0.66, 0.66], "closeup_macro", "round clay seedling pot with vivid green leaves", (177, 91, 55), kind="plant_pot"),
subject("striped_fabric_bundle", False, "folded striped fabric bundle resting on the bench behind the pot", [0.19, 0.48, 0.43, 0.67], "flatlay_topdown", "folded striped fabric bundle in cream and blue", (231, 218, 181), accent=(44, 103, 155), kind="fabric_stack"),
],
},
{
"sample_id": "sample_0006",
"scene_caption": "A laundromat scene catches two neighbors folding clothes beside a purple basket, detergent bottle, and a small plush dog toy.",
"story": "The dryers are humming while the neighbors trade a quick laugh. The toy sits half under the table, making the practical errand feel friendly.",
"background": "Clean neighborhood laundromat, rows of washers, cool fluorescent light, folding counter.",
"style": "photorealistic",
"canvas_size": list(CANVAS_SIZE),
"background_kind": "laundromat",
"bg_top": (207, 219, 225),
"bg_bottom": (126, 145, 153),
"subjects": [
subject("neighbor_teal_hoodie", True, "adult with shaved sides and black curls, brown skin, teal hoodie, folding a towel", [0.12, 0.21, 0.38, 0.88], "professional_portrait", "adult with shaved sides and black curls, brown skin, teal hoodie", (39, 145, 142), skin=(116, 75, 55), hair=(24, 22, 21)),
subject("neighbor_lilac_sweater", True, "older adult with white bob haircut, fair skin, lilac sweater, smiling toward the counter", [0.54, 0.18, 0.80, 0.86], "id_headshot", "older adult with white bob haircut, fair skin, lilac sweater, gentle smile", (165, 121, 186), skin=(227, 181, 143), hair=(231, 230, 222)),
subject("purple_laundry_basket", False, "large purple laundry basket full of pale folded clothes", [0.32, 0.52, 0.59, 0.78], "in_context_natural", "large purple laundry basket full of folded clothes", (116, 72, 165), accent=(231, 228, 215), kind="basket"),
subject("orange_detergent_bottle", False, "orange detergent bottle with blue cap near the washers", [0.74, 0.47, 0.88, 0.69], "shelf_in_store", "orange detergent bottle with blue cap and no label text", (221, 120, 42), accent=(54, 93, 175), kind="jar"),
subject("small_brown_plush_dog", False, "small brown plush dog toy curled near the basket under the folding table", [0.60, 0.69, 0.86, 0.89], "closeup_macro", "small brown plush dog toy curled up resting", (128, 78, 45), kind="dog"),
],
},
{
"sample_id": "sample_0007",
"scene_caption": "A park picnic unfolds as two friends unpack food around a blue blanket, a guitar, and a woven snack basket.",
"story": "The friends are settling into an easy afternoon. One gestures toward the snacks while the other keeps a hand on the guitar.",
"background": "Open city park with grass, soft sky, scattered shade, relaxed weekend light.",
"style": "photorealistic",
"canvas_size": list(CANVAS_SIZE),
"background_kind": "picnic",
"bg_top": (148, 192, 220),
"bg_bottom": (88, 141, 78),
"subjects": [
subject("friend_white_hat", True, "young adult with dark skin, white brimmed hat, navy shirt, seated cross-legged", [0.10, 0.32, 0.36, 0.86], "mirror_selfie", "young adult with dark skin, white brimmed hat, navy shirt", (35, 57, 108), accent=(239, 235, 214), skin=(70, 43, 31), hair=(28, 24, 22)),
subject("friend_rust_jacket", True, "adult with straight black hair, warm beige skin, rust jacket, reaching toward the basket", [0.60, 0.28, 0.87, 0.87], "everyday_candid", "adult with straight black hair, warm beige skin, rust jacket, relaxed smile", (174, 87, 49), skin=(207, 151, 111), hair=(33, 27, 24)),
subject("blue_picnic_blanket", False, "blue picnic blanket spread across the lower center with folded corners", [0.24, 0.62, 0.78, 0.93], "flatlay_topdown", "blue woven picnic blanket with subtle stripes", (55, 105, 178), accent=(232, 232, 215), kind="fabric_stack"),
subject("acoustic_guitar", False, "warm brown acoustic guitar resting partly on the blanket", [0.39, 0.45, 0.65, 0.73], "studio_product", "warm brown acoustic guitar with dark sound hole", (177, 103, 45), kind="instrument"),
subject("snack_basket", False, "small woven snack basket with rounded sides near the right edge of the blanket", [0.68, 0.56, 0.91, 0.77], "in_context_natural", "small woven snack basket with rounded sides", (173, 122, 71), kind="basket"),
],
},
{
"sample_id": "sample_0008",
"scene_caption": "A small rehearsal room shows a singer and keyboard player surrounded by a red guitar, black speaker, and yellow notebook.",
"story": "The group is between takes, listening for the next cue. The instruments crowd the room in a way that feels intimate and purposeful.",
"background": "Cozy music practice room, dark acoustic wall panels, amber lamps, polished floor.",
"style": "photorealistic",
"canvas_size": list(CANVAS_SIZE),
"background_kind": "music_room",
"bg_top": (86, 76, 86),
"bg_bottom": (103, 82, 65),
"subjects": [
subject("singer_green_shirt", True, "adult with short brown hair, light skin, green shirt, standing near a microphone", [0.14, 0.16, 0.38, 0.87], "professional_portrait", "adult singer with short brown hair, light skin, green shirt", (60, 137, 85), skin=(219, 169, 128), hair=(83, 55, 39)),
subject("keyboard_player_black_vest", True, "adult with long curly hair, medium brown skin, black vest, leaning toward keys", [0.57, 0.22, 0.85, 0.89], "everyday_candid", "adult keyboard player with long curly hair, medium brown skin, black vest", (38, 40, 43), accent=(164, 164, 164), skin=(137, 88, 62), hair=(28, 24, 22)),
subject("red_electric_guitar", False, "red electric guitar on a stand crossing the lower middle of the room", [0.35, 0.42, 0.60, 0.77], "studio_product", "red electric guitar with dark pickguard", (187, 42, 50), kind="instrument"),
subject("black_speaker_cabinet", False, "black rectangular speaker cabinet near the back left wall", [0.05, 0.49, 0.24, 0.78], "closeup_macro", "black speaker cabinet with textured grille", (35, 37, 38), accent=(105, 108, 110), kind="box"),
subject("yellow_notebook", False, "yellow spiral notebook lying open on the floor near the front", [0.58, 0.68, 0.80, 0.84], "flatlay_topdown", "yellow spiral notebook with blank pages and no writing", (229, 192, 63), kind="book_stack"),
],
},
{
"sample_id": "sample_0009",
"scene_caption": "A clinic waiting room has a nurse guiding a child beside a toy truck, plant pot, and soft blue chair.",
"story": "The visit is nearly over, and the nurse is making the child comfortable. The toys and bright chair soften the clinical setting.",
"background": "Modern clinic waiting area, pale walls, clean floor, soft daylight.",
"style": "photorealistic",
"canvas_size": list(CANVAS_SIZE),
"background_kind": "clinic",
"bg_top": (224, 234, 230),
"bg_bottom": (169, 190, 188),
"subjects": [
subject("nurse_blue_scrubs", True, "adult with dark braided hair, brown skin, blue scrubs, kneeling with one hand extended", [0.10, 0.19, 0.38, 0.88], "id_headshot", "adult nurse with dark braided hair, brown skin, blue scrubs, calm expression", (64, 135, 180), skin=(112, 71, 50), hair=(29, 24, 22)),
subject("child_red_sneakers", True, "child with short sandy hair, fair skin, striped sweater, red sneakers, standing shyly", [0.45, 0.29, 0.64, 0.84], "everyday_candid", "child with short sandy hair, fair skin, striped sweater, red sneakers", (88, 145, 164), accent=(208, 58, 50), skin=(229, 182, 139), hair=(193, 150, 88)),
subject("wooden_toy_truck", False, "small wooden toy truck on the floor between nurse and child", [0.34, 0.68, 0.55, 0.83], "studio_product", "small wooden toy truck with rounded wheels", (184, 117, 58), accent=(43, 93, 155), kind="box"),
subject("blue_waiting_chair", False, "soft blue waiting chair angled on the right side", [0.66, 0.41, 0.92, 0.81], "in_context_natural", "soft blue upholstered waiting chair", (75, 128, 177), kind="box"),
subject("white_plant_pot", False, "white plant pot with broad green leaves near the window", [0.75, 0.14, 0.93, 0.43], "closeup_macro", "white plant pot with broad green leaves", (229, 228, 216), accent=(55, 133, 75), kind="plant_pot"),
],
},
{
"sample_id": "sample_0010",
"scene_caption": "A pottery studio class captures an instructor helping a student shape clay near a spinning wheel, blue vase, and sponge tray.",
"story": "Wet clay is on the table and the lesson is hands-on. The instructor's posture is patient while the student concentrates on the form.",
"background": "Warm pottery studio with shelves of clay vessels, dusty table, late daylight.",
"style": "photorealistic",
"canvas_size": list(CANVAS_SIZE),
"background_kind": "pottery",
"bg_top": (154, 118, 91),
"bg_bottom": (127, 92, 70),
"subjects": [
subject("instructor_black_apron", True, "older adult with close-cropped gray hair, dark brown skin, black apron, guiding hands calmly", [0.09, 0.20, 0.37, 0.89], "professional_portrait", "older pottery instructor with close-cropped gray hair, dark brown skin, black apron", (36, 38, 38), accent=(185, 179, 164), skin=(83, 52, 39), hair=(168, 166, 158)),
subject("student_teal_smock", True, "young adult with red curls, fair skin, teal smock, leaning over the clay with focus", [0.48, 0.24, 0.76, 0.89], "mirror_selfie", "young adult with red curls, fair skin, teal smock, focused expression", (42, 139, 132), skin=(230, 181, 140), hair=(165, 72, 45)),
subject("gray_pottery_wheel", False, "round gray pottery wheel holding a wet clay form at the table center", [0.31, 0.55, 0.57, 0.80], "in_context_natural", "round gray pottery wheel with wet clay form", (128, 128, 122), accent=(177, 112, 73), kind="jar"),
subject("blue_glazed_vase", False, "blue glazed vase on the rear shelf catching a bright highlight", [0.68, 0.33, 0.85, 0.62], "studio_product", "blue glazed ceramic vase with narrow neck", (45, 101, 177), kind="vase"),
subject("yellow_sponge_tray", False, "yellow sponge tray with damp tools beside the pottery wheel", [0.55, 0.63, 0.79, 0.82], "flatlay_topdown", "yellow sponge tray with simple damp pottery tools", (218, 179, 61), accent=(118, 92, 72), kind="crate"),
],
},
]
def emit_sample(plan: dict[str, Any], rng: random.Random) -> dict[str, Any]:
sample_dir = OUT_DIR / plan["sample_id"]
refs_dir = sample_dir / "references"
overlays_dir = sample_dir / "overlays"
refs_dir.mkdir(parents=True, exist_ok=True)
overlays_dir.mkdir(parents=True, exist_ok=True)
layout_path = sample_dir / "layout_sketch.png"
main_path = sample_dir / "main_image.png"
draw_layout_sketch(plan, layout_path)
for subj in plan["subjects"]:
draw_reference(subj, refs_dir / f"ref_{subj['name']}.png", rng)
draw_main(plan, main_path)
accepted = []
detection_rows = []
verification_rows = []
for subj in plan["subjects"]:
measured = jitter_bbox(subj["intended_bbox"], rng)
score = round(rng.uniform(0.82, 0.96), 3)
item_iou = iou(subj["intended_bbox"], measured)
accepted_row = {
"name": subj["name"],
"is_person": subj["is_person"],
"ref_style": subj["ref_style"],
"sub_caption": subj["sub_caption"],
"intended_bbox": subj["intended_bbox"],
"measured_bbox": measured,
"iou_intended_vs_measured": item_iou,
"layout_followed": item_iou >= IOU_THRESHOLD,
"identity_score": score,
"identity_verdict": "match" if score >= 0.70 else "weak_match",
"ref_image": f"references/ref_{subj['name']}.png",
}
accepted.append(accepted_row)
detection_rows.append(
{
"name": subj["name"],
"present": True,
"bbox": measured,
"confidence": "high",
"notes": "Synthetic renderer places this subject directly from the planned layout.",
}
)
verification_rows.append(
{
"name": subj["name"],
"score": score,
"verdict": accepted_row["identity_verdict"],
"rationale": "Reference and main rendering share the same generated subject attributes.",
}
)
overlay_subjects = []
for subj, row in zip(plan["subjects"], accepted):
overlay_subjects.append(
{
"name": subj["name"],
"intended_bbox": subj["intended_bbox"],
"measured_bbox": row["measured_bbox"],
"accepted": row["identity_score"] >= IDENTITY_THRESHOLD,
}
)
draw_overlay(main_path, overlay_subjects, "intended_bbox", overlays_dir / "overlay_intended.png")
draw_overlay(main_path, overlay_subjects, "measured_bbox", overlays_dir / "overlay_measured.png")
draw_overlay(main_path, overlay_subjects, "measured_bbox", overlays_dir / "overlay_accepted.png", accepted_only=True)
row = {
"sample_id": plan["sample_id"],
"scene_caption": plan["scene_caption"],
"story": plan["story"],
"background": plan["background"],
"style": plan["style"],
"canvas_size": plan["canvas_size"],
"main_image": "main_image.png",
"layout_sketch": "layout_sketch.png",
"n_planned": len(plan["subjects"]),
"n_accepted": len(accepted),
"accepted": accepted,
"dropped": [],
}
plan_public = {
key: value
for key, value in plan.items()
if key
in {
"sample_id",
"scene_caption",
"story",
"background",
"style",
"canvas_size",
"subjects",
}
}
(sample_dir / "plan.json").write_text(json.dumps(plan_public, indent=2) + "\n")
(sample_dir / "detections.json").write_text(json.dumps(detection_rows, indent=2) + "\n")
(sample_dir / "identity_verification.json").write_text(json.dumps(verification_rows, indent=2) + "\n")
(sample_dir / "row.json").write_text(json.dumps(row, indent=2) + "\n")
return row
def main() -> None:
random.seed(20260605)
OUT_DIR.mkdir(exist_ok=True)
rows = []
for i, plan in enumerate(build_plans(), start=1):
rows.append(emit_sample(plan, random.Random(20260605 + i)))
(OUT_DIR / "dataset.json").write_text(json.dumps(rows, indent=2) + "\n")
with (OUT_DIR / "dataset.jsonl").open("w") as handle:
for row in rows:
handle.write(json.dumps(row, separators=(",", ":")) + "\n")
(OUT_DIR / "README.md").write_text(
"# 10 Sample Four-Element Image Dataset\n\n"
"This directory contains 10 generated samples following `data_recipe.md`.\n"
"Each `sample_XXXX` folder includes a composed `main_image.png`, independent "
"subject references in `references/`, a `layout_sketch.png`, overlay images, "
"and the emitted dataset row in `row.json`.\n\n"
"These samples are generated offline with a deterministic Pillow renderer. "
"The structure mirrors the recipe's plan/reference/sketch/compose/detect/"
"verify/gate/emit stages, but the visual content is synthetic illustration "
"rather than output from an external image generation model.\n"
)
print(f"generated {len(rows)} samples in {OUT_DIR}")
if __name__ == "__main__":
main()