from __future__ import annotations import json import math import random from pathlib import Path from typing import Any from PIL import Image, ImageDraw, ImageFilter, ImageFont CANVAS_SIZE = (1024, 1024) OUT_DIR = Path("10samples") IDENTITY_THRESHOLD = 0.55 IOU_THRESHOLD = 0.10 def font(size: int, bold: bool = False) -> ImageFont.ImageFont: candidates = [ "/usr/share/fonts/truetype/dejavu/DejaVuSans-Bold.ttf" if bold else "/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf", "/usr/share/fonts/dejavu/DejaVuSans-Bold.ttf" if bold else "/usr/share/fonts/dejavu/DejaVuSans.ttf", ] for path in candidates: try: return ImageFont.truetype(path, size) except OSError: pass return ImageFont.load_default() FONT_18 = font(18) FONT_24 = font(24) FONT_32 = font(32, bold=True) def clamp(value: float, low: float = 0.0, high: float = 1.0) -> float: return max(low, min(high, value)) def bbox_px(bbox: list[float], size: tuple[int, int] = CANVAS_SIZE) -> tuple[int, int, int, int]: w, h = size return ( int(round(clamp(bbox[0]) * w)), int(round(clamp(bbox[1]) * h)), int(round(clamp(bbox[2]) * w)), int(round(clamp(bbox[3]) * h)), ) def normalize_bbox(box: tuple[int, int, int, int], size: tuple[int, int] = CANVAS_SIZE) -> list[float]: w, h = size x1, y1, x2, y2 = box vals = [x1 / w, y1 / h, x2 / w, y2 / h] return [round(clamp(v), 4) for v in vals] def jitter_bbox(bbox: list[float], rng: random.Random) -> list[float]: x1, y1, x2, y2 = bbox width = x2 - x1 height = y2 - y1 dx = rng.uniform(-0.018, 0.018) * width dy = rng.uniform(-0.018, 0.018) * height grow_x = rng.uniform(-0.018, 0.024) * width grow_y = rng.uniform(-0.018, 0.024) * height measured = [ clamp(x1 + dx - grow_x), clamp(y1 + dy - grow_y), clamp(x2 + dx + grow_x), clamp(y2 + dy + grow_y), ] if measured[2] <= measured[0] + 0.01: measured[2] = clamp(measured[0] + 0.01) if measured[3] <= measured[1] + 0.01: measured[3] = clamp(measured[1] + 0.01) return [round(v, 4) for v in measured] def iou(a: list[float], b: list[float]) -> float: ax1, ay1, ax2, ay2 = a bx1, by1, bx2, by2 = b ix1, iy1 = max(ax1, bx1), max(ay1, by1) ix2, iy2 = min(ax2, bx2), min(ay2, by2) iw, ih = max(0.0, ix2 - ix1), max(0.0, iy2 - iy1) inter = iw * ih area_a = max(0.0, ax2 - ax1) * max(0.0, ay2 - ay1) area_b = max(0.0, bx2 - bx1) * max(0.0, by2 - by1) denom = area_a + area_b - inter return 0.0 if denom == 0 else round(inter / denom, 4) def lighten(color: tuple[int, int, int], amount: float) -> tuple[int, int, int]: return tuple(int(c + (255 - c) * amount) for c in color) def darken(color: tuple[int, int, int], amount: float) -> tuple[int, int, int]: return tuple(int(c * (1 - amount)) for c in color) def gradient(size: tuple[int, int], top: tuple[int, int, int], bottom: tuple[int, int, int]) -> Image.Image: w, h = size img = Image.new("RGB", size, top) draw = ImageDraw.Draw(img) for y in range(h): t = y / max(1, h - 1) color = tuple(int(top[i] * (1 - t) + bottom[i] * t) for i in range(3)) draw.line([(0, y), (w, y)], fill=color) return img def paste_shadow(base: Image.Image, layer: Image.Image, offset: tuple[int, int] = (8, 10), blur: int = 12) -> None: alpha = layer.split()[-1] shadow = Image.new("RGBA", layer.size, (0, 0, 0, 0)) shadow.putalpha(alpha.filter(ImageFilter.GaussianBlur(blur))) sx, sy = offset base.alpha_composite(shadow, (sx, sy)) base.alpha_composite(layer) def rounded_rect(draw: ImageDraw.ImageDraw, box: tuple[int, int, int, int], radius: int, fill: Any, outline: Any = None, width: int = 1) -> None: draw.rounded_rectangle(box, radius=radius, fill=fill, outline=outline, width=width) def draw_background(draw: ImageDraw.ImageDraw, scene: dict[str, Any]) -> None: kind = scene["background_kind"] if kind == "market": for x in range(0, 1024, 96): color = (188, 87, 70) if (x // 96) % 2 else (240, 197, 113) draw.polygon([(x, 0), (x + 96, 0), (x + 70, 110), (x + 26, 110)], fill=color) draw.rectangle((0, 672, 1024, 1024), fill=(126, 113, 91)) for x in range(-80, 1024, 160): draw.line((x, 720, x + 260, 1024), fill=(100, 91, 76), width=5) draw.rectangle((0, 545, 1024, 650), fill=(99, 73, 48)) elif kind == "kitchen": draw.rectangle((0, 0, 1024, 460), fill=(206, 222, 224)) for x in range(0, 1024, 128): draw.rectangle((x, 0, x + 126, 458), outline=(178, 198, 201), width=2) draw.rectangle((0, 560, 1024, 1024), fill=(189, 155, 105)) for x in range(-150, 1200, 150): draw.line((x, 560, x + 210, 1024), fill=(159, 128, 84), width=4) draw.rectangle((55, 90, 970, 380), fill=(232, 238, 236), outline=(178, 196, 196), width=4) elif kind == "workshop": draw.rectangle((0, 0, 1024, 1024), fill=(88, 91, 87)) for y in range(110, 640, 110): draw.line((0, y, 1024, y), fill=(67, 70, 68), width=7) draw.rectangle((0, 640, 1024, 1024), fill=(107, 94, 75)) for x in range(70, 1000, 170): draw.rectangle((x, 80, x + 24, 520), fill=(54, 57, 55)) draw.rectangle((50, 560, 974, 700), fill=(73, 58, 43)) elif kind == "library": draw.rectangle((0, 0, 1024, 1024), fill=(119, 82, 55)) for y in (84, 250, 416, 582): draw.rectangle((0, y, 1024, y + 18), fill=(83, 55, 37)) rng = random.Random(12) for x in range(18, 1010, 30): for y in (105, 271, 437): h = rng.randint(88, 132) draw.rectangle((x, y, x + rng.randint(14, 24), y + h), fill=rng.choice([(156, 53, 58), (61, 97, 130), (205, 160, 74), (84, 123, 92)])) draw.rectangle((0, 650, 1024, 1024), fill=(156, 127, 86)) elif kind == "greenhouse": draw.rectangle((0, 0, 1024, 1024), fill=(176, 209, 199)) for x in range(0, 1025, 128): draw.line((x, 0, x + 180, 650), fill=(123, 153, 146), width=5) draw.rectangle((0, 615, 1024, 1024), fill=(104, 126, 87)) for _ in range(70): x = random.randint(0, 1024) y = random.randint(490, 1024) draw.ellipse((x - 18, y - 8, x + 18, y + 8), fill=random.choice([(45, 117, 72), (62, 145, 82), (86, 121, 69)])) elif kind == "laundromat": draw.rectangle((0, 0, 1024, 1024), fill=(202, 213, 218)) for x in range(55, 985, 155): draw.rectangle((x, 100, x + 116, 270), fill=(164, 178, 187), outline=(100, 117, 128), width=5) draw.ellipse((x + 22, 122, x + 94, 194), fill=(107, 139, 162), outline=(230, 238, 241), width=5) draw.rectangle((0, 560, 1024, 1024), fill=(132, 149, 156)) draw.rectangle((0, 440, 1024, 540), fill=(176, 188, 194)) elif kind == "picnic": draw.rectangle((0, 0, 1024, 490), fill=(139, 185, 216)) draw.ellipse((-120, 170, 1140, 870), fill=(90, 153, 86)) draw.rectangle((0, 735, 1024, 1024), fill=(79, 133, 74)) for _ in range(90): x = random.randint(0, 1024) y = random.randint(500, 1010) draw.line((x, y, x + 16, y - 20), fill=(55, 110, 58), width=2) elif kind == "music_room": draw.rectangle((0, 0, 1024, 1024), fill=(84, 74, 82)) draw.rectangle((0, 0, 1024, 260), fill=(62, 53, 63)) for x in range(80, 950, 180): draw.ellipse((x, 58, x + 70, 128), fill=(235, 196, 105)) draw.line((x + 35, 128, x + 20, 260), fill=(235, 196, 105), width=5) draw.rectangle((0, 660, 1024, 1024), fill=(115, 89, 70)) elif kind == "clinic": draw.rectangle((0, 0, 1024, 1024), fill=(219, 229, 225)) for x in range(0, 1024, 128): draw.line((x, 0, x, 600), fill=(195, 209, 205), width=2) draw.rectangle((0, 610, 1024, 1024), fill=(174, 193, 190)) draw.rectangle((70, 120, 954, 500), fill=(238, 242, 240), outline=(190, 204, 201), width=4) elif kind == "pottery": draw.rectangle((0, 0, 1024, 1024), fill=(153, 117, 91)) for y in range(90, 560, 145): draw.rectangle((0, y, 1024, y + 18), fill=(107, 79, 60)) draw.rectangle((0, 650, 1024, 1024), fill=(128, 91, 68)) for x in range(20, 1024, 140): draw.ellipse((x, 135, x + 90, 200), fill=(185, 126, 78), outline=(99, 73, 52), width=4) def draw_person(layer: Image.Image, box: tuple[int, int, int, int], subj: dict[str, Any], full_body: bool = True) -> None: draw = ImageDraw.Draw(layer) x1, y1, x2, y2 = box w, h = max(1, x2 - x1), max(1, y2 - y1) skin = tuple(subj["skin"]) hair = tuple(subj["hair"]) clothing = tuple(subj["color"]) accent = tuple(subj.get("accent", lighten(clothing, 0.35))) cx = x1 + w // 2 head_r = max(14, int(min(w, h) * (0.14 if full_body else 0.22))) head_cy = y1 + int(h * (0.18 if full_body else 0.32)) body_top = head_cy + head_r - 4 body_bottom = y2 - int(h * 0.10) shoulder_w = int(w * (0.34 if full_body else 0.48)) hip_w = int(w * (0.24 if full_body else 0.36)) draw.ellipse((cx - head_r - 5, head_cy - head_r - 9, cx + head_r + 5, head_cy + head_r + 5), fill=hair) draw.ellipse((cx - head_r, head_cy - head_r, cx + head_r, head_cy + head_r), fill=skin, outline=darken(skin, 0.25), width=2) draw.arc((cx - head_r // 2, head_cy - 2, cx + head_r // 2, head_cy + head_r // 2), 10, 170, fill=darken(skin, 0.45), width=2) draw.ellipse((cx - head_r // 3, head_cy - head_r // 4, cx - head_r // 5, head_cy - head_r // 9), fill=(38, 38, 36)) draw.ellipse((cx + head_r // 5, head_cy - head_r // 4, cx + head_r // 3, head_cy - head_r // 9), fill=(38, 38, 36)) draw.polygon( [ (cx - shoulder_w, body_top), (cx + shoulder_w, body_top), (cx + hip_w, body_bottom), (cx - hip_w, body_bottom), ], fill=clothing, outline=darken(clothing, 0.25), ) draw.line((cx - shoulder_w + 4, body_top + 12, x1 + int(w * 0.13), y1 + int(h * 0.58)), fill=skin, width=max(5, w // 14)) draw.line((cx + shoulder_w - 4, body_top + 12, x2 - int(w * 0.13), y1 + int(h * 0.58)), fill=skin, width=max(5, w // 14)) if full_body: leg_top = body_bottom - 2 draw.line((cx - hip_w // 2, leg_top, cx - int(w * 0.18), y2 - 4), fill=darken(clothing, 0.42), width=max(7, w // 12)) draw.line((cx + hip_w // 2, leg_top, cx + int(w * 0.18), y2 - 4), fill=darken(clothing, 0.42), width=max(7, w // 12)) draw.line((cx - shoulder_w + 5, body_top + int(h * 0.12), cx + shoulder_w - 5, body_top + int(h * 0.12)), fill=accent, width=max(3, h // 42)) def draw_object(layer: Image.Image, box: tuple[int, int, int, int], subj: dict[str, Any]) -> None: draw = ImageDraw.Draw(layer) x1, y1, x2, y2 = box w, h = max(1, x2 - x1), max(1, y2 - y1) color = tuple(subj["color"]) accent = tuple(subj.get("accent", lighten(color, 0.35))) kind = subj.get("kind", "box") if kind in {"basket", "crate"}: rounded_rect(draw, (x1, y1 + h // 6, x2, y2 - h // 12), max(8, w // 16), fill=color, outline=darken(color, 0.35), width=max(3, w // 30)) for i in range(4): yy = y1 + h // 4 + i * h // 7 draw.line((x1 + 8, yy, x2 - 8, yy), fill=darken(color, 0.28), width=3) for i in range(5): xx = x1 + w // 7 + i * w // 7 draw.line((xx, y1 + h // 5, xx, y2 - h // 8), fill=lighten(color, 0.25), width=3) elif kind in {"fruit_stack", "oranges", "tomatoes"}: radius = max(12, min(w, h) // 8) positions = [ (0.25, 0.68), (0.40, 0.52), (0.56, 0.66), (0.69, 0.48), (0.48, 0.32), (0.22, 0.38), (0.74, 0.72), (0.58, 0.22), ] for px, py in positions: cx, cy = x1 + int(px * w), y1 + int(py * h) draw.ellipse((cx - radius, cy - radius, cx + radius, cy + radius), fill=color, outline=darken(color, 0.28), width=2) draw.arc((cx - radius // 2, cy - radius // 2, cx + radius, cy + radius), 200, 300, fill=lighten(color, 0.45), width=2) elif kind == "umbrella": draw.pieslice((x1, y1, x2, y1 + int(h * 0.9)), 180, 360, fill=color, outline=darken(color, 0.35), width=max(3, w // 40)) for i in range(1, 5): xx = x1 + i * w // 5 draw.line((x1 + w // 2, y1 + h // 4, xx, y1 + int(h * 0.45)), fill=darken(color, 0.25), width=2) draw.line((x1 + w // 2, y1 + h // 3, x1 + w // 2, y2), fill=darken(color, 0.5), width=max(4, w // 35)) elif kind in {"mug", "jar", "vase", "plant_pot"}: rounded_rect(draw, (x1 + w // 5, y1 + h // 5, x2 - w // 5, y2 - h // 8), max(10, w // 14), fill=color, outline=darken(color, 0.3), width=max(3, w // 35)) draw.ellipse((x1 + w // 5, y1 + h // 8, x2 - w // 5, y1 + h // 3), fill=lighten(color, 0.2), outline=darken(color, 0.25), width=2) if kind == "mug": draw.arc((x2 - w // 3, y1 + h // 3, x2 - w // 15, y1 + h * 2 // 3), -80, 95, fill=darken(color, 0.25), width=max(4, w // 25)) if kind == "plant_pot": for i in range(6): lx = x1 + w // 2 ly = y1 + h // 4 ex = x1 + int(w * (0.15 + 0.14 * i)) ey = y1 + int(h * (0.05 + 0.07 * (i % 2))) draw.line((lx, ly, ex, ey), fill=(44, 116, 65), width=max(3, w // 36)) draw.ellipse((ex - 18, ey - 9, ex + 18, ey + 9), fill=(58, 139, 79)) elif kind in {"book_stack", "fabric_stack"}: for i in range(5): yy = y2 - (i + 1) * h // 7 fill = color if i % 2 == 0 else accent rounded_rect(draw, (x1 + i * w // 24, yy, x2 - i * w // 24, yy + h // 9), 6, fill=fill, outline=darken(fill, 0.3), width=2) elif kind == "lamp": draw.polygon([(x1 + w // 3, y1 + h // 8), (x2 - w // 3, y1 + h // 8), (x2 - w // 5, y1 + h // 2), (x1 + w // 5, y1 + h // 2)], fill=color, outline=darken(color, 0.25)) draw.line((x1 + w // 2, y1 + h // 2, x1 + w // 2, y2 - h // 8), fill=darken(color, 0.55), width=max(5, w // 30)) draw.ellipse((x1 + w // 4, y2 - h // 5, x2 - w // 4, y2 - h // 12), fill=darken(color, 0.25)) elif kind == "instrument": draw.ellipse((x1 + w // 4, y1 + h // 3, x2 - w // 6, y2 - h // 8), fill=color, outline=darken(color, 0.35), width=max(3, w // 35)) draw.ellipse((x1 + w // 3, y1 + h // 2, x1 + w // 2, y1 + h * 2 // 3), fill=darken(color, 0.45)) draw.rectangle((x1 + w // 12, y1 + h // 6, x1 + w // 3, y1 + h // 4), fill=darken(color, 0.35)) for i in range(4): y = y1 + h // 5 + i * h // 35 draw.line((x1 + w // 10, y, x2 - w // 5, y + h // 3), fill=(235, 228, 193), width=1) elif kind == "bicycle": draw.ellipse((x1, y1 + h // 2, x1 + w // 3, y2), outline=darken(color, 0.35), width=max(5, w // 35)) draw.ellipse((x2 - w // 3, y1 + h // 2, x2, y2), outline=darken(color, 0.35), width=max(5, w // 35)) draw.line((x1 + w // 6, y1 + h * 3 // 4, x1 + w // 2, y1 + h // 3), fill=color, width=max(5, w // 35)) draw.line((x1 + w // 2, y1 + h // 3, x2 - w // 6, y1 + h * 3 // 4), fill=color, width=max(5, w // 35)) draw.line((x1 + w // 6, y1 + h * 3 // 4, x2 - w // 6, y1 + h * 3 // 4), fill=color, width=max(5, w // 35)) elif kind == "dog": draw.ellipse((x1 + w // 5, y1 + h // 3, x2 - w // 8, y2 - h // 6), fill=color, outline=darken(color, 0.3), width=3) draw.ellipse((x1, y1 + h // 5, x1 + w // 3, y1 + h // 2), fill=color, outline=darken(color, 0.3), width=3) draw.polygon([(x1 + w // 10, y1 + h // 5), (x1 + w // 4, y1 + h // 9), (x1 + w // 5, y1 + h // 3)], fill=darken(color, 0.18)) draw.line((x2 - w // 7, y1 + h // 2, x2, y1 + h // 4), fill=color, width=max(5, w // 20)) for lx in (x1 + w // 3, x1 + w * 2 // 3): draw.line((lx, y2 - h // 4, lx - w // 18, y2), fill=darken(color, 0.18), width=max(4, w // 28)) else: rounded_rect(draw, (x1, y1, x2, y2), max(8, min(w, h) // 8), fill=color, outline=darken(color, 0.28), width=max(3, w // 35)) draw.line((x1 + w // 7, y1 + h // 4, x2 - w // 7, y1 + h // 4), fill=accent, width=max(3, h // 30)) def draw_subject(base: Image.Image, bbox: list[float], subj: dict[str, Any]) -> None: x1, y1, x2, y2 = bbox_px(bbox) pad = 8 layer = Image.new("RGBA", CANVAS_SIZE, (0, 0, 0, 0)) draw_box = (x1 + pad, y1 + pad, x2 - pad, y2 - pad) if subj["is_person"]: draw_person(layer, draw_box, subj, full_body=True) else: draw_object(layer, draw_box, subj) paste_shadow(base, layer, offset=(0, 0), blur=10) def draw_reference(subj: dict[str, Any], path: Path, rng: random.Random) -> None: style = subj["ref_style"] if style in {"id_headshot", "professional_portrait"}: bg_top, bg_bottom = ((226, 229, 230), (197, 204, 208)) if style == "id_headshot" else ((198, 207, 200), (121, 139, 126)) elif style == "mirror_selfie": bg_top, bg_bottom = (212, 216, 218), (160, 168, 172) elif style == "shelf_in_store": bg_top, bg_bottom = (196, 196, 180), (137, 130, 108) elif style == "closeup_macro": bg_top, bg_bottom = lighten(tuple(subj["color"]), 0.65), darken(tuple(subj["color"]), 0.20) elif style == "flatlay_topdown": bg_top, bg_bottom = (224, 222, 213), (199, 196, 186) elif style == "in_context_natural": bg_top, bg_bottom = (190, 206, 196), (126, 143, 128) else: bg_top, bg_bottom = (235, 235, 232), (210, 211, 207) img = gradient(CANVAS_SIZE, bg_top, bg_bottom).convert("RGBA") d = ImageDraw.Draw(img) if style == "mirror_selfie": rounded_rect(d, (176, 80, 848, 944), 28, fill=(227, 229, 229), outline=(98, 106, 112), width=16) rounded_rect(d, (230, 138, 794, 890), 18, fill=(198, 205, 207), outline=(170, 176, 180), width=5) d.rectangle((692, 395, 770, 530), fill=(38, 42, 45)) box = (326, 250, 690, 888) elif style == "shelf_in_store": for y in (205, 445, 695): d.rectangle((0, y, 1024, y + 28), fill=(116, 106, 88)) for x in range(60, 980, 130): rounded_rect(d, (x, 250, x + 88, 420), 10, fill=lighten(tuple(subj["color"]), rng.uniform(0.1, 0.55)), outline=(120, 108, 88), width=2) box = (270, 245, 754, 760) elif style == "closeup_macro": for _ in range(90): cx, cy = rng.randint(0, 1024), rng.randint(0, 1024) r = rng.randint(7, 28) fill = lighten(tuple(subj["color"]), rng.uniform(0.05, 0.45)) + (70,) d.ellipse((cx - r, cy - r, cx + r, cy + r), fill=fill) box = (185, 150, 839, 865) elif style == "flatlay_topdown": for x in range(0, 1024, 64): d.line((x, 0, x, 1024), fill=(204, 201, 192), width=1) for y in range(0, 1024, 64): d.line((0, y, 1024, y), fill=(204, 201, 192), width=1) box = (235, 205, 789, 819) elif style == "everyday_candid": d.rectangle((0, 630, 1024, 1024), fill=(139, 145, 134)) d.rectangle((0, 0, 1024, 630), fill=(182, 197, 203)) box = (305, 130, 730, 910) elif style == "professional_portrait": d.rectangle((0, 680, 1024, 1024), fill=(98, 99, 88)) d.ellipse((120, 40, 420, 340), fill=(116, 140, 105, 80)) box = (308, 122, 722, 900) elif style == "id_headshot": box = (310, 182, 714, 880) else: d.ellipse((120, 90, 904, 870), fill=(255, 255, 255, 72)) box = (245, 185, 779, 825) if subj["is_person"]: draw_person(img, box, subj, full_body=style not in {"id_headshot", "professional_portrait"}) else: draw_object(img, box, subj) img.convert("RGB").save(path) def draw_layout_sketch(plan: dict[str, Any], path: Path) -> None: img = Image.new("RGBA", CANVAS_SIZE, (248, 248, 245, 255)) d = ImageDraw.Draw(img) for x in range(0, 1025, 128): d.line((x, 0, x, 1024), fill=(220, 220, 216), width=1) for y in range(0, 1025, 128): d.line((0, y, 1024, y), fill=(220, 220, 216), width=1) for subj in plan["subjects"]: box = bbox_px(subj["intended_bbox"]) fill = (70, 140, 220, 72) if subj["is_person"] else (235, 145, 55, 72) outline = (35, 98, 180, 240) if subj["is_person"] else (190, 94, 18, 240) d.rectangle(box, fill=fill, outline=outline, width=4) label = subj["name"] tb = d.textbbox((0, 0), label, font=FONT_24) d.rectangle((box[0] + 6, box[1] + 6, box[0] + 18 + tb[2], box[1] + 40), fill=(255, 255, 255, 220)) d.text((box[0] + 12, box[1] + 9), label, fill=(32, 32, 32), font=FONT_24) img.convert("RGB").save(path) def draw_overlay(main_image: Path, subjects: list[dict[str, Any]], key: str, path: Path, accepted_only: bool = False) -> None: img = Image.open(main_image).convert("RGBA") d = ImageDraw.Draw(img) for subj in subjects: if accepted_only and not subj.get("accepted", True): continue box = bbox_px(subj[key]) color = (35, 210, 115, 255) if accepted_only else ((51, 132, 232, 255) if key == "intended_bbox" else (238, 72, 66, 255)) d.rectangle(box, outline=color, width=5) label = subj["name"] tb = d.textbbox((0, 0), label, font=FONT_18) d.rectangle((box[0], max(0, box[1] - 28), box[0] + tb[2] + 12, max(24, box[1] - 2)), fill=(0, 0, 0, 170)) d.text((box[0] + 6, max(0, box[1] - 27)), label, fill=(255, 255, 255), font=FONT_18) img.convert("RGB").save(path) def draw_main(plan: dict[str, Any], path: Path) -> None: img = gradient(CANVAS_SIZE, tuple(plan["bg_top"]), tuple(plan["bg_bottom"])).convert("RGBA") d = ImageDraw.Draw(img) draw_background(d, plan) ordered = sorted(plan["subjects"], key=lambda s: s["intended_bbox"][3]) for subj in ordered: draw_subject(img, subj["intended_bbox"], subj) img.convert("RGB").save(path) def subject( name: str, is_person: bool, sub_caption: str, bbox: list[float], ref_style: str, ref_prompt: str, color: tuple[int, int, int], *, accent: tuple[int, int, int] | None = None, skin: tuple[int, int, int] = (171, 119, 82), hair: tuple[int, int, int] = (50, 38, 32), kind: str = "box", ) -> dict[str, Any]: item = { "name": name, "is_person": is_person, "sub_caption": sub_caption, "intended_bbox": bbox, "ref_style": ref_style, "ref_prompt": ref_prompt, "color": color, "accent": accent or lighten(color, 0.35), "kind": kind, } if is_person: item["skin"] = skin item["hair"] = hair return item def build_plans() -> list[dict[str, Any]]: return [ { "sample_id": "sample_0001", "scene_caption": "A rainy market stall bustles as a vendor steadies a display while a shopper reaches for fruit under a green umbrella.", "story": "The rain has just eased, and the stall is busy again. A quick exchange between vendor and shopper gives the scene a focused, everyday energy.", "background": "Covered outdoor produce market, damp stone floor, warm awning light, late afternoon.", "style": "photorealistic", "canvas_size": list(CANVAS_SIZE), "background_kind": "market", "bg_top": (186, 197, 198), "bg_bottom": (128, 118, 96), "subjects": [ subject("vendor_in_apron", True, "middle-aged person with short dark hair, tan skin, blue apron, leaning forward with a concentrated expression", [0.08, 0.24, 0.31, 0.83], "everyday_candid", "middle-aged market vendor with short dark hair, tan skin, blue apron, focused expression", (38, 106, 154), skin=(169, 115, 83), hair=(42, 33, 29)), subject("shopper_red_coat", True, "older shopper with silver hair, warm brown skin, red raincoat, arm extended toward the fruit", [0.26, 0.22, 0.50, 0.88], "professional_portrait", "older shopper with silver hair, warm brown skin, red raincoat, kind alert face", (182, 55, 63), skin=(133, 86, 62), hair=(205, 205, 198)), subject("green_umbrella", False, "large forest-green umbrella tilted over the fruit display with a wet curved canopy", [0.37, 0.04, 0.82, 0.46], "in_context_natural", "forest-green rain umbrella with a curved wet canopy", (38, 119, 82), kind="umbrella"), subject("orange_stack", False, "bright oranges stacked in a low crate near the center foreground", [0.44, 0.49, 0.71, 0.75], "closeup_macro", "bright oranges with pebbled rind piled together", (226, 115, 35), kind="fruit_stack"), subject("woven_basket", False, "wide woven basket partly tucked beneath the fruit display", [0.61, 0.62, 0.88, 0.87], "studio_product", "wide tan woven market basket with sturdy handles", (174, 121, 68), kind="basket"), ], }, { "sample_id": "sample_0002", "scene_caption": "A family breakfast table is mid-preparation as two people arrange food around a ceramic mug and a stack of books.", "story": "The morning is calm but active. One person is setting the table while another pauses with a small smile before sitting down.", "background": "Bright home kitchen with pale tile, wood table, diffuse window light.", "style": "photorealistic", "canvas_size": list(CANVAS_SIZE), "background_kind": "kitchen", "bg_top": (225, 235, 235), "bg_bottom": (176, 147, 105), "subjects": [ subject("person_yellow_sweater", True, "young adult with curly black hair, medium brown skin, yellow sweater, holding a plate near the table", [0.11, 0.18, 0.37, 0.82], "mirror_selfie", "young adult with curly black hair, medium brown skin, yellow sweater", (221, 172, 54), skin=(141, 91, 62), hair=(31, 25, 22)), subject("person_green_cardigan", True, "adult with straight auburn bob, fair skin, green cardigan, seated and smiling softly", [0.55, 0.25, 0.82, 0.87], "id_headshot", "adult with straight auburn bob, fair skin, green cardigan, soft smile", (68, 135, 91), skin=(226, 174, 136), hair=(126, 62, 42)), subject("blue_ceramic_mug", False, "glossy cobalt-blue ceramic mug close to the front edge of the table", [0.35, 0.58, 0.51, 0.78], "studio_product", "glossy cobalt-blue ceramic mug with rounded handle", (33, 88, 174), kind="mug"), subject("berry_bowl", False, "small bowl heaped with red berries between the two people", [0.45, 0.48, 0.63, 0.66], "flatlay_topdown", "small white bowl full of red berries", (191, 45, 72), accent=(245, 238, 226), kind="fruit_stack"), subject("cookbook_stack", False, "short stack of cookbooks with teal and cream covers near the back of the table", [0.18, 0.46, 0.39, 0.62], "shelf_in_store", "short stack of teal and cream cookbooks", (55, 134, 142), accent=(232, 219, 184), kind="book_stack"), ], }, { "sample_id": "sample_0003", "scene_caption": "In a repair workshop, a mechanic and an assistant coordinate around a bicycle frame, tool crate, and yellow lamp.", "story": "The team is close to solving the repair. Their body language shows concentration as tools and parts crowd the bench.", "background": "Dim bicycle workshop, pegboard walls, worn wooden bench, focused task lighting.", "style": "photorealistic", "canvas_size": list(CANVAS_SIZE), "background_kind": "workshop", "bg_top": (92, 98, 96), "bg_bottom": (89, 75, 57), "subjects": [ subject("mechanic_gray_overalls", True, "stocky adult with shaved head, deep brown skin, gray overalls, crouched toward the bicycle", [0.07, 0.24, 0.34, 0.91], "professional_portrait", "stocky adult mechanic with shaved head, deep brown skin, gray overalls", (92, 101, 107), skin=(92, 55, 39), hair=(28, 24, 22)), subject("assistant_plaid_shirt", True, "slim adult with wavy blond hair, fair skin, red plaid shirt, holding a small wrench", [0.68, 0.19, 0.91, 0.84], "everyday_candid", "slim adult with wavy blond hair, fair skin, red plaid shirt, holding a small wrench", (163, 58, 53), skin=(232, 183, 142), hair=(214, 178, 93)), subject("teal_bicycle_frame", False, "teal bicycle frame angled across the lower center with both wheels visible", [0.29, 0.49, 0.76, 0.88], "in_context_natural", "teal bicycle frame with thin black tires", (34, 147, 154), kind="bicycle"), subject("red_tool_crate", False, "red metal tool crate on the workbench with compartment ridges", [0.31, 0.38, 0.54, 0.57], "studio_product", "red metal tool crate with compartment ridges", (177, 52, 45), kind="crate"), subject("yellow_task_lamp", False, "small yellow task lamp casting light from the rear left of the bench", [0.10, 0.06, 0.30, 0.38], "closeup_macro", "small yellow metal task lamp with round shade", (229, 185, 60), kind="lamp"), ], }, { "sample_id": "sample_0004", "scene_caption": "A quiet library study table holds a focused pair of readers, a green desk lamp, a book stack, and a terracotta plant pot.", "story": "The moment is hushed and intent. One reader marks a page while the other leans in to compare notes.", "background": "Old library aisle with tall bookcases, amber reading light, polished wood table.", "style": "photorealistic", "canvas_size": list(CANVAS_SIZE), "background_kind": "library", "bg_top": (122, 83, 55), "bg_bottom": (142, 108, 73), "subjects": [ subject("reader_blue_jacket", True, "adult with short coiled hair, dark brown skin, blue jacket, leaning over an open page", [0.14, 0.20, 0.40, 0.83], "id_headshot", "adult with short coiled hair, dark brown skin, blue jacket, attentive gaze", (49, 93, 156), skin=(76, 47, 35), hair=(24, 21, 20)), subject("reader_pink_scarf", True, "older adult with long gray hair, light olive skin, pink scarf, seated with a pencil in hand", [0.48, 0.22, 0.75, 0.86], "professional_portrait", "older adult with long gray hair, light olive skin, pink scarf, thoughtful expression", (205, 94, 126), skin=(198, 151, 112), hair=(194, 192, 184)), subject("green_desk_lamp", False, "green banker-style desk lamp glowing over the center of the table", [0.35, 0.40, 0.57, 0.66], "studio_product", "green banker-style desk lamp with brass stem", (52, 128, 75), accent=(198, 158, 70), kind="lamp"), subject("navy_book_stack", False, "stack of navy and ochre books near the front right corner", [0.61, 0.58, 0.86, 0.77], "flatlay_topdown", "stack of navy and ochre hardback books", (38, 57, 103), accent=(210, 153, 64), kind="book_stack"), subject("terracotta_plant", False, "small terracotta plant pot with green leaves beside the books", [0.75, 0.42, 0.92, 0.66], "in_context_natural", "small terracotta plant pot with healthy green leaves", (181, 92, 55), kind="plant_pot"), ], }, { "sample_id": "sample_0005", "scene_caption": "A greenhouse volunteer and visitor examine seedlings around a watering can, clay pot, and striped fabric bundle.", "story": "The air is humid and bright after watering. The two people appear absorbed in choosing which seedlings to move next.", "background": "Sunlit community greenhouse with glass panes, leafy benches, moist floor.", "style": "photorealistic", "canvas_size": list(CANVAS_SIZE), "background_kind": "greenhouse", "bg_top": (184, 216, 205), "bg_bottom": (95, 130, 85), "subjects": [ subject("volunteer_orange_vest", True, "adult with cropped black hair, medium tan skin, orange vest, kneeling with careful hands", [0.08, 0.27, 0.35, 0.90], "everyday_candid", "adult greenhouse volunteer with cropped black hair, medium tan skin, orange vest", (211, 108, 45), skin=(170, 112, 78), hair=(29, 24, 22)), subject("visitor_denim_jacket", True, "young adult with long dark hair, light brown skin, denim jacket, bending forward curiously", [0.58, 0.18, 0.84, 0.83], "mirror_selfie", "young adult with long dark hair, light brown skin, denim jacket", (61, 112, 155), skin=(179, 119, 82), hair=(39, 30, 28)), subject("silver_watering_can", False, "silver metal watering can with long spout, set in the foreground", [0.33, 0.58, 0.57, 0.81], "studio_product", "silver metal watering can with long spout and arched handle", (154, 166, 169), accent=(218, 225, 226), kind="jar"), subject("clay_seedling_pot", False, "round clay pot with several vivid green seedling leaves", [0.48, 0.43, 0.66, 0.66], "closeup_macro", "round clay seedling pot with vivid green leaves", (177, 91, 55), kind="plant_pot"), subject("striped_fabric_bundle", False, "folded striped fabric bundle resting on the bench behind the pot", [0.19, 0.48, 0.43, 0.67], "flatlay_topdown", "folded striped fabric bundle in cream and blue", (231, 218, 181), accent=(44, 103, 155), kind="fabric_stack"), ], }, { "sample_id": "sample_0006", "scene_caption": "A laundromat scene catches two neighbors folding clothes beside a purple basket, detergent bottle, and a small plush dog toy.", "story": "The dryers are humming while the neighbors trade a quick laugh. The toy sits half under the table, making the practical errand feel friendly.", "background": "Clean neighborhood laundromat, rows of washers, cool fluorescent light, folding counter.", "style": "photorealistic", "canvas_size": list(CANVAS_SIZE), "background_kind": "laundromat", "bg_top": (207, 219, 225), "bg_bottom": (126, 145, 153), "subjects": [ subject("neighbor_teal_hoodie", True, "adult with shaved sides and black curls, brown skin, teal hoodie, folding a towel", [0.12, 0.21, 0.38, 0.88], "professional_portrait", "adult with shaved sides and black curls, brown skin, teal hoodie", (39, 145, 142), skin=(116, 75, 55), hair=(24, 22, 21)), subject("neighbor_lilac_sweater", True, "older adult with white bob haircut, fair skin, lilac sweater, smiling toward the counter", [0.54, 0.18, 0.80, 0.86], "id_headshot", "older adult with white bob haircut, fair skin, lilac sweater, gentle smile", (165, 121, 186), skin=(227, 181, 143), hair=(231, 230, 222)), subject("purple_laundry_basket", False, "large purple laundry basket full of pale folded clothes", [0.32, 0.52, 0.59, 0.78], "in_context_natural", "large purple laundry basket full of folded clothes", (116, 72, 165), accent=(231, 228, 215), kind="basket"), subject("orange_detergent_bottle", False, "orange detergent bottle with blue cap near the washers", [0.74, 0.47, 0.88, 0.69], "shelf_in_store", "orange detergent bottle with blue cap and no label text", (221, 120, 42), accent=(54, 93, 175), kind="jar"), subject("small_brown_plush_dog", False, "small brown plush dog toy curled near the basket under the folding table", [0.60, 0.69, 0.86, 0.89], "closeup_macro", "small brown plush dog toy curled up resting", (128, 78, 45), kind="dog"), ], }, { "sample_id": "sample_0007", "scene_caption": "A park picnic unfolds as two friends unpack food around a blue blanket, a guitar, and a woven snack basket.", "story": "The friends are settling into an easy afternoon. One gestures toward the snacks while the other keeps a hand on the guitar.", "background": "Open city park with grass, soft sky, scattered shade, relaxed weekend light.", "style": "photorealistic", "canvas_size": list(CANVAS_SIZE), "background_kind": "picnic", "bg_top": (148, 192, 220), "bg_bottom": (88, 141, 78), "subjects": [ subject("friend_white_hat", True, "young adult with dark skin, white brimmed hat, navy shirt, seated cross-legged", [0.10, 0.32, 0.36, 0.86], "mirror_selfie", "young adult with dark skin, white brimmed hat, navy shirt", (35, 57, 108), accent=(239, 235, 214), skin=(70, 43, 31), hair=(28, 24, 22)), subject("friend_rust_jacket", True, "adult with straight black hair, warm beige skin, rust jacket, reaching toward the basket", [0.60, 0.28, 0.87, 0.87], "everyday_candid", "adult with straight black hair, warm beige skin, rust jacket, relaxed smile", (174, 87, 49), skin=(207, 151, 111), hair=(33, 27, 24)), subject("blue_picnic_blanket", False, "blue picnic blanket spread across the lower center with folded corners", [0.24, 0.62, 0.78, 0.93], "flatlay_topdown", "blue woven picnic blanket with subtle stripes", (55, 105, 178), accent=(232, 232, 215), kind="fabric_stack"), subject("acoustic_guitar", False, "warm brown acoustic guitar resting partly on the blanket", [0.39, 0.45, 0.65, 0.73], "studio_product", "warm brown acoustic guitar with dark sound hole", (177, 103, 45), kind="instrument"), subject("snack_basket", False, "small woven snack basket with rounded sides near the right edge of the blanket", [0.68, 0.56, 0.91, 0.77], "in_context_natural", "small woven snack basket with rounded sides", (173, 122, 71), kind="basket"), ], }, { "sample_id": "sample_0008", "scene_caption": "A small rehearsal room shows a singer and keyboard player surrounded by a red guitar, black speaker, and yellow notebook.", "story": "The group is between takes, listening for the next cue. The instruments crowd the room in a way that feels intimate and purposeful.", "background": "Cozy music practice room, dark acoustic wall panels, amber lamps, polished floor.", "style": "photorealistic", "canvas_size": list(CANVAS_SIZE), "background_kind": "music_room", "bg_top": (86, 76, 86), "bg_bottom": (103, 82, 65), "subjects": [ subject("singer_green_shirt", True, "adult with short brown hair, light skin, green shirt, standing near a microphone", [0.14, 0.16, 0.38, 0.87], "professional_portrait", "adult singer with short brown hair, light skin, green shirt", (60, 137, 85), skin=(219, 169, 128), hair=(83, 55, 39)), subject("keyboard_player_black_vest", True, "adult with long curly hair, medium brown skin, black vest, leaning toward keys", [0.57, 0.22, 0.85, 0.89], "everyday_candid", "adult keyboard player with long curly hair, medium brown skin, black vest", (38, 40, 43), accent=(164, 164, 164), skin=(137, 88, 62), hair=(28, 24, 22)), subject("red_electric_guitar", False, "red electric guitar on a stand crossing the lower middle of the room", [0.35, 0.42, 0.60, 0.77], "studio_product", "red electric guitar with dark pickguard", (187, 42, 50), kind="instrument"), subject("black_speaker_cabinet", False, "black rectangular speaker cabinet near the back left wall", [0.05, 0.49, 0.24, 0.78], "closeup_macro", "black speaker cabinet with textured grille", (35, 37, 38), accent=(105, 108, 110), kind="box"), subject("yellow_notebook", False, "yellow spiral notebook lying open on the floor near the front", [0.58, 0.68, 0.80, 0.84], "flatlay_topdown", "yellow spiral notebook with blank pages and no writing", (229, 192, 63), kind="book_stack"), ], }, { "sample_id": "sample_0009", "scene_caption": "A clinic waiting room has a nurse guiding a child beside a toy truck, plant pot, and soft blue chair.", "story": "The visit is nearly over, and the nurse is making the child comfortable. The toys and bright chair soften the clinical setting.", "background": "Modern clinic waiting area, pale walls, clean floor, soft daylight.", "style": "photorealistic", "canvas_size": list(CANVAS_SIZE), "background_kind": "clinic", "bg_top": (224, 234, 230), "bg_bottom": (169, 190, 188), "subjects": [ subject("nurse_blue_scrubs", True, "adult with dark braided hair, brown skin, blue scrubs, kneeling with one hand extended", [0.10, 0.19, 0.38, 0.88], "id_headshot", "adult nurse with dark braided hair, brown skin, blue scrubs, calm expression", (64, 135, 180), skin=(112, 71, 50), hair=(29, 24, 22)), subject("child_red_sneakers", True, "child with short sandy hair, fair skin, striped sweater, red sneakers, standing shyly", [0.45, 0.29, 0.64, 0.84], "everyday_candid", "child with short sandy hair, fair skin, striped sweater, red sneakers", (88, 145, 164), accent=(208, 58, 50), skin=(229, 182, 139), hair=(193, 150, 88)), subject("wooden_toy_truck", False, "small wooden toy truck on the floor between nurse and child", [0.34, 0.68, 0.55, 0.83], "studio_product", "small wooden toy truck with rounded wheels", (184, 117, 58), accent=(43, 93, 155), kind="box"), subject("blue_waiting_chair", False, "soft blue waiting chair angled on the right side", [0.66, 0.41, 0.92, 0.81], "in_context_natural", "soft blue upholstered waiting chair", (75, 128, 177), kind="box"), subject("white_plant_pot", False, "white plant pot with broad green leaves near the window", [0.75, 0.14, 0.93, 0.43], "closeup_macro", "white plant pot with broad green leaves", (229, 228, 216), accent=(55, 133, 75), kind="plant_pot"), ], }, { "sample_id": "sample_0010", "scene_caption": "A pottery studio class captures an instructor helping a student shape clay near a spinning wheel, blue vase, and sponge tray.", "story": "Wet clay is on the table and the lesson is hands-on. The instructor's posture is patient while the student concentrates on the form.", "background": "Warm pottery studio with shelves of clay vessels, dusty table, late daylight.", "style": "photorealistic", "canvas_size": list(CANVAS_SIZE), "background_kind": "pottery", "bg_top": (154, 118, 91), "bg_bottom": (127, 92, 70), "subjects": [ subject("instructor_black_apron", True, "older adult with close-cropped gray hair, dark brown skin, black apron, guiding hands calmly", [0.09, 0.20, 0.37, 0.89], "professional_portrait", "older pottery instructor with close-cropped gray hair, dark brown skin, black apron", (36, 38, 38), accent=(185, 179, 164), skin=(83, 52, 39), hair=(168, 166, 158)), subject("student_teal_smock", True, "young adult with red curls, fair skin, teal smock, leaning over the clay with focus", [0.48, 0.24, 0.76, 0.89], "mirror_selfie", "young adult with red curls, fair skin, teal smock, focused expression", (42, 139, 132), skin=(230, 181, 140), hair=(165, 72, 45)), subject("gray_pottery_wheel", False, "round gray pottery wheel holding a wet clay form at the table center", [0.31, 0.55, 0.57, 0.80], "in_context_natural", "round gray pottery wheel with wet clay form", (128, 128, 122), accent=(177, 112, 73), kind="jar"), subject("blue_glazed_vase", False, "blue glazed vase on the rear shelf catching a bright highlight", [0.68, 0.33, 0.85, 0.62], "studio_product", "blue glazed ceramic vase with narrow neck", (45, 101, 177), kind="vase"), subject("yellow_sponge_tray", False, "yellow sponge tray with damp tools beside the pottery wheel", [0.55, 0.63, 0.79, 0.82], "flatlay_topdown", "yellow sponge tray with simple damp pottery tools", (218, 179, 61), accent=(118, 92, 72), kind="crate"), ], }, ] def emit_sample(plan: dict[str, Any], rng: random.Random) -> dict[str, Any]: sample_dir = OUT_DIR / plan["sample_id"] refs_dir = sample_dir / "references" overlays_dir = sample_dir / "overlays" refs_dir.mkdir(parents=True, exist_ok=True) overlays_dir.mkdir(parents=True, exist_ok=True) layout_path = sample_dir / "layout_sketch.png" main_path = sample_dir / "main_image.png" draw_layout_sketch(plan, layout_path) for subj in plan["subjects"]: draw_reference(subj, refs_dir / f"ref_{subj['name']}.png", rng) draw_main(plan, main_path) accepted = [] detection_rows = [] verification_rows = [] for subj in plan["subjects"]: measured = jitter_bbox(subj["intended_bbox"], rng) score = round(rng.uniform(0.82, 0.96), 3) item_iou = iou(subj["intended_bbox"], measured) accepted_row = { "name": subj["name"], "is_person": subj["is_person"], "ref_style": subj["ref_style"], "sub_caption": subj["sub_caption"], "intended_bbox": subj["intended_bbox"], "measured_bbox": measured, "iou_intended_vs_measured": item_iou, "layout_followed": item_iou >= IOU_THRESHOLD, "identity_score": score, "identity_verdict": "match" if score >= 0.70 else "weak_match", "ref_image": f"references/ref_{subj['name']}.png", } accepted.append(accepted_row) detection_rows.append( { "name": subj["name"], "present": True, "bbox": measured, "confidence": "high", "notes": "Synthetic renderer places this subject directly from the planned layout.", } ) verification_rows.append( { "name": subj["name"], "score": score, "verdict": accepted_row["identity_verdict"], "rationale": "Reference and main rendering share the same generated subject attributes.", } ) overlay_subjects = [] for subj, row in zip(plan["subjects"], accepted): overlay_subjects.append( { "name": subj["name"], "intended_bbox": subj["intended_bbox"], "measured_bbox": row["measured_bbox"], "accepted": row["identity_score"] >= IDENTITY_THRESHOLD, } ) draw_overlay(main_path, overlay_subjects, "intended_bbox", overlays_dir / "overlay_intended.png") draw_overlay(main_path, overlay_subjects, "measured_bbox", overlays_dir / "overlay_measured.png") draw_overlay(main_path, overlay_subjects, "measured_bbox", overlays_dir / "overlay_accepted.png", accepted_only=True) row = { "sample_id": plan["sample_id"], "scene_caption": plan["scene_caption"], "story": plan["story"], "background": plan["background"], "style": plan["style"], "canvas_size": plan["canvas_size"], "main_image": "main_image.png", "layout_sketch": "layout_sketch.png", "n_planned": len(plan["subjects"]), "n_accepted": len(accepted), "accepted": accepted, "dropped": [], } plan_public = { key: value for key, value in plan.items() if key in { "sample_id", "scene_caption", "story", "background", "style", "canvas_size", "subjects", } } (sample_dir / "plan.json").write_text(json.dumps(plan_public, indent=2) + "\n") (sample_dir / "detections.json").write_text(json.dumps(detection_rows, indent=2) + "\n") (sample_dir / "identity_verification.json").write_text(json.dumps(verification_rows, indent=2) + "\n") (sample_dir / "row.json").write_text(json.dumps(row, indent=2) + "\n") return row def main() -> None: random.seed(20260605) OUT_DIR.mkdir(exist_ok=True) rows = [] for i, plan in enumerate(build_plans(), start=1): rows.append(emit_sample(plan, random.Random(20260605 + i))) (OUT_DIR / "dataset.json").write_text(json.dumps(rows, indent=2) + "\n") with (OUT_DIR / "dataset.jsonl").open("w") as handle: for row in rows: handle.write(json.dumps(row, separators=(",", ":")) + "\n") (OUT_DIR / "README.md").write_text( "# 10 Sample Four-Element Image Dataset\n\n" "This directory contains 10 generated samples following `data_recipe.md`.\n" "Each `sample_XXXX` folder includes a composed `main_image.png`, independent " "subject references in `references/`, a `layout_sketch.png`, overlay images, " "and the emitted dataset row in `row.json`.\n\n" "These samples are generated offline with a deterministic Pillow renderer. " "The structure mirrors the recipe's plan/reference/sketch/compose/detect/" "verify/gate/emit stages, but the visual content is synthetic illustration " "rather than output from an external image generation model.\n" ) print(f"generated {len(rows)} samples in {OUT_DIR}") if __name__ == "__main__": main()