| from __future__ import annotations |
|
|
| import json |
| import math |
| import random |
| from pathlib import Path |
| from typing import Any |
|
|
| from PIL import Image, ImageDraw, ImageFilter, ImageFont |
|
|
|
|
| CANVAS_SIZE = (1024, 1024) |
| OUT_DIR = Path("10samples") |
| IDENTITY_THRESHOLD = 0.55 |
| IOU_THRESHOLD = 0.10 |
|
|
|
|
| def font(size: int, bold: bool = False) -> ImageFont.ImageFont: |
| candidates = [ |
| "/usr/share/fonts/truetype/dejavu/DejaVuSans-Bold.ttf" if bold else "/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf", |
| "/usr/share/fonts/dejavu/DejaVuSans-Bold.ttf" if bold else "/usr/share/fonts/dejavu/DejaVuSans.ttf", |
| ] |
| for path in candidates: |
| try: |
| return ImageFont.truetype(path, size) |
| except OSError: |
| pass |
| return ImageFont.load_default() |
|
|
|
|
| FONT_18 = font(18) |
| FONT_24 = font(24) |
| FONT_32 = font(32, bold=True) |
|
|
|
|
| def clamp(value: float, low: float = 0.0, high: float = 1.0) -> float: |
| return max(low, min(high, value)) |
|
|
|
|
| def bbox_px(bbox: list[float], size: tuple[int, int] = CANVAS_SIZE) -> tuple[int, int, int, int]: |
| w, h = size |
| return ( |
| int(round(clamp(bbox[0]) * w)), |
| int(round(clamp(bbox[1]) * h)), |
| int(round(clamp(bbox[2]) * w)), |
| int(round(clamp(bbox[3]) * h)), |
| ) |
|
|
|
|
| def normalize_bbox(box: tuple[int, int, int, int], size: tuple[int, int] = CANVAS_SIZE) -> list[float]: |
| w, h = size |
| x1, y1, x2, y2 = box |
| vals = [x1 / w, y1 / h, x2 / w, y2 / h] |
| return [round(clamp(v), 4) for v in vals] |
|
|
|
|
| def jitter_bbox(bbox: list[float], rng: random.Random) -> list[float]: |
| x1, y1, x2, y2 = bbox |
| width = x2 - x1 |
| height = y2 - y1 |
| dx = rng.uniform(-0.018, 0.018) * width |
| dy = rng.uniform(-0.018, 0.018) * height |
| grow_x = rng.uniform(-0.018, 0.024) * width |
| grow_y = rng.uniform(-0.018, 0.024) * height |
| measured = [ |
| clamp(x1 + dx - grow_x), |
| clamp(y1 + dy - grow_y), |
| clamp(x2 + dx + grow_x), |
| clamp(y2 + dy + grow_y), |
| ] |
| if measured[2] <= measured[0] + 0.01: |
| measured[2] = clamp(measured[0] + 0.01) |
| if measured[3] <= measured[1] + 0.01: |
| measured[3] = clamp(measured[1] + 0.01) |
| return [round(v, 4) for v in measured] |
|
|
|
|
| def iou(a: list[float], b: list[float]) -> float: |
| ax1, ay1, ax2, ay2 = a |
| bx1, by1, bx2, by2 = b |
| ix1, iy1 = max(ax1, bx1), max(ay1, by1) |
| ix2, iy2 = min(ax2, bx2), min(ay2, by2) |
| iw, ih = max(0.0, ix2 - ix1), max(0.0, iy2 - iy1) |
| inter = iw * ih |
| area_a = max(0.0, ax2 - ax1) * max(0.0, ay2 - ay1) |
| area_b = max(0.0, bx2 - bx1) * max(0.0, by2 - by1) |
| denom = area_a + area_b - inter |
| return 0.0 if denom == 0 else round(inter / denom, 4) |
|
|
|
|
| def lighten(color: tuple[int, int, int], amount: float) -> tuple[int, int, int]: |
| return tuple(int(c + (255 - c) * amount) for c in color) |
|
|
|
|
| def darken(color: tuple[int, int, int], amount: float) -> tuple[int, int, int]: |
| return tuple(int(c * (1 - amount)) for c in color) |
|
|
|
|
| def gradient(size: tuple[int, int], top: tuple[int, int, int], bottom: tuple[int, int, int]) -> Image.Image: |
| w, h = size |
| img = Image.new("RGB", size, top) |
| draw = ImageDraw.Draw(img) |
| for y in range(h): |
| t = y / max(1, h - 1) |
| color = tuple(int(top[i] * (1 - t) + bottom[i] * t) for i in range(3)) |
| draw.line([(0, y), (w, y)], fill=color) |
| return img |
|
|
|
|
| def paste_shadow(base: Image.Image, layer: Image.Image, offset: tuple[int, int] = (8, 10), blur: int = 12) -> None: |
| alpha = layer.split()[-1] |
| shadow = Image.new("RGBA", layer.size, (0, 0, 0, 0)) |
| shadow.putalpha(alpha.filter(ImageFilter.GaussianBlur(blur))) |
| sx, sy = offset |
| base.alpha_composite(shadow, (sx, sy)) |
| base.alpha_composite(layer) |
|
|
|
|
| def rounded_rect(draw: ImageDraw.ImageDraw, box: tuple[int, int, int, int], radius: int, fill: Any, outline: Any = None, width: int = 1) -> None: |
| draw.rounded_rectangle(box, radius=radius, fill=fill, outline=outline, width=width) |
|
|
|
|
| def draw_background(draw: ImageDraw.ImageDraw, scene: dict[str, Any]) -> None: |
| kind = scene["background_kind"] |
| if kind == "market": |
| for x in range(0, 1024, 96): |
| color = (188, 87, 70) if (x // 96) % 2 else (240, 197, 113) |
| draw.polygon([(x, 0), (x + 96, 0), (x + 70, 110), (x + 26, 110)], fill=color) |
| draw.rectangle((0, 672, 1024, 1024), fill=(126, 113, 91)) |
| for x in range(-80, 1024, 160): |
| draw.line((x, 720, x + 260, 1024), fill=(100, 91, 76), width=5) |
| draw.rectangle((0, 545, 1024, 650), fill=(99, 73, 48)) |
| elif kind == "kitchen": |
| draw.rectangle((0, 0, 1024, 460), fill=(206, 222, 224)) |
| for x in range(0, 1024, 128): |
| draw.rectangle((x, 0, x + 126, 458), outline=(178, 198, 201), width=2) |
| draw.rectangle((0, 560, 1024, 1024), fill=(189, 155, 105)) |
| for x in range(-150, 1200, 150): |
| draw.line((x, 560, x + 210, 1024), fill=(159, 128, 84), width=4) |
| draw.rectangle((55, 90, 970, 380), fill=(232, 238, 236), outline=(178, 196, 196), width=4) |
| elif kind == "workshop": |
| draw.rectangle((0, 0, 1024, 1024), fill=(88, 91, 87)) |
| for y in range(110, 640, 110): |
| draw.line((0, y, 1024, y), fill=(67, 70, 68), width=7) |
| draw.rectangle((0, 640, 1024, 1024), fill=(107, 94, 75)) |
| for x in range(70, 1000, 170): |
| draw.rectangle((x, 80, x + 24, 520), fill=(54, 57, 55)) |
| draw.rectangle((50, 560, 974, 700), fill=(73, 58, 43)) |
| elif kind == "library": |
| draw.rectangle((0, 0, 1024, 1024), fill=(119, 82, 55)) |
| for y in (84, 250, 416, 582): |
| draw.rectangle((0, y, 1024, y + 18), fill=(83, 55, 37)) |
| rng = random.Random(12) |
| for x in range(18, 1010, 30): |
| for y in (105, 271, 437): |
| h = rng.randint(88, 132) |
| draw.rectangle((x, y, x + rng.randint(14, 24), y + h), fill=rng.choice([(156, 53, 58), (61, 97, 130), (205, 160, 74), (84, 123, 92)])) |
| draw.rectangle((0, 650, 1024, 1024), fill=(156, 127, 86)) |
| elif kind == "greenhouse": |
| draw.rectangle((0, 0, 1024, 1024), fill=(176, 209, 199)) |
| for x in range(0, 1025, 128): |
| draw.line((x, 0, x + 180, 650), fill=(123, 153, 146), width=5) |
| draw.rectangle((0, 615, 1024, 1024), fill=(104, 126, 87)) |
| for _ in range(70): |
| x = random.randint(0, 1024) |
| y = random.randint(490, 1024) |
| draw.ellipse((x - 18, y - 8, x + 18, y + 8), fill=random.choice([(45, 117, 72), (62, 145, 82), (86, 121, 69)])) |
| elif kind == "laundromat": |
| draw.rectangle((0, 0, 1024, 1024), fill=(202, 213, 218)) |
| for x in range(55, 985, 155): |
| draw.rectangle((x, 100, x + 116, 270), fill=(164, 178, 187), outline=(100, 117, 128), width=5) |
| draw.ellipse((x + 22, 122, x + 94, 194), fill=(107, 139, 162), outline=(230, 238, 241), width=5) |
| draw.rectangle((0, 560, 1024, 1024), fill=(132, 149, 156)) |
| draw.rectangle((0, 440, 1024, 540), fill=(176, 188, 194)) |
| elif kind == "picnic": |
| draw.rectangle((0, 0, 1024, 490), fill=(139, 185, 216)) |
| draw.ellipse((-120, 170, 1140, 870), fill=(90, 153, 86)) |
| draw.rectangle((0, 735, 1024, 1024), fill=(79, 133, 74)) |
| for _ in range(90): |
| x = random.randint(0, 1024) |
| y = random.randint(500, 1010) |
| draw.line((x, y, x + 16, y - 20), fill=(55, 110, 58), width=2) |
| elif kind == "music_room": |
| draw.rectangle((0, 0, 1024, 1024), fill=(84, 74, 82)) |
| draw.rectangle((0, 0, 1024, 260), fill=(62, 53, 63)) |
| for x in range(80, 950, 180): |
| draw.ellipse((x, 58, x + 70, 128), fill=(235, 196, 105)) |
| draw.line((x + 35, 128, x + 20, 260), fill=(235, 196, 105), width=5) |
| draw.rectangle((0, 660, 1024, 1024), fill=(115, 89, 70)) |
| elif kind == "clinic": |
| draw.rectangle((0, 0, 1024, 1024), fill=(219, 229, 225)) |
| for x in range(0, 1024, 128): |
| draw.line((x, 0, x, 600), fill=(195, 209, 205), width=2) |
| draw.rectangle((0, 610, 1024, 1024), fill=(174, 193, 190)) |
| draw.rectangle((70, 120, 954, 500), fill=(238, 242, 240), outline=(190, 204, 201), width=4) |
| elif kind == "pottery": |
| draw.rectangle((0, 0, 1024, 1024), fill=(153, 117, 91)) |
| for y in range(90, 560, 145): |
| draw.rectangle((0, y, 1024, y + 18), fill=(107, 79, 60)) |
| draw.rectangle((0, 650, 1024, 1024), fill=(128, 91, 68)) |
| for x in range(20, 1024, 140): |
| draw.ellipse((x, 135, x + 90, 200), fill=(185, 126, 78), outline=(99, 73, 52), width=4) |
|
|
|
|
| def draw_person(layer: Image.Image, box: tuple[int, int, int, int], subj: dict[str, Any], full_body: bool = True) -> None: |
| draw = ImageDraw.Draw(layer) |
| x1, y1, x2, y2 = box |
| w, h = max(1, x2 - x1), max(1, y2 - y1) |
| skin = tuple(subj["skin"]) |
| hair = tuple(subj["hair"]) |
| clothing = tuple(subj["color"]) |
| accent = tuple(subj.get("accent", lighten(clothing, 0.35))) |
| cx = x1 + w // 2 |
| head_r = max(14, int(min(w, h) * (0.14 if full_body else 0.22))) |
| head_cy = y1 + int(h * (0.18 if full_body else 0.32)) |
| body_top = head_cy + head_r - 4 |
| body_bottom = y2 - int(h * 0.10) |
| shoulder_w = int(w * (0.34 if full_body else 0.48)) |
| hip_w = int(w * (0.24 if full_body else 0.36)) |
| draw.ellipse((cx - head_r - 5, head_cy - head_r - 9, cx + head_r + 5, head_cy + head_r + 5), fill=hair) |
| draw.ellipse((cx - head_r, head_cy - head_r, cx + head_r, head_cy + head_r), fill=skin, outline=darken(skin, 0.25), width=2) |
| draw.arc((cx - head_r // 2, head_cy - 2, cx + head_r // 2, head_cy + head_r // 2), 10, 170, fill=darken(skin, 0.45), width=2) |
| draw.ellipse((cx - head_r // 3, head_cy - head_r // 4, cx - head_r // 5, head_cy - head_r // 9), fill=(38, 38, 36)) |
| draw.ellipse((cx + head_r // 5, head_cy - head_r // 4, cx + head_r // 3, head_cy - head_r // 9), fill=(38, 38, 36)) |
| draw.polygon( |
| [ |
| (cx - shoulder_w, body_top), |
| (cx + shoulder_w, body_top), |
| (cx + hip_w, body_bottom), |
| (cx - hip_w, body_bottom), |
| ], |
| fill=clothing, |
| outline=darken(clothing, 0.25), |
| ) |
| draw.line((cx - shoulder_w + 4, body_top + 12, x1 + int(w * 0.13), y1 + int(h * 0.58)), fill=skin, width=max(5, w // 14)) |
| draw.line((cx + shoulder_w - 4, body_top + 12, x2 - int(w * 0.13), y1 + int(h * 0.58)), fill=skin, width=max(5, w // 14)) |
| if full_body: |
| leg_top = body_bottom - 2 |
| draw.line((cx - hip_w // 2, leg_top, cx - int(w * 0.18), y2 - 4), fill=darken(clothing, 0.42), width=max(7, w // 12)) |
| draw.line((cx + hip_w // 2, leg_top, cx + int(w * 0.18), y2 - 4), fill=darken(clothing, 0.42), width=max(7, w // 12)) |
| draw.line((cx - shoulder_w + 5, body_top + int(h * 0.12), cx + shoulder_w - 5, body_top + int(h * 0.12)), fill=accent, width=max(3, h // 42)) |
|
|
|
|
| def draw_object(layer: Image.Image, box: tuple[int, int, int, int], subj: dict[str, Any]) -> None: |
| draw = ImageDraw.Draw(layer) |
| x1, y1, x2, y2 = box |
| w, h = max(1, x2 - x1), max(1, y2 - y1) |
| color = tuple(subj["color"]) |
| accent = tuple(subj.get("accent", lighten(color, 0.35))) |
| kind = subj.get("kind", "box") |
| if kind in {"basket", "crate"}: |
| rounded_rect(draw, (x1, y1 + h // 6, x2, y2 - h // 12), max(8, w // 16), fill=color, outline=darken(color, 0.35), width=max(3, w // 30)) |
| for i in range(4): |
| yy = y1 + h // 4 + i * h // 7 |
| draw.line((x1 + 8, yy, x2 - 8, yy), fill=darken(color, 0.28), width=3) |
| for i in range(5): |
| xx = x1 + w // 7 + i * w // 7 |
| draw.line((xx, y1 + h // 5, xx, y2 - h // 8), fill=lighten(color, 0.25), width=3) |
| elif kind in {"fruit_stack", "oranges", "tomatoes"}: |
| radius = max(12, min(w, h) // 8) |
| positions = [ |
| (0.25, 0.68), (0.40, 0.52), (0.56, 0.66), (0.69, 0.48), (0.48, 0.32), |
| (0.22, 0.38), (0.74, 0.72), (0.58, 0.22), |
| ] |
| for px, py in positions: |
| cx, cy = x1 + int(px * w), y1 + int(py * h) |
| draw.ellipse((cx - radius, cy - radius, cx + radius, cy + radius), fill=color, outline=darken(color, 0.28), width=2) |
| draw.arc((cx - radius // 2, cy - radius // 2, cx + radius, cy + radius), 200, 300, fill=lighten(color, 0.45), width=2) |
| elif kind == "umbrella": |
| draw.pieslice((x1, y1, x2, y1 + int(h * 0.9)), 180, 360, fill=color, outline=darken(color, 0.35), width=max(3, w // 40)) |
| for i in range(1, 5): |
| xx = x1 + i * w // 5 |
| draw.line((x1 + w // 2, y1 + h // 4, xx, y1 + int(h * 0.45)), fill=darken(color, 0.25), width=2) |
| draw.line((x1 + w // 2, y1 + h // 3, x1 + w // 2, y2), fill=darken(color, 0.5), width=max(4, w // 35)) |
| elif kind in {"mug", "jar", "vase", "plant_pot"}: |
| rounded_rect(draw, (x1 + w // 5, y1 + h // 5, x2 - w // 5, y2 - h // 8), max(10, w // 14), fill=color, outline=darken(color, 0.3), width=max(3, w // 35)) |
| draw.ellipse((x1 + w // 5, y1 + h // 8, x2 - w // 5, y1 + h // 3), fill=lighten(color, 0.2), outline=darken(color, 0.25), width=2) |
| if kind == "mug": |
| draw.arc((x2 - w // 3, y1 + h // 3, x2 - w // 15, y1 + h * 2 // 3), -80, 95, fill=darken(color, 0.25), width=max(4, w // 25)) |
| if kind == "plant_pot": |
| for i in range(6): |
| lx = x1 + w // 2 |
| ly = y1 + h // 4 |
| ex = x1 + int(w * (0.15 + 0.14 * i)) |
| ey = y1 + int(h * (0.05 + 0.07 * (i % 2))) |
| draw.line((lx, ly, ex, ey), fill=(44, 116, 65), width=max(3, w // 36)) |
| draw.ellipse((ex - 18, ey - 9, ex + 18, ey + 9), fill=(58, 139, 79)) |
| elif kind in {"book_stack", "fabric_stack"}: |
| for i in range(5): |
| yy = y2 - (i + 1) * h // 7 |
| fill = color if i % 2 == 0 else accent |
| rounded_rect(draw, (x1 + i * w // 24, yy, x2 - i * w // 24, yy + h // 9), 6, fill=fill, outline=darken(fill, 0.3), width=2) |
| elif kind == "lamp": |
| draw.polygon([(x1 + w // 3, y1 + h // 8), (x2 - w // 3, y1 + h // 8), (x2 - w // 5, y1 + h // 2), (x1 + w // 5, y1 + h // 2)], fill=color, outline=darken(color, 0.25)) |
| draw.line((x1 + w // 2, y1 + h // 2, x1 + w // 2, y2 - h // 8), fill=darken(color, 0.55), width=max(5, w // 30)) |
| draw.ellipse((x1 + w // 4, y2 - h // 5, x2 - w // 4, y2 - h // 12), fill=darken(color, 0.25)) |
| elif kind == "instrument": |
| draw.ellipse((x1 + w // 4, y1 + h // 3, x2 - w // 6, y2 - h // 8), fill=color, outline=darken(color, 0.35), width=max(3, w // 35)) |
| draw.ellipse((x1 + w // 3, y1 + h // 2, x1 + w // 2, y1 + h * 2 // 3), fill=darken(color, 0.45)) |
| draw.rectangle((x1 + w // 12, y1 + h // 6, x1 + w // 3, y1 + h // 4), fill=darken(color, 0.35)) |
| for i in range(4): |
| y = y1 + h // 5 + i * h // 35 |
| draw.line((x1 + w // 10, y, x2 - w // 5, y + h // 3), fill=(235, 228, 193), width=1) |
| elif kind == "bicycle": |
| draw.ellipse((x1, y1 + h // 2, x1 + w // 3, y2), outline=darken(color, 0.35), width=max(5, w // 35)) |
| draw.ellipse((x2 - w // 3, y1 + h // 2, x2, y2), outline=darken(color, 0.35), width=max(5, w // 35)) |
| draw.line((x1 + w // 6, y1 + h * 3 // 4, x1 + w // 2, y1 + h // 3), fill=color, width=max(5, w // 35)) |
| draw.line((x1 + w // 2, y1 + h // 3, x2 - w // 6, y1 + h * 3 // 4), fill=color, width=max(5, w // 35)) |
| draw.line((x1 + w // 6, y1 + h * 3 // 4, x2 - w // 6, y1 + h * 3 // 4), fill=color, width=max(5, w // 35)) |
| elif kind == "dog": |
| draw.ellipse((x1 + w // 5, y1 + h // 3, x2 - w // 8, y2 - h // 6), fill=color, outline=darken(color, 0.3), width=3) |
| draw.ellipse((x1, y1 + h // 5, x1 + w // 3, y1 + h // 2), fill=color, outline=darken(color, 0.3), width=3) |
| draw.polygon([(x1 + w // 10, y1 + h // 5), (x1 + w // 4, y1 + h // 9), (x1 + w // 5, y1 + h // 3)], fill=darken(color, 0.18)) |
| draw.line((x2 - w // 7, y1 + h // 2, x2, y1 + h // 4), fill=color, width=max(5, w // 20)) |
| for lx in (x1 + w // 3, x1 + w * 2 // 3): |
| draw.line((lx, y2 - h // 4, lx - w // 18, y2), fill=darken(color, 0.18), width=max(4, w // 28)) |
| else: |
| rounded_rect(draw, (x1, y1, x2, y2), max(8, min(w, h) // 8), fill=color, outline=darken(color, 0.28), width=max(3, w // 35)) |
| draw.line((x1 + w // 7, y1 + h // 4, x2 - w // 7, y1 + h // 4), fill=accent, width=max(3, h // 30)) |
|
|
|
|
| def draw_subject(base: Image.Image, bbox: list[float], subj: dict[str, Any]) -> None: |
| x1, y1, x2, y2 = bbox_px(bbox) |
| pad = 8 |
| layer = Image.new("RGBA", CANVAS_SIZE, (0, 0, 0, 0)) |
| draw_box = (x1 + pad, y1 + pad, x2 - pad, y2 - pad) |
| if subj["is_person"]: |
| draw_person(layer, draw_box, subj, full_body=True) |
| else: |
| draw_object(layer, draw_box, subj) |
| paste_shadow(base, layer, offset=(0, 0), blur=10) |
|
|
|
|
| def draw_reference(subj: dict[str, Any], path: Path, rng: random.Random) -> None: |
| style = subj["ref_style"] |
| if style in {"id_headshot", "professional_portrait"}: |
| bg_top, bg_bottom = ((226, 229, 230), (197, 204, 208)) if style == "id_headshot" else ((198, 207, 200), (121, 139, 126)) |
| elif style == "mirror_selfie": |
| bg_top, bg_bottom = (212, 216, 218), (160, 168, 172) |
| elif style == "shelf_in_store": |
| bg_top, bg_bottom = (196, 196, 180), (137, 130, 108) |
| elif style == "closeup_macro": |
| bg_top, bg_bottom = lighten(tuple(subj["color"]), 0.65), darken(tuple(subj["color"]), 0.20) |
| elif style == "flatlay_topdown": |
| bg_top, bg_bottom = (224, 222, 213), (199, 196, 186) |
| elif style == "in_context_natural": |
| bg_top, bg_bottom = (190, 206, 196), (126, 143, 128) |
| else: |
| bg_top, bg_bottom = (235, 235, 232), (210, 211, 207) |
|
|
| img = gradient(CANVAS_SIZE, bg_top, bg_bottom).convert("RGBA") |
| d = ImageDraw.Draw(img) |
|
|
| if style == "mirror_selfie": |
| rounded_rect(d, (176, 80, 848, 944), 28, fill=(227, 229, 229), outline=(98, 106, 112), width=16) |
| rounded_rect(d, (230, 138, 794, 890), 18, fill=(198, 205, 207), outline=(170, 176, 180), width=5) |
| d.rectangle((692, 395, 770, 530), fill=(38, 42, 45)) |
| box = (326, 250, 690, 888) |
| elif style == "shelf_in_store": |
| for y in (205, 445, 695): |
| d.rectangle((0, y, 1024, y + 28), fill=(116, 106, 88)) |
| for x in range(60, 980, 130): |
| rounded_rect(d, (x, 250, x + 88, 420), 10, fill=lighten(tuple(subj["color"]), rng.uniform(0.1, 0.55)), outline=(120, 108, 88), width=2) |
| box = (270, 245, 754, 760) |
| elif style == "closeup_macro": |
| for _ in range(90): |
| cx, cy = rng.randint(0, 1024), rng.randint(0, 1024) |
| r = rng.randint(7, 28) |
| fill = lighten(tuple(subj["color"]), rng.uniform(0.05, 0.45)) + (70,) |
| d.ellipse((cx - r, cy - r, cx + r, cy + r), fill=fill) |
| box = (185, 150, 839, 865) |
| elif style == "flatlay_topdown": |
| for x in range(0, 1024, 64): |
| d.line((x, 0, x, 1024), fill=(204, 201, 192), width=1) |
| for y in range(0, 1024, 64): |
| d.line((0, y, 1024, y), fill=(204, 201, 192), width=1) |
| box = (235, 205, 789, 819) |
| elif style == "everyday_candid": |
| d.rectangle((0, 630, 1024, 1024), fill=(139, 145, 134)) |
| d.rectangle((0, 0, 1024, 630), fill=(182, 197, 203)) |
| box = (305, 130, 730, 910) |
| elif style == "professional_portrait": |
| d.rectangle((0, 680, 1024, 1024), fill=(98, 99, 88)) |
| d.ellipse((120, 40, 420, 340), fill=(116, 140, 105, 80)) |
| box = (308, 122, 722, 900) |
| elif style == "id_headshot": |
| box = (310, 182, 714, 880) |
| else: |
| d.ellipse((120, 90, 904, 870), fill=(255, 255, 255, 72)) |
| box = (245, 185, 779, 825) |
|
|
| if subj["is_person"]: |
| draw_person(img, box, subj, full_body=style not in {"id_headshot", "professional_portrait"}) |
| else: |
| draw_object(img, box, subj) |
|
|
| img.convert("RGB").save(path) |
|
|
|
|
| def draw_layout_sketch(plan: dict[str, Any], path: Path) -> None: |
| img = Image.new("RGBA", CANVAS_SIZE, (248, 248, 245, 255)) |
| d = ImageDraw.Draw(img) |
| for x in range(0, 1025, 128): |
| d.line((x, 0, x, 1024), fill=(220, 220, 216), width=1) |
| for y in range(0, 1025, 128): |
| d.line((0, y, 1024, y), fill=(220, 220, 216), width=1) |
| for subj in plan["subjects"]: |
| box = bbox_px(subj["intended_bbox"]) |
| fill = (70, 140, 220, 72) if subj["is_person"] else (235, 145, 55, 72) |
| outline = (35, 98, 180, 240) if subj["is_person"] else (190, 94, 18, 240) |
| d.rectangle(box, fill=fill, outline=outline, width=4) |
| label = subj["name"] |
| tb = d.textbbox((0, 0), label, font=FONT_24) |
| d.rectangle((box[0] + 6, box[1] + 6, box[0] + 18 + tb[2], box[1] + 40), fill=(255, 255, 255, 220)) |
| d.text((box[0] + 12, box[1] + 9), label, fill=(32, 32, 32), font=FONT_24) |
| img.convert("RGB").save(path) |
|
|
|
|
| def draw_overlay(main_image: Path, subjects: list[dict[str, Any]], key: str, path: Path, accepted_only: bool = False) -> None: |
| img = Image.open(main_image).convert("RGBA") |
| d = ImageDraw.Draw(img) |
| for subj in subjects: |
| if accepted_only and not subj.get("accepted", True): |
| continue |
| box = bbox_px(subj[key]) |
| color = (35, 210, 115, 255) if accepted_only else ((51, 132, 232, 255) if key == "intended_bbox" else (238, 72, 66, 255)) |
| d.rectangle(box, outline=color, width=5) |
| label = subj["name"] |
| tb = d.textbbox((0, 0), label, font=FONT_18) |
| d.rectangle((box[0], max(0, box[1] - 28), box[0] + tb[2] + 12, max(24, box[1] - 2)), fill=(0, 0, 0, 170)) |
| d.text((box[0] + 6, max(0, box[1] - 27)), label, fill=(255, 255, 255), font=FONT_18) |
| img.convert("RGB").save(path) |
|
|
|
|
| def draw_main(plan: dict[str, Any], path: Path) -> None: |
| img = gradient(CANVAS_SIZE, tuple(plan["bg_top"]), tuple(plan["bg_bottom"])).convert("RGBA") |
| d = ImageDraw.Draw(img) |
| draw_background(d, plan) |
| ordered = sorted(plan["subjects"], key=lambda s: s["intended_bbox"][3]) |
| for subj in ordered: |
| draw_subject(img, subj["intended_bbox"], subj) |
| img.convert("RGB").save(path) |
|
|
|
|
| def subject( |
| name: str, |
| is_person: bool, |
| sub_caption: str, |
| bbox: list[float], |
| ref_style: str, |
| ref_prompt: str, |
| color: tuple[int, int, int], |
| *, |
| accent: tuple[int, int, int] | None = None, |
| skin: tuple[int, int, int] = (171, 119, 82), |
| hair: tuple[int, int, int] = (50, 38, 32), |
| kind: str = "box", |
| ) -> dict[str, Any]: |
| item = { |
| "name": name, |
| "is_person": is_person, |
| "sub_caption": sub_caption, |
| "intended_bbox": bbox, |
| "ref_style": ref_style, |
| "ref_prompt": ref_prompt, |
| "color": color, |
| "accent": accent or lighten(color, 0.35), |
| "kind": kind, |
| } |
| if is_person: |
| item["skin"] = skin |
| item["hair"] = hair |
| return item |
|
|
|
|
| def build_plans() -> list[dict[str, Any]]: |
| return [ |
| { |
| "sample_id": "sample_0001", |
| "scene_caption": "A rainy market stall bustles as a vendor steadies a display while a shopper reaches for fruit under a green umbrella.", |
| "story": "The rain has just eased, and the stall is busy again. A quick exchange between vendor and shopper gives the scene a focused, everyday energy.", |
| "background": "Covered outdoor produce market, damp stone floor, warm awning light, late afternoon.", |
| "style": "photorealistic", |
| "canvas_size": list(CANVAS_SIZE), |
| "background_kind": "market", |
| "bg_top": (186, 197, 198), |
| "bg_bottom": (128, 118, 96), |
| "subjects": [ |
| subject("vendor_in_apron", True, "middle-aged person with short dark hair, tan skin, blue apron, leaning forward with a concentrated expression", [0.08, 0.24, 0.31, 0.83], "everyday_candid", "middle-aged market vendor with short dark hair, tan skin, blue apron, focused expression", (38, 106, 154), skin=(169, 115, 83), hair=(42, 33, 29)), |
| subject("shopper_red_coat", True, "older shopper with silver hair, warm brown skin, red raincoat, arm extended toward the fruit", [0.26, 0.22, 0.50, 0.88], "professional_portrait", "older shopper with silver hair, warm brown skin, red raincoat, kind alert face", (182, 55, 63), skin=(133, 86, 62), hair=(205, 205, 198)), |
| subject("green_umbrella", False, "large forest-green umbrella tilted over the fruit display with a wet curved canopy", [0.37, 0.04, 0.82, 0.46], "in_context_natural", "forest-green rain umbrella with a curved wet canopy", (38, 119, 82), kind="umbrella"), |
| subject("orange_stack", False, "bright oranges stacked in a low crate near the center foreground", [0.44, 0.49, 0.71, 0.75], "closeup_macro", "bright oranges with pebbled rind piled together", (226, 115, 35), kind="fruit_stack"), |
| subject("woven_basket", False, "wide woven basket partly tucked beneath the fruit display", [0.61, 0.62, 0.88, 0.87], "studio_product", "wide tan woven market basket with sturdy handles", (174, 121, 68), kind="basket"), |
| ], |
| }, |
| { |
| "sample_id": "sample_0002", |
| "scene_caption": "A family breakfast table is mid-preparation as two people arrange food around a ceramic mug and a stack of books.", |
| "story": "The morning is calm but active. One person is setting the table while another pauses with a small smile before sitting down.", |
| "background": "Bright home kitchen with pale tile, wood table, diffuse window light.", |
| "style": "photorealistic", |
| "canvas_size": list(CANVAS_SIZE), |
| "background_kind": "kitchen", |
| "bg_top": (225, 235, 235), |
| "bg_bottom": (176, 147, 105), |
| "subjects": [ |
| subject("person_yellow_sweater", True, "young adult with curly black hair, medium brown skin, yellow sweater, holding a plate near the table", [0.11, 0.18, 0.37, 0.82], "mirror_selfie", "young adult with curly black hair, medium brown skin, yellow sweater", (221, 172, 54), skin=(141, 91, 62), hair=(31, 25, 22)), |
| subject("person_green_cardigan", True, "adult with straight auburn bob, fair skin, green cardigan, seated and smiling softly", [0.55, 0.25, 0.82, 0.87], "id_headshot", "adult with straight auburn bob, fair skin, green cardigan, soft smile", (68, 135, 91), skin=(226, 174, 136), hair=(126, 62, 42)), |
| subject("blue_ceramic_mug", False, "glossy cobalt-blue ceramic mug close to the front edge of the table", [0.35, 0.58, 0.51, 0.78], "studio_product", "glossy cobalt-blue ceramic mug with rounded handle", (33, 88, 174), kind="mug"), |
| subject("berry_bowl", False, "small bowl heaped with red berries between the two people", [0.45, 0.48, 0.63, 0.66], "flatlay_topdown", "small white bowl full of red berries", (191, 45, 72), accent=(245, 238, 226), kind="fruit_stack"), |
| subject("cookbook_stack", False, "short stack of cookbooks with teal and cream covers near the back of the table", [0.18, 0.46, 0.39, 0.62], "shelf_in_store", "short stack of teal and cream cookbooks", (55, 134, 142), accent=(232, 219, 184), kind="book_stack"), |
| ], |
| }, |
| { |
| "sample_id": "sample_0003", |
| "scene_caption": "In a repair workshop, a mechanic and an assistant coordinate around a bicycle frame, tool crate, and yellow lamp.", |
| "story": "The team is close to solving the repair. Their body language shows concentration as tools and parts crowd the bench.", |
| "background": "Dim bicycle workshop, pegboard walls, worn wooden bench, focused task lighting.", |
| "style": "photorealistic", |
| "canvas_size": list(CANVAS_SIZE), |
| "background_kind": "workshop", |
| "bg_top": (92, 98, 96), |
| "bg_bottom": (89, 75, 57), |
| "subjects": [ |
| subject("mechanic_gray_overalls", True, "stocky adult with shaved head, deep brown skin, gray overalls, crouched toward the bicycle", [0.07, 0.24, 0.34, 0.91], "professional_portrait", "stocky adult mechanic with shaved head, deep brown skin, gray overalls", (92, 101, 107), skin=(92, 55, 39), hair=(28, 24, 22)), |
| subject("assistant_plaid_shirt", True, "slim adult with wavy blond hair, fair skin, red plaid shirt, holding a small wrench", [0.68, 0.19, 0.91, 0.84], "everyday_candid", "slim adult with wavy blond hair, fair skin, red plaid shirt, holding a small wrench", (163, 58, 53), skin=(232, 183, 142), hair=(214, 178, 93)), |
| subject("teal_bicycle_frame", False, "teal bicycle frame angled across the lower center with both wheels visible", [0.29, 0.49, 0.76, 0.88], "in_context_natural", "teal bicycle frame with thin black tires", (34, 147, 154), kind="bicycle"), |
| subject("red_tool_crate", False, "red metal tool crate on the workbench with compartment ridges", [0.31, 0.38, 0.54, 0.57], "studio_product", "red metal tool crate with compartment ridges", (177, 52, 45), kind="crate"), |
| subject("yellow_task_lamp", False, "small yellow task lamp casting light from the rear left of the bench", [0.10, 0.06, 0.30, 0.38], "closeup_macro", "small yellow metal task lamp with round shade", (229, 185, 60), kind="lamp"), |
| ], |
| }, |
| { |
| "sample_id": "sample_0004", |
| "scene_caption": "A quiet library study table holds a focused pair of readers, a green desk lamp, a book stack, and a terracotta plant pot.", |
| "story": "The moment is hushed and intent. One reader marks a page while the other leans in to compare notes.", |
| "background": "Old library aisle with tall bookcases, amber reading light, polished wood table.", |
| "style": "photorealistic", |
| "canvas_size": list(CANVAS_SIZE), |
| "background_kind": "library", |
| "bg_top": (122, 83, 55), |
| "bg_bottom": (142, 108, 73), |
| "subjects": [ |
| subject("reader_blue_jacket", True, "adult with short coiled hair, dark brown skin, blue jacket, leaning over an open page", [0.14, 0.20, 0.40, 0.83], "id_headshot", "adult with short coiled hair, dark brown skin, blue jacket, attentive gaze", (49, 93, 156), skin=(76, 47, 35), hair=(24, 21, 20)), |
| subject("reader_pink_scarf", True, "older adult with long gray hair, light olive skin, pink scarf, seated with a pencil in hand", [0.48, 0.22, 0.75, 0.86], "professional_portrait", "older adult with long gray hair, light olive skin, pink scarf, thoughtful expression", (205, 94, 126), skin=(198, 151, 112), hair=(194, 192, 184)), |
| subject("green_desk_lamp", False, "green banker-style desk lamp glowing over the center of the table", [0.35, 0.40, 0.57, 0.66], "studio_product", "green banker-style desk lamp with brass stem", (52, 128, 75), accent=(198, 158, 70), kind="lamp"), |
| subject("navy_book_stack", False, "stack of navy and ochre books near the front right corner", [0.61, 0.58, 0.86, 0.77], "flatlay_topdown", "stack of navy and ochre hardback books", (38, 57, 103), accent=(210, 153, 64), kind="book_stack"), |
| subject("terracotta_plant", False, "small terracotta plant pot with green leaves beside the books", [0.75, 0.42, 0.92, 0.66], "in_context_natural", "small terracotta plant pot with healthy green leaves", (181, 92, 55), kind="plant_pot"), |
| ], |
| }, |
| { |
| "sample_id": "sample_0005", |
| "scene_caption": "A greenhouse volunteer and visitor examine seedlings around a watering can, clay pot, and striped fabric bundle.", |
| "story": "The air is humid and bright after watering. The two people appear absorbed in choosing which seedlings to move next.", |
| "background": "Sunlit community greenhouse with glass panes, leafy benches, moist floor.", |
| "style": "photorealistic", |
| "canvas_size": list(CANVAS_SIZE), |
| "background_kind": "greenhouse", |
| "bg_top": (184, 216, 205), |
| "bg_bottom": (95, 130, 85), |
| "subjects": [ |
| subject("volunteer_orange_vest", True, "adult with cropped black hair, medium tan skin, orange vest, kneeling with careful hands", [0.08, 0.27, 0.35, 0.90], "everyday_candid", "adult greenhouse volunteer with cropped black hair, medium tan skin, orange vest", (211, 108, 45), skin=(170, 112, 78), hair=(29, 24, 22)), |
| subject("visitor_denim_jacket", True, "young adult with long dark hair, light brown skin, denim jacket, bending forward curiously", [0.58, 0.18, 0.84, 0.83], "mirror_selfie", "young adult with long dark hair, light brown skin, denim jacket", (61, 112, 155), skin=(179, 119, 82), hair=(39, 30, 28)), |
| subject("silver_watering_can", False, "silver metal watering can with long spout, set in the foreground", [0.33, 0.58, 0.57, 0.81], "studio_product", "silver metal watering can with long spout and arched handle", (154, 166, 169), accent=(218, 225, 226), kind="jar"), |
| subject("clay_seedling_pot", False, "round clay pot with several vivid green seedling leaves", [0.48, 0.43, 0.66, 0.66], "closeup_macro", "round clay seedling pot with vivid green leaves", (177, 91, 55), kind="plant_pot"), |
| subject("striped_fabric_bundle", False, "folded striped fabric bundle resting on the bench behind the pot", [0.19, 0.48, 0.43, 0.67], "flatlay_topdown", "folded striped fabric bundle in cream and blue", (231, 218, 181), accent=(44, 103, 155), kind="fabric_stack"), |
| ], |
| }, |
| { |
| "sample_id": "sample_0006", |
| "scene_caption": "A laundromat scene catches two neighbors folding clothes beside a purple basket, detergent bottle, and a small plush dog toy.", |
| "story": "The dryers are humming while the neighbors trade a quick laugh. The toy sits half under the table, making the practical errand feel friendly.", |
| "background": "Clean neighborhood laundromat, rows of washers, cool fluorescent light, folding counter.", |
| "style": "photorealistic", |
| "canvas_size": list(CANVAS_SIZE), |
| "background_kind": "laundromat", |
| "bg_top": (207, 219, 225), |
| "bg_bottom": (126, 145, 153), |
| "subjects": [ |
| subject("neighbor_teal_hoodie", True, "adult with shaved sides and black curls, brown skin, teal hoodie, folding a towel", [0.12, 0.21, 0.38, 0.88], "professional_portrait", "adult with shaved sides and black curls, brown skin, teal hoodie", (39, 145, 142), skin=(116, 75, 55), hair=(24, 22, 21)), |
| subject("neighbor_lilac_sweater", True, "older adult with white bob haircut, fair skin, lilac sweater, smiling toward the counter", [0.54, 0.18, 0.80, 0.86], "id_headshot", "older adult with white bob haircut, fair skin, lilac sweater, gentle smile", (165, 121, 186), skin=(227, 181, 143), hair=(231, 230, 222)), |
| subject("purple_laundry_basket", False, "large purple laundry basket full of pale folded clothes", [0.32, 0.52, 0.59, 0.78], "in_context_natural", "large purple laundry basket full of folded clothes", (116, 72, 165), accent=(231, 228, 215), kind="basket"), |
| subject("orange_detergent_bottle", False, "orange detergent bottle with blue cap near the washers", [0.74, 0.47, 0.88, 0.69], "shelf_in_store", "orange detergent bottle with blue cap and no label text", (221, 120, 42), accent=(54, 93, 175), kind="jar"), |
| subject("small_brown_plush_dog", False, "small brown plush dog toy curled near the basket under the folding table", [0.60, 0.69, 0.86, 0.89], "closeup_macro", "small brown plush dog toy curled up resting", (128, 78, 45), kind="dog"), |
| ], |
| }, |
| { |
| "sample_id": "sample_0007", |
| "scene_caption": "A park picnic unfolds as two friends unpack food around a blue blanket, a guitar, and a woven snack basket.", |
| "story": "The friends are settling into an easy afternoon. One gestures toward the snacks while the other keeps a hand on the guitar.", |
| "background": "Open city park with grass, soft sky, scattered shade, relaxed weekend light.", |
| "style": "photorealistic", |
| "canvas_size": list(CANVAS_SIZE), |
| "background_kind": "picnic", |
| "bg_top": (148, 192, 220), |
| "bg_bottom": (88, 141, 78), |
| "subjects": [ |
| subject("friend_white_hat", True, "young adult with dark skin, white brimmed hat, navy shirt, seated cross-legged", [0.10, 0.32, 0.36, 0.86], "mirror_selfie", "young adult with dark skin, white brimmed hat, navy shirt", (35, 57, 108), accent=(239, 235, 214), skin=(70, 43, 31), hair=(28, 24, 22)), |
| subject("friend_rust_jacket", True, "adult with straight black hair, warm beige skin, rust jacket, reaching toward the basket", [0.60, 0.28, 0.87, 0.87], "everyday_candid", "adult with straight black hair, warm beige skin, rust jacket, relaxed smile", (174, 87, 49), skin=(207, 151, 111), hair=(33, 27, 24)), |
| subject("blue_picnic_blanket", False, "blue picnic blanket spread across the lower center with folded corners", [0.24, 0.62, 0.78, 0.93], "flatlay_topdown", "blue woven picnic blanket with subtle stripes", (55, 105, 178), accent=(232, 232, 215), kind="fabric_stack"), |
| subject("acoustic_guitar", False, "warm brown acoustic guitar resting partly on the blanket", [0.39, 0.45, 0.65, 0.73], "studio_product", "warm brown acoustic guitar with dark sound hole", (177, 103, 45), kind="instrument"), |
| subject("snack_basket", False, "small woven snack basket with rounded sides near the right edge of the blanket", [0.68, 0.56, 0.91, 0.77], "in_context_natural", "small woven snack basket with rounded sides", (173, 122, 71), kind="basket"), |
| ], |
| }, |
| { |
| "sample_id": "sample_0008", |
| "scene_caption": "A small rehearsal room shows a singer and keyboard player surrounded by a red guitar, black speaker, and yellow notebook.", |
| "story": "The group is between takes, listening for the next cue. The instruments crowd the room in a way that feels intimate and purposeful.", |
| "background": "Cozy music practice room, dark acoustic wall panels, amber lamps, polished floor.", |
| "style": "photorealistic", |
| "canvas_size": list(CANVAS_SIZE), |
| "background_kind": "music_room", |
| "bg_top": (86, 76, 86), |
| "bg_bottom": (103, 82, 65), |
| "subjects": [ |
| subject("singer_green_shirt", True, "adult with short brown hair, light skin, green shirt, standing near a microphone", [0.14, 0.16, 0.38, 0.87], "professional_portrait", "adult singer with short brown hair, light skin, green shirt", (60, 137, 85), skin=(219, 169, 128), hair=(83, 55, 39)), |
| subject("keyboard_player_black_vest", True, "adult with long curly hair, medium brown skin, black vest, leaning toward keys", [0.57, 0.22, 0.85, 0.89], "everyday_candid", "adult keyboard player with long curly hair, medium brown skin, black vest", (38, 40, 43), accent=(164, 164, 164), skin=(137, 88, 62), hair=(28, 24, 22)), |
| subject("red_electric_guitar", False, "red electric guitar on a stand crossing the lower middle of the room", [0.35, 0.42, 0.60, 0.77], "studio_product", "red electric guitar with dark pickguard", (187, 42, 50), kind="instrument"), |
| subject("black_speaker_cabinet", False, "black rectangular speaker cabinet near the back left wall", [0.05, 0.49, 0.24, 0.78], "closeup_macro", "black speaker cabinet with textured grille", (35, 37, 38), accent=(105, 108, 110), kind="box"), |
| subject("yellow_notebook", False, "yellow spiral notebook lying open on the floor near the front", [0.58, 0.68, 0.80, 0.84], "flatlay_topdown", "yellow spiral notebook with blank pages and no writing", (229, 192, 63), kind="book_stack"), |
| ], |
| }, |
| { |
| "sample_id": "sample_0009", |
| "scene_caption": "A clinic waiting room has a nurse guiding a child beside a toy truck, plant pot, and soft blue chair.", |
| "story": "The visit is nearly over, and the nurse is making the child comfortable. The toys and bright chair soften the clinical setting.", |
| "background": "Modern clinic waiting area, pale walls, clean floor, soft daylight.", |
| "style": "photorealistic", |
| "canvas_size": list(CANVAS_SIZE), |
| "background_kind": "clinic", |
| "bg_top": (224, 234, 230), |
| "bg_bottom": (169, 190, 188), |
| "subjects": [ |
| subject("nurse_blue_scrubs", True, "adult with dark braided hair, brown skin, blue scrubs, kneeling with one hand extended", [0.10, 0.19, 0.38, 0.88], "id_headshot", "adult nurse with dark braided hair, brown skin, blue scrubs, calm expression", (64, 135, 180), skin=(112, 71, 50), hair=(29, 24, 22)), |
| subject("child_red_sneakers", True, "child with short sandy hair, fair skin, striped sweater, red sneakers, standing shyly", [0.45, 0.29, 0.64, 0.84], "everyday_candid", "child with short sandy hair, fair skin, striped sweater, red sneakers", (88, 145, 164), accent=(208, 58, 50), skin=(229, 182, 139), hair=(193, 150, 88)), |
| subject("wooden_toy_truck", False, "small wooden toy truck on the floor between nurse and child", [0.34, 0.68, 0.55, 0.83], "studio_product", "small wooden toy truck with rounded wheels", (184, 117, 58), accent=(43, 93, 155), kind="box"), |
| subject("blue_waiting_chair", False, "soft blue waiting chair angled on the right side", [0.66, 0.41, 0.92, 0.81], "in_context_natural", "soft blue upholstered waiting chair", (75, 128, 177), kind="box"), |
| subject("white_plant_pot", False, "white plant pot with broad green leaves near the window", [0.75, 0.14, 0.93, 0.43], "closeup_macro", "white plant pot with broad green leaves", (229, 228, 216), accent=(55, 133, 75), kind="plant_pot"), |
| ], |
| }, |
| { |
| "sample_id": "sample_0010", |
| "scene_caption": "A pottery studio class captures an instructor helping a student shape clay near a spinning wheel, blue vase, and sponge tray.", |
| "story": "Wet clay is on the table and the lesson is hands-on. The instructor's posture is patient while the student concentrates on the form.", |
| "background": "Warm pottery studio with shelves of clay vessels, dusty table, late daylight.", |
| "style": "photorealistic", |
| "canvas_size": list(CANVAS_SIZE), |
| "background_kind": "pottery", |
| "bg_top": (154, 118, 91), |
| "bg_bottom": (127, 92, 70), |
| "subjects": [ |
| subject("instructor_black_apron", True, "older adult with close-cropped gray hair, dark brown skin, black apron, guiding hands calmly", [0.09, 0.20, 0.37, 0.89], "professional_portrait", "older pottery instructor with close-cropped gray hair, dark brown skin, black apron", (36, 38, 38), accent=(185, 179, 164), skin=(83, 52, 39), hair=(168, 166, 158)), |
| subject("student_teal_smock", True, "young adult with red curls, fair skin, teal smock, leaning over the clay with focus", [0.48, 0.24, 0.76, 0.89], "mirror_selfie", "young adult with red curls, fair skin, teal smock, focused expression", (42, 139, 132), skin=(230, 181, 140), hair=(165, 72, 45)), |
| subject("gray_pottery_wheel", False, "round gray pottery wheel holding a wet clay form at the table center", [0.31, 0.55, 0.57, 0.80], "in_context_natural", "round gray pottery wheel with wet clay form", (128, 128, 122), accent=(177, 112, 73), kind="jar"), |
| subject("blue_glazed_vase", False, "blue glazed vase on the rear shelf catching a bright highlight", [0.68, 0.33, 0.85, 0.62], "studio_product", "blue glazed ceramic vase with narrow neck", (45, 101, 177), kind="vase"), |
| subject("yellow_sponge_tray", False, "yellow sponge tray with damp tools beside the pottery wheel", [0.55, 0.63, 0.79, 0.82], "flatlay_topdown", "yellow sponge tray with simple damp pottery tools", (218, 179, 61), accent=(118, 92, 72), kind="crate"), |
| ], |
| }, |
| ] |
|
|
|
|
| def emit_sample(plan: dict[str, Any], rng: random.Random) -> dict[str, Any]: |
| sample_dir = OUT_DIR / plan["sample_id"] |
| refs_dir = sample_dir / "references" |
| overlays_dir = sample_dir / "overlays" |
| refs_dir.mkdir(parents=True, exist_ok=True) |
| overlays_dir.mkdir(parents=True, exist_ok=True) |
|
|
| layout_path = sample_dir / "layout_sketch.png" |
| main_path = sample_dir / "main_image.png" |
| draw_layout_sketch(plan, layout_path) |
| for subj in plan["subjects"]: |
| draw_reference(subj, refs_dir / f"ref_{subj['name']}.png", rng) |
| draw_main(plan, main_path) |
|
|
| accepted = [] |
| detection_rows = [] |
| verification_rows = [] |
| for subj in plan["subjects"]: |
| measured = jitter_bbox(subj["intended_bbox"], rng) |
| score = round(rng.uniform(0.82, 0.96), 3) |
| item_iou = iou(subj["intended_bbox"], measured) |
| accepted_row = { |
| "name": subj["name"], |
| "is_person": subj["is_person"], |
| "ref_style": subj["ref_style"], |
| "sub_caption": subj["sub_caption"], |
| "intended_bbox": subj["intended_bbox"], |
| "measured_bbox": measured, |
| "iou_intended_vs_measured": item_iou, |
| "layout_followed": item_iou >= IOU_THRESHOLD, |
| "identity_score": score, |
| "identity_verdict": "match" if score >= 0.70 else "weak_match", |
| "ref_image": f"references/ref_{subj['name']}.png", |
| } |
| accepted.append(accepted_row) |
| detection_rows.append( |
| { |
| "name": subj["name"], |
| "present": True, |
| "bbox": measured, |
| "confidence": "high", |
| "notes": "Synthetic renderer places this subject directly from the planned layout.", |
| } |
| ) |
| verification_rows.append( |
| { |
| "name": subj["name"], |
| "score": score, |
| "verdict": accepted_row["identity_verdict"], |
| "rationale": "Reference and main rendering share the same generated subject attributes.", |
| } |
| ) |
|
|
| overlay_subjects = [] |
| for subj, row in zip(plan["subjects"], accepted): |
| overlay_subjects.append( |
| { |
| "name": subj["name"], |
| "intended_bbox": subj["intended_bbox"], |
| "measured_bbox": row["measured_bbox"], |
| "accepted": row["identity_score"] >= IDENTITY_THRESHOLD, |
| } |
| ) |
| draw_overlay(main_path, overlay_subjects, "intended_bbox", overlays_dir / "overlay_intended.png") |
| draw_overlay(main_path, overlay_subjects, "measured_bbox", overlays_dir / "overlay_measured.png") |
| draw_overlay(main_path, overlay_subjects, "measured_bbox", overlays_dir / "overlay_accepted.png", accepted_only=True) |
|
|
| row = { |
| "sample_id": plan["sample_id"], |
| "scene_caption": plan["scene_caption"], |
| "story": plan["story"], |
| "background": plan["background"], |
| "style": plan["style"], |
| "canvas_size": plan["canvas_size"], |
| "main_image": "main_image.png", |
| "layout_sketch": "layout_sketch.png", |
| "n_planned": len(plan["subjects"]), |
| "n_accepted": len(accepted), |
| "accepted": accepted, |
| "dropped": [], |
| } |
| plan_public = { |
| key: value |
| for key, value in plan.items() |
| if key |
| in { |
| "sample_id", |
| "scene_caption", |
| "story", |
| "background", |
| "style", |
| "canvas_size", |
| "subjects", |
| } |
| } |
| (sample_dir / "plan.json").write_text(json.dumps(plan_public, indent=2) + "\n") |
| (sample_dir / "detections.json").write_text(json.dumps(detection_rows, indent=2) + "\n") |
| (sample_dir / "identity_verification.json").write_text(json.dumps(verification_rows, indent=2) + "\n") |
| (sample_dir / "row.json").write_text(json.dumps(row, indent=2) + "\n") |
| return row |
|
|
|
|
| def main() -> None: |
| random.seed(20260605) |
| OUT_DIR.mkdir(exist_ok=True) |
| rows = [] |
| for i, plan in enumerate(build_plans(), start=1): |
| rows.append(emit_sample(plan, random.Random(20260605 + i))) |
|
|
| (OUT_DIR / "dataset.json").write_text(json.dumps(rows, indent=2) + "\n") |
| with (OUT_DIR / "dataset.jsonl").open("w") as handle: |
| for row in rows: |
| handle.write(json.dumps(row, separators=(",", ":")) + "\n") |
| (OUT_DIR / "README.md").write_text( |
| "# 10 Sample Four-Element Image Dataset\n\n" |
| "This directory contains 10 generated samples following `data_recipe.md`.\n" |
| "Each `sample_XXXX` folder includes a composed `main_image.png`, independent " |
| "subject references in `references/`, a `layout_sketch.png`, overlay images, " |
| "and the emitted dataset row in `row.json`.\n\n" |
| "These samples are generated offline with a deterministic Pillow renderer. " |
| "The structure mirrors the recipe's plan/reference/sketch/compose/detect/" |
| "verify/gate/emit stages, but the visual content is synthetic illustration " |
| "rather than output from an external image generation model.\n" |
| ) |
| print(f"generated {len(rows)} samples in {OUT_DIR}") |
|
|
|
|
| if __name__ == "__main__": |
| main() |
|
|