Spaces:

LeafCat79
/

Image_Generator_for_HTML_Games

Sleeping

File size: 62,638 Bytes

import base64
import hashlib
import io
import json
import math
import os
import random
import re
import tempfile
import time
from dataclasses import dataclass
from html import escape
from urllib.parse import quote
from urllib.error import HTTPError, URLError
from urllib.request import Request, urlopen

import gradio as gr
from PIL import Image, ImageDraw, ImageEnhance, ImageFilter, ImageFont

try:
    from huggingface_hub import InferenceClient
except Exception:
    InferenceClient = None


STARTER_HTML = """<!DOCTYPE html>
<html>
<head>
  <meta charset="UTF-8" />
  <title>Mini Forest Game</title>
  <style>
    body { margin: 0; background: #111; display: grid; place-items: center; min-height: 100vh; }
    canvas { border: 2px solid #222; background: #222; image-rendering: pixelated; }
  </style>
</head>
<body>
  <canvas id="gameCanvas" width="800" height="450"></canvas>
  <script>
    const canvas = document.getElementById("gameCanvas");
    const ctx = canvas.getContext("2d");

    const background = new Image();
    background.src = "sprite_background.png";
    const playerImg = new Image();
    playerImg.src = "sprite_player.png";

    const keys = new Set();
    const player = { x: 380, y: 205, w: 48, h: 48, speed: 4 };

    window.addEventListener("keydown", e => keys.add(e.key));
    window.addEventListener("keyup", e => keys.delete(e.key));

    function update() {
      if (keys.has("a") || keys.has("ArrowLeft")) player.x -= player.speed;
      if (keys.has("d") || keys.has("ArrowRight")) player.x += player.speed;
      if (keys.has("w") || keys.has("ArrowUp")) player.y -= player.speed;
      if (keys.has("s") || keys.has("ArrowDown")) player.y += player.speed;
      player.x = Math.max(0, Math.min(canvas.width - player.w, player.x));
      player.y = Math.max(0, Math.min(canvas.height - player.h, player.y));
    }

    function draw() {
      ctx.drawImage(background, 0, 0, canvas.width, canvas.height);
      ctx.drawImage(playerImg, player.x, player.y, player.w, player.h);
      ctx.fillStyle = "white";
      ctx.font = "18px sans-serif";
      ctx.fillText("Use WASD or arrow keys", 18, 28);
    }

    function loop() {
      update();
      draw();
      requestAnimationFrame(loop);
    }
    loop();
  </script>
</body>
</html>"""


DEFAULT_ROLES = """player: top-down pixel-art adventurer hero, transparent background, bright readable silhouette
background: enchanted forest clearing game background, top-down view, soft moonlight, detailed but not too busy"""

ROLE_PLACEHOLDER = """player: blue robot hero sprite
background: empty space station floor map"""


@dataclass
class AssetSpec:
    role: str
    prompt: str
    filename: str
    width: int
    height: int


@dataclass
class StylePlan:
    medium: str
    palette: str
    texture: str
    lighting: str
    linework: str
    camera: str
    tags: tuple[str, ...]


HF_TOKEN = os.environ.get("HF_TOKEN", "")
FREE_IMAGE_MODEL = os.environ.get("FREE_IMAGE_MODEL", "segmind/tiny-sd")
FREE_IMAGE_STEPS = int(os.environ.get("FREE_IMAGE_STEPS", "5"))
USE_HF_PROMPT_PROVIDER = os.environ.get("USE_HF_PROMPT_PROVIDER", "0") == "1"
USE_HF_IMAGE_PROVIDER = os.environ.get("USE_HF_IMAGE_PROVIDER", "0") == "1"
USE_DIFFUSION_FOR_SPRITES = os.environ.get("USE_DIFFUSION_FOR_SPRITES", "0") == "1"
USE_DIFFUSION_FOR_BACKGROUNDS = os.environ.get("USE_DIFFUSION_FOR_BACKGROUNDS", "0") == "1"
HF_IMAGE_MODEL = os.environ.get("HF_IMAGE_MODEL", "black-forest-labs/FLUX.1-schnell")
HF_PROMPT_MODEL = os.environ.get("HF_PROMPT_MODEL", "Qwen/Qwen2.5-Coder-7B-Instruct:fastest")
HF_PROMPT_ENDPOINT = os.environ.get("HF_PROMPT_ENDPOINT", "https://router.huggingface.co/v1/chat/completions")
FREE_DIFFUSION_PIPE = None
FREE_DIFFUSION_ERROR = None


def slugify(value: str) -> str:
    value = re.sub(r"[^a-zA-Z0-9]+", "_", value.strip().lower()).strip("_")
    return value or "asset"


def interpret_style_hint(style_hint: str) -> StylePlan:
    text = (style_hint or "").lower()
    tags: list[str] = []

    def has(*words: str) -> bool:
        return any(word in text for word in words)

    if has("watercolor", "ink wash", "gouache", "paper"):
        tags.append("watercolor")
        medium = "watercolor game illustration on textured paper"
        palette = "soft layered washes, muted pigments, gentle color bleed"
        texture = "visible paper grain, translucent edges, organic brush pooling"
        lighting = "soft ambient light with low contrast"
        linework = "loose ink accents, imperfect hand-painted contour lines"
    elif has("vector", "flat", "icon", "logo"):
        tags.append("vector")
        medium = "clean vector game art"
        palette = "flat separated color fields with bold accent colors"
        texture = "smooth fills, no painterly grain"
        lighting = "graphic cel shading with crisp highlights"
        linework = "thick precise outlines and hard-edged silhouettes"
    elif has("clay", "claymation", "stop motion", "plasticine"):
        tags.append("clay")
        medium = "claymation stop-motion game asset"
        palette = "chunky colored clay with warm handmade tones"
        texture = "fingerprint-like clay bumps, rounded sculpted forms"
        lighting = "soft studio light with small specular highlights"
        linework = "no ink outlines, shape defined by soft shadows"
    elif has("oil", "painterly", "impasto", "canvas"):
        tags.append("oil")
        medium = "oil-painted fantasy game art"
        palette = "rich blended colors with deep shadows"
        texture = "visible bristle strokes and canvas-like texture"
        lighting = "dramatic directional light"
        linework = "painted edges instead of hard outlines"
    elif has("pixel", "8-bit", "16-bit", "retro"):
        tags.append("pixel")
        medium = "retro pixel-art game asset"
        palette = "limited palette with readable color ramps"
        texture = "sharp square pixels and no anti-aliasing"
        lighting = "simple two-tone pixel shading"
        linework = "one-pixel dark outline and blocky silhouette"
    elif has("anime", "manga"):
        tags.append("anime")
        medium = "anime game illustration"
        palette = "clean saturated colors with expressive accents"
        texture = "smooth cel-shaded surfaces"
        lighting = "bright rim light and glossy highlights"
        linework = "confident manga-style outlines"
    elif has("cyber", "neon", "synthwave"):
        tags.append("vector")
        tags.append("neon")
        medium = "neon cyberpunk arcade game art"
        palette = "electric cyan, magenta, violet, and black"
        texture = "glowing edges, glossy panels, light bloom"
        lighting = "high contrast neon rim lighting"
        linework = "hard sci-fi outlines with luminous accents"
    else:
        tags.append("illustration")
        medium = "cohesive 2D game illustration"
        palette = "distinct theme-driven colors with clear value contrast"
        texture = "clean readable game asset finish"
        lighting = "balanced game lighting"
        linework = "clear readable silhouette and controlled edges"

    if has("top-down", "top down", "shooter", "rpg", "arena"):
        camera = "top-down readable game camera"
    elif has("platformer", "side-scroller", "side scroller"):
        camera = "side-view platformer camera"
    else:
        camera = "game-ready camera angle matching the role"

    return StylePlan(medium, palette, texture, lighting, linework, camera, tuple(tags))


def build_asset_prompt(role: str, prompt: str, style_hint: str) -> str:
    plan = interpret_style_hint(style_hint)
    slug = slugify(role)
    is_background = any(word in slug for word in ("background", "backdrop", "scene", "map", "level"))
    if is_background:
        asset_instruction = (
            "Create one complete 2D game background scene, not a texture tile, not a material sample, "
            "not a UV map. Full scene composition for a canvas game. Empty environment only: no player, "
            "no character, no creature, no vehicle, no mascot, no foreground subject."
        )
    else:
        asset_instruction = (
            "Create one complete standalone 2D sprite of the whole subject, centered, full body or full vehicle, "
            "single object, transparent or plain background, readable silhouette. Not a texture map, not a tiled "
            "pattern, not a material swatch, not a UV unwrap, not a 3D model skin."
        )
    return (
        f"{role} asset: {prompt}. Style interpretation: {plan.medium}; {plan.palette}; "
        f"{plan.texture}; {plan.lighting}; {plan.linework}; {plan.camera}. "
        f"{asset_instruction} "
        "Game asset, readable at small size, no text, no watermark."
    )


def parse_role_lines(raw_roles: str) -> list[tuple[str, str]]:
    parsed: list[tuple[str, str]] = []
    for line in raw_roles.splitlines():
        line = line.strip()
        if not line or line.startswith("#"):
            continue

        if ":" in line:
            role, prompt = line.split(":", 1)
        elif "=" in line:
            role, prompt = line.split("=", 1)
        else:
            role, prompt = line, line
        role = role.strip()
        prompt = prompt.strip() or role
        parsed.append((role, prompt))
    return parsed


def infer_code_context(html_code: str) -> str:
    text = html_code[:12000]
    filenames = sorted(set(re.findall(r"['\"]([^'\"]+?\.(?:png|jpg|jpeg|webp|gif))['\"]", text, flags=re.I)))
    canvas = re.findall(r"<canvas[^>]*?(?:width=['\"]?(\d+)|height=['\"]?(\d+))", text, flags=re.I)
    controls = []
    lowered = text.lower()
    for label, words in {
        "top-down movement": ("arrowup", "arrowdown", "keys.has(\"w\")", "keys.has('w')"),
        "platformer": ("gravity", "grounded", "platform"),
        "shooting": ("bullet", "shoot", "projectile", "laser"),
        "enemies": ("enemy", "monster", "spawn"),
    }.items():
        if any(word in lowered for word in words):
            controls.append(label)
    return (
        f"Referenced asset filenames: {', '.join(filenames[:24]) or 'none found'}. "
        f"Detected game mechanics: {', '.join(controls) or 'not obvious'}. "
        f"Canvas hints found: {canvas[:4] or 'none'}."
    )


def local_prompt_map(role_lines: list[tuple[str, str]], style_hint: str) -> dict[str, str]:
    return {role: build_asset_prompt(role, prompt, style_hint) for role, prompt in role_lines}


def extract_json_object(text: str) -> dict | None:
    match = re.search(r"\{.*\}", text, flags=re.S)
    if not match:
        return None
    try:
        value = json.loads(match.group(0))
    except Exception:
        return None
    return value if isinstance(value, dict) else None


def short_error(exc: Exception) -> str:
    if isinstance(exc, HTTPError):
        body = exc.read().decode("utf-8", errors="replace")[:500]
        return f"HTTP {exc.code}: {body}"
    if isinstance(exc, URLError):
        return f"URL error: {exc.reason}"
    return f"{type(exc).__name__}: {str(exc)[:500]}"


def hf_prompt_json(html_code: str, role_lines: list[tuple[str, str]], style_hint: str) -> tuple[dict[str, str] | None, str | None]:
    if not USE_HF_PROMPT_PROVIDER:
        return None, None
    if not HF_TOKEN:
        return None, "HF_TOKEN is not visible to the Space runtime"

    role_block = "\n".join(f"- {role}: {prompt}" for role, prompt in role_lines)
    instruction = (
        "You are a senior game art director and prompt engineer. Read the HTML game context, "
        "the requested asset roles, and the shared theme/style. Return ONLY a JSON object where "
        "each key is the exact role name and each value is one concise text-to-image prompt. "
        "Each prompt must specify: subject silhouette/shape, camera angle, art style, palette, "
        "transparent background for sprites/items, full scene for backgrounds, no text, no watermark. "
        "Make different roles visually distinct and suitable for embedding in an HTML game."
    )
    user_text = (
        f"HTML/game context summary: {infer_code_context(html_code)}\n\n"
        f"Shared theme/style: {style_hint}\n\n"
        f"Asset roles:\n{role_block}\n\n"
        "Return JSON only."
    )
    payload = {
        "model": HF_PROMPT_MODEL,
        "messages": [
            {"role": "system", "content": instruction},
            {"role": "user", "content": user_text},
        ],
        "max_tokens": 900,
        "temperature": 0.55,
        "top_p": 0.9,
        "stream": False,
    }
    request = Request(
        HF_PROMPT_ENDPOINT,
        data=json.dumps(payload).encode("utf-8"),
        headers={
            "Authorization": f"Bearer {HF_TOKEN}",
            "Content-Type": "application/json",
        },
        method="POST",
    )
    try:
        with urlopen(request, timeout=90) as response:
            raw = response.read().decode("utf-8", errors="replace")
        parsed = json.loads(raw)
        if isinstance(parsed, dict) and parsed.get("choices"):
            message = parsed["choices"][0].get("message", {})
            text = message.get("content", "") if isinstance(message, dict) else str(message)
        elif isinstance(parsed, list) and parsed:
            text = parsed[0].get("generated_text", "") if isinstance(parsed[0], dict) else str(parsed[0])
        elif isinstance(parsed, dict):
            text = parsed.get("generated_text", parsed.get("text", ""))
        else:
            text = str(parsed)
        obj = extract_json_object(text)
        if not obj:
            return None, f"{HF_PROMPT_MODEL} returned non-JSON text: {text[:300]}"
        return {
            role: str(obj.get(role, "")).strip()
            for role, _ in role_lines
            if str(obj.get(role, "")).strip()
        }, None
    except Exception as exc:
        return None, short_error(exc)


def build_prompt_map(html_code: str, raw_roles: str, style_hint: str) -> tuple[list[tuple[str, str]], dict[str, str], str, str | None]:
    role_lines = parse_role_lines(raw_roles)
    local_map = local_prompt_map(role_lines, style_hint)
    ai_map, prompt_error = hf_prompt_json(html_code, role_lines, style_hint)
    if ai_map and all(role in ai_map for role, _ in role_lines):
        return role_lines, ai_map, HF_PROMPT_MODEL, None
    return role_lines, local_map, "local prompt interpreter", prompt_error


def parse_assets(raw_roles: str, style_hint: str, prompt_map: dict[str, str] | None = None) -> list[AssetSpec]:
    specs: list[AssetSpec] = []
    for role, prompt in parse_role_lines(raw_roles):
        slug = slugify(role)
        is_background = any(word in slug for word in ("background", "backdrop", "scene", "map", "level"))
        width, height = (800, 450) if is_background else (128, 128)
        filename = f"sprite_{slug}.png"
        full_prompt = (prompt_map or {}).get(role) or build_asset_prompt(role, prompt, style_hint)
        specs.append(AssetSpec(role=role, prompt=full_prompt, filename=filename, width=width, height=height))
    return specs


def is_background_spec(spec: AssetSpec) -> bool:
    slug = slugify(spec.role)
    return spec.width > spec.height * 2 or any(
        word in slug for word in ("background", "backdrop", "scene", "map", "level")
    )


def image_to_png_bytes(content: bytes, width: int, height: int) -> bytes:
    image = Image.open(io.BytesIO(content)).convert("RGBA")
    image = image.resize((width, height), Image.LANCZOS)
    out = io.BytesIO()
    image.save(out, format="PNG")
    return out.getvalue()


def png_bytes_to_data_uri(content: bytes) -> str:
    return "data:image/png;base64," + base64.b64encode(content).decode("ascii")


def write_gallery_image(content: bytes, role: str) -> str:
    handle = tempfile.NamedTemporaryFile(
        prefix=f"{slugify(role)}_",
        suffix=".png",
        delete=False,
    )
    handle.write(content)
    handle.close()
    return handle.name


def palette_for(text: str) -> list[tuple[int, int, int]]:
    lowered = text.lower()
    if any(word in lowered for word in ("watercolor", "ink wash", "gouache", "paper grain")):
        return [(113, 151, 168), (207, 180, 158), (241, 232, 214), (154, 118, 148)]
    if any(word in lowered for word in ("clay", "claymation", "stop-motion", "plasticine")):
        return [(204, 105, 83), (238, 175, 104), (107, 72, 61), (242, 213, 151)]
    if any(word in lowered for word in ("oil", "painterly", "impasto", "canvas")):
        return [(98, 45, 122), (213, 154, 70), (38, 28, 42), (230, 212, 156)]
    if any(word in lowered for word in ("vector", "flat", "logo")):
        return [(42, 125, 246), (255, 81, 128), (18, 24, 38), (255, 221, 64)]
    if any(word in lowered for word in ("anime", "manga")):
        return [(78, 151, 236), (255, 139, 173), (42, 45, 76), (255, 238, 166)]
    themed = [
        (("lava", "fire", "volcano", "hell"), [(139, 28, 18), (239, 84, 34), (42, 18, 16), (255, 198, 71)]),
        (("ice", "snow", "frost", "arctic"), [(75, 151, 191), (187, 235, 255), (24, 63, 96), (240, 252, 255)]),
        (("desert", "sand", "dune", "western"), [(194, 139, 72), (236, 198, 112), (102, 66, 36), (255, 226, 151)]),
        (("ocean", "sea", "water", "underwater"), [(25, 103, 155), (57, 191, 205), (8, 43, 85), (177, 239, 224)]),
        (("forest", "garden", "grass", "jungle"), [(51, 132, 72), (126, 196, 81), (24, 72, 43), (242, 201, 84)]),
        (("dungeon", "castle", "stone", "crypt"), [(78, 82, 96), (133, 140, 154), (34, 36, 48), (181, 154, 94)]),
        (("city", "street", "cyber", "neon"), [(40, 52, 87), (39, 230, 226), (19, 21, 38), (239, 67, 154)]),
        (("sci-fi", "scifi", "space", "arena", "metal", "shooter"), [(30, 216, 236), (235, 64, 96), (18, 22, 31), (245, 192, 51)]),
    ]
    for words, colors in themed:
        if any(word in lowered for word in words):
            return colors

    digest = hashlib.sha256(text.encode("utf-8")).digest()
    base = digest[0] % 360

    def hsl_to_rgb(h: float, s: float, l: float) -> tuple[int, int, int]:
        c = (1 - abs(2 * l - 1)) * s
        x = c * (1 - abs((h / 60) % 2 - 1))
        m = l - c / 2
        if h < 60:
            r, g, b = c, x, 0
        elif h < 120:
            r, g, b = x, c, 0
        elif h < 180:
            r, g, b = 0, c, x
        elif h < 240:
            r, g, b = 0, x, c
        elif h < 300:
            r, g, b = x, 0, c
        else:
            r, g, b = c, 0, x
        return (int((r + m) * 255), int((g + m) * 255), int((b + m) * 255))

    return [
        hsl_to_rgb((base + offset) % 360, 0.68, lightness)
        for offset, lightness in ((0, 0.46), (45, 0.56), (145, 0.38), (220, 0.64))
    ]


def style_tags_for(prompt: str) -> set[str]:
    lowered = prompt.lower()
    tags = set()
    groups = {
        "watercolor": ("watercolor", "ink wash", "gouache", "paper grain"),
        "vector": ("vector", "flat", "logo", "hard-edged", "graphic"),
        "clay": ("clay", "claymation", "stop-motion", "plasticine", "sculpted"),
        "oil": ("oil", "painterly", "impasto", "canvas", "bristle"),
        "pixel": ("pixel", "8-bit", "16-bit", "retro"),
        "anime": ("anime", "manga", "cel-shaded"),
        "neon": ("neon", "cyberpunk", "synthwave", "glowing"),
    }
    for tag, words in groups.items():
        if any(word in lowered for word in words):
            tags.add(tag)
    return tags


def apply_style_finish(content: bytes, spec: AssetSpec, rng: random.Random) -> bytes:
    tags = style_tags_for(spec.prompt)
    if not tags:
        return content

    image = Image.open(io.BytesIO(content)).convert("RGBA")
    draw = ImageDraw.Draw(image, "RGBA")
    is_bg = is_background_spec(spec)

    if "watercolor" in tags:
        wash = Image.new("RGBA", image.size, (245, 238, 220, 34 if is_bg else 12))
        image = Image.alpha_composite(image, wash)
        draw = ImageDraw.Draw(image, "RGBA")
        for _ in range(60 if is_bg else 18):
            x = rng.randint(-20, image.width)
            y = rng.randint(-20, image.height)
            r = rng.randint(10, 70 if is_bg else 24)
            color = rng.choice(palette_for(spec.prompt)) + (rng.randint(18, 55),)
            draw.ellipse((x - r, y - r, x + r, y + r), fill=color)
        image = image.filter(ImageFilter.GaussianBlur(0.45))

    if "oil" in tags:
        for _ in range(120 if is_bg else 34):
            x = rng.randint(0, image.width)
            y = rng.randint(0, image.height)
            length = rng.randint(8, 38 if is_bg else 16)
            color = rng.choice(palette_for(spec.prompt)) + (rng.randint(75, 145),)
            draw.line((x, y, x + rng.randint(-length, length), y + rng.randint(-length, length)), fill=color, width=rng.randint(2, 5))
        image = ImageEnhance.Contrast(image).enhance(1.12)

    if "clay" in tags:
        image = ImageEnhance.Color(image).enhance(0.82)
        image = ImageEnhance.Contrast(image).enhance(1.18)
        draw = ImageDraw.Draw(image, "RGBA")
        for _ in range(45 if is_bg else 14):
            x = rng.randint(0, image.width)
            y = rng.randint(0, image.height)
            r = rng.randint(3, 18 if is_bg else 8)
            draw.ellipse((x - r, y - r, x + r, y + r), outline=(255, 238, 205, rng.randint(25, 70)), width=2)
        image = image.filter(ImageFilter.SMOOTH_MORE)

    if "vector" in tags:
        image = ImageEnhance.Contrast(image).enhance(1.35)
        image = ImageEnhance.Color(image).enhance(1.25)
        if not is_bg:
            alpha = image.getchannel("A")
            outline = alpha.filter(ImageFilter.MaxFilter(7)).filter(ImageFilter.GaussianBlur(0.2))
            outlined = Image.new("RGBA", image.size, (12, 16, 28, 255))
            outlined.putalpha(outline)
            image = Image.alpha_composite(outlined, image)

    if "anime" in tags:
        image = ImageEnhance.Color(image).enhance(1.35)
        image = ImageEnhance.Contrast(image).enhance(1.18)
        draw = ImageDraw.Draw(image, "RGBA")
        for _ in range(8 if is_bg else 4):
            x = rng.randint(0, image.width)
            y = rng.randint(0, image.height)
            draw.line((x, y, x + rng.randint(20, 80), y - rng.randint(10, 50)), fill=(255, 255, 255, 80), width=2)

    if "neon" in tags:
        glow = image.filter(ImageFilter.GaussianBlur(4 if is_bg else 3))
        image = Image.blend(glow, image, 0.72)
        image = ImageEnhance.Color(image).enhance(1.45)

    if "pixel" in tags:
        small = image.resize((max(1, image.width // 4), max(1, image.height // 4)), Image.Resampling.NEAREST)
        image = small.resize(image.size, Image.Resampling.NEAREST)

    out = io.BytesIO()
    image.save(out, format="PNG")
    return out.getvalue()


def draw_background(spec: AssetSpec, rng: random.Random) -> bytes:
    colors = palette_for(spec.prompt)
    image = Image.new("RGBA", (spec.width, spec.height), colors[2] + (255,))
    draw = ImageDraw.Draw(image, "RGBA")
    prompt = spec.prompt.lower()

    if any(word in prompt for word in ("sci-fi", "scifi", "arena", "metal", "hazard", "grid", "shooter")):
        image = Image.new("RGBA", (spec.width, spec.height), (18, 22, 31, 255))
        draw = ImageDraw.Draw(image, "RGBA")
        panel = 64
        for y in range(0, spec.height, panel):
            for x in range(0, spec.width, panel):
                shade = 24 + ((x // panel + y // panel) % 2) * 12
                draw.rectangle(
                    (x, y, x + panel - 2, y + panel - 2),
                    fill=(shade, shade + 4, shade + 14, 255),
                    outline=(58, 74, 92, 170),
                )
        for x in range(0, spec.width, 96):
            draw.line((x, 0, x, spec.height), fill=(34, 215, 236, 95), width=2)
        for y in range(0, spec.height, 96):
            draw.line((0, y, spec.width, y), fill=(34, 215, 236, 95), width=2)
        for _ in range(8):
            x = rng.randint(0, spec.width - 120)
            y = rng.randint(0, spec.height - 34)
            draw.rectangle((x, y, x + 120, y + 34), fill=(36, 42, 53, 235), outline=(110, 125, 142, 210))
            for stripe in range(0, 120, 24):
                draw.polygon(
                    [(x + stripe, y), (x + stripe + 12, y), (x + stripe - 12, y + 34), (x + stripe - 24, y + 34)],
                    fill=(245, 192, 51, 210),
                )
        for _ in range(18):
            x = rng.randint(0, spec.width)
            y = rng.randint(0, spec.height)
            r = rng.randint(10, 32)
            draw.ellipse((x - r, y - r, x + r, y + r), outline=(224, 54, 85, 100), width=2)
        out = io.BytesIO()
        image.save(out, format="PNG")
        return out.getvalue()

    if any(word in prompt for word in ("space", "star", "nebula", "galaxy", "cosmic")):
        image = Image.new("RGBA", (spec.width, spec.height), (3, 4, 18, 255))
        draw = ImageDraw.Draw(image, "RGBA")
        for y in range(spec.height):
            mix = y / max(1, spec.height - 1)
            color = (
                int(4 + 24 * mix),
                int(6 + 10 * mix),
                int(24 + 45 * mix),
                255,
            )
            draw.line((0, y, spec.width, y), fill=color)
        for _ in range(7):
            x = rng.randint(-120, spec.width)
            y = rng.randint(-90, spec.height)
            w = rng.randint(180, 420)
            h = rng.randint(90, 230)
            fill = rng.choice(colors) + (rng.randint(32, 70),)
            draw.ellipse((x, y, x + w, y + h), fill=fill)
        for _ in range(140):
            x = rng.randint(0, spec.width - 1)
            y = rng.randint(0, spec.height - 1)
            size = rng.choice((1, 1, 2, 3))
            shade = rng.randint(180, 255)
            draw.rectangle((x, y, x + size, y + size), fill=(shade, shade, 255, rng.randint(130, 240)))
        out = io.BytesIO()
        image.save(out, format="PNG")
        return out.getvalue()

    if any(word in prompt for word in ("garden", "flower", "grass", "meadow", "forest", "path")):
        sky = (184, 224, 239)
        grass = (80, 151, 82)
        image = Image.new("RGBA", (spec.width, spec.height), sky + (255,))
        draw = ImageDraw.Draw(image, "RGBA")
        horizon = int(spec.height * 0.56)
        for y in range(horizon):
            mix = y / max(1, horizon - 1)
            color = (
                int(230 * (1 - mix) + sky[0] * mix),
                int(246 * (1 - mix) + sky[1] * mix),
                int(255 * (1 - mix) + sky[2] * mix),
                255,
            )
            draw.line((0, y, spec.width, y), fill=color)
        draw.rectangle((0, horizon, spec.width, spec.height), fill=grass + (255,))
        path_color = (198, 169, 112, 255)
        draw.polygon(
            [
                (int(spec.width * 0.39), horizon),
                (int(spec.width * 0.61), horizon),
                (int(spec.width * 0.83), spec.height),
                (int(spec.width * 0.18), spec.height),
            ],
            fill=path_color,
        )
        for _ in range(18):
            x = rng.randint(0, spec.width)
            y = rng.randint(horizon + 48, spec.height - 18)
            stem = rng.randint(5, 12)
            draw.line((x, y + stem, x, y), fill=(43, 109, 53, 210), width=2)
            petal = rng.choice(colors) + (230,)
            draw.ellipse((x - 4, y - 3, x + 4, y + 5), fill=petal)
        for _ in range(10):
            x = rng.randint(-40, spec.width)
            y = rng.randint(horizon - 35, horizon + 20)
            draw.ellipse((x, y, x + rng.randint(65, 140), y + rng.randint(36, 72)), fill=(42, 121, 64, 185))
        out = io.BytesIO()
        image.save(out, format="PNG")
        return out.getvalue()

    if any(word in prompt for word in ("lava", "fire", "volcano")):
        image = Image.new("RGBA", (spec.width, spec.height), (52, 23, 18, 255))
        draw = ImageDraw.Draw(image, "RGBA")
        for y in range(0, spec.height, 58):
            draw.rectangle((0, y, spec.width, y + 30), fill=(70, 32, 25, 255))
        for _ in range(18):
            x = rng.randint(-80, spec.width)
            y = rng.randint(0, spec.height)
            draw.line((x, y, x + rng.randint(120, 320), y + rng.randint(-20, 20)), fill=(255, 111, 43, 220), width=rng.randint(5, 12))
        for _ in range(45):
            x = rng.randint(0, spec.width)
            y = rng.randint(0, spec.height)
            r = rng.randint(5, 20)
            draw.ellipse((x - r, y - r, x + r, y + r), fill=(255, 192, 53, rng.randint(70, 140)))
        out = io.BytesIO()
        image.save(out, format="PNG")
        return out.getvalue()

    if any(word in prompt for word in ("dungeon", "castle", "stone", "crypt")):
        image = Image.new("RGBA", (spec.width, spec.height), (48, 50, 62, 255))
        draw = ImageDraw.Draw(image, "RGBA")
        tile = 54
        for y in range(0, spec.height, tile):
            for x in range(0, spec.width, tile):
                shade = rng.randint(48, 78)
                draw.rectangle((x, y, x + tile - 2, y + tile - 2), fill=(shade, shade + 2, shade + 8, 255), outline=(27, 29, 38, 190))
        for _ in range(22):
            x = rng.randint(0, spec.width)
            y = rng.randint(0, spec.height)
            draw.ellipse((x - 8, y - 8, x + 8, y + 8), fill=(185, 146, 80, 110))
        out = io.BytesIO()
        image.save(out, format="PNG")
        return out.getvalue()

    if any(word in prompt for word in ("ocean", "sea", "water", "underwater")):
        image = Image.new("RGBA", (spec.width, spec.height), (8, 54, 92, 255))
        draw = ImageDraw.Draw(image, "RGBA")
        for y in range(spec.height):
            mix = y / max(1, spec.height - 1)
            draw.line((0, y, spec.width, y), fill=(int(16 + 6 * mix), int(99 + 62 * mix), int(151 + 58 * mix), 255))
        for _ in range(26):
            x = rng.randint(-100, spec.width)
            y = rng.randint(0, spec.height)
            draw.arc((x, y, x + 180, y + 70), 0, 180, fill=(165, 232, 225, 90), width=3)
        for _ in range(38):
            x = rng.randint(0, spec.width)
            y = rng.randint(0, spec.height)
            r = rng.randint(2, 8)
            draw.ellipse((x - r, y - r, x + r, y + r), outline=(205, 247, 245, 120), width=2)
        out = io.BytesIO()
        image.save(out, format="PNG")
        return out.getvalue()

    for y in range(spec.height):
        mix = y / max(1, spec.height - 1)
        color = tuple(int(colors[2][i] * (1 - mix) + colors[0][i] * mix) for i in range(3))
        draw.line((0, y, spec.width, y), fill=color + (255,))

    for _ in range(34):
        x = rng.randint(-80, spec.width)
        y = rng.randint(-40, spec.height)
        w = rng.randint(80, 260)
        h = rng.randint(36, 150)
        fill = rng.choice(colors) + (rng.randint(45, 110),)
        draw.ellipse((x, y, x + w, y + h), fill=fill)

    for _ in range(80):
        x = rng.randint(0, spec.width)
        y = rng.randint(0, spec.height)
        size = rng.randint(2, 7)
        fill = rng.choice(colors) + (rng.randint(90, 190),)
        draw.rectangle((x, y, x + size, y + size), fill=fill)

    out = io.BytesIO()
    image.save(out, format="PNG")
    return out.getvalue()


def draw_sprite(spec: AssetSpec, rng: random.Random) -> bytes:
    colors = palette_for(f"{spec.role} {spec.prompt}")
    image = Image.new("RGBA", (spec.width, spec.height), (0, 0, 0, 0))
    draw = ImageDraw.Draw(image, "RGBA")
    cx, cy = spec.width // 2, spec.height // 2
    scale = min(spec.width, spec.height) / 128
    role = slugify(spec.role)
    role_prompt = f"{spec.role} {spec.prompt}".lower()
    outline = (18, 24, 31, 255)
    body = colors[0] + (255,)
    accent = colors[1] + (255,)
    trim = colors[3] + (255,)

    if any(word in role_prompt for word in ("car", "racing", "racer", "buggy", "vehicle", "truck")):
        shadow = (int(cx - 43 * scale), int(cy + 28 * scale), int(cx + 43 * scale), int(cy + 42 * scale))
        draw.ellipse(shadow, fill=(0, 0, 0, 58))
        draw.rounded_rectangle(
            (cx - 34 * scale, cy - 42 * scale, cx + 34 * scale, cy + 42 * scale),
            radius=int(18 * scale),
            fill=outline,
        )
        draw.rounded_rectangle(
            (cx - 26 * scale, cy - 34 * scale, cx + 26 * scale, cy + 34 * scale),
            radius=int(13 * scale),
            fill=body,
        )
        draw.rectangle((cx - 15 * scale, cy - 24 * scale, cx + 15 * scale, cy - 5 * scale), fill=accent)
        draw.rectangle((cx - 18 * scale, cy + 8 * scale, cx + 18 * scale, cy + 27 * scale), fill=trim)
        for side in (-1, 1):
            draw.rounded_rectangle(
                (cx + side * 27 * scale - 9 * scale, cy - 30 * scale, cx + side * 27 * scale + 9 * scale, cy - 13 * scale),
                radius=int(5 * scale),
                fill=(20, 22, 27, 255),
            )
            draw.rounded_rectangle(
                (cx + side * 27 * scale - 9 * scale, cy + 15 * scale, cx + side * 27 * scale + 9 * scale, cy + 32 * scale),
                radius=int(5 * scale),
                fill=(20, 22, 27, 255),
            )
        out = io.BytesIO()
        image.save(out, format="PNG")
        return out.getvalue()

    if any(word in role_prompt for word in ("turret", "tower", "cannon", "launcher")):
        draw.ellipse((cx - 38 * scale, cy + 22 * scale, cx + 38 * scale, cy + 45 * scale), fill=(0, 0, 0, 60))
        draw.ellipse((cx - 36 * scale, cy - 12 * scale, cx + 36 * scale, cy + 42 * scale), fill=outline)
        draw.ellipse((cx - 28 * scale, cy - 5 * scale, cx + 28 * scale, cy + 34 * scale), fill=body)
        barrel_angle = rng.choice((-0.45, -0.2, 0, 0.2, 0.45))
        length = 52 * scale
        bx = cx + math.sin(barrel_angle) * length
        by = cy - 38 * scale
        draw.line((cx, cy - 5 * scale, bx, by), fill=outline, width=max(10, int(17 * scale)))
        draw.line((cx, cy - 5 * scale, bx, by), fill=trim, width=max(5, int(9 * scale)))
        draw.rectangle((cx - 19 * scale, cy + 15 * scale, cx + 19 * scale, cy + 39 * scale), fill=accent)
        out = io.BytesIO()
        image.save(out, format="PNG")
        return out.getvalue()

    if role in ("coin", "gem", "key", "orb", "pickup", "collectible") or any(
        word in role_prompt for word in ("coin", "gem", "key", "pickup", "collectible", "powerup", "power-up", "treasure")
    ):
        draw.ellipse((cx - 33 * scale, cy - 33 * scale, cx + 33 * scale, cy + 33 * scale), fill=outline)
        if any(word in role_prompt for word in ("gem", "crystal", "diamond")):
            points = [
                (cx, cy - 43 * scale),
                (cx + 36 * scale, cy - 8 * scale),
                (cx + 20 * scale, cy + 40 * scale),
                (cx - 20 * scale, cy + 40 * scale),
                (cx - 36 * scale, cy - 8 * scale),
            ]
            draw.polygon(points, fill=body)
            draw.polygon([(cx, cy - 34 * scale), (cx + 18 * scale, cy - 4 * scale), (cx, cy + 28 * scale), (cx - 18 * scale, cy - 4 * scale)], fill=accent)
        elif "key" in role_prompt:
            draw.ellipse((cx - 30 * scale, cy - 20 * scale, cx + 8 * scale, cy + 18 * scale), fill=body)
            draw.ellipse((cx - 17 * scale, cy - 8 * scale, cx - 3 * scale, cy + 6 * scale), fill=(0, 0, 0, 0))
            draw.rounded_rectangle((cx + 2 * scale, cy - 5 * scale, cx + 43 * scale, cy + 7 * scale), radius=int(5 * scale), fill=accent)
            draw.rectangle((cx + 30 * scale, cy + 4 * scale, cx + 43 * scale, cy + 17 * scale), fill=trim)
        else:
            draw.ellipse((cx - 27 * scale, cy - 27 * scale, cx + 27 * scale, cy + 27 * scale), fill=body)
            draw.ellipse((cx - 17 * scale, cy - 17 * scale, cx + 17 * scale, cy + 17 * scale), outline=trim, width=max(3, int(7 * scale)))
            draw.arc((cx - 12 * scale, cy - 20 * scale, cx + 20 * scale, cy + 13 * scale), 195, 300, fill=(255, 255, 255, 180), width=max(2, int(4 * scale)))
        out = io.BytesIO()
        image.save(out, format="PNG")
        return out.getvalue()

    if any(word in role_prompt for word in ("slime", "blob", "gel", "jelly")):
        draw.ellipse((cx - 40 * scale, cy + 24 * scale, cx + 40 * scale, cy + 42 * scale), fill=(0, 0, 0, 55))
        draw.pieslice((cx - 40 * scale, cy - 28 * scale, cx + 40 * scale, cy + 52 * scale), 180, 360, fill=outline)
        draw.pieslice((cx - 32 * scale, cy - 20 * scale, cx + 32 * scale, cy + 42 * scale), 180, 360, fill=body)
        for side in (-1, 1):
            draw.ellipse((cx + side * 11 * scale - 5 * scale, cy + 3 * scale, cx + side * 11 * scale + 5 * scale, cy + 13 * scale), fill=(255, 255, 255, 230))
        draw.arc((cx - 12 * scale, cy + 12 * scale, cx + 12 * scale, cy + 27 * scale), 10, 170, fill=trim, width=max(2, int(4 * scale)))
        out = io.BytesIO()
        image.save(out, format="PNG")
        return out.getvalue()

    if any(word in role_prompt for word in ("beetle", "bug", "spider", "insect", "crawler", "creep")):
        draw.ellipse((cx - 28 * scale, cy - 34 * scale, cx + 28 * scale, cy + 34 * scale), fill=outline)
        draw.ellipse((cx - 21 * scale, cy - 27 * scale, cx + 21 * scale, cy + 29 * scale), fill=body)
        draw.line((cx, cy - 25 * scale, cx, cy + 30 * scale), fill=trim, width=max(2, int(4 * scale)))
        for y in (-18, 0, 18):
            for side in (-1, 1):
                draw.line((cx + side * 18 * scale, cy + y * scale, cx + side * 45 * scale, cy + (y + rng.choice((-10, 10))) * scale), fill=outline, width=max(3, int(5 * scale)))
        out = io.BytesIO()
        image.save(out, format="PNG")
        return out.getvalue()

    if role in ("bullet", "projectile", "laser", "shot") or any(word in role_prompt for word in ("bullet", "projectile", "laser", "energy")):
        glow = colors[1] + (85,)
        core_width = rng.randint(7, 13) * scale
        core_height = rng.randint(30, 44) * scale
        glow_width = core_width + rng.randint(18, 30) * scale
        glow_height = core_height + rng.randint(12, 25) * scale
        draw.ellipse((cx - glow_width, cy - glow_height, cx + glow_width, cy + glow_height), fill=glow)
        draw.rounded_rectangle(
            (cx - core_width, cy - core_height, cx + core_width, cy + core_height),
            radius=int(9 * scale),
            fill=outline,
        )
        draw.rounded_rectangle(
            (cx - max(2, core_width - 4 * scale), cy - max(8, core_height - 6 * scale), cx + max(2, core_width - 4 * scale), cy + max(8, core_height - 6 * scale)),
            radius=int(5 * scale),
            fill=accent,
        )
        for _ in range(5):
            px = cx + rng.randint(-24, 24) * scale
            py = cy + rng.randint(-38, 38) * scale
            draw.line(
                (px, py, px + rng.randint(-10, 10) * scale, py + rng.randint(-8, 8) * scale),
                fill=trim,
                width=max(1, int(2 * scale)),
            )
        out = io.BytesIO()
        image.save(out, format="PNG")
        return out.getvalue()

    if role in ("enemy", "monster", "alien", "drone", "foe", "boss") or any(word in role_prompt for word in ("enemy", "drone", "alien", "monster")):
        draw.ellipse((cx - 38 * scale, cy - 28 * scale, cx + 38 * scale, cy + 28 * scale), fill=outline)
        draw.ellipse((cx - 32 * scale, cy - 22 * scale, cx + 32 * scale, cy + 22 * scale), fill=body)
        draw.ellipse((cx - 16 * scale, cy - 16 * scale, cx + 16 * scale, cy + 16 * scale), fill=accent)
        for side in (-1, 1):
            draw.polygon(
                [
                    (cx + side * 28 * scale, cy - 8 * scale),
                    (cx + side * 56 * scale, cy - 28 * scale),
                    (cx + side * 48 * scale, cy + 20 * scale),
                ],
                fill=outline,
            )
            draw.line((cx + side * 14 * scale, cy, cx + side * 58 * scale, cy), fill=trim, width=max(2, int(5 * scale)))
        for _ in range(5):
            angle = rng.random() * 6.283
            r = rng.randint(int(12 * scale), int(30 * scale))
            px = cx + int(r * math.cos(angle))
            py = cy + int(r * math.sin(angle))
            draw.ellipse((px - 3 * scale, py - 3 * scale, px + 3 * scale, py + 3 * scale), fill=trim)
        out = io.BytesIO()
        image.save(out, format="PNG")
        return out.getvalue()

    if role in ("player", "hero", "ship", "tank", "avatar") or any(word in role_prompt for word in ("shooter", "ship", "tank", "hero", "player", "armored")):
        shadow = (int(cx - 36 * scale), int(cy + 34 * scale), int(cx + 36 * scale), int(cy + 48 * scale))
        draw.ellipse(shadow, fill=(0, 0, 0, 55))
        wing = rng.randint(28, 39)
        nose = rng.randint(45, 56)
        draw.polygon(
            [
                (cx, cy - nose * scale),
                (cx - wing * scale, cy + 34 * scale),
                (cx, cy + 20 * scale),
                (cx + wing * scale, cy + 34 * scale),
            ],
            fill=outline,
        )
        draw.polygon(
            [
                (cx, cy - (nose - 8) * scale),
                (cx - (wing - 10) * scale, cy + 23 * scale),
                (cx, cy + 12 * scale),
                (cx + (wing - 10) * scale, cy + 23 * scale),
            ],
            fill=body,
        )
        draw.ellipse((cx - 11 * scale, cy - 14 * scale, cx + 11 * scale, cy + 10 * scale), fill=accent)
        draw.line((cx, cy - 42 * scale, cx, cy - 62 * scale), fill=trim, width=max(3, int(6 * scale)))
        for side in (-1, 1):
            x1 = cx + side * rng.randint(14, 23) * scale
            x2 = cx + side * rng.randint(24, 32) * scale
            y1 = cy + rng.randint(8, 18) * scale
            y2 = cy + rng.randint(20, 30) * scale
            draw.rectangle((min(x1, x2), y1, max(x1, x2), y2), fill=trim)
        out = io.BytesIO()
        image.save(out, format="PNG")
        return out.getvalue()

    shadow = (int(cx - 36 * scale), int(cy + 34 * scale), int(cx + 36 * scale), int(cy + 48 * scale))
    draw.ellipse(shadow, fill=(0, 0, 0, 65))

    body_box = (cx - 26 * scale, cy - 18 * scale, cx + 26 * scale, cy + 34 * scale)
    head_box = (cx - 20 * scale, cy - 52 * scale, cx + 20 * scale, cy - 12 * scale)
    draw.rounded_rectangle(body_box, radius=int(12 * scale), fill=outline)
    draw.rounded_rectangle(
        tuple(v + d * scale for v, d in zip(body_box, (4, 4, -4, -4))),
        radius=int(9 * scale),
        fill=body,
    )
    draw.ellipse(head_box, fill=outline)
    draw.ellipse(tuple(v + d * scale for v, d in zip(head_box, (4, 4, -4, -4))), fill=accent)

    eye_y = cy - int(33 * scale)
    eye_dx = int(8 * scale)
    eye_size = max(2, int(4 * scale))
    draw.ellipse((cx - eye_dx - eye_size, eye_y, cx - eye_dx + eye_size, eye_y + eye_size * 2), fill=(255, 255, 255, 245))
    draw.ellipse((cx + eye_dx - eye_size, eye_y, cx + eye_dx + eye_size, eye_y + eye_size * 2), fill=(255, 255, 255, 245))

    for side in (-1, 1):
        arm = (
            cx + side * 22 * scale,
            cy - 8 * scale,
            cx + side * rng.randint(38, 48) * scale,
            cy + rng.randint(8, 22) * scale,
        )
        draw.line(arm, fill=outline, width=max(4, int(9 * scale)))
        draw.line(arm, fill=trim, width=max(2, int(5 * scale)))
        leg = (
            cx + side * 13 * scale,
            cy + 30 * scale,
            cx + side * rng.randint(16, 28) * scale,
            cy + 53 * scale,
        )
        draw.line(leg, fill=outline, width=max(4, int(10 * scale)))
        draw.line(leg, fill=body, width=max(2, int(6 * scale)))

    for _ in range(10):
        px = rng.randint(int(cx - 22 * scale), int(cx + 22 * scale))
        py = rng.randint(int(cy - 10 * scale), int(cy + 26 * scale))
        draw.rectangle((px, py, px + max(2, int(4 * scale)), py + max(2, int(4 * scale))), fill=trim)

    out = io.BytesIO()
    image.save(out, format="PNG")
    return out.getvalue()


def local_asset_png(spec: AssetSpec, index: int, run_id: int) -> bytes:
    seed_text = f"{spec.role}|{spec.prompt}|{index}|{run_id}"
    rng = random.Random(seed_text)
    if is_background_spec(spec):
        content = draw_background(spec, rng)
    else:
        content = draw_sprite(spec, rng)
    return apply_style_finish(content, spec, rng)


def placeholder_png_bytes(role: str, width: int, height: int) -> bytes:
    label = escape(role[:18])
    image = Image.new("RGBA", (width, height), "#222222")
    draw = ImageDraw.Draw(image)
    draw.rounded_rectangle((4, 4, width - 4, height - 4), radius=12, fill="#3b82f6")
    font = ImageFont.load_default()
    bbox = draw.textbbox((0, 0), label, font=font)
    x = (width - (bbox[2] - bbox[0])) / 2
    y = (height - (bbox[3] - bbox[1])) / 2
    draw.text((x, y), label, fill="white", font=font)
    out = io.BytesIO()
    image.save(out, format="PNG")
    return out.getvalue()


def diffusion_dimensions(spec: AssetSpec) -> tuple[int, int]:
    if is_background_spec(spec):
        return 384, 216
    return 256, 256


def polish_diffusion_asset(image: Image.Image, spec: AssetSpec) -> bytes:
    image = image.convert("RGBA")
    if not is_background_spec(spec):
        # Text-to-image models do not produce real transparency. This makes sprites
        # usable by fading out colors similar to the generated corner background.
        small = image.resize((128, 128), Image.LANCZOS)
        corners = [
            small.getpixel((0, 0)),
            small.getpixel((127, 0)),
            small.getpixel((0, 127)),
            small.getpixel((127, 127)),
        ]
        bg = tuple(sum(pixel[i] for pixel in corners) // len(corners) for i in range(3))
        pixels = small.load()
        for y in range(small.height):
            for x in range(small.width):
                r, g, b, a = pixels[x, y]
                dist = abs(r - bg[0]) + abs(g - bg[1]) + abs(b - bg[2])
                edge = min(x, y, small.width - 1 - x, small.height - 1 - y)
                if dist < 64 or edge < 3:
                    a = 0
                elif dist < 125:
                    a = max(0, min(a, (dist - 64) * 4))
                pixels[x, y] = (r, g, b, a)
        image = small.resize((spec.width, spec.height), Image.LANCZOS)
    else:
        image = image.resize((spec.width, spec.height), Image.LANCZOS)
    out = io.BytesIO()
    image.save(out, format="PNG")
    return out.getvalue()


def free_diffusion_png(spec: AssetSpec, index: int, run_id: int) -> tuple[bytes | None, str | None]:
    global FREE_DIFFUSION_PIPE, FREE_DIFFUSION_ERROR
    if FREE_DIFFUSION_ERROR:
        return None, FREE_DIFFUSION_ERROR
    try:
        import torch
        from diffusers import DiffusionPipeline

        if FREE_DIFFUSION_PIPE is None:
            FREE_DIFFUSION_PIPE = DiffusionPipeline.from_pretrained(FREE_IMAGE_MODEL, torch_dtype=torch.float32)
            FREE_DIFFUSION_PIPE = FREE_DIFFUSION_PIPE.to("cpu")
            for component_name in ("unet", "vae", "text_encoder"):
                component = getattr(FREE_DIFFUSION_PIPE, component_name, None)
                if component is not None and hasattr(component, "to"):
                    component.to(device="cpu", dtype=torch.float32)
            if hasattr(FREE_DIFFUSION_PIPE, "enable_attention_slicing"):
                FREE_DIFFUSION_PIPE.enable_attention_slicing()

        width, height = diffusion_dimensions(spec)
        seed = abs(hash(f"{spec.role}|{spec.prompt}|{index}|{run_id}")) % 2147483647
        generator = torch.Generator(device="cpu").manual_seed(seed)
        negative_prompt = (
            "texture map, material texture, seamless texture, tiled pattern, uv map, uv unwrap, "
            "skin texture, 3d model texture, normal map, roughness map, diffuse map, sprite sheet, "
            "atlas, multiple objects, cropped subject, close-up surface, fabric swatch, text, watermark, "
            "person, character, player, hero, creature, monster, vehicle, mascot, foreground subject"
        )
        image = FREE_DIFFUSION_PIPE(
            spec.prompt,
            negative_prompt=negative_prompt,
            width=width,
            height=height,
            num_inference_steps=FREE_IMAGE_STEPS,
            guidance_scale=6.0,
            generator=generator,
        ).images[0]
        return polish_diffusion_asset(image, spec), None
    except Exception as exc:
        FREE_DIFFUSION_ERROR = short_error(exc)
        return None, FREE_DIFFUSION_ERROR


def hf_image_png(spec: AssetSpec, index: int, run_id: int) -> tuple[bytes | None, str | None]:
    if not USE_HF_IMAGE_PROVIDER:
        return None, None
    if not HF_TOKEN:
        return None, "HF_TOKEN is not visible to the Space runtime"
    if InferenceClient is None:
        return None, "huggingface_hub is not installed in this Space"

    try:
        client = InferenceClient(api_key=HF_TOKEN)
        image = client.text_to_image(
            prompt=spec.prompt,
            model=HF_IMAGE_MODEL,
            width=spec.width,
            height=spec.height,
            num_inference_steps=4,
            guidance_scale=0.0,
            seed=abs(hash(f"{spec.role}|{spec.prompt}|{index}|{run_id}")) % 2147483647,
        )
        out = io.BytesIO()
        image.save(out, format="PNG")
        return image_to_png_bytes(out.getvalue(), spec.width, spec.height), None
    except Exception as exc:
        return None, short_error(exc)


def generate_asset(spec: AssetSpec, index: int, run_id: int) -> tuple[str, str, str | None, str]:
    if not is_background_spec(spec) and not USE_DIFFUSION_FOR_SPRITES:
        png_content = local_asset_png(spec, index, run_id)
        return (
            png_bytes_to_data_uri(png_content),
            write_gallery_image(png_content, spec.role),
            None,
            "shape-aware 2D sprite generator",
        )
    if is_background_spec(spec) and not USE_DIFFUSION_FOR_BACKGROUNDS:
        png_content = local_asset_png(spec, index, run_id)
        return (
            png_bytes_to_data_uri(png_content),
            write_gallery_image(png_content, spec.role),
            None,
            "scene-safe 2D background generator",
        )

    png_content, free_error = free_diffusion_png(spec, index, run_id)
    source = FREE_IMAGE_MODEL
    error = free_error
    if png_content is None:
        png_content, hf_error = hf_image_png(spec, index, run_id)
        source = HF_IMAGE_MODEL
        error = hf_error or free_error
    if png_content is None:
        png_content = local_asset_png(spec, index, run_id)
        source = "local procedural fallback"
    return (
        png_bytes_to_data_uri(png_content),
        write_gallery_image(png_content, spec.role),
        error if source == "local procedural fallback" else None,
        source,
    )


def replacement_names(spec: AssetSpec) -> set[str]:
    slug = slugify(spec.role)
    names = {
        spec.filename,
        f"{slug}.png",
        f"{slug}.jpg",
        f"{slug}.jpeg",
        f"{slug}.webp",
        f"asset_{slug}.png",
        f"{spec.role.strip()}.png",
        f"{{{{{slug}}}}}",
        f"{{{slug}}}",
    }
    if slug == "background":
        names.update({"background.png", "sprite_background.jpg", "background.jpg"})
    if slug == "player":
        names.update({"player.png", "sprite_player.jpg", "hero.png"})
    if slug == "enemy":
        names.update({"enemy.png", "monster.png", "sprite_enemy.jpg"})
    if slug == "bullet":
        names.update({"bullet.png", "projectile.png", "laser.png", "shot.png", "sprite_bullet.jpg"})
    return names


def asset_aliases(spec: AssetSpec) -> list[str]:
    slug = slugify(spec.role)
    aliases = {slug, spec.role.strip().lower(), spec.filename.lower()}
    prompt = spec.prompt.lower()
    role_groups = {
        "player": ("player", "hero", "character", "avatar", "ship", "shooter", "tank", "knight", "wizard"),
        "enemy": ("enemy", "monster", "alien", "drone", "foe", "zombie", "boss", "hazard"),
        "bullet": ("bullet", "projectile", "laser", "shot", "missile", "beam", "ammo"),
        "coin": ("coin", "gem", "seed", "star", "pickup", "collectible", "key", "orb"),
        "background": ("background", "backdrop", "bg", "map", "level", "arena", "scene", "world", "floor"),
    }
    for group, words in role_groups.items():
        if group in slug or any(word in prompt for word in words):
            aliases.update(words)
    return sorted(alias for alias in aliases if alias)


def embed_assets(html_code: str, assets: dict[str, str], specs: list[AssetSpec]) -> str:
    output = html_code
    manifest_lines = ["<!-- Embedded game assets generated by Image Generator for HTML Games"]
    background_uri = None
    asset_map: dict[str, str] = {}
    alias_map: dict[str, list[str]] = {}

    for spec in specs:
        data_uri = assets[spec.role]
        slug = slugify(spec.role)
        asset_map[slug] = data_uri
        alias_map[slug] = asset_aliases(spec)
        manifest_lines.append(f"{spec.role}: {spec.filename}")
        if is_background_spec(spec) and background_uri is None:
            background_uri = data_uri
        for name in replacement_names(spec):
            output = output.replace(f'"{name}"', f'"{data_uri}"')
            output = output.replace(f"'{name}'", f"'{data_uri}'")
            output = output.replace(name, data_uri)

    manifest_lines.append("-->")
    manifest = "\n".join(manifest_lines) + "\n"
    asset_json = json.dumps(asset_map)
    alias_json = json.dumps(alias_map)
    background_json = json.dumps(background_uri)
    helper_script = f"""<script>
(function () {{
  var ASSETS = {asset_json};
  var ALIASES = {alias_json};
  window.GENERATED_GAME_ASSETS = ASSETS;

  function basename(value) {{
    return String(value || "").split("?")[0].split("#")[0].split("/").pop().toLowerCase();
  }}

  function pickAsset(value) {{
    var text = String(value || "").toLowerCase();
    if (!text || text.indexOf("data:image/") === 0) return value;
    var file = basename(text);
    for (var role in ASSETS) {{
      var aliases = ALIASES[role] || [role];
      for (var i = 0; i < aliases.length; i++) {{
        var alias = String(aliases[i]).toLowerCase();
        if (!alias) continue;
        if (file === alias || file === alias + ".png" || file === "sprite_" + alias + ".png") return ASSETS[role];
        if (file.indexOf(alias) !== -1 || text.indexOf("/" + alias) !== -1 || text.indexOf("_" + alias) !== -1) return ASSETS[role];
      }}
    }}
    return value;
  }}

  var descriptor = Object.getOwnPropertyDescriptor(HTMLImageElement.prototype, "src");
  if (descriptor && descriptor.set && !HTMLImageElement.prototype.__generatedAssetMapper) {{
    Object.defineProperty(HTMLImageElement.prototype, "src", {{
      get: function () {{ return descriptor.get.call(this); }},
      set: function (value) {{ descriptor.set.call(this, pickAsset(value)); }},
      configurable: true,
      enumerable: descriptor.enumerable
    }});
    HTMLImageElement.prototype.__generatedAssetMapper = true;
  }}

  var originalDrawImage = CanvasRenderingContext2D.prototype.drawImage;
  if (!CanvasRenderingContext2D.prototype.__generatedAssetDrawGuard) {{
    CanvasRenderingContext2D.prototype.drawImage = function (image) {{
      try {{
        if (image instanceof HTMLImageElement) {{
          var current = image.getAttribute("src") || image.src || "";
          var mapped = pickAsset(current);
          if (mapped && mapped !== current) image.src = mapped;
          if (!image.complete || image.naturalWidth === 0 || image.naturalHeight === 0) {{
            var looksLikeBackground = /background|backdrop|scene|map|level|bg/i.test(current);
            var coversCanvas = arguments.length >= 5 && arguments[1] === 0 && arguments[2] === 0 &&
              arguments[3] >= this.canvas.width * 0.8 && arguments[4] >= this.canvas.height * 0.8;
            if (looksLikeBackground || coversCanvas) {{
              this.clearRect(0, 0, this.canvas.width, this.canvas.height);
            }}
            return;
          }}
        }}
        return originalDrawImage.apply(this, arguments);
      }} catch (error) {{
        return;
      }}
    }};
    CanvasRenderingContext2D.prototype.__generatedAssetDrawGuard = true;
  }}

  window.addEventListener("DOMContentLoaded", function () {{
    document.querySelectorAll("img").forEach(function (img) {{
      var mapped = pickAsset(img.getAttribute("src") || img.src);
      if (mapped !== (img.getAttribute("src") || img.src)) img.src = mapped;
    }});
    var background = {background_json};
    if (!background) return;
    document.querySelectorAll("canvas").forEach(function (canvas) {{
      canvas.style.backgroundImage = "url(" + background + ")";
      canvas.style.backgroundSize = "cover";
      canvas.style.backgroundPosition = "center";
    }});
  }});
}})();
</script>"""

    if "</head>" in output:
        output = output.replace("</head>", helper_script + "\n</head>", 1)
    elif "<body" in output:
        output = output.replace("<body", helper_script + "\n<body", 1)
    else:
        output = helper_script + "\n" + output

    return manifest + output


def build_preview(html_code: str) -> str:
    encoded = base64.b64encode(html_code.encode("utf-8")).decode("ascii")
    return (
        f'<iframe src="data:text/html;base64,{encoded}" '
        'style="width:100%;height:560px;border:1px solid #333;border-radius:8px;background:#000;" '
        'sandbox="allow-scripts" title="Game preview"></iframe>'
    )


def build_prompt_preview(specs: list[AssetSpec]) -> str:
    return "\n\n".join(f"{spec.role}:\n{spec.prompt}" for spec in specs)


def build_model_report(rows: list[tuple[str, str, str]]) -> str:
    return "\n".join(f"{role}: prompt={prompt_model}; image={image_model}" for role, prompt_model, image_model in rows)


def summarize_model_sources(rows: list[tuple[str, str, str]]) -> str:
    prompt_sources = sorted({prompt_model for _, prompt_model, _ in rows})
    image_sources = sorted({image_model for _, _, image_model in rows})
    return f"prompt={', '.join(prompt_sources)}; image={', '.join(image_sources)}"


def generate_images_and_game(html_code: str, roles: str, style_hint: str):
    if not html_code.strip():
        return "", "Paste HTML game code first.", [], "", "", ""

    role_lines, prompt_map, prompt_model, prompt_error = build_prompt_map(html_code, roles, style_hint or "pixel art style")
    specs = parse_assets(roles, style_hint or "pixel art style", prompt_map)
    if not specs:
        return html_code, "Add at least one asset role, like `player: brave knight`.", [], "", "", build_preview(html_code)

    assets: dict[str, str] = {}
    gallery = []
    errors = []
    model_rows = []
    run_id = time.time_ns()
    if prompt_error:
        errors.append(f"prompt model: {prompt_error}")

    for index, spec in enumerate(specs):
        data_uri, gallery_path, error, image_model = generate_asset(spec, index, run_id)
        assets[spec.role] = data_uri
        gallery.append((gallery_path, f"{spec.role} -> {spec.filename}"))
        model_rows.append((spec.role, prompt_model, image_model))
        if error:
            errors.append(f"{spec.role}: image model failed ({error}); used local procedural fallback")

    rewritten = embed_assets(html_code, assets, specs)
    status = (
        f"Generated and embedded {len(specs)} fresh asset(s) using "
        f"{summarize_model_sources(model_rows)}. Run {str(run_id)[-6:]}."
    )
    if errors:
        status += "\n\n" + "\n".join(errors)
    return rewritten, status, gallery, build_prompt_preview(specs), build_model_report(model_rows), build_preview(rewritten)


def check_hf_token() -> str:
    if not HF_TOKEN:
        return "HF_TOKEN is missing or not visible to the Space runtime."

    request = Request(
        "https://huggingface.co/api/whoami-v2",
        headers={"Authorization": f"Bearer {HF_TOKEN}"},
        method="GET",
    )
    try:
        with urlopen(request, timeout=30) as response:
            raw = response.read().decode("utf-8", errors="replace")
        data = json.loads(raw)
        name = data.get("name") or data.get("fullname") or "authenticated account"
        return f"HF_TOKEN is visible and valid for {name}."
    except Exception as exc:
        return f"HF_TOKEN check failed: {short_error(exc)}"


with gr.Blocks(title="Image Generator for HTML Games") as demo:
    gr.Markdown(
        "# Image Generator for HTML Games\n"
        "Paste an HTML canvas game, list the image roles you want, and generate a rewritten version "
        "with the images embedded directly into the code."
    )

    with gr.Row():
        with gr.Column(scale=1):
            roles = gr.Textbox(
                label="Image roles to generate",
                lines=8,
                placeholder=ROLE_PLACEHOLDER,
                info="One per line: role: image description. Example: player: blue robot hero",
            )
            style = gr.Textbox(
                label="Shared visual style",
                lines=2,
                placeholder="Optional: describe the art style/theme, or leave blank.",
            )
            generate_btn = gr.Button("Generate Images + Embed Game", variant="primary")
            status = gr.Markdown("Ready.")
            token_btn = gr.Button("Check HF Token")
            token_status = gr.Markdown("")
            gallery = gr.Gallery(label="Generated assets", columns=2, height=300)

        with gr.Column(scale=2):
            html_input = gr.Textbox(
                label="Original HTML game code",
                lines=18,
                placeholder="Paste your full HTML game code here.",
            )
            output_code = gr.Code(
                label="Rewritten HTML with embedded images",
                language="html",
                lines=18,
            )
            prompt_preview = gr.Textbox(
                label="Interpreted image prompts",
                lines=8,
                interactive=False,
            )
            model_report = gr.Textbox(
                label="Model/source used by role",
                lines=5,
                interactive=False,
            )

    gr.Markdown("## Game preview")
    preview = gr.HTML("")

    generate_btn.click(
        fn=generate_images_and_game,
        inputs=[html_input, roles, style],
        outputs=[output_code, status, gallery, prompt_preview, model_report, preview],
    )
    token_btn.click(fn=check_hf_token, inputs=None, outputs=token_status)


if __name__ == "__main__":
    demo.launch()