import base64 import hashlib import io import json import math import os import random import re import tempfile import time from dataclasses import dataclass from html import escape from urllib.parse import quote from urllib.error import HTTPError, URLError from urllib.request import Request, urlopen import gradio as gr from PIL import Image, ImageDraw, ImageEnhance, ImageFilter, ImageFont try: from huggingface_hub import InferenceClient except Exception: InferenceClient = None STARTER_HTML = """ Mini Forest Game """ DEFAULT_ROLES = """player: top-down pixel-art adventurer hero, transparent background, bright readable silhouette background: enchanted forest clearing game background, top-down view, soft moonlight, detailed but not too busy""" ROLE_PLACEHOLDER = """player: blue robot hero sprite background: empty space station floor map""" @dataclass class AssetSpec: role: str prompt: str filename: str width: int height: int @dataclass class StylePlan: medium: str palette: str texture: str lighting: str linework: str camera: str tags: tuple[str, ...] HF_TOKEN = os.environ.get("HF_TOKEN", "") FREE_IMAGE_MODEL = os.environ.get("FREE_IMAGE_MODEL", "segmind/tiny-sd") FREE_IMAGE_STEPS = int(os.environ.get("FREE_IMAGE_STEPS", "5")) USE_HF_PROMPT_PROVIDER = os.environ.get("USE_HF_PROMPT_PROVIDER", "0") == "1" USE_HF_IMAGE_PROVIDER = os.environ.get("USE_HF_IMAGE_PROVIDER", "0") == "1" USE_DIFFUSION_FOR_SPRITES = os.environ.get("USE_DIFFUSION_FOR_SPRITES", "0") == "1" USE_DIFFUSION_FOR_BACKGROUNDS = os.environ.get("USE_DIFFUSION_FOR_BACKGROUNDS", "0") == "1" HF_IMAGE_MODEL = os.environ.get("HF_IMAGE_MODEL", "black-forest-labs/FLUX.1-schnell") HF_PROMPT_MODEL = os.environ.get("HF_PROMPT_MODEL", "Qwen/Qwen2.5-Coder-7B-Instruct:fastest") HF_PROMPT_ENDPOINT = os.environ.get("HF_PROMPT_ENDPOINT", "https://router.huggingface.co/v1/chat/completions") FREE_DIFFUSION_PIPE = None FREE_DIFFUSION_ERROR = None def slugify(value: str) -> str: value = re.sub(r"[^a-zA-Z0-9]+", "_", value.strip().lower()).strip("_") return value or "asset" def interpret_style_hint(style_hint: str) -> StylePlan: text = (style_hint or "").lower() tags: list[str] = [] def has(*words: str) -> bool: return any(word in text for word in words) if has("watercolor", "ink wash", "gouache", "paper"): tags.append("watercolor") medium = "watercolor game illustration on textured paper" palette = "soft layered washes, muted pigments, gentle color bleed" texture = "visible paper grain, translucent edges, organic brush pooling" lighting = "soft ambient light with low contrast" linework = "loose ink accents, imperfect hand-painted contour lines" elif has("vector", "flat", "icon", "logo"): tags.append("vector") medium = "clean vector game art" palette = "flat separated color fields with bold accent colors" texture = "smooth fills, no painterly grain" lighting = "graphic cel shading with crisp highlights" linework = "thick precise outlines and hard-edged silhouettes" elif has("clay", "claymation", "stop motion", "plasticine"): tags.append("clay") medium = "claymation stop-motion game asset" palette = "chunky colored clay with warm handmade tones" texture = "fingerprint-like clay bumps, rounded sculpted forms" lighting = "soft studio light with small specular highlights" linework = "no ink outlines, shape defined by soft shadows" elif has("oil", "painterly", "impasto", "canvas"): tags.append("oil") medium = "oil-painted fantasy game art" palette = "rich blended colors with deep shadows" texture = "visible bristle strokes and canvas-like texture" lighting = "dramatic directional light" linework = "painted edges instead of hard outlines" elif has("pixel", "8-bit", "16-bit", "retro"): tags.append("pixel") medium = "retro pixel-art game asset" palette = "limited palette with readable color ramps" texture = "sharp square pixels and no anti-aliasing" lighting = "simple two-tone pixel shading" linework = "one-pixel dark outline and blocky silhouette" elif has("anime", "manga"): tags.append("anime") medium = "anime game illustration" palette = "clean saturated colors with expressive accents" texture = "smooth cel-shaded surfaces" lighting = "bright rim light and glossy highlights" linework = "confident manga-style outlines" elif has("cyber", "neon", "synthwave"): tags.append("vector") tags.append("neon") medium = "neon cyberpunk arcade game art" palette = "electric cyan, magenta, violet, and black" texture = "glowing edges, glossy panels, light bloom" lighting = "high contrast neon rim lighting" linework = "hard sci-fi outlines with luminous accents" else: tags.append("illustration") medium = "cohesive 2D game illustration" palette = "distinct theme-driven colors with clear value contrast" texture = "clean readable game asset finish" lighting = "balanced game lighting" linework = "clear readable silhouette and controlled edges" if has("top-down", "top down", "shooter", "rpg", "arena"): camera = "top-down readable game camera" elif has("platformer", "side-scroller", "side scroller"): camera = "side-view platformer camera" else: camera = "game-ready camera angle matching the role" return StylePlan(medium, palette, texture, lighting, linework, camera, tuple(tags)) def build_asset_prompt(role: str, prompt: str, style_hint: str) -> str: plan = interpret_style_hint(style_hint) slug = slugify(role) is_background = any(word in slug for word in ("background", "backdrop", "scene", "map", "level")) if is_background: asset_instruction = ( "Create one complete 2D game background scene, not a texture tile, not a material sample, " "not a UV map. Full scene composition for a canvas game. Empty environment only: no player, " "no character, no creature, no vehicle, no mascot, no foreground subject." ) else: asset_instruction = ( "Create one complete standalone 2D sprite of the whole subject, centered, full body or full vehicle, " "single object, transparent or plain background, readable silhouette. Not a texture map, not a tiled " "pattern, not a material swatch, not a UV unwrap, not a 3D model skin." ) return ( f"{role} asset: {prompt}. Style interpretation: {plan.medium}; {plan.palette}; " f"{plan.texture}; {plan.lighting}; {plan.linework}; {plan.camera}. " f"{asset_instruction} " "Game asset, readable at small size, no text, no watermark." ) def parse_role_lines(raw_roles: str) -> list[tuple[str, str]]: parsed: list[tuple[str, str]] = [] for line in raw_roles.splitlines(): line = line.strip() if not line or line.startswith("#"): continue if ":" in line: role, prompt = line.split(":", 1) elif "=" in line: role, prompt = line.split("=", 1) else: role, prompt = line, line role = role.strip() prompt = prompt.strip() or role parsed.append((role, prompt)) return parsed def infer_code_context(html_code: str) -> str: text = html_code[:12000] filenames = sorted(set(re.findall(r"['\"]([^'\"]+?\.(?:png|jpg|jpeg|webp|gif))['\"]", text, flags=re.I))) canvas = re.findall(r"]*?(?:width=['\"]?(\d+)|height=['\"]?(\d+))", text, flags=re.I) controls = [] lowered = text.lower() for label, words in { "top-down movement": ("arrowup", "arrowdown", "keys.has(\"w\")", "keys.has('w')"), "platformer": ("gravity", "grounded", "platform"), "shooting": ("bullet", "shoot", "projectile", "laser"), "enemies": ("enemy", "monster", "spawn"), }.items(): if any(word in lowered for word in words): controls.append(label) return ( f"Referenced asset filenames: {', '.join(filenames[:24]) or 'none found'}. " f"Detected game mechanics: {', '.join(controls) or 'not obvious'}. " f"Canvas hints found: {canvas[:4] or 'none'}." ) def local_prompt_map(role_lines: list[tuple[str, str]], style_hint: str) -> dict[str, str]: return {role: build_asset_prompt(role, prompt, style_hint) for role, prompt in role_lines} def extract_json_object(text: str) -> dict | None: match = re.search(r"\{.*\}", text, flags=re.S) if not match: return None try: value = json.loads(match.group(0)) except Exception: return None return value if isinstance(value, dict) else None def short_error(exc: Exception) -> str: if isinstance(exc, HTTPError): body = exc.read().decode("utf-8", errors="replace")[:500] return f"HTTP {exc.code}: {body}" if isinstance(exc, URLError): return f"URL error: {exc.reason}" return f"{type(exc).__name__}: {str(exc)[:500]}" def hf_prompt_json(html_code: str, role_lines: list[tuple[str, str]], style_hint: str) -> tuple[dict[str, str] | None, str | None]: if not USE_HF_PROMPT_PROVIDER: return None, None if not HF_TOKEN: return None, "HF_TOKEN is not visible to the Space runtime" role_block = "\n".join(f"- {role}: {prompt}" for role, prompt in role_lines) instruction = ( "You are a senior game art director and prompt engineer. Read the HTML game context, " "the requested asset roles, and the shared theme/style. Return ONLY a JSON object where " "each key is the exact role name and each value is one concise text-to-image prompt. " "Each prompt must specify: subject silhouette/shape, camera angle, art style, palette, " "transparent background for sprites/items, full scene for backgrounds, no text, no watermark. " "Make different roles visually distinct and suitable for embedding in an HTML game." ) user_text = ( f"HTML/game context summary: {infer_code_context(html_code)}\n\n" f"Shared theme/style: {style_hint}\n\n" f"Asset roles:\n{role_block}\n\n" "Return JSON only." ) payload = { "model": HF_PROMPT_MODEL, "messages": [ {"role": "system", "content": instruction}, {"role": "user", "content": user_text}, ], "max_tokens": 900, "temperature": 0.55, "top_p": 0.9, "stream": False, } request = Request( HF_PROMPT_ENDPOINT, data=json.dumps(payload).encode("utf-8"), headers={ "Authorization": f"Bearer {HF_TOKEN}", "Content-Type": "application/json", }, method="POST", ) try: with urlopen(request, timeout=90) as response: raw = response.read().decode("utf-8", errors="replace") parsed = json.loads(raw) if isinstance(parsed, dict) and parsed.get("choices"): message = parsed["choices"][0].get("message", {}) text = message.get("content", "") if isinstance(message, dict) else str(message) elif isinstance(parsed, list) and parsed: text = parsed[0].get("generated_text", "") if isinstance(parsed[0], dict) else str(parsed[0]) elif isinstance(parsed, dict): text = parsed.get("generated_text", parsed.get("text", "")) else: text = str(parsed) obj = extract_json_object(text) if not obj: return None, f"{HF_PROMPT_MODEL} returned non-JSON text: {text[:300]}" return { role: str(obj.get(role, "")).strip() for role, _ in role_lines if str(obj.get(role, "")).strip() }, None except Exception as exc: return None, short_error(exc) def build_prompt_map(html_code: str, raw_roles: str, style_hint: str) -> tuple[list[tuple[str, str]], dict[str, str], str, str | None]: role_lines = parse_role_lines(raw_roles) local_map = local_prompt_map(role_lines, style_hint) ai_map, prompt_error = hf_prompt_json(html_code, role_lines, style_hint) if ai_map and all(role in ai_map for role, _ in role_lines): return role_lines, ai_map, HF_PROMPT_MODEL, None return role_lines, local_map, "local prompt interpreter", prompt_error def parse_assets(raw_roles: str, style_hint: str, prompt_map: dict[str, str] | None = None) -> list[AssetSpec]: specs: list[AssetSpec] = [] for role, prompt in parse_role_lines(raw_roles): slug = slugify(role) is_background = any(word in slug for word in ("background", "backdrop", "scene", "map", "level")) width, height = (800, 450) if is_background else (128, 128) filename = f"sprite_{slug}.png" full_prompt = (prompt_map or {}).get(role) or build_asset_prompt(role, prompt, style_hint) specs.append(AssetSpec(role=role, prompt=full_prompt, filename=filename, width=width, height=height)) return specs def is_background_spec(spec: AssetSpec) -> bool: slug = slugify(spec.role) return spec.width > spec.height * 2 or any( word in slug for word in ("background", "backdrop", "scene", "map", "level") ) def image_to_png_bytes(content: bytes, width: int, height: int) -> bytes: image = Image.open(io.BytesIO(content)).convert("RGBA") image = image.resize((width, height), Image.LANCZOS) out = io.BytesIO() image.save(out, format="PNG") return out.getvalue() def png_bytes_to_data_uri(content: bytes) -> str: return "data:image/png;base64," + base64.b64encode(content).decode("ascii") def write_gallery_image(content: bytes, role: str) -> str: handle = tempfile.NamedTemporaryFile( prefix=f"{slugify(role)}_", suffix=".png", delete=False, ) handle.write(content) handle.close() return handle.name def palette_for(text: str) -> list[tuple[int, int, int]]: lowered = text.lower() if any(word in lowered for word in ("watercolor", "ink wash", "gouache", "paper grain")): return [(113, 151, 168), (207, 180, 158), (241, 232, 214), (154, 118, 148)] if any(word in lowered for word in ("clay", "claymation", "stop-motion", "plasticine")): return [(204, 105, 83), (238, 175, 104), (107, 72, 61), (242, 213, 151)] if any(word in lowered for word in ("oil", "painterly", "impasto", "canvas")): return [(98, 45, 122), (213, 154, 70), (38, 28, 42), (230, 212, 156)] if any(word in lowered for word in ("vector", "flat", "logo")): return [(42, 125, 246), (255, 81, 128), (18, 24, 38), (255, 221, 64)] if any(word in lowered for word in ("anime", "manga")): return [(78, 151, 236), (255, 139, 173), (42, 45, 76), (255, 238, 166)] themed = [ (("lava", "fire", "volcano", "hell"), [(139, 28, 18), (239, 84, 34), (42, 18, 16), (255, 198, 71)]), (("ice", "snow", "frost", "arctic"), [(75, 151, 191), (187, 235, 255), (24, 63, 96), (240, 252, 255)]), (("desert", "sand", "dune", "western"), [(194, 139, 72), (236, 198, 112), (102, 66, 36), (255, 226, 151)]), (("ocean", "sea", "water", "underwater"), [(25, 103, 155), (57, 191, 205), (8, 43, 85), (177, 239, 224)]), (("forest", "garden", "grass", "jungle"), [(51, 132, 72), (126, 196, 81), (24, 72, 43), (242, 201, 84)]), (("dungeon", "castle", "stone", "crypt"), [(78, 82, 96), (133, 140, 154), (34, 36, 48), (181, 154, 94)]), (("city", "street", "cyber", "neon"), [(40, 52, 87), (39, 230, 226), (19, 21, 38), (239, 67, 154)]), (("sci-fi", "scifi", "space", "arena", "metal", "shooter"), [(30, 216, 236), (235, 64, 96), (18, 22, 31), (245, 192, 51)]), ] for words, colors in themed: if any(word in lowered for word in words): return colors digest = hashlib.sha256(text.encode("utf-8")).digest() base = digest[0] % 360 def hsl_to_rgb(h: float, s: float, l: float) -> tuple[int, int, int]: c = (1 - abs(2 * l - 1)) * s x = c * (1 - abs((h / 60) % 2 - 1)) m = l - c / 2 if h < 60: r, g, b = c, x, 0 elif h < 120: r, g, b = x, c, 0 elif h < 180: r, g, b = 0, c, x elif h < 240: r, g, b = 0, x, c elif h < 300: r, g, b = x, 0, c else: r, g, b = c, 0, x return (int((r + m) * 255), int((g + m) * 255), int((b + m) * 255)) return [ hsl_to_rgb((base + offset) % 360, 0.68, lightness) for offset, lightness in ((0, 0.46), (45, 0.56), (145, 0.38), (220, 0.64)) ] def style_tags_for(prompt: str) -> set[str]: lowered = prompt.lower() tags = set() groups = { "watercolor": ("watercolor", "ink wash", "gouache", "paper grain"), "vector": ("vector", "flat", "logo", "hard-edged", "graphic"), "clay": ("clay", "claymation", "stop-motion", "plasticine", "sculpted"), "oil": ("oil", "painterly", "impasto", "canvas", "bristle"), "pixel": ("pixel", "8-bit", "16-bit", "retro"), "anime": ("anime", "manga", "cel-shaded"), "neon": ("neon", "cyberpunk", "synthwave", "glowing"), } for tag, words in groups.items(): if any(word in lowered for word in words): tags.add(tag) return tags def apply_style_finish(content: bytes, spec: AssetSpec, rng: random.Random) -> bytes: tags = style_tags_for(spec.prompt) if not tags: return content image = Image.open(io.BytesIO(content)).convert("RGBA") draw = ImageDraw.Draw(image, "RGBA") is_bg = is_background_spec(spec) if "watercolor" in tags: wash = Image.new("RGBA", image.size, (245, 238, 220, 34 if is_bg else 12)) image = Image.alpha_composite(image, wash) draw = ImageDraw.Draw(image, "RGBA") for _ in range(60 if is_bg else 18): x = rng.randint(-20, image.width) y = rng.randint(-20, image.height) r = rng.randint(10, 70 if is_bg else 24) color = rng.choice(palette_for(spec.prompt)) + (rng.randint(18, 55),) draw.ellipse((x - r, y - r, x + r, y + r), fill=color) image = image.filter(ImageFilter.GaussianBlur(0.45)) if "oil" in tags: for _ in range(120 if is_bg else 34): x = rng.randint(0, image.width) y = rng.randint(0, image.height) length = rng.randint(8, 38 if is_bg else 16) color = rng.choice(palette_for(spec.prompt)) + (rng.randint(75, 145),) draw.line((x, y, x + rng.randint(-length, length), y + rng.randint(-length, length)), fill=color, width=rng.randint(2, 5)) image = ImageEnhance.Contrast(image).enhance(1.12) if "clay" in tags: image = ImageEnhance.Color(image).enhance(0.82) image = ImageEnhance.Contrast(image).enhance(1.18) draw = ImageDraw.Draw(image, "RGBA") for _ in range(45 if is_bg else 14): x = rng.randint(0, image.width) y = rng.randint(0, image.height) r = rng.randint(3, 18 if is_bg else 8) draw.ellipse((x - r, y - r, x + r, y + r), outline=(255, 238, 205, rng.randint(25, 70)), width=2) image = image.filter(ImageFilter.SMOOTH_MORE) if "vector" in tags: image = ImageEnhance.Contrast(image).enhance(1.35) image = ImageEnhance.Color(image).enhance(1.25) if not is_bg: alpha = image.getchannel("A") outline = alpha.filter(ImageFilter.MaxFilter(7)).filter(ImageFilter.GaussianBlur(0.2)) outlined = Image.new("RGBA", image.size, (12, 16, 28, 255)) outlined.putalpha(outline) image = Image.alpha_composite(outlined, image) if "anime" in tags: image = ImageEnhance.Color(image).enhance(1.35) image = ImageEnhance.Contrast(image).enhance(1.18) draw = ImageDraw.Draw(image, "RGBA") for _ in range(8 if is_bg else 4): x = rng.randint(0, image.width) y = rng.randint(0, image.height) draw.line((x, y, x + rng.randint(20, 80), y - rng.randint(10, 50)), fill=(255, 255, 255, 80), width=2) if "neon" in tags: glow = image.filter(ImageFilter.GaussianBlur(4 if is_bg else 3)) image = Image.blend(glow, image, 0.72) image = ImageEnhance.Color(image).enhance(1.45) if "pixel" in tags: small = image.resize((max(1, image.width // 4), max(1, image.height // 4)), Image.Resampling.NEAREST) image = small.resize(image.size, Image.Resampling.NEAREST) out = io.BytesIO() image.save(out, format="PNG") return out.getvalue() def draw_background(spec: AssetSpec, rng: random.Random) -> bytes: colors = palette_for(spec.prompt) image = Image.new("RGBA", (spec.width, spec.height), colors[2] + (255,)) draw = ImageDraw.Draw(image, "RGBA") prompt = spec.prompt.lower() if any(word in prompt for word in ("sci-fi", "scifi", "arena", "metal", "hazard", "grid", "shooter")): image = Image.new("RGBA", (spec.width, spec.height), (18, 22, 31, 255)) draw = ImageDraw.Draw(image, "RGBA") panel = 64 for y in range(0, spec.height, panel): for x in range(0, spec.width, panel): shade = 24 + ((x // panel + y // panel) % 2) * 12 draw.rectangle( (x, y, x + panel - 2, y + panel - 2), fill=(shade, shade + 4, shade + 14, 255), outline=(58, 74, 92, 170), ) for x in range(0, spec.width, 96): draw.line((x, 0, x, spec.height), fill=(34, 215, 236, 95), width=2) for y in range(0, spec.height, 96): draw.line((0, y, spec.width, y), fill=(34, 215, 236, 95), width=2) for _ in range(8): x = rng.randint(0, spec.width - 120) y = rng.randint(0, spec.height - 34) draw.rectangle((x, y, x + 120, y + 34), fill=(36, 42, 53, 235), outline=(110, 125, 142, 210)) for stripe in range(0, 120, 24): draw.polygon( [(x + stripe, y), (x + stripe + 12, y), (x + stripe - 12, y + 34), (x + stripe - 24, y + 34)], fill=(245, 192, 51, 210), ) for _ in range(18): x = rng.randint(0, spec.width) y = rng.randint(0, spec.height) r = rng.randint(10, 32) draw.ellipse((x - r, y - r, x + r, y + r), outline=(224, 54, 85, 100), width=2) out = io.BytesIO() image.save(out, format="PNG") return out.getvalue() if any(word in prompt for word in ("space", "star", "nebula", "galaxy", "cosmic")): image = Image.new("RGBA", (spec.width, spec.height), (3, 4, 18, 255)) draw = ImageDraw.Draw(image, "RGBA") for y in range(spec.height): mix = y / max(1, spec.height - 1) color = ( int(4 + 24 * mix), int(6 + 10 * mix), int(24 + 45 * mix), 255, ) draw.line((0, y, spec.width, y), fill=color) for _ in range(7): x = rng.randint(-120, spec.width) y = rng.randint(-90, spec.height) w = rng.randint(180, 420) h = rng.randint(90, 230) fill = rng.choice(colors) + (rng.randint(32, 70),) draw.ellipse((x, y, x + w, y + h), fill=fill) for _ in range(140): x = rng.randint(0, spec.width - 1) y = rng.randint(0, spec.height - 1) size = rng.choice((1, 1, 2, 3)) shade = rng.randint(180, 255) draw.rectangle((x, y, x + size, y + size), fill=(shade, shade, 255, rng.randint(130, 240))) out = io.BytesIO() image.save(out, format="PNG") return out.getvalue() if any(word in prompt for word in ("garden", "flower", "grass", "meadow", "forest", "path")): sky = (184, 224, 239) grass = (80, 151, 82) image = Image.new("RGBA", (spec.width, spec.height), sky + (255,)) draw = ImageDraw.Draw(image, "RGBA") horizon = int(spec.height * 0.56) for y in range(horizon): mix = y / max(1, horizon - 1) color = ( int(230 * (1 - mix) + sky[0] * mix), int(246 * (1 - mix) + sky[1] * mix), int(255 * (1 - mix) + sky[2] * mix), 255, ) draw.line((0, y, spec.width, y), fill=color) draw.rectangle((0, horizon, spec.width, spec.height), fill=grass + (255,)) path_color = (198, 169, 112, 255) draw.polygon( [ (int(spec.width * 0.39), horizon), (int(spec.width * 0.61), horizon), (int(spec.width * 0.83), spec.height), (int(spec.width * 0.18), spec.height), ], fill=path_color, ) for _ in range(18): x = rng.randint(0, spec.width) y = rng.randint(horizon + 48, spec.height - 18) stem = rng.randint(5, 12) draw.line((x, y + stem, x, y), fill=(43, 109, 53, 210), width=2) petal = rng.choice(colors) + (230,) draw.ellipse((x - 4, y - 3, x + 4, y + 5), fill=petal) for _ in range(10): x = rng.randint(-40, spec.width) y = rng.randint(horizon - 35, horizon + 20) draw.ellipse((x, y, x + rng.randint(65, 140), y + rng.randint(36, 72)), fill=(42, 121, 64, 185)) out = io.BytesIO() image.save(out, format="PNG") return out.getvalue() if any(word in prompt for word in ("lava", "fire", "volcano")): image = Image.new("RGBA", (spec.width, spec.height), (52, 23, 18, 255)) draw = ImageDraw.Draw(image, "RGBA") for y in range(0, spec.height, 58): draw.rectangle((0, y, spec.width, y + 30), fill=(70, 32, 25, 255)) for _ in range(18): x = rng.randint(-80, spec.width) y = rng.randint(0, spec.height) draw.line((x, y, x + rng.randint(120, 320), y + rng.randint(-20, 20)), fill=(255, 111, 43, 220), width=rng.randint(5, 12)) for _ in range(45): x = rng.randint(0, spec.width) y = rng.randint(0, spec.height) r = rng.randint(5, 20) draw.ellipse((x - r, y - r, x + r, y + r), fill=(255, 192, 53, rng.randint(70, 140))) out = io.BytesIO() image.save(out, format="PNG") return out.getvalue() if any(word in prompt for word in ("dungeon", "castle", "stone", "crypt")): image = Image.new("RGBA", (spec.width, spec.height), (48, 50, 62, 255)) draw = ImageDraw.Draw(image, "RGBA") tile = 54 for y in range(0, spec.height, tile): for x in range(0, spec.width, tile): shade = rng.randint(48, 78) draw.rectangle((x, y, x + tile - 2, y + tile - 2), fill=(shade, shade + 2, shade + 8, 255), outline=(27, 29, 38, 190)) for _ in range(22): x = rng.randint(0, spec.width) y = rng.randint(0, spec.height) draw.ellipse((x - 8, y - 8, x + 8, y + 8), fill=(185, 146, 80, 110)) out = io.BytesIO() image.save(out, format="PNG") return out.getvalue() if any(word in prompt for word in ("ocean", "sea", "water", "underwater")): image = Image.new("RGBA", (spec.width, spec.height), (8, 54, 92, 255)) draw = ImageDraw.Draw(image, "RGBA") for y in range(spec.height): mix = y / max(1, spec.height - 1) draw.line((0, y, spec.width, y), fill=(int(16 + 6 * mix), int(99 + 62 * mix), int(151 + 58 * mix), 255)) for _ in range(26): x = rng.randint(-100, spec.width) y = rng.randint(0, spec.height) draw.arc((x, y, x + 180, y + 70), 0, 180, fill=(165, 232, 225, 90), width=3) for _ in range(38): x = rng.randint(0, spec.width) y = rng.randint(0, spec.height) r = rng.randint(2, 8) draw.ellipse((x - r, y - r, x + r, y + r), outline=(205, 247, 245, 120), width=2) out = io.BytesIO() image.save(out, format="PNG") return out.getvalue() for y in range(spec.height): mix = y / max(1, spec.height - 1) color = tuple(int(colors[2][i] * (1 - mix) + colors[0][i] * mix) for i in range(3)) draw.line((0, y, spec.width, y), fill=color + (255,)) for _ in range(34): x = rng.randint(-80, spec.width) y = rng.randint(-40, spec.height) w = rng.randint(80, 260) h = rng.randint(36, 150) fill = rng.choice(colors) + (rng.randint(45, 110),) draw.ellipse((x, y, x + w, y + h), fill=fill) for _ in range(80): x = rng.randint(0, spec.width) y = rng.randint(0, spec.height) size = rng.randint(2, 7) fill = rng.choice(colors) + (rng.randint(90, 190),) draw.rectangle((x, y, x + size, y + size), fill=fill) out = io.BytesIO() image.save(out, format="PNG") return out.getvalue() def draw_sprite(spec: AssetSpec, rng: random.Random) -> bytes: colors = palette_for(f"{spec.role} {spec.prompt}") image = Image.new("RGBA", (spec.width, spec.height), (0, 0, 0, 0)) draw = ImageDraw.Draw(image, "RGBA") cx, cy = spec.width // 2, spec.height // 2 scale = min(spec.width, spec.height) / 128 role = slugify(spec.role) role_prompt = f"{spec.role} {spec.prompt}".lower() outline = (18, 24, 31, 255) body = colors[0] + (255,) accent = colors[1] + (255,) trim = colors[3] + (255,) if any(word in role_prompt for word in ("car", "racing", "racer", "buggy", "vehicle", "truck")): shadow = (int(cx - 43 * scale), int(cy + 28 * scale), int(cx + 43 * scale), int(cy + 42 * scale)) draw.ellipse(shadow, fill=(0, 0, 0, 58)) draw.rounded_rectangle( (cx - 34 * scale, cy - 42 * scale, cx + 34 * scale, cy + 42 * scale), radius=int(18 * scale), fill=outline, ) draw.rounded_rectangle( (cx - 26 * scale, cy - 34 * scale, cx + 26 * scale, cy + 34 * scale), radius=int(13 * scale), fill=body, ) draw.rectangle((cx - 15 * scale, cy - 24 * scale, cx + 15 * scale, cy - 5 * scale), fill=accent) draw.rectangle((cx - 18 * scale, cy + 8 * scale, cx + 18 * scale, cy + 27 * scale), fill=trim) for side in (-1, 1): draw.rounded_rectangle( (cx + side * 27 * scale - 9 * scale, cy - 30 * scale, cx + side * 27 * scale + 9 * scale, cy - 13 * scale), radius=int(5 * scale), fill=(20, 22, 27, 255), ) draw.rounded_rectangle( (cx + side * 27 * scale - 9 * scale, cy + 15 * scale, cx + side * 27 * scale + 9 * scale, cy + 32 * scale), radius=int(5 * scale), fill=(20, 22, 27, 255), ) out = io.BytesIO() image.save(out, format="PNG") return out.getvalue() if any(word in role_prompt for word in ("turret", "tower", "cannon", "launcher")): draw.ellipse((cx - 38 * scale, cy + 22 * scale, cx + 38 * scale, cy + 45 * scale), fill=(0, 0, 0, 60)) draw.ellipse((cx - 36 * scale, cy - 12 * scale, cx + 36 * scale, cy + 42 * scale), fill=outline) draw.ellipse((cx - 28 * scale, cy - 5 * scale, cx + 28 * scale, cy + 34 * scale), fill=body) barrel_angle = rng.choice((-0.45, -0.2, 0, 0.2, 0.45)) length = 52 * scale bx = cx + math.sin(barrel_angle) * length by = cy - 38 * scale draw.line((cx, cy - 5 * scale, bx, by), fill=outline, width=max(10, int(17 * scale))) draw.line((cx, cy - 5 * scale, bx, by), fill=trim, width=max(5, int(9 * scale))) draw.rectangle((cx - 19 * scale, cy + 15 * scale, cx + 19 * scale, cy + 39 * scale), fill=accent) out = io.BytesIO() image.save(out, format="PNG") return out.getvalue() if role in ("coin", "gem", "key", "orb", "pickup", "collectible") or any( word in role_prompt for word in ("coin", "gem", "key", "pickup", "collectible", "powerup", "power-up", "treasure") ): draw.ellipse((cx - 33 * scale, cy - 33 * scale, cx + 33 * scale, cy + 33 * scale), fill=outline) if any(word in role_prompt for word in ("gem", "crystal", "diamond")): points = [ (cx, cy - 43 * scale), (cx + 36 * scale, cy - 8 * scale), (cx + 20 * scale, cy + 40 * scale), (cx - 20 * scale, cy + 40 * scale), (cx - 36 * scale, cy - 8 * scale), ] draw.polygon(points, fill=body) draw.polygon([(cx, cy - 34 * scale), (cx + 18 * scale, cy - 4 * scale), (cx, cy + 28 * scale), (cx - 18 * scale, cy - 4 * scale)], fill=accent) elif "key" in role_prompt: draw.ellipse((cx - 30 * scale, cy - 20 * scale, cx + 8 * scale, cy + 18 * scale), fill=body) draw.ellipse((cx - 17 * scale, cy - 8 * scale, cx - 3 * scale, cy + 6 * scale), fill=(0, 0, 0, 0)) draw.rounded_rectangle((cx + 2 * scale, cy - 5 * scale, cx + 43 * scale, cy + 7 * scale), radius=int(5 * scale), fill=accent) draw.rectangle((cx + 30 * scale, cy + 4 * scale, cx + 43 * scale, cy + 17 * scale), fill=trim) else: draw.ellipse((cx - 27 * scale, cy - 27 * scale, cx + 27 * scale, cy + 27 * scale), fill=body) draw.ellipse((cx - 17 * scale, cy - 17 * scale, cx + 17 * scale, cy + 17 * scale), outline=trim, width=max(3, int(7 * scale))) draw.arc((cx - 12 * scale, cy - 20 * scale, cx + 20 * scale, cy + 13 * scale), 195, 300, fill=(255, 255, 255, 180), width=max(2, int(4 * scale))) out = io.BytesIO() image.save(out, format="PNG") return out.getvalue() if any(word in role_prompt for word in ("slime", "blob", "gel", "jelly")): draw.ellipse((cx - 40 * scale, cy + 24 * scale, cx + 40 * scale, cy + 42 * scale), fill=(0, 0, 0, 55)) draw.pieslice((cx - 40 * scale, cy - 28 * scale, cx + 40 * scale, cy + 52 * scale), 180, 360, fill=outline) draw.pieslice((cx - 32 * scale, cy - 20 * scale, cx + 32 * scale, cy + 42 * scale), 180, 360, fill=body) for side in (-1, 1): draw.ellipse((cx + side * 11 * scale - 5 * scale, cy + 3 * scale, cx + side * 11 * scale + 5 * scale, cy + 13 * scale), fill=(255, 255, 255, 230)) draw.arc((cx - 12 * scale, cy + 12 * scale, cx + 12 * scale, cy + 27 * scale), 10, 170, fill=trim, width=max(2, int(4 * scale))) out = io.BytesIO() image.save(out, format="PNG") return out.getvalue() if any(word in role_prompt for word in ("beetle", "bug", "spider", "insect", "crawler", "creep")): draw.ellipse((cx - 28 * scale, cy - 34 * scale, cx + 28 * scale, cy + 34 * scale), fill=outline) draw.ellipse((cx - 21 * scale, cy - 27 * scale, cx + 21 * scale, cy + 29 * scale), fill=body) draw.line((cx, cy - 25 * scale, cx, cy + 30 * scale), fill=trim, width=max(2, int(4 * scale))) for y in (-18, 0, 18): for side in (-1, 1): draw.line((cx + side * 18 * scale, cy + y * scale, cx + side * 45 * scale, cy + (y + rng.choice((-10, 10))) * scale), fill=outline, width=max(3, int(5 * scale))) out = io.BytesIO() image.save(out, format="PNG") return out.getvalue() if role in ("bullet", "projectile", "laser", "shot") or any(word in role_prompt for word in ("bullet", "projectile", "laser", "energy")): glow = colors[1] + (85,) core_width = rng.randint(7, 13) * scale core_height = rng.randint(30, 44) * scale glow_width = core_width + rng.randint(18, 30) * scale glow_height = core_height + rng.randint(12, 25) * scale draw.ellipse((cx - glow_width, cy - glow_height, cx + glow_width, cy + glow_height), fill=glow) draw.rounded_rectangle( (cx - core_width, cy - core_height, cx + core_width, cy + core_height), radius=int(9 * scale), fill=outline, ) draw.rounded_rectangle( (cx - max(2, core_width - 4 * scale), cy - max(8, core_height - 6 * scale), cx + max(2, core_width - 4 * scale), cy + max(8, core_height - 6 * scale)), radius=int(5 * scale), fill=accent, ) for _ in range(5): px = cx + rng.randint(-24, 24) * scale py = cy + rng.randint(-38, 38) * scale draw.line( (px, py, px + rng.randint(-10, 10) * scale, py + rng.randint(-8, 8) * scale), fill=trim, width=max(1, int(2 * scale)), ) out = io.BytesIO() image.save(out, format="PNG") return out.getvalue() if role in ("enemy", "monster", "alien", "drone", "foe", "boss") or any(word in role_prompt for word in ("enemy", "drone", "alien", "monster")): draw.ellipse((cx - 38 * scale, cy - 28 * scale, cx + 38 * scale, cy + 28 * scale), fill=outline) draw.ellipse((cx - 32 * scale, cy - 22 * scale, cx + 32 * scale, cy + 22 * scale), fill=body) draw.ellipse((cx - 16 * scale, cy - 16 * scale, cx + 16 * scale, cy + 16 * scale), fill=accent) for side in (-1, 1): draw.polygon( [ (cx + side * 28 * scale, cy - 8 * scale), (cx + side * 56 * scale, cy - 28 * scale), (cx + side * 48 * scale, cy + 20 * scale), ], fill=outline, ) draw.line((cx + side * 14 * scale, cy, cx + side * 58 * scale, cy), fill=trim, width=max(2, int(5 * scale))) for _ in range(5): angle = rng.random() * 6.283 r = rng.randint(int(12 * scale), int(30 * scale)) px = cx + int(r * math.cos(angle)) py = cy + int(r * math.sin(angle)) draw.ellipse((px - 3 * scale, py - 3 * scale, px + 3 * scale, py + 3 * scale), fill=trim) out = io.BytesIO() image.save(out, format="PNG") return out.getvalue() if role in ("player", "hero", "ship", "tank", "avatar") or any(word in role_prompt for word in ("shooter", "ship", "tank", "hero", "player", "armored")): shadow = (int(cx - 36 * scale), int(cy + 34 * scale), int(cx + 36 * scale), int(cy + 48 * scale)) draw.ellipse(shadow, fill=(0, 0, 0, 55)) wing = rng.randint(28, 39) nose = rng.randint(45, 56) draw.polygon( [ (cx, cy - nose * scale), (cx - wing * scale, cy + 34 * scale), (cx, cy + 20 * scale), (cx + wing * scale, cy + 34 * scale), ], fill=outline, ) draw.polygon( [ (cx, cy - (nose - 8) * scale), (cx - (wing - 10) * scale, cy + 23 * scale), (cx, cy + 12 * scale), (cx + (wing - 10) * scale, cy + 23 * scale), ], fill=body, ) draw.ellipse((cx - 11 * scale, cy - 14 * scale, cx + 11 * scale, cy + 10 * scale), fill=accent) draw.line((cx, cy - 42 * scale, cx, cy - 62 * scale), fill=trim, width=max(3, int(6 * scale))) for side in (-1, 1): x1 = cx + side * rng.randint(14, 23) * scale x2 = cx + side * rng.randint(24, 32) * scale y1 = cy + rng.randint(8, 18) * scale y2 = cy + rng.randint(20, 30) * scale draw.rectangle((min(x1, x2), y1, max(x1, x2), y2), fill=trim) out = io.BytesIO() image.save(out, format="PNG") return out.getvalue() shadow = (int(cx - 36 * scale), int(cy + 34 * scale), int(cx + 36 * scale), int(cy + 48 * scale)) draw.ellipse(shadow, fill=(0, 0, 0, 65)) body_box = (cx - 26 * scale, cy - 18 * scale, cx + 26 * scale, cy + 34 * scale) head_box = (cx - 20 * scale, cy - 52 * scale, cx + 20 * scale, cy - 12 * scale) draw.rounded_rectangle(body_box, radius=int(12 * scale), fill=outline) draw.rounded_rectangle( tuple(v + d * scale for v, d in zip(body_box, (4, 4, -4, -4))), radius=int(9 * scale), fill=body, ) draw.ellipse(head_box, fill=outline) draw.ellipse(tuple(v + d * scale for v, d in zip(head_box, (4, 4, -4, -4))), fill=accent) eye_y = cy - int(33 * scale) eye_dx = int(8 * scale) eye_size = max(2, int(4 * scale)) draw.ellipse((cx - eye_dx - eye_size, eye_y, cx - eye_dx + eye_size, eye_y + eye_size * 2), fill=(255, 255, 255, 245)) draw.ellipse((cx + eye_dx - eye_size, eye_y, cx + eye_dx + eye_size, eye_y + eye_size * 2), fill=(255, 255, 255, 245)) for side in (-1, 1): arm = ( cx + side * 22 * scale, cy - 8 * scale, cx + side * rng.randint(38, 48) * scale, cy + rng.randint(8, 22) * scale, ) draw.line(arm, fill=outline, width=max(4, int(9 * scale))) draw.line(arm, fill=trim, width=max(2, int(5 * scale))) leg = ( cx + side * 13 * scale, cy + 30 * scale, cx + side * rng.randint(16, 28) * scale, cy + 53 * scale, ) draw.line(leg, fill=outline, width=max(4, int(10 * scale))) draw.line(leg, fill=body, width=max(2, int(6 * scale))) for _ in range(10): px = rng.randint(int(cx - 22 * scale), int(cx + 22 * scale)) py = rng.randint(int(cy - 10 * scale), int(cy + 26 * scale)) draw.rectangle((px, py, px + max(2, int(4 * scale)), py + max(2, int(4 * scale))), fill=trim) out = io.BytesIO() image.save(out, format="PNG") return out.getvalue() def local_asset_png(spec: AssetSpec, index: int, run_id: int) -> bytes: seed_text = f"{spec.role}|{spec.prompt}|{index}|{run_id}" rng = random.Random(seed_text) if is_background_spec(spec): content = draw_background(spec, rng) else: content = draw_sprite(spec, rng) return apply_style_finish(content, spec, rng) def placeholder_png_bytes(role: str, width: int, height: int) -> bytes: label = escape(role[:18]) image = Image.new("RGBA", (width, height), "#222222") draw = ImageDraw.Draw(image) draw.rounded_rectangle((4, 4, width - 4, height - 4), radius=12, fill="#3b82f6") font = ImageFont.load_default() bbox = draw.textbbox((0, 0), label, font=font) x = (width - (bbox[2] - bbox[0])) / 2 y = (height - (bbox[3] - bbox[1])) / 2 draw.text((x, y), label, fill="white", font=font) out = io.BytesIO() image.save(out, format="PNG") return out.getvalue() def diffusion_dimensions(spec: AssetSpec) -> tuple[int, int]: if is_background_spec(spec): return 384, 216 return 256, 256 def polish_diffusion_asset(image: Image.Image, spec: AssetSpec) -> bytes: image = image.convert("RGBA") if not is_background_spec(spec): # Text-to-image models do not produce real transparency. This makes sprites # usable by fading out colors similar to the generated corner background. small = image.resize((128, 128), Image.LANCZOS) corners = [ small.getpixel((0, 0)), small.getpixel((127, 0)), small.getpixel((0, 127)), small.getpixel((127, 127)), ] bg = tuple(sum(pixel[i] for pixel in corners) // len(corners) for i in range(3)) pixels = small.load() for y in range(small.height): for x in range(small.width): r, g, b, a = pixels[x, y] dist = abs(r - bg[0]) + abs(g - bg[1]) + abs(b - bg[2]) edge = min(x, y, small.width - 1 - x, small.height - 1 - y) if dist < 64 or edge < 3: a = 0 elif dist < 125: a = max(0, min(a, (dist - 64) * 4)) pixels[x, y] = (r, g, b, a) image = small.resize((spec.width, spec.height), Image.LANCZOS) else: image = image.resize((spec.width, spec.height), Image.LANCZOS) out = io.BytesIO() image.save(out, format="PNG") return out.getvalue() def free_diffusion_png(spec: AssetSpec, index: int, run_id: int) -> tuple[bytes | None, str | None]: global FREE_DIFFUSION_PIPE, FREE_DIFFUSION_ERROR if FREE_DIFFUSION_ERROR: return None, FREE_DIFFUSION_ERROR try: import torch from diffusers import DiffusionPipeline if FREE_DIFFUSION_PIPE is None: FREE_DIFFUSION_PIPE = DiffusionPipeline.from_pretrained(FREE_IMAGE_MODEL, torch_dtype=torch.float32) FREE_DIFFUSION_PIPE = FREE_DIFFUSION_PIPE.to("cpu") for component_name in ("unet", "vae", "text_encoder"): component = getattr(FREE_DIFFUSION_PIPE, component_name, None) if component is not None and hasattr(component, "to"): component.to(device="cpu", dtype=torch.float32) if hasattr(FREE_DIFFUSION_PIPE, "enable_attention_slicing"): FREE_DIFFUSION_PIPE.enable_attention_slicing() width, height = diffusion_dimensions(spec) seed = abs(hash(f"{spec.role}|{spec.prompt}|{index}|{run_id}")) % 2147483647 generator = torch.Generator(device="cpu").manual_seed(seed) negative_prompt = ( "texture map, material texture, seamless texture, tiled pattern, uv map, uv unwrap, " "skin texture, 3d model texture, normal map, roughness map, diffuse map, sprite sheet, " "atlas, multiple objects, cropped subject, close-up surface, fabric swatch, text, watermark, " "person, character, player, hero, creature, monster, vehicle, mascot, foreground subject" ) image = FREE_DIFFUSION_PIPE( spec.prompt, negative_prompt=negative_prompt, width=width, height=height, num_inference_steps=FREE_IMAGE_STEPS, guidance_scale=6.0, generator=generator, ).images[0] return polish_diffusion_asset(image, spec), None except Exception as exc: FREE_DIFFUSION_ERROR = short_error(exc) return None, FREE_DIFFUSION_ERROR def hf_image_png(spec: AssetSpec, index: int, run_id: int) -> tuple[bytes | None, str | None]: if not USE_HF_IMAGE_PROVIDER: return None, None if not HF_TOKEN: return None, "HF_TOKEN is not visible to the Space runtime" if InferenceClient is None: return None, "huggingface_hub is not installed in this Space" try: client = InferenceClient(api_key=HF_TOKEN) image = client.text_to_image( prompt=spec.prompt, model=HF_IMAGE_MODEL, width=spec.width, height=spec.height, num_inference_steps=4, guidance_scale=0.0, seed=abs(hash(f"{spec.role}|{spec.prompt}|{index}|{run_id}")) % 2147483647, ) out = io.BytesIO() image.save(out, format="PNG") return image_to_png_bytes(out.getvalue(), spec.width, spec.height), None except Exception as exc: return None, short_error(exc) def generate_asset(spec: AssetSpec, index: int, run_id: int) -> tuple[str, str, str | None, str]: if not is_background_spec(spec) and not USE_DIFFUSION_FOR_SPRITES: png_content = local_asset_png(spec, index, run_id) return ( png_bytes_to_data_uri(png_content), write_gallery_image(png_content, spec.role), None, "shape-aware 2D sprite generator", ) if is_background_spec(spec) and not USE_DIFFUSION_FOR_BACKGROUNDS: png_content = local_asset_png(spec, index, run_id) return ( png_bytes_to_data_uri(png_content), write_gallery_image(png_content, spec.role), None, "scene-safe 2D background generator", ) png_content, free_error = free_diffusion_png(spec, index, run_id) source = FREE_IMAGE_MODEL error = free_error if png_content is None: png_content, hf_error = hf_image_png(spec, index, run_id) source = HF_IMAGE_MODEL error = hf_error or free_error if png_content is None: png_content = local_asset_png(spec, index, run_id) source = "local procedural fallback" return ( png_bytes_to_data_uri(png_content), write_gallery_image(png_content, spec.role), error if source == "local procedural fallback" else None, source, ) def replacement_names(spec: AssetSpec) -> set[str]: slug = slugify(spec.role) names = { spec.filename, f"{slug}.png", f"{slug}.jpg", f"{slug}.jpeg", f"{slug}.webp", f"asset_{slug}.png", f"{spec.role.strip()}.png", f"{{{{{slug}}}}}", f"{{{slug}}}", } if slug == "background": names.update({"background.png", "sprite_background.jpg", "background.jpg"}) if slug == "player": names.update({"player.png", "sprite_player.jpg", "hero.png"}) if slug == "enemy": names.update({"enemy.png", "monster.png", "sprite_enemy.jpg"}) if slug == "bullet": names.update({"bullet.png", "projectile.png", "laser.png", "shot.png", "sprite_bullet.jpg"}) return names def asset_aliases(spec: AssetSpec) -> list[str]: slug = slugify(spec.role) aliases = {slug, spec.role.strip().lower(), spec.filename.lower()} prompt = spec.prompt.lower() role_groups = { "player": ("player", "hero", "character", "avatar", "ship", "shooter", "tank", "knight", "wizard"), "enemy": ("enemy", "monster", "alien", "drone", "foe", "zombie", "boss", "hazard"), "bullet": ("bullet", "projectile", "laser", "shot", "missile", "beam", "ammo"), "coin": ("coin", "gem", "seed", "star", "pickup", "collectible", "key", "orb"), "background": ("background", "backdrop", "bg", "map", "level", "arena", "scene", "world", "floor"), } for group, words in role_groups.items(): if group in slug or any(word in prompt for word in words): aliases.update(words) return sorted(alias for alias in aliases if alias) def embed_assets(html_code: str, assets: dict[str, str], specs: list[AssetSpec]) -> str: output = html_code manifest_lines = ["") manifest = "\n".join(manifest_lines) + "\n" asset_json = json.dumps(asset_map) alias_json = json.dumps(alias_map) background_json = json.dumps(background_uri) helper_script = f"""""" if "" in output: output = output.replace("", helper_script + "\n", 1) elif " str: encoded = base64.b64encode(html_code.encode("utf-8")).decode("ascii") return ( f'' ) def build_prompt_preview(specs: list[AssetSpec]) -> str: return "\n\n".join(f"{spec.role}:\n{spec.prompt}" for spec in specs) def build_model_report(rows: list[tuple[str, str, str]]) -> str: return "\n".join(f"{role}: prompt={prompt_model}; image={image_model}" for role, prompt_model, image_model in rows) def summarize_model_sources(rows: list[tuple[str, str, str]]) -> str: prompt_sources = sorted({prompt_model for _, prompt_model, _ in rows}) image_sources = sorted({image_model for _, _, image_model in rows}) return f"prompt={', '.join(prompt_sources)}; image={', '.join(image_sources)}" def generate_images_and_game(html_code: str, roles: str, style_hint: str): if not html_code.strip(): return "", "Paste HTML game code first.", [], "", "", "" role_lines, prompt_map, prompt_model, prompt_error = build_prompt_map(html_code, roles, style_hint or "pixel art style") specs = parse_assets(roles, style_hint or "pixel art style", prompt_map) if not specs: return html_code, "Add at least one asset role, like `player: brave knight`.", [], "", "", build_preview(html_code) assets: dict[str, str] = {} gallery = [] errors = [] model_rows = [] run_id = time.time_ns() if prompt_error: errors.append(f"prompt model: {prompt_error}") for index, spec in enumerate(specs): data_uri, gallery_path, error, image_model = generate_asset(spec, index, run_id) assets[spec.role] = data_uri gallery.append((gallery_path, f"{spec.role} -> {spec.filename}")) model_rows.append((spec.role, prompt_model, image_model)) if error: errors.append(f"{spec.role}: image model failed ({error}); used local procedural fallback") rewritten = embed_assets(html_code, assets, specs) status = ( f"Generated and embedded {len(specs)} fresh asset(s) using " f"{summarize_model_sources(model_rows)}. Run {str(run_id)[-6:]}." ) if errors: status += "\n\n" + "\n".join(errors) return rewritten, status, gallery, build_prompt_preview(specs), build_model_report(model_rows), build_preview(rewritten) def check_hf_token() -> str: if not HF_TOKEN: return "HF_TOKEN is missing or not visible to the Space runtime." request = Request( "https://huggingface.co/api/whoami-v2", headers={"Authorization": f"Bearer {HF_TOKEN}"}, method="GET", ) try: with urlopen(request, timeout=30) as response: raw = response.read().decode("utf-8", errors="replace") data = json.loads(raw) name = data.get("name") or data.get("fullname") or "authenticated account" return f"HF_TOKEN is visible and valid for {name}." except Exception as exc: return f"HF_TOKEN check failed: {short_error(exc)}" with gr.Blocks(title="Image Generator for HTML Games") as demo: gr.Markdown( "# Image Generator for HTML Games\n" "Paste an HTML canvas game, list the image roles you want, and generate a rewritten version " "with the images embedded directly into the code." ) with gr.Row(): with gr.Column(scale=1): roles = gr.Textbox( label="Image roles to generate", lines=8, placeholder=ROLE_PLACEHOLDER, info="One per line: role: image description. Example: player: blue robot hero", ) style = gr.Textbox( label="Shared visual style", lines=2, placeholder="Optional: describe the art style/theme, or leave blank.", ) generate_btn = gr.Button("Generate Images + Embed Game", variant="primary") status = gr.Markdown("Ready.") token_btn = gr.Button("Check HF Token") token_status = gr.Markdown("") gallery = gr.Gallery(label="Generated assets", columns=2, height=300) with gr.Column(scale=2): html_input = gr.Textbox( label="Original HTML game code", lines=18, placeholder="Paste your full HTML game code here.", ) output_code = gr.Code( label="Rewritten HTML with embedded images", language="html", lines=18, ) prompt_preview = gr.Textbox( label="Interpreted image prompts", lines=8, interactive=False, ) model_report = gr.Textbox( label="Model/source used by role", lines=5, interactive=False, ) gr.Markdown("## Game preview") preview = gr.HTML("") generate_btn.click( fn=generate_images_and_game, inputs=[html_input, roles, style], outputs=[output_code, status, gallery, prompt_preview, model_report, preview], ) token_btn.click(fn=check_hf_token, inputs=None, outputs=token_status) if __name__ == "__main__": demo.launch()