"""Prototype of the actual app engine: pixel-perfect editing of real PNG textures via grammar-constrained decoding on the local Gemma server. Pipeline: PNG -> token-stable wire format (palette quantized to <=26 keys, alpha thresholded, grid cells separated by spaces) -> per-file GBNF grammar locking the footprint (exact or 2x upscale) -> local llama.cpp generation -> parse -> true-alpha PNG out + checkerboard preview. Supports non-square textures (Minecraft mob atlases are 64x32 etc.). Usage: python pixel_editor.py "" [--upscale] [--out DIR] """ from __future__ import annotations import argparse import asyncio import sys import time from collections import Counter from pathlib import Path import httpx from PIL import Image sys.path.insert(0, str(Path(__file__).resolve().parent.parent)) import config import render import validate # The app allows richer palettes than the benchmark's 12-entry cap. # validate.py defines its own module-level constant, so patch it there. config.MAX_PALETTE = 64 validate.MAX_PALETTE = 64 URL = "http://localhost:8080/v1/chat/completions" KEY_ALPHABET = "ABCDEFGHIJKLMNOPQRSTUVWXYZ" # up to 26 opaque colors MAX_COLORS = len(KEY_ALPHABET) APP_SYSTEM = """You are an expert pixel artist editing real game textures \ (Minecraft resource pack style). You receive a texture as a palette-indexed \ character grid and an instruction. Recolor and restyle it with conviction: \ full color ramps (dark, mid, light per material), hue-shifted shading (shadows \ toward purple/blue, highlights warm), consistent upper-left light, no flat \ single-color fills, no lazy tint shifts. The pixel layout is locked; you choose \ only the colors. Output the PALETTE block then the GRID block in the exact \ format of the input. Grid rows use one space between single-character cells; \ preserve that row format exactly. Write nothing else.""" def _format_rows(rows: list[str], spaced: bool) -> list[str]: if not spaced: return list(rows) return [" ".join(row) for row in rows] def sprite_to_wire(sprite: validate.Sprite, spaced: bool = False) -> str: lines = ["PALETTE"] for key, rgb in sprite.palette.items(): if rgb is None: lines.append("{} transparent".format(key)) else: lines.append("{} {},{},{}".format(key, *rgb)) lines.append("GRID {}x{}".format(sprite.width, sprite.height)) lines.extend(_format_rows(sprite.rows, spaced)) return "\n".join(lines) def png_to_wire(path: Path, spaced: bool = False) -> tuple[str, int, int]: """Convert a PNG to wire format. Returns (wire_text, width, height).""" im = Image.open(path).convert("RGBA") w, h = im.size pixels = list(im.get_flattened_data() if hasattr(im, "get_flattened_data") else im.getdata()) opaque = [(r, g, b) for r, g, b, a in pixels if a >= 128] counts = Counter(opaque) if len(counts) > MAX_COLORS: # Quantize opaque colors down to MAX_COLORS with Pillow's median cut. tmp = Image.new("RGB", (len(opaque), 1)) tmp.putdata(opaque) quant = tmp.quantize(colors=MAX_COLORS) qpal = quant.getpalette()[: MAX_COLORS * 3] centers = [tuple(qpal[i * 3 : i * 3 + 3]) for i in range(MAX_COLORS)] def nearest(c: tuple[int, int, int]) -> tuple[int, int, int]: return min(centers, key=lambda k: sum((a - b) ** 2 for a, b in zip(k, c))) mapping = {c: nearest(c) for c in counts} counts = Counter(mapping[c] for c in opaque) else: mapping = {c: c for c in counts} keys: dict[tuple[int, int, int], str] = {} for i, (color, _n) in enumerate(counts.most_common()): keys[color] = KEY_ALPHABET[i] rows = [] idx = 0 for y in range(h): row = [] for x in range(w): r, g, b, a = pixels[idx] idx += 1 row.append("." if a < 128 else keys[mapping[(r, g, b)]]) rows.append("".join(row)) palette: dict[str, tuple[int, int, int] | None] = {".": None} for color, key in sorted(keys.items(), key=lambda kv: kv[1]): palette[key] = color sprite = validate.Sprite(palette=palette, width=w, height=h, rows=rows) return sprite_to_wire(sprite, spaced=spaced), w, h def wire_to_png(sprite: validate.Sprite, out_path: Path) -> None: """Write the true-alpha 1:1 PNG (the actual resource-pack artifact).""" im = Image.new("RGBA", (sprite.width, sprite.height)) data = [] for row in sprite.rows: for ch in row: rgb = sprite.palette[ch] data.append((0, 0, 0, 0) if rgb is None else (*rgb, 255)) im.putdata(data) out_path.parent.mkdir(parents=True, exist_ok=True) im.save(out_path) def _row_grammar(row: str, spaced: bool) -> str: if spaced: parts: list[str] = [] for i, ch in enumerate(row): parts.append('"."' if ch == "." else "ckey") if i != len(row) - 1: parts.append('" "') return " ".join(parts) parts = ['"."' if ch == "." else "ckey" for ch in row] # Collapse consecutive '.' literals into one string literal for a smaller # grammar (matters at 128x128). merged: list[str] = [] dots = 0 for part in parts: if part == '"."': dots += 1 else: if dots: merged.append('"{}"'.format("." * dots)) dots = 0 merged.append(part) if dots: merged.append('"{}"'.format("." * dots)) return " ".join(merged) def build_grammar( rows: list[str], n_keys: int, upscale: bool, spaced: bool = False ) -> str: """Footprint-locked GBNF. '.' cells are literal; colored cells sample ckey.""" if upscale: rows = ["".join(ch * 2 for ch in r) for r in rows for _ in (0, 1)] w, h = len(rows[0]), len(rows) key_class = KEY_ALPHABET[:n_keys] palette_part = "".join( '"{}" " " rgb "\\n" '.format(k) for k in key_class ) row_refs = " ".join("r{}".format(y) for y in range(h)) lines = [ 'root ::= "PALETTE\\n. transparent\\n" {} "GRID {}x{}\\n" {}'.format( palette_part, w, h, row_refs ), 'rgb ::= num "," num "," num', 'num ::= ("25" [0-5]) | ("2" [0-4] [0-9]) | ("1" [0-9] [0-9]) | ([1-9] [0-9]) | [0-9]', "ckey ::= [{}]".format(key_class), ] for y, row in enumerate(rows): lines.append('r{} ::= {} "\\n"'.format(y, _row_grammar(row, spaced))) return "\n".join(lines) async def edit_file(path: Path, instruction: str, upscale: bool, out_dir: Path) -> dict: wire, w, h = png_to_wire(path, spaced=True) in_sprite, perr = validate.parse_sprite(wire) assert in_sprite is not None, perr n_keys = len([k for k in in_sprite.palette if k != "."]) grammar = build_grammar(in_sprite.rows, n_keys, upscale, spaced=True) if upscale: contract = ( "Redraw this texture at {}x{} (2x). Every input pixel becomes a 2x2 " "block: transparent stays transparent, colored stays colored. Add " "finer shading and detail within that constraint." ).format(w * 2, h * 2) else: contract = ( "Edit this texture. The grid stays {}x{} and every transparent cell " "stays transparent; change only the colors of non-transparent cells." ).format(w, h) user_msg = "{}\n\nInstruction: {}\n\nHere is the input texture:\n{}".format( contract, instruction, wire ) out_cells = (w * 2) * (h * 2) if upscale else w * h max_tokens = min(int(out_cells * 1.6) + 800, 40000) payload = { "model": "gemma-4-12b", "messages": [ {"role": "system", "content": APP_SYSTEM}, {"role": "user", "content": user_msg}, ], "max_tokens": max_tokens, "temperature": 0.7, "chat_template_kwargs": {"enable_thinking": False}, "grammar": grammar, } started = time.perf_counter() async with httpx.AsyncClient(timeout=1800.0) as http: resp = await http.post(URL, json=payload) latency = time.perf_counter() - started resp.raise_for_status() data = resp.json() text = data["choices"][0]["message"]["content"] or "" finish = data["choices"][0].get("finish_reason") sprite, perr = validate.parse_sprite(text) result = { "file": path.name, "mode": "upscale2x" if upscale else "exact", "latency_s": round(latency, 1), "completion_tokens": data.get("usage", {}).get("completion_tokens"), "finish_reason": finish, "parsed": sprite is not None, "error": perr, } if sprite is not None: stem = "{}__{}".format(path.stem, "2x" if upscale else "edit") wire_to_png(sprite, out_dir / (stem + ".png")) scale = max(2, 512 // max(sprite.width, sprite.height)) render.save_render(sprite, out_dir / (stem + "_preview.png"), scale=scale) # Footprint check (the pixel-perfect guarantee, verified not assumed) in_fp = {(x, y) for y, r in enumerate(in_sprite.rows) for x, ch in enumerate(r) if ch != "."} if upscale: in_fp = {(2 * x + dx, 2 * y + dy) for x, y in in_fp for dx in (0, 1) for dy in (0, 1)} out_fp = {(x, y) for y, r in enumerate(sprite.rows) for x, ch in enumerate(r) if ch != "."} result["footprint_perfect"] = in_fp == out_fp return result async def main() -> None: ap = argparse.ArgumentParser() ap.add_argument("input") ap.add_argument("instruction") ap.add_argument("--upscale", action="store_true") ap.add_argument("--out", default=str(Path(__file__).parent / "edited")) args = ap.parse_args() result = await edit_file(Path(args.input), args.instruction, args.upscale, Path(args.out)) for k, v in result.items(): print("{}: {}".format(k, v)) if __name__ == "__main__": asyncio.run(main())