Spaces:

MidFord327
/

PixelMAE

Sleeping

App Files Files Community

PixelMAE / app.py

MidFord327

Update app.py

2feaac2 verified about 2 months ago

raw

history blame contribute delete

38.3 kB

	"""
	PixelMAE v10 — Neural Sprite Engine
	Hugging Face Spaces · Production UI
	"""

	import os
	import math
	import warnings
	from typing import Optional, Dict, List, Tuple
	from dataclasses import dataclass, field

	import numpy as np
	import torch
	import torch.nn as nn
	import torch.nn.functional as F
	from PIL import Image, ImageDraw
	import gradio as gr
	from huggingface_hub import hf_hub_download

	warnings.filterwarnings("ignore")

	# ============================================================================
	# REPO CONFIG — edit these two lines
	# ============================================================================
	REPO_ID = "MidFord327/PixelArt-MAE-v7"
	MODEL_FILES = {"best": "best.pth", "latest": "latest.pth"}

	# ============================================================================
	# 1. CONFIGURATION
	# ============================================================================

	@dataclass
	class Config:
	IMAGE_SIZE: int = 16
	CHANNELS: int = 4
	NUM_PIXELS: int = 256
	LATENT_DIM: int = 192
	ENCODER_LAYERS:int = 6
	DECODER_LAYERS:int = 4
	NUM_HEADS: int = 6
	FFN_RATIO: int = 4
	DROPOUT: float = 0.0
	PALETTE_SIZE: int = 16
	USE_EMA: bool = True
	EMA_DECAY: float = 0.9995
	EMA_UPDATE_EVERY: int = 1
	DEVICE: torch.device = field(
	default_factory=lambda: torch.device("cpu"))

	# ============================================================================
	# 2. ARCHITECTURE
	# ============================================================================

	def get_2d_sincos_pos_embed(embed_dim: int, grid_size: int) -> torch.Tensor:
	grid_h = torch.arange(grid_size, dtype=torch.float32)
	grid_w = torch.arange(grid_size, dtype=torch.float32)
	grid = torch.meshgrid(grid_h, grid_w, indexing="ij")
	grid = torch.stack(grid, dim=0).reshape(2, -1)
	half = embed_dim // 4
	omega = 1.0 / (10000 ** (torch.arange(half, dtype=torch.float32) / half))
	out_h = torch.einsum("n,d->nd", grid[0], omega)
	out_w = torch.einsum("n,d->nd", grid[1], omega)
	pe = torch.zeros(grid_size * grid_size, embed_dim)
	pe[:, 0::4] = torch.sin(out_h); pe[:, 1::4] = torch.cos(out_h)
	pe[:, 2::4] = torch.sin(out_w); pe[:, 3::4] = torch.cos(out_w)
	return pe.unsqueeze(0)


	class EMA:
	def __init__(self, model, decay=0.9995, update_every=1):
	self.model = model
	self.decay = decay
	self.update_every = update_every
	self.step = 0
	self.shadow = {n: p.data.clone()
	for n, p in model.named_parameters() if p.requires_grad}
	self.backup = {}

	def apply_shadow(self):
	for name, param in self.model.named_parameters():
	if param.requires_grad and name in self.shadow:
	self.backup[name] = param.data.clone()
	param.data.copy_(self.shadow[name])

	def restore(self):
	for name, param in self.model.named_parameters():
	if name in self.backup:
	param.data.copy_(self.backup[name])
	self.backup = {}

	def load_state_dict(self, state, device):
	self.shadow = {k: v.to(device) for k, v in state["shadow"].items()}
	self.step = state["step"]


	class MHSA(nn.Module):
	def __init__(self, dim, heads, dropout=0.0):
	super().__init__()
	self.heads = heads
	self.head_dim = dim // heads
	self.qkv = nn.Linear(dim, dim * 3)
	self.proj = nn.Linear(dim, dim)
	self.drop = dropout

	def forward(self, x):
	B, N, C = x.shape
	qkv = self.qkv(x).reshape(B, N, 3, self.heads, self.head_dim)
	q, k, v = qkv.permute(2, 0, 3, 1, 4).unbind(0)
	out = F.scaled_dot_product_attention(q, k, v, dropout_p=0.0)
	return self.proj(out.transpose(1, 2).reshape(B, N, C))


	class TransformerBlock(nn.Module):
	def __init__(self, dim, heads, ffn_ratio=4, dropout=0.0):
	super().__init__()
	self.norm1 = nn.LayerNorm(dim)
	self.attn = MHSA(dim, heads, dropout)
	self.norm2 = nn.LayerNorm(dim)
	self.ffn = nn.Sequential(
	nn.Linear(dim, dim * ffn_ratio), nn.GELU(), nn.Dropout(dropout),
	nn.Linear(dim * ffn_ratio, dim), nn.Dropout(dropout))

	def forward(self, x):
	x = x + self.attn(self.norm1(x))
	x = x + self.ffn(self.norm2(x))
	return x


	class AsymmetricPixelMAE(nn.Module):
	def __init__(self, config: Config):
	super().__init__()
	self.config = config
	D, N = config.LATENT_DIM, config.NUM_PIXELS

	self.pixel_embed = nn.Linear(config.CHANNELS, D)
	self.embed_norm = nn.LayerNorm(D)
	self.pos_embed = nn.Parameter(
	get_2d_sincos_pos_embed(D, config.IMAGE_SIZE), requires_grad=False)
	self.cls_token = nn.Parameter(torch.zeros(1, 1, D))
	self.mask_token = nn.Parameter(torch.zeros(1, 1, D))
	nn.init.normal_(self.cls_token, std=0.02)
	nn.init.normal_(self.mask_token, std=0.02)

	self.encoder = nn.ModuleList([
	TransformerBlock(D, config.NUM_HEADS, config.FFN_RATIO, config.DROPOUT)
	for _ in range(config.ENCODER_LAYERS)])
	self.encoder_norm = nn.LayerNorm(D)
	self.decoder_embed= nn.Linear(D, D)

	self.decoder_pos_embed = nn.Parameter(torch.zeros(1, N + 1, D), requires_grad=False)
	dp = torch.zeros(1, N + 1, D)
	dp[:, 1:, :] = get_2d_sincos_pos_embed(D, config.IMAGE_SIZE)
	self.decoder_pos_embed.data.copy_(dp)

	self.decoder = nn.ModuleList([
	TransformerBlock(D, config.NUM_HEADS, config.FFN_RATIO, config.DROPOUT)
	for _ in range(config.DECODER_LAYERS)])
	self.decoder_norm = nn.LayerNorm(D)

	self.pixel_head_rgb = nn.Sequential(
	nn.LayerNorm(D), nn.Linear(D, D), nn.GELU(), nn.Linear(D, 3))
	self.pixel_head_alpha = nn.Sequential(
	nn.LayerNorm(D), nn.Linear(D, D), nn.GELU(), nn.Linear(D, 1))
	self.palette_head = nn.Sequential(
	nn.LayerNorm(D), nn.Linear(D, D), nn.GELU(),
	nn.Linear(D, config.PALETTE_SIZE * config.CHANNELS), nn.Sigmoid())

	def _tokenize(self, x):
	return self.embed_norm(self.pixel_embed(x.flatten(2).transpose(1, 2)))

	def forward_encoder(self, x, mask=None):
	B, N, D = x.shape[0], self.config.NUM_PIXELS, self.config.LATENT_DIM
	tokens = self._tokenize(x) + self.pos_embed
	if mask is not None:
	mf = mask.flatten(1)
	if mf.shape[1] != N: mf = mask.reshape(B, -1)
	if mf.dim() == 3: mf = mf.squeeze(1)
	noise = torch.rand(B, N, device=x.device)
	ids_shuf = torch.argsort(noise + mf.float() * 1e6, dim=1)
	ids_rest = torch.argsort(ids_shuf, dim=1)
	n_vis = max(1, int((1 - mf.float()).sum(dim=1).min().item()))
	ids_keep = ids_shuf[:, :n_vis]
	tokens = torch.gather(tokens, 1, ids_keep.unsqueeze(-1).expand(-1, -1, D))
	else:
	n_vis = N
	ids_rest = torch.arange(N, device=x.device).unsqueeze(0).expand(B, -1)
	cls = self.cls_token.expand(B, -1, -1)
	x_enc = torch.cat([cls, tokens], dim=1)
	for blk in self.encoder: x_enc = blk(x_enc)
	return self.encoder_norm(x_enc), ids_rest, n_vis

	def forward_decoder(self, x_enc, ids_rest, n_vis):
	B, N, D = x_enc.shape[0], self.config.NUM_PIXELS, self.config.LATENT_DIM
	cls_enc, vis_enc = x_enc[:, :1], x_enc[:, 1:]
	vis_dec = self.decoder_embed(vis_enc)
	n_masked = N - n_vis
	if n_masked > 0:
	mask_tok = self.mask_token.expand(B, n_masked, -1)
	full_seq = torch.cat([vis_dec, mask_tok], dim=1)
	else:
	full_seq = vis_dec
	full_seq = torch.gather(full_seq, 1, ids_rest.unsqueeze(-1).expand(-1, -1, D))
	full_seq = torch.cat([self.decoder_embed(cls_enc), full_seq], dim=1) + self.decoder_pos_embed
	for blk in self.decoder: full_seq = blk(full_seq)
	return self.decoder_norm(full_seq)[:, 1:]

	def forward(self, x, mask=None):
	B, C, H, W = x.shape
	enc, ids, n_vis = self.forward_encoder(x, mask)
	dec = self.forward_decoder(enc, ids, n_vis)
	rgb = torch.sigmoid(self.pixel_head_rgb(dec)).transpose(1, 2).reshape(B, 3, H, W)
	alp = torch.sigmoid(self.pixel_head_alpha(dec)).transpose(1, 2).reshape(B, 1, H, W)
	return {"pixel_pred": torch.cat([rgb, alp], dim=1)}

	# ============================================================================
	# 3. INFERENCE ENGINE (extended)
	# ============================================================================

	class InferenceEngine:
	def __init__(self, config: Config, device: torch.device):
	self.config = config
	self.device = device
	self.model = AsymmetricPixelMAE(config).to(device)
	self.ema = EMA(self.model, config.EMA_DECAY, config.EMA_UPDATE_EVERY) if config.USE_EMA else None
	self.ckpt_meta: Dict = {}

	def load_checkpoint(self, path: str) -> Dict:
	ckpt = torch.load(path, map_location=self.device, weights_only=False)
	state = {k.replace("module.", "").replace("_orig_mod.", ""): v
	for k, v in ckpt["model"].items()}
	self.model.load_state_dict(state, strict=False)
	if self.ema and ckpt.get("ema"):
	try: self.ema.load_state_dict(ckpt["ema"], self.device)
	except Exception: pass
	self.ckpt_meta = {
	"epoch": ckpt.get("epoch", "?"),
	"best_loss": ckpt.get("best_loss", None),
	"has_ema": bool(self.ema and ckpt.get("ema")),
	}
	return self.ckpt_meta

	# ── Core helpers ──────────────────────────────────────────────────────

	def _apply_ema(self):
	if self.ema: self.ema.apply_shadow()

	def _restore_ema(self):
	if self.ema: self.ema.restore()

	# ── Inpaint ───────────────────────────────────────────────────────────

	@torch.no_grad()
	def inpaint(self, image: torch.Tensor, mask: torch.Tensor,
	use_ema: bool = True) -> torch.Tensor:
	self.model.eval()
	if image.dim() == 3: image = image.unsqueeze(0)
	if mask.dim() == 3: mask = mask.unsqueeze(0)
	if use_ema: self._apply_ema()
	try:
	pred = self.model(image, mask=mask.float())["pixel_pred"]
	result = torch.where(mask.expand_as(image).bool(), pred, image)
	finally:
	if use_ema: self._restore_ema()
	return result.clamp(0, 1)

	# ── Restore ───────────────────────────────────────────────────────────

	@torch.no_grad()
	def restore(self, image: torch.Tensor, strength: float = 0.5,
	use_ema: bool = True) -> torch.Tensor:
	self.model.eval()
	if image.dim() == 3: image = image.unsqueeze(0)
	B, C, H, W = image.shape
	ratio = 0.05 + strength * 0.50
	mask = (torch.rand(B, 1, H, W, device=image.device) < ratio).float()
	return self.inpaint(image, mask, use_ema=use_ema)

	@torch.no_grad()
	def restore_multi_pass(self, image: torch.Tensor, strength: float = 0.5,
	passes: int = 3, use_ema: bool = True) -> torch.Tensor:
	"""Iterative multi-pass restore — each pass uses slightly less strength."""
	result = image.clone()
	for i in range(passes):
	s = strength * (1.0 - i * 0.1)
	result = self.restore(result, strength=max(0.05, s), use_ema=use_ema)
	return result

	# ── Generate from scratch ─────────────────────────────────────────────

	@torch.no_grad()
	def generate(self, n_samples: int = 1, n_steps: int = 12,
	seed_image: Optional[torch.Tensor] = None,
	seed_ratio: float = 0.0,
	temperature: float = 1.0,
	use_ema: bool = True) -> torch.Tensor:
	self.model.eval()
	if use_ema: self._apply_ema()
	try:
	H = W = self.config.IMAGE_SIZE
	C, N = self.config.CHANNELS, self.config.NUM_PIXELS
	dev = self.device
	B = n_samples

	if seed_image is not None:
	if seed_image.dim() == 3: seed_image = seed_image.unsqueeze(0)
	seed_image = seed_image.expand(B, -1, -1, -1)
	canvas = seed_image.clone()
	keep = (torch.rand(B, 1, H, W, device=dev) < seed_ratio)
	else:
	canvas = torch.rand(B, C, H, W, device=dev) * 0.5
	keep = torch.zeros(B, 1, H, W, device=dev, dtype=torch.bool)

	revealed = keep.float()

	for step in range(n_steps):
	mask = 1.0 - revealed
	n_unrevealed = int(mask.sum().item() / B)
	if n_unrevealed == 0: break

	pred = self.model(canvas, mask=mask)["pixel_pred"]

	# temperature scaling — add noise before confidence scoring
	if temperature != 1.0:
	noise = torch.randn_like(pred) * (temperature - 1.0) * 0.05
	pred = (pred + noise).clamp(0, 1)

	canvas = torch.where(mask.expand_as(canvas).bool(), pred, canvas)

	if step < n_steps - 1:
	confidence = torch.abs(pred - 0.5).mean(dim=1, keepdim=True)
	confidence = confidence * mask + torch.rand_like(confidence) * 0.05

	progress = (step + 1) / n_steps
	target_frac = 1.0 - 0.5 * (1 + math.cos(math.pi * progress))
	cur_revealed = int(revealed.sum().item() / B)
	n_reveal = max(1, int(target_frac * N) - cur_revealed)
	n_reveal = min(n_reveal, n_unrevealed)

	flat_conf = confidence.reshape(B, -1).masked_fill(
	mask.reshape(B, -1) < 0.5, -float("inf"))
	_, top = flat_conf.topk(min(n_reveal, N), dim=1)
	new_rev = torch.zeros(B, N, device=dev).scatter_(1, top, 1.0)
	revealed = (revealed + new_rev.reshape(B, 1, H, W)).clamp(0, 1)

	finally:
	if use_ema: self._restore_ema()
	return canvas.clamp(0, 1)

	# ── Variation ─────────────────────────────────────────────────────────

	@torch.no_grad()
	def generate_variation(self, image: torch.Tensor,
	diversity: float = 0.5,
	n_samples: int = 1,
	n_steps: int = 16,
	use_ema: bool = True) -> torch.Tensor:
	"""Generate variations of an existing sprite.
	diversity=0 → near-identical copy; diversity=1 → fully free generation."""
	seed_ratio = 1.0 - diversity
	return self.generate(
	n_samples=n_samples, n_steps=n_steps,
	seed_image=image, seed_ratio=seed_ratio,
	use_ema=use_ema)

	# ── Palette extraction from CLS token ─────────────────────────────────

	@torch.no_grad()
	def extract_palette(self, image: torch.Tensor,
	use_ema: bool = True) -> torch.Tensor:
	"""Returns (PALETTE_SIZE, 4) RGBA palette from the model's palette head."""
	self.model.eval()
	if image.dim() == 3: image = image.unsqueeze(0)
	if use_ema: self._apply_ema()
	try:
	enc, _, _ = self.model.forward_encoder(image)
	cls_tok = enc[:, :1, :] # (1, 1, D)
	palette = self.model.palette_head(cls_tok) # (1, 1, P*C)
	palette = palette.reshape(self.config.PALETTE_SIZE, self.config.CHANNELS)
	finally:
	if use_ema: self._restore_ema()
	return palette.clamp(0, 1)

	# ============================================================================
	# 4. INIT ENGINE + MODEL MANAGEMENT
	# ============================================================================

	print("Initializing PixelMAE Inference Engine …")
	config = Config()
	engine = InferenceEngine(config, config.DEVICE)

	# Global state
	_loaded_model_key = None
	_ckpt_info_text = "No model loaded."
	_model_paths: Dict[str, Optional[str]] = {"best": None, "latest": None}


	def _download_model(key: str) -> Tuple[Optional[str], str]:
	filename = MODEL_FILES[key]
	try:
	path = hf_hub_download(repo_id=REPO_ID, filename=filename)
	return path, f"✅ Downloaded `{filename}` from `{REPO_ID}`."
	except Exception as e:
	return None, f"❌ Could not download `{filename}`: {e}"


	def load_model(model_choice: str) -> str:
	global _loaded_model_key, _ckpt_info_text, _model_paths
	key = "best" if model_choice == "Best (best.pth)" else "latest"

	if _model_paths[key] is None:
	path, msg = _download_model(key)
	if path is None:
	_ckpt_info_text = msg
	return _ckpt_info_text
	_model_paths[key] = path

	try:
	meta = engine.load_checkpoint(_model_paths[key])
	loss_str = f"{meta['best_loss']:.6f}" if meta["best_loss"] is not None else "N/A"
	_ckpt_info_text = (
	f"Model: `{MODEL_FILES[key]}` \n"
	f"Epoch: {meta['epoch']} \n"
	f"Best Val Loss: {loss_str} \n"
	f"EMA Weights: {'✅ loaded' if meta['has_ema'] else '⚠️ not found'} \n"
	f"Device: {config.DEVICE}"
	)
	_loaded_model_key = key
	except Exception as e:
	_ckpt_info_text = f"❌ Load error: {e}"

	return _ckpt_info_text


	# Pre-load best model at startup (non-fatal)
	try:
	_path, _msg = _download_model("best")
	if _path:
	_model_paths["best"] = _path
	load_model("Best (best.pth)")
	print(f" └─ {_ckpt_info_text.replace(chr(10), ' ')}")
	else:
	print(f" └─ {_msg}")
	except Exception as ex:
	print(f" └─ Startup load failed: {ex}")


	# ============================================================================
	# 5. IMAGE UTILITIES
	# ============================================================================

	def preprocess(pil_img: Image.Image) -> torch.Tensor:
	img = pil_img.convert("RGBA").resize(
	(config.IMAGE_SIZE, config.IMAGE_SIZE), Image.Resampling.NEAREST)
	arr = np.array(img, np.float32) / 255.0
	return torch.from_numpy(arr).permute(2, 0, 1).unsqueeze(0).to(config.DEVICE)


	def postprocess(tensor: torch.Tensor, upscale: int = 16) -> Image.Image:
	"""Convert tensor → pixel-perfect upscaled PIL image (RGBA)."""
	if tensor is None: return None
	if tensor.dim() == 4: tensor = tensor.squeeze(0)
	arr = (tensor.permute(1, 2, 0).cpu().clamp(0, 1).numpy() * 255).astype(np.uint8)
	img = Image.fromarray(arr, "RGBA")
	return img.resize(
	(config.IMAGE_SIZE * upscale, config.IMAGE_SIZE * upscale),
	Image.Resampling.NEAREST)


	def tensor_to_pils(batch: torch.Tensor, upscale: int = 16) -> List[Image.Image]:
	"""Batch tensor → list of PIL images."""
	if batch.dim() == 3: batch = batch.unsqueeze(0)
	return [postprocess(batch[i], upscale) for i in range(batch.shape[0])]


	def make_palette_image(palette: torch.Tensor, swatch_size: int = 32) -> Image.Image:
	"""Render palette as a row of color swatches."""
	n = palette.shape[0]
	img = Image.new("RGBA", (n * swatch_size, swatch_size), (0, 0, 0, 0))
	draw = ImageDraw.Draw(img)
	for i, color in enumerate(palette):
	rgba = tuple((color.cpu().numpy() * 255).astype(int).tolist())
	draw.rectangle([i * swatch_size, 0, (i + 1) * swatch_size - 1, swatch_size - 1],
	fill=rgba)
	return img


	def compare_images(before: Image.Image, after: Image.Image,
	label_size: int = 10) -> Image.Image:
	"""Side-by-side comparison with labels."""
	w, h = before.size
	total = Image.new("RGBA", (w * 2 + 4, h), (20, 20, 20, 255))
	total.paste(before, (0, 0))
	total.paste(after, (w + 4, 0))
	return total


	def extract_unique_colors(img: Image.Image, max_colors: int = 32) -> List[Tuple]:
	"""Extract sorted unique RGBA colors (ignoring transparent)."""
	arr = np.array(img.convert("RGBA"))
	pixels = arr.reshape(-1, 4)
	pixels = pixels[pixels[:, 3] > 10] # ignore near-transparent
	unique = np.unique(pixels, axis=0)
	return [tuple(c) for c in unique[:max_colors]]


	# ============================================================================
	# 6. GRADIO FUNCTIONS
	# ============================================================================

	def fn_load_model(choice):
	return load_model(choice)


	def fn_generate(seed_img, seed_ratio, steps, n_samples,
	temperature, use_ema, upscale):
	n_samples = int(n_samples)
	steps = int(steps)
	upscale = int(upscale)
	seed_t = preprocess(seed_img) if seed_img is not None else None
	out = engine.generate(
	n_samples=n_samples, n_steps=steps,
	seed_image=seed_t, seed_ratio=seed_ratio,
	temperature=temperature, use_ema=use_ema)
	pils = tensor_to_pils(out, upscale)
	# Return gallery items + first image separately + palette
	palette = engine.extract_palette(out[0:1], use_ema=use_ema)
	pal_img = make_palette_image(palette, swatch_size=40)
	gallery = [(p, f"Sample {i+1}") for i, p in enumerate(pils)]
	return gallery, pils[0], pal_img


	def fn_generate_single(seed_img, seed_ratio, steps, temperature, use_ema, upscale):
	upscale = int(upscale)
	steps = int(steps)
	seed_t = preprocess(seed_img) if seed_img is not None else None
	out = engine.generate(
	n_samples=1, n_steps=steps,
	seed_image=seed_t, seed_ratio=seed_ratio,
	temperature=temperature, use_ema=use_ema)
	pil = postprocess(out[0], upscale)
	palette = engine.extract_palette(out[0:1], use_ema=use_ema)
	pal_img = make_palette_image(palette, swatch_size=40)
	return pil, pal_img


	def fn_restore(image, strength, passes, use_ema, upscale):
	if image is None: return None, None, None
	upscale = int(upscale)
	passes = int(passes)
	in_t = preprocess(image)
	if passes > 1:
	out_t = engine.restore_multi_pass(in_t, strength=strength, passes=passes, use_ema=use_ema)
	else:
	out_t = engine.restore(in_t, strength=strength, use_ema=use_ema)
	before = postprocess(in_t[0], upscale)
	after = postprocess(out_t[0], upscale)
	compare = compare_images(before, after)
	palette = engine.extract_palette(out_t[0:1], use_ema=use_ema)
	pal_img = make_palette_image(palette, swatch_size=40)
	return after, compare, pal_img


	def fn_inpaint(editor_dict, use_ema, upscale):
	if not editor_dict or editor_dict.get("background") is None:
	return None, None
	upscale = int(upscale)
	bg = editor_dict["background"].convert("RGBA")
	in_t = preprocess(bg)
	mask_t = torch.zeros((1, 1, 16, 16), device=config.DEVICE)
	layers = editor_dict.get("layers", [])
	if layers:
	drawing = layers[0].convert("RGBA").resize((16, 16), Image.Resampling.NEAREST)
	mask_np = (np.array(drawing)[:, :, 3] > 0).astype(np.float32)
	mask_t = torch.from_numpy(mask_np).unsqueeze(0).unsqueeze(0).to(config.DEVICE)
	out_t = engine.inpaint(in_t, mask_t, use_ema=use_ema)
	result = postprocess(out_t[0], upscale)
	palette = engine.extract_palette(out_t[0:1], use_ema=use_ema)
	pal_img = make_palette_image(palette, swatch_size=40)
	return result, pal_img


	def fn_inpaint_alpha(image, use_ema, upscale):
	"""Fill transparent pixels using the model."""
	if image is None: return None, None
	upscale = int(upscale)
	in_t = preprocess(image)
	mask_t = (in_t[:, 3:4] < 0.5).float()
	out_t = engine.inpaint(in_t, mask_t, use_ema=use_ema)
	result = postprocess(out_t[0], upscale)
	palette = engine.extract_palette(out_t[0:1], use_ema=use_ema)
	pal_img = make_palette_image(palette, swatch_size=40)
	return result, pal_img


	def fn_variation(image, diversity, n_var, steps, use_ema, upscale):
	if image is None: return [], None
	n_var = int(n_var)
	steps = int(steps)
	upscale = int(upscale)
	in_t = preprocess(image)
	out_t = engine.generate_variation(
	in_t, diversity=diversity, n_samples=n_var,
	n_steps=steps, use_ema=use_ema)
	pils = tensor_to_pils(out_t, upscale)
	palette = engine.extract_palette(out_t[0:1], use_ema=use_ema)
	pal_img = make_palette_image(palette, swatch_size=40)
	gallery = [(p, f"Variation {i+1}") for i, p in enumerate(pils)]
	return gallery, pal_img


	def fn_batch_generate(n_total, steps, temperature, use_ema, upscale):
	n_total = int(n_total)
	steps = int(steps)
	upscale = int(upscale)
	out_t = engine.generate(
	n_samples=n_total, n_steps=steps,
	temperature=temperature, use_ema=use_ema)
	pils = tensor_to_pils(out_t, upscale)
	gallery = [(p, f"#{i+1}") for i, p in enumerate(pils)]
	return gallery


	# ============================================================================
	# 7. GRADIO UI
	# ============================================================================

	CSS = """
	/* ── Pixel-perfect rendering for all output images ── */
	.pixel-out img,
	.pixel-gallery img,
	.pixel-compare img {
	image-rendering: pixelated !important;
	image-rendering: crisp-edges !important;
	}

	/* ── Dark card styling ── */
	.card { background: #1a1a2e; border-radius: 10px; padding: 12px; }

	/* ── Status badge ── */
	.status-box textarea { font-family: monospace; font-size: 12px; }

	/* ── Palette strip ── */
	.pal-out img {
	image-rendering: pixelated !important;
	border-radius: 4px;
	border: 1px solid #444;
	}
	"""

	_UPSCALE_CHOICES = ["4", "8", "16", "24", "32"]
	_MODEL_CHOICES = ["Best (best.pth)", "Latest (latest.pth)"]


	def _upscale_ctrl(default="16"):
	return gr.Radio(
	_UPSCALE_CHOICES, value=default, label="Preview Scale",
	info="Pixel-perfect zoom multiplier (× original 16px)")


	def _ema_ctrl():
	return gr.Checkbox(value=True, label="Use EMA Weights",
	info="Usually better quality")

	def _palette_out():
	return gr.Image(label="Predicted Palette", type="pil",
	elem_classes=["pal-out"])


	with gr.Blocks(theme=gr.themes.Monochrome(), css=CSS,
	title="PixelMAE v10 — Neural Sprite Engine") as app:

	gr.Markdown(
	"""
	# 👾 PixelMAE — Neural Sprite Engine `v10`
	Asymmetric Masked Autoencoder · 16 × 16 RGBA · Generation · Restoration · Inpainting · Variations
	""")

	# ── Model loader ─────────────────────────────────────────────────────
	with gr.Accordion("⚙️ Model & Checkpoint", open=False):
	with gr.Row():
	model_choice = gr.Radio(
	_MODEL_CHOICES, value="Best (best.pth)",
	label="Checkpoint")
	load_btn = gr.Button("🔄 Load / Reload Model", variant="secondary")
	ckpt_info = gr.Markdown(value=_ckpt_info_text, label="Checkpoint Info")
	load_btn.click(fn_load_model, inputs=model_choice, outputs=ckpt_info)

	gr.Markdown("---")

	# ── TABS ─────────────────────────────────────────────────────────────
	with gr.Tabs():

	# ── TAB 1: GENERATE ──────────────────────────────────────────────
	with gr.TabItem("✨ Generate"):
	gr.Markdown(
	"Generate sprites from noise using iterative MaskGIT decoding. "
	"Optionally seed from an existing image.")
	with gr.Row():
	with gr.Column(scale=1):
	gen_seed = gr.Image(
	label="Seed Image (optional)", type="pil",
	image_mode="RGBA", height=200)
	gen_ratio = gr.Slider(0.0, 1.0, 0.0, step=0.05,
	label="Seed Fidelity",
	info="0 = fully random · 1 = preserve seed completely")
	gen_steps = gr.Slider(4, 64, 16, step=1,
	label="Decoding Steps",
	info="More steps → smoother, slower")
	gen_temp = gr.Slider(0.5, 2.0, 1.0, step=0.05,
	label="Temperature",
	info="< 1 sharper/conservative · > 1 more chaotic/varied")
	gen_use_ema = _ema_ctrl()
	gen_upscale = _upscale_ctrl("16")
	gen_btn = gr.Button("⚡ Generate", variant="primary")

	with gr.Column(scale=1):
	gen_out = gr.Image(
	label="Output Sprite", type="pil", format="png",
	elem_classes=["pixel-out"])
	gen_pal = _palette_out()

	gen_btn.click(
	fn_generate_single,
	inputs=[gen_seed, gen_ratio, gen_steps,
	gen_temp, gen_use_ema, gen_upscale],
	outputs=[gen_out, gen_pal])

	# ── TAB 2: BATCH GENERATE ────────────────────────────────────────
	with gr.TabItem("🗂️ Batch Generate"):
	gr.Markdown(
	"Generate multiple sprites in one shot. "
	"All sprites are rendered pixel-perfect in the gallery.")
	with gr.Row():
	with gr.Column(scale=1):
	bg_n = gr.Slider(1, 16, 8, step=1,
	label="Number of Sprites")
	bg_steps = gr.Slider(4, 64, 16, step=1,
	label="Decoding Steps")
	bg_temp = gr.Slider(0.5, 2.0, 1.0, step=0.05,
	label="Temperature")
	bg_use_ema = _ema_ctrl()
	bg_upscale = _upscale_ctrl("16")
	bg_btn = gr.Button("⚡ Generate Batch", variant="primary")

	with gr.Column(scale=2):
	bg_gallery = gr.Gallery(
	label="Generated Sprites", columns=4, rows=4,
	height="auto", elem_classes=["pixel-gallery"], format="png")

	bg_btn.click(
	fn_batch_generate,
	inputs=[bg_n, bg_steps, bg_temp, bg_use_ema, bg_upscale],
	outputs=bg_gallery)

	# ── TAB 3: RESTORE & REFINE ──────────────────────────────────────
	with gr.TabItem("🔄 Restore & Refine"):
	gr.Markdown(
	"Feed the model an existing sprite. It randomly masks regions and "
	"reconstructs them, improving pixel coherence. Multi-pass iteratively refines.")
	with gr.Row():
	with gr.Column(scale=1):
	rest_img = gr.Image(
	label="Input Sprite", type="pil",
	image_mode="RGBA", height=200)
	rest_str = gr.Slider(0.0, 1.0, 0.5, step=0.05,
	label="Mask Strength",
	info="What fraction of pixels are randomly re-predicted")
	rest_passes = gr.Slider(1, 6, 1, step=1,
	label="Passes",
	info="Multi-pass: each pass refines further")
	rest_use_ema = _ema_ctrl()
	rest_upscale = _upscale_ctrl("16")
	rest_btn = gr.Button("🔄 Restore", variant="primary")

	with gr.Column(scale=1):
	rest_out = gr.Image(
	label="Restored Sprite", type="pil", format="png",
	elem_classes=["pixel-out"])
	rest_compare = gr.Image(
	label="Before · After", type="pil",
	elem_classes=["pixel-compare"])
	rest_pal = _palette_out()

	rest_btn.click(
	fn_restore,
	inputs=[rest_img, rest_str, rest_passes, rest_use_ema, rest_upscale],
	outputs=[rest_out, rest_compare, rest_pal])

	# ── TAB 4: SMART INPAINT ─────────────────────────────────────────
	with gr.TabItem("🖌️ Smart Inpaint"):
	gr.Markdown(
	"Upload a sprite and paint the mask over pixels you want the AI to redraw. "
	"Use any brush color — coverage counts, not color.")
	with gr.Row():
	with gr.Column(scale=1):
	inp_editor = gr.ImageEditor(
	label="Draw Mask (paint = redo)",
	type="pil", image_mode="RGBA",
	brush=gr.Brush(colors=["#ff0000"], color_mode="fixed"),
	height=300)
	inp_use_ema = _ema_ctrl()
	inp_upscale = _upscale_ctrl("16")
	inp_btn = gr.Button("🖌️ Inpaint", variant="primary")

	with gr.Column(scale=1):
	inp_out = gr.Image(
	label="Inpainted Output", type="pil", format="png",
	elem_classes=["pixel-out"])
	inp_pal = _palette_out()

	inp_btn.click(
	fn_inpaint,
	inputs=[inp_editor, inp_use_ema, inp_upscale],
	outputs=[inp_out, inp_pal])

	# ── TAB 5: FILL TRANSPARENT ──────────────────────────────────────
	with gr.TabItem("🔍 Fill Transparent"):
	gr.Markdown(
	"Upload a partially transparent sprite (RGBA). "
	"The model will fill all transparent pixels based on the visible context.")
	with gr.Row():
	with gr.Column(scale=1):
	alp_img = gr.Image(
	label="Partial Sprite (RGBA)", type="pil",
	image_mode="RGBA", height=200)
	alp_use_ema = _ema_ctrl()
	alp_upscale = _upscale_ctrl("16")
	alp_btn = gr.Button("✨ Fill Transparent", variant="primary")

	with gr.Column(scale=1):
	alp_out = gr.Image(
	label="Completed Sprite", type="pil", format="png",
	elem_classes=["pixel-out"])
	alp_pal = _palette_out()

	alp_btn.click(
	fn_inpaint_alpha,
	inputs=[alp_img, alp_use_ema, alp_upscale],
	outputs=[alp_out, alp_pal])

	# ── TAB 6: VARIATIONS ────────────────────────────────────────────
	with gr.TabItem("🎲 Variations"):
	gr.Markdown(
	"Upload a sprite and generate N creative variations. "
	"Diversity 0 = almost identical · 1 = free improvisation on the theme.")
	with gr.Row():
	with gr.Column(scale=1):
	var_img = gr.Image(
	label="Source Sprite", type="pil",
	image_mode="RGBA", height=200)
	var_div = gr.Slider(0.0, 1.0, 0.5, step=0.05,
	label="Diversity",
	info="How far variants can deviate from the source")
	var_n = gr.Slider(1, 16, 4, step=1,
	label="Number of Variations")
	var_steps = gr.Slider(4, 64, 16, step=1,
	label="Decoding Steps")
	var_use_ema = _ema_ctrl()
	var_upscale = _upscale_ctrl("16")
	var_btn = gr.Button("🎲 Generate Variations", variant="primary")

	with gr.Column(scale=2):
	var_gallery = gr.Gallery(
	label="Variations", columns=4, rows=4,
	height="auto", elem_classes=["pixel-gallery"],
	format="png")
	var_pal = _palette_out()

	var_btn.click(
	fn_variation,
	inputs=[var_img, var_div, var_n, var_steps, var_use_ema, var_upscale],
	outputs=[var_gallery, var_pal])

	# ── Footer ────────────────────────────────────────────────────────────
	gr.Markdown(
	"""
	---
	PixelMAE v10 · Asymmetric MAE · 4.2M params · 16×16 RGBA
	Model: [`MidFord327/PixelArt-MAE-v7`](https://huggingface.co/MidFord327/PixelArt-MAE-v7)
	""")


	if __name__ == "__main__":
	app.launch()