ShadeNet / inference_utils.py

revert

39a275f about 7 hours ago

8.23 kB

	import numpy as np
	import torch
	from PIL import Image, ImageDraw, ImageFont
	from torchvision import transforms


	def resize_pad(img_rgb: Image.Image, image_size: int) -> Image.Image:
	img_rgb = img_rgb.convert("RGB")
	w, h = img_rgb.size
	size = int(image_size)
	if w == 0 or h == 0:
	return Image.new("RGB", (size, size), (0, 0, 0))
	scale = max(size / float(w), size / float(h))
	new_w = max(1, int(round(w * scale)))
	new_h = max(1, int(round(h * scale)))
	img_r = img_rgb.resize((new_w, new_h), resample=Image.BICUBIC)
	left = (new_w - size) // 2
	top = (new_h - size) // 2
	return img_r.crop((left, top, left + size, top + size))


	def cond_tensor_from_pil(img_rgb: Image.Image, device: torch.device) -> torch.Tensor:
	t = transforms.ToTensor()(img_rgb).unsqueeze(0).to(device)
	t = t * 2.0 - 1.0
	return t


	def full_inference(model, img_rgb: Image.Image, image_size: int, device: torch.device, num_passes: int = 5, noise_std: float = 0.01):
	img_rgb = resize_pad(img_rgb, int(image_size))
	x = cond_tensor_from_pil(img_rgb, device)

	map_names = ['basecolor', 'normal', 'rmd']
	stacks = {k: [] for k in map_names}

	for p in range(num_passes):
	noise = torch.randn_like(x) * noise_std if noise_std > 0 else 0
	preds = model(x + noise)

	for k in map_names:
	stacks[k].append(preds[k])

	merged = {}
	for k in map_names:
	merged[k] = torch.median(torch.stack(stacks[k]), dim=0).values

	inv_input = torch.cat([merged['basecolor'], merged['normal'], merged['rmd']], dim=1)
	with torch.no_grad():
	merged['rgb'] = model(inv_input, mode=1)['rgb']

	def to_pil(tensor):
	out = (tensor + 1.0) / 2.0
	out = out.clamp(0, 1)
	out_np = out[0].detach().cpu().permute(1, 2, 0).numpy()
	return Image.fromarray((out_np * 255.0 + 0.5).astype("uint8"))

	outputs = {k: to_pil(v) for k, v in merged.items()}
	return img_rgb, outputs


	def tiled_inference(model, img_rgb: Image.Image, tile: int, device: torch.device, overlap: int = 16):
	img_rgb = img_rgb.convert("RGB")

	overlap = int(overlap)
	if overlap < 0:
	overlap = 0
	if overlap >= tile:
	overlap = max(0, tile - 1)
	stride = max(1, tile - overlap)

	w, h = img_rgb.size
	pad_w = (tile - (w % stride)) % stride
	pad_h = (tile - (h % stride)) % stride

	if pad_w or pad_h:
	new_img = Image.new("RGB", (w + pad_w, h + pad_h), (0, 0, 0))
	new_img.paste(img_rgb, (0, 0))
	src_padded = new_img
	else:
	src_padded = img_rgb

	pw, ph = src_padded.size

	map_names = ['basecolor', 'normal', 'rmd']
	acc = {k: np.zeros((ph, pw, 3), dtype=np.float32) for k in map_names}
	wsum = np.zeros((ph, pw, 1), dtype=np.float32)

	xs = list(range(0, max(1, pw - tile + 1), stride))
	ys = list(range(0, max(1, ph - tile + 1), stride))
	if xs[-1] != pw - tile:
	xs.append(pw - tile)
	if ys[-1] != ph - tile:
	ys.append(ph - tile)

	for top in ys:
	for left in xs:
	patch_img = src_padded.crop((left, top, left + tile, top + tile))
	cond = cond_tensor_from_pil(patch_img, device)

	with torch.no_grad():
	preds = model(cond)

	def tensor_to_np(t):
	t = (t + 1.0) / 2.0
	t = t.clamp(0, 1)
	return t[0].detach().cpu().permute(1, 2, 0).numpy()

	ramp_x = np.ones((tile,), dtype=np.float32)
	ramp_y = np.ones((tile,), dtype=np.float32)
	if overlap > 0:
	if left > 0:
	ramp_x[:overlap] = np.linspace(0.0, 1.0, overlap, endpoint=False, dtype=np.float32)
	if left + tile < pw:
	ramp_x[-overlap:] = np.linspace(1.0, 0.0, overlap, endpoint=False, dtype=np.float32)
	if top > 0:
	ramp_y[:overlap] = np.linspace(0.0, 1.0, overlap, endpoint=False, dtype=np.float32)
	if top + tile < ph:
	ramp_y[-overlap:] = np.linspace(1.0, 0.0, overlap, endpoint=False, dtype=np.float32)

	weight = (ramp_y[:, None] * ramp_x[None, :])[:, :, None]

	for k in map_names:
	np_pred = tensor_to_np(preds[k])
	acc[k][top : top + tile, left : left + tile, :] += np_pred * weight
	wsum[top : top + tile, left : left + tile, :] += weight

	def acc_to_pil(out_np):
	out_np = out_np / np.maximum(wsum, 1e-8)
	out_np = np.clip(out_np, 0.0, 1.0)
	return Image.fromarray((out_np * 255.0 + 0.5).astype("uint8"))

	outputs = {k: acc_to_pil(acc[k]) for k in map_names}

	if pad_w or pad_h:
	for k in map_names:
	outputs[k] = outputs[k].crop((0, 0, w, h))

	return img_rgb, outputs


	def _draw_label(img: Image.Image, label: str, bar_color=(0, 0, 0)) -> Image.Image:
	draw = ImageDraw.Draw(img)
	try:
	font = ImageFont.truetype("arial.ttf", 18)
	except OSError:
	font = ImageFont.load_default()
	draw.rectangle((0, 0, img.width, 24), fill=bar_color)
	draw.text((4, 2), label, fill=(255, 255, 255), font=font)
	return img


	def _draw_arrow(img: Image.Image, color=(180, 180, 180)) -> Image.Image:
	draw = ImageDraw.Draw(img)
	cx, cy = img.width // 2, img.height // 2
	r = 8
	draw.line((0, cy, img.width - r, cy), fill=color, width=3)
	draw.polygon([(img.width - r, cy - r), (img.width - r, cy + r), (img.width, cy)], fill=color)
	return img


	def _draw_label(img: Image.Image, label: str, bar_color=(0, 0, 0)) -> Image.Image:
	draw = ImageDraw.Draw(img)
	try:
	font = ImageFont.truetype("arial.ttf", 18)
	except OSError:
	font = ImageFont.load_default()
	draw.rectangle((0, 0, img.width, 24), fill=bar_color)
	draw.text((4, 2), label, fill=(255, 255, 255), font=font)
	return img


	def _draw_arrow(img: Image.Image, color=(180, 180, 180)) -> Image.Image:
	draw = ImageDraw.Draw(img)
	cx, cy = img.width // 2, img.height // 2
	r = 8
	draw.line((0, cy, img.width - r, cy), fill=color, width=3)
	draw.polygon([(img.width - r, cy - r), (img.width - r, cy + r), (img.width, cy)], fill=color)
	return img


	def make_side_by_side(inp_img: Image.Image, outputs: dict) -> Image.Image:
	inp_img = inp_img.convert("RGB")

	outputs = {k: v.convert("RGB") for k, v in outputs.items()}

	r, g, b = outputs['rmd'].split()

	cell_size = 200
	arrow_w = 48
	gap = 8

	stage1_w = cell_size
	stage2_w = cell_size * 3 + gap * 2
	stage3_w = cell_size

	total_w = stage1_w + arrow_w + stage2_w + arrow_w + stage3_w
	total_h = cell_size * 2 + gap

	canvas = Image.new("RGB", (total_w, total_h), (35, 35, 35))

	def place(cvs, img, x, y, sz=cell_size):
	img = img.resize((sz, sz), Image.BICUBIC)
	cvs.paste(img, (x, y))

	cx = 0

	stage1 = _draw_label(inp_img.resize((cell_size, cell_size), Image.BICUBIC), "INPUT", (50, 100, 200))
	place(canvas, stage1, cx, (total_h - cell_size) // 2)
	cx += cell_size

	arrow1 = Image.new("RGB", (arrow_w, total_h), (35, 35, 35))
	arrow1 = _draw_arrow(arrow1)
	canvas.paste(arrow1, (cx, 0))
	cx += arrow_w

	ordered = [
	("BASECOLOR", outputs['basecolor'], (50, 160, 80)),
	("NORMAL", outputs['normal'], (50, 160, 80)),
	("DEPTH", b.convert("RGB"), (50, 160, 80)),
	("ROUGHNESS", r.convert("RGB"), (50, 160, 80)),
	("METALLIC", g.convert("RGB"), (50, 160, 80)),
	]

	for i, (label, img, color) in enumerate(ordered):
	col = i % 3
	row = i // 3
	px = cx + col * (cell_size + gap)
	py = row * (cell_size + gap)
	panel = _draw_label(img.resize((cell_size, cell_size), Image.BICUBIC), label, color)
	canvas.paste(panel, (px, py))

	cx += cell_size * 3 + gap * 2

	arrow2 = Image.new("RGB", (arrow_w, total_h), (35, 35, 35))
	arrow2 = _draw_arrow(arrow2)
	canvas.paste(arrow2, (cx, 0))
	cx += arrow_w

	stage3 = _draw_label(outputs['rgb'].resize((cell_size, cell_size), Image.BICUBIC), "RECON RGB", (200, 120, 50))
	place(canvas, stage3, cx, (total_h - cell_size) // 2)

	return canvas