Spaces:

coconutiscoding
/

image-seg-inpaint-demo

Sleeping

App Files Files Community

image-seg-inpaint-demo / app.py

coconutiscoding

Update app.py

7bd64ab verified 4 months ago

raw

history blame contribute delete

3.39 kB

	import gradio as gr
	import torch
	import numpy as np
	from PIL import Image
	from torchvision.models.segmentation import deeplabv3_resnet50, DeepLabV3_ResNet50_Weights
	from huggingface_hub import hf_hub_download
	import cv2

	# ---------------- 下载并加载 LaMa 官方权重 ----------------
	repo_id = "JosephCatrambone/big-lama-torchscript"
	model_path = hf_hub_download(repo_id=repo_id, filename="lama.pt")
	lama_model = torch.jit.load(model_path, map_location="cpu")
	lama_model.eval()

	# ---- 加载分割模型（CPU） ----
	device = torch.device("cpu")
	weights = DeepLabV3_ResNet50_Weights.COCO_WITH_VOC_LABELS_V1
	model = deeplabv3_resnet50(weights=weights).to(device).eval()
	preprocess = weights.transforms()

	MAX_SIDE = 1024 # 为了速度与内存，限制输入最大边

	def _resize_if_needed(pil_img: Image.Image, max_side=MAX_SIDE) -> Image.Image:
	w, h = pil_img.size
	if max(w, h) <= max_side:
	return pil_img
	r = max_side / float(max(w, h))
	return pil_img.resize((int(w * r), int(h * r)), Image.BILINEAR)

	def segment(image: Image.Image):
	if not isinstance(image, Image.Image):
	image = Image.fromarray(image)

	image = image.convert("RGB")
	image = _resize_if_needed(image)

	# 预处理并推理
	x = torch.from_numpy(np.array(image)).permute(2, 0, 1).float() / 255.0
	x = x.unsqueeze(0).to(device) # [1,3,H,W]

	with torch.no_grad():
	out = model(x)["out"][0] # [C,H,W]，C=21
	pred = out.argmax(0).cpu().numpy() # [H,W]

	# 前景 = 非背景（背景类在COCO VOC权重下是0）
	fg = (pred != 0).astype(np.uint8)

	# ---------------- mask ----------------
	kernel = np.ones((15,15), np.uint8)
	fg_dilated = cv2.dilate(fg, kernel, iterations=1)

	mask_img = Image.fromarray((fg_dilated * 255).astype(np.uint8), mode="L")

	# 叠加彩色遮罩（红色半透明）
	base = image.convert("RGBA")
	overlay = Image.new("RGBA", base.size, (255, 0, 0, 0))
	alpha = Image.fromarray((fg_dilated * 120).astype(np.uint8))
	overlay.putalpha(alpha)
	blended = Image.alpha_composite(base, overlay).convert("RGB")

	# ---- LaMa ----
	img_np = np.array(image) # HWC, uint8
	mask_np = np.array(mask_img) # H,W, 0/255
	img_t = torch.from_numpy(img_np).permute(2, 0, 1).float().unsqueeze(0) / 255.0
	mask_t = torch.from_numpy(mask_np).unsqueeze(0).unsqueeze(0).float() / 255.0
	with torch.no_grad():
	inpainted_t = lama_model(img_t, mask_t) # [1,3,H,W]
	inpainted_np = (inpainted_t[0].permute(1, 2, 0).numpy() * 255).astype(np.uint8)

	# ---- 裁剪回原图大小 ----
	H, W = img_np.shape[:2]
	inpainted_np = inpainted_np[:H, :W, :]
	inpainted_img = Image.fromarray(inpainted_np)

	return blended, mask_img, inpainted_img

	# ---- Gradio 界面 ----
	demo = gr.Interface(
	fn=segment,
	inputs=gr.Image(type="pil", label="Upload Image"),
	outputs=[
	gr.Image(type="pil", label="Overlay (foreground)"),
	gr.Image(type="pil", label="Binary Mask (foreground=white)"),
	gr.Image(type="pil", label="inpaint result"),
	],
	title="Semantic Segmentation + LaMa Inpainting",
	description="DeepLabV3 segmentation + LaMa inpainting。",
	examples=[
	["./9F27E2C4-5662-4AA7-A14A-2DE6627EBE8E-14319-000010D528167C0B.PNG"]
	]
	)

	if __name__ == "__main__":
	demo.launch()