import argparse from pathlib import Path import numpy as np import onnxruntime as ort from PIL import Image IMAGE_EXTENSIONS = {".jpg", ".jpeg", ".png", ".bmp", ".tif", ".tiff"} def parse_args(): parser = argparse.ArgumentParser(description="Run DepthPolyp ONNX inference on images.") parser.add_argument("--onnx", default="checkpoints/DepthPolyp_Kvasir.onnx") parser.add_argument("--input", default="samples/kvasir/images") parser.add_argument("--output", default="samples/kvasir/outputs") parser.add_argument("--image-size", type=int, default=224) parser.add_argument("--threshold", type=float, default=0.3) return parser.parse_args() def list_images(input_path: Path): if input_path.is_file(): return [input_path] return sorted(path for path in input_path.rglob("*") if path.suffix.lower() in IMAGE_EXTENSIONS) def preprocess(image_path: Path, image_size: int): image = Image.open(image_path).convert("RGB") original_size = image.size resized = image.resize((image_size, image_size), Image.BILINEAR) array = np.asarray(resized).astype(np.float32) / 255.0 tensor = np.transpose(array, (2, 0, 1))[None, ...] return image, original_size, tensor def to_grayscale(probability: np.ndarray, size): probability = np.clip(probability, 0.0, 1.0) image = Image.fromarray((probability * 255).astype(np.uint8), mode="L") return image.resize(size, Image.BILINEAR) def colorize_purple_yellow(probability: np.ndarray, size): probability = np.clip(probability, 0.0, 1.0) stops = np.array( [ [38, 5, 84], [86, 33, 132], [141, 48, 140], [203, 71, 119], [245, 135, 48], [252, 231, 37], ], dtype=np.float32, ) scaled = probability * (len(stops) - 1) lower = np.floor(scaled).astype(np.int32) upper = np.clip(lower + 1, 0, len(stops) - 1) alpha = (scaled - lower)[..., None] colored = stops[lower] * (1.0 - alpha) + stops[upper] * alpha image = Image.fromarray(colored.astype(np.uint8), mode="RGB") return image.resize(size, Image.BILINEAR) def make_overlay(image: Image.Image, mask: Image.Image): base = image.convert("RGBA") mask_array = np.asarray(mask).astype(np.float32) / 255.0 color = np.zeros((mask_array.shape[0], mask_array.shape[1], 4), dtype=np.uint8) color[..., 0] = 252 color[..., 1] = 231 color[..., 2] = 37 color[..., 3] = (mask_array * 155).astype(np.uint8) return Image.alpha_composite(base, Image.fromarray(color, mode="RGBA")).convert("RGB") def main(): args = parse_args() input_path = Path(args.input) output_root = Path(args.output) mask_dir = output_root / "masks" depth_dir = output_root / "depth" overlay_dir = output_root / "overlay" for directory in (mask_dir, depth_dir, overlay_dir): directory.mkdir(parents=True, exist_ok=True) session = ort.InferenceSession(args.onnx, providers=["CPUExecutionProvider"]) input_name = session.get_inputs()[0].name images = list_images(input_path) if not images: raise FileNotFoundError(f"No images found under {input_path}") for image_path in images: image, original_size, tensor = preprocess(image_path, args.image_size) segmentation, depth = session.run(None, {input_name: tensor}) seg_prob = segmentation[0, 0] depth_prob = depth[0, 0] seg_image = to_grayscale(seg_prob, original_size) depth_image = colorize_purple_yellow(depth_prob, original_size) binary_mask = seg_image.point(lambda value: 255 if value >= int(args.threshold * 255) else 0) overlay = make_overlay(image, seg_image) stem = image_path.stem binary_mask.save(mask_dir / f"{stem}.png") depth_image.save(depth_dir / f"{stem}.png") overlay.save(overlay_dir / f"{stem}.jpg", quality=95) print(f"Processed {len(images)} image(s). Outputs saved to {output_root}") if __name__ == "__main__": main()