Buckets:

yzhllm
/

PhysicalAI-Spatial-Intelligence-Warehouse-bucket

Files

xet

yzhllm/PhysicalAI-Spatial-Intelligence-Warehouse-bucket / utils /visualize.py

yzhllm

2 days ago

download

raw

3.77 kB

	import pycocotools.mask as mask_utils
	from PIL import Image, ImageDraw, ImageFont
	import json
	import os.path as osp
	import numpy as np
	import random
	import argparse
	import os
	from tqdm import tqdm

	def visualize_masks_and_depth(masks, image_path, depth_path, output_path):
	# Load and process RGB image
	image = Image.open(image_path).convert("RGBA")
	overlay = Image.new("RGBA", image.size, (0, 0, 0, 0))
	font_path = "/usr/share/fonts/truetype/dejavu/DejaVuSans-Bold.ttf"
	font = ImageFont.truetype(font_path, 25)

	text_infos = []

	# Process masks
	for i, mask in enumerate(masks):
	mask = mask_utils.decode(mask)
	mask_image = Image.fromarray((mask * 255).astype(np.uint8), mode='L')
	color = (random.randint(0, 255), random.randint(0, 255), random.randint(0, 255), 128)
	colored_mask = Image.new("RGBA", image.size, color)
	overlay.paste(colored_mask, (0, 0), mask_image)

	draw = ImageDraw.Draw(overlay)
	text = f"Region {i}"
	text_size = draw.textbbox((0, 0), text, font=font)
	text_width = text_size[2] - text_size[0]
	text_height = text_size[3] - text_size[1]

	mask_indices = np.argwhere(mask)
	if mask_indices.size > 0:
	min_y, min_x = mask_indices.min(axis=0)
	max_y, max_x = mask_indices.max(axis=0)
	center_x = (min_x + max_x) // 2
	center_y = (min_y + max_y) // 2
	text_position = (center_x - text_width // 2, center_y - text_height // 2)
	text_infos.append((text, text_position))

	draw = ImageDraw.Draw(overlay)
	for text, text_position in text_infos:
	draw.text(text_position, text, fill=(255, 255, 255, 255), font=font)

	# Blend RGB image with mask overlay
	blended_image = Image.alpha_composite(image, overlay)

	# Load and process depth image
	depth = np.array(Image.open(depth_path))
	# Normalize depth to 0-255 range for visualization
	depth_min, depth_max = depth.min(), depth.max()
	depth_normalized = ((depth - depth_min) / (depth_max - depth_min) * 255).astype(np.uint8)
	depth_image = Image.fromarray(depth_normalized).convert('RGB')

	# Create side-by-side visualization
	total_width = blended_image.width + depth_image.width
	combined_image = Image.new('RGB', (total_width, blended_image.height))
	combined_image.paste(blended_image, (0, 0))
	combined_image.paste(depth_image, (blended_image.width, 0))

	# Save the combined visualization
	combined_image.save(output_path)


	if __name__ == '__main__':
	parser = argparse.ArgumentParser()
	parser.add_argument('--image_folder', type=str)
	parser.add_argument('--depth_folder', type=str) # New argument for depth images
	parser.add_argument('--annotations_file', type=str)
	parser.add_argument('--output_dir', type=str, default='visualization')
	parser.add_argument('--num_samples', type=int, default=10)
	args = parser.parse_args()

	image_folder = args.image_folder
	depth_folder = args.depth_folder
	annotations_file = args.annotations_file
	output_dir = args.output_dir
	os.makedirs(output_dir, exist_ok=True)

	with open(annotations_file, 'r') as f:
	annotations = json.load(f)

	base_name = osp.basename(annotations_file)

	for annotation in tqdm(annotations[:args.num_samples]):
	output_path = osp.join(output_dir, f"{base_name}_{annotation['image']}")
	image_path = osp.join(image_folder, f"{annotation['image']}")
	depth_path = osp.join(depth_folder, f"{annotation['image'].replace('.png', '_depth.png')}") # Assuming depth images have same names
	masks = annotation['rle']
	visualize_masks_and_depth(masks, image_path, depth_path, output_path)

Xet Storage Details

Size:: 3.77 kB
Xet hash:: c4dacc2584edda2879bbc20754b63a2cddae58f858a94097a3f273c80e5cd7fb

Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.