Spaces:

ColamanAI
/

Map-anything-seg

Sleeping

App Files Files Community

Map-anything-seg / mapanything /utils /viz.py

ColamanAI

Upload 169 files

b74998d verified 2 months ago

raw

history blame contribute delete

8.24 kB

	# Copyright (c) Meta Platforms, Inc. and affiliates.
	#
	# This source code is licensed under the Apache License, Version 2.0
	# found in the LICENSE file in the root directory of this source tree.

	"""
	Utility functions for visualization
	"""

	from argparse import ArgumentParser, Namespace
	from distutils.util import strtobool

	import numpy as np
	import rerun as rr
	import trimesh

	from mapanything.utils.hf_utils.viz import image_mesh


	def log_posed_rgbd_data_to_rerun(
	image, depthmap, pose, intrinsics, base_name, mask=None
	):
	"""
	Log camera and image data to Rerun visualization tool.

	Parameters
	----------
	image : numpy.ndarray
	RGB image to be logged
	depthmap : numpy.ndarray
	Depth map corresponding to the image
	pose : numpy.ndarray
	4x4 camera pose matrix with rotation (3x3) and translation (3x1)
	intrinsics : numpy.ndarray
	Camera intrinsic matrix
	base_name : str
	Base name for the logged entities in Rerun
	mask : numpy.ndarray, optional
	Optional segmentation mask for the depth image
	"""
	# Log camera info and loaded data
	height, width = image.shape[0], image.shape[1]
	rr.log(
	base_name,
	rr.Transform3D(
	translation=pose[:3, 3],
	mat3x3=pose[:3, :3],
	),
	)
	rr.log(
	f"{base_name}/pinhole",
	rr.Pinhole(
	image_from_camera=intrinsics,
	height=height,
	width=width,
	camera_xyz=rr.ViewCoordinates.RDF,
	),
	)
	rr.log(
	f"{base_name}/pinhole/rgb",
	rr.Image(image),
	)
	rr.log(
	f"{base_name}/pinhole/depth",
	rr.DepthImage(depthmap),
	)
	if mask is not None:
	rr.log(
	f"{base_name}/pinhole/depth_mask",
	rr.SegmentationImage(mask),
	)


	def str2bool(v):
	return bool(strtobool(v))


	def script_add_rerun_args(parser: ArgumentParser) -> None:
	"""
	Add common Rerun script arguments to `parser`.

	Change Log from https://github.com/rerun-io/rerun/blob/29eb8954b08e59ff96943dc0677f46f7ea4ea734/rerun_py/rerun_sdk/rerun/script_helpers.py#L65:
	- Added default portforwarding url for ease of use
	- Update parser types

	Parameters
	----------
	parser : ArgumentParser
	The parser to add arguments to.

	Returns
	-------
	None
	"""
	parser.add_argument(
	"--headless",
	type=str2bool,
	nargs="?",
	const=True,
	default=True,
	help="Don't show GUI",
	)
	parser.add_argument(
	"--connect",
	dest="connect",
	type=str2bool,
	nargs="?",
	const=True,
	default=True,
	help="Connect to an external viewer",
	)
	parser.add_argument(
	"--serve",
	dest="serve",
	type=str2bool,
	nargs="?",
	const=True,
	default=False,
	help="Serve a web viewer (WARNING: experimental feature)",
	)
	parser.add_argument(
	"--url",
	type=str,
	default="rerun+http://127.0.0.1:2004/proxy",
	help="Connect to this HTTP(S) URL",
	)
	parser.add_argument(
	"--save", type=str, default=None, help="Save data to a .rrd file at this path"
	)
	parser.add_argument(
	"-o",
	"--stdout",
	dest="stdout",
	action="store_true",
	help="Log data to standard output, to be piped into a Rerun Viewer",
	)


	def init_rerun_args(
	headless=True,
	connect=True,
	serve=False,
	url="rerun+http://127.0.0.1:2004/proxy",
	save=None,
	stdout=False,
	) -> Namespace:
	"""
	Initialize common Rerun script arguments.

	Parameters
	----------
	headless : bool, optional
	Don't show GUI, by default True
	connect : bool, optional
	Connect to an external viewer, by default True
	serve : bool, optional
	Serve a web viewer (WARNING: experimental feature), by default False
	url : str, optional
	Connect to this HTTP(S) URL, by default rerun+http://127.0.0.1:2004/proxy
	save : str, optional
	Save data to a .rrd file at this path, by default None
	stdout : bool, optional
	Log data to standard output, to be piped into a Rerun Viewer, by default False

	Returns
	-------
	Namespace
	The parsed arguments.
	"""
	rerun_args = Namespace()
	rerun_args.headless = headless
	rerun_args.connect = connect
	rerun_args.serve = serve
	rerun_args.url = url
	rerun_args.save = save
	rerun_args.stdout = stdout

	return rerun_args


	def predictions_to_glb(
	predictions,
	as_mesh=True,
	) -> trimesh.Scene:
	"""
	Converts predictions to a 3D scene represented as a GLB file.

	Args:
	predictions (dict): Dictionary containing model predictions with keys:
	- world_points: 3D point coordinates (V, H, W, 3)
	- images: Input images (V, H, W, 3)
	- final_masks: Validity masks (V, H, W)
	as_mesh (bool): Represent the data as a mesh instead of point cloud (default: True)

	Returns:
	trimesh.Scene: Processed 3D scene containing point cloud/mesh and cameras

	Raises:
	ValueError: If input predictions structure is invalid
	"""
	if not isinstance(predictions, dict):
	raise ValueError("predictions must be a dictionary")

	# Get the world frame points and images from the predictions
	pred_world_points = predictions["world_points"]
	images = predictions["images"]

	# Get the points and rgb
	vertices_3d = pred_world_points.reshape(-1, 3)
	# Handle different image formats - check if images need transposing
	if images.ndim == 4 and images.shape[1] == 3: # NCHW format
	colors_rgb = np.transpose(images, (0, 2, 3, 1))
	else: # Assume already in NHWC format
	colors_rgb = images
	colors_rgb = (colors_rgb.reshape(-1, 3) * 255).astype(np.uint8)

	# Initialize a 3D scene
	scene_3d = trimesh.Scene()

	# Add point cloud data to the scene
	if as_mesh:
	# Multi-frame case - create separate meshes for each frame
	for frame_idx in range(pred_world_points.shape[0]):
	H, W = pred_world_points.shape[1:3]

	# Get data for this frame
	frame_points = pred_world_points[frame_idx]
	frame_final_mask = predictions["final_masks"][frame_idx]

	# Get frame image
	if images.ndim == 4 and images.shape[1] == 3: # NCHW format
	frame_image = np.transpose(images[frame_idx], (1, 2, 0))
	else: # Assume already in HWC format
	frame_image = images[frame_idx]
	frame_image *= 255

	# Create mesh for this frame
	faces, vertices, vertex_colors = image_mesh(
	frame_points * np.array([1, -1, 1], dtype=np.float32),
	frame_image / 255.0,
	mask=frame_final_mask,
	tri=True,
	return_indices=False,
	)
	vertices = vertices * np.array([1, -1, 1], dtype=np.float32)

	# Create trimesh object for this frame
	frame_mesh = trimesh.Trimesh(
	vertices=vertices,
	faces=faces,
	vertex_colors=(vertex_colors * 255).astype(np.uint8),
	process=False,
	)
	scene_3d.add_geometry(frame_mesh)
	else:
	final_masks = predictions["final_masks"].reshape(-1)
	vertices_3d = vertices_3d[final_masks].copy()
	colors_rgb = colors_rgb[final_masks].copy()
	point_cloud_data = trimesh.PointCloud(vertices=vertices_3d, colors=colors_rgb)
	scene_3d.add_geometry(point_cloud_data)

	# Apply 180° rotation around X-axis to fix orientation (upside-down issue)
	rotation_matrix_x = trimesh.transformations.rotation_matrix(np.pi, [1, 0, 0])
	scene_3d.apply_transform(rotation_matrix_x)

	return scene_3d