# Copyright (c) Meta Platforms, Inc. and affiliates. # # This source code is licensed under the Apache License, Version 2.0 # found in the LICENSE file in the root directory of this source tree. """ Utility functions for visualization """ from argparse import ArgumentParser, Namespace from distutils.util import strtobool import numpy as np import rerun as rr import trimesh from mapanything.utils.hf_utils.viz import image_mesh def log_posed_rgbd_data_to_rerun( image, depthmap, pose, intrinsics, base_name, mask=None ): """ Log camera and image data to Rerun visualization tool. Parameters ---------- image : numpy.ndarray RGB image to be logged depthmap : numpy.ndarray Depth map corresponding to the image pose : numpy.ndarray 4x4 camera pose matrix with rotation (3x3) and translation (3x1) intrinsics : numpy.ndarray Camera intrinsic matrix base_name : str Base name for the logged entities in Rerun mask : numpy.ndarray, optional Optional segmentation mask for the depth image """ # Log camera info and loaded data height, width = image.shape[0], image.shape[1] rr.log( base_name, rr.Transform3D( translation=pose[:3, 3], mat3x3=pose[:3, :3], ), ) rr.log( f"{base_name}/pinhole", rr.Pinhole( image_from_camera=intrinsics, height=height, width=width, camera_xyz=rr.ViewCoordinates.RDF, ), ) rr.log( f"{base_name}/pinhole/rgb", rr.Image(image), ) rr.log( f"{base_name}/pinhole/depth", rr.DepthImage(depthmap), ) if mask is not None: rr.log( f"{base_name}/pinhole/depth_mask", rr.SegmentationImage(mask), ) def str2bool(v): return bool(strtobool(v)) def script_add_rerun_args(parser: ArgumentParser) -> None: """ Add common Rerun script arguments to `parser`. Change Log from https://github.com/rerun-io/rerun/blob/29eb8954b08e59ff96943dc0677f46f7ea4ea734/rerun_py/rerun_sdk/rerun/script_helpers.py#L65: - Added default portforwarding url for ease of use - Update parser types Parameters ---------- parser : ArgumentParser The parser to add arguments to. Returns ------- None """ parser.add_argument( "--headless", type=str2bool, nargs="?", const=True, default=True, help="Don't show GUI", ) parser.add_argument( "--connect", dest="connect", type=str2bool, nargs="?", const=True, default=True, help="Connect to an external viewer", ) parser.add_argument( "--serve", dest="serve", type=str2bool, nargs="?", const=True, default=False, help="Serve a web viewer (WARNING: experimental feature)", ) parser.add_argument( "--url", type=str, default="rerun+http://127.0.0.1:2004/proxy", help="Connect to this HTTP(S) URL", ) parser.add_argument( "--save", type=str, default=None, help="Save data to a .rrd file at this path" ) parser.add_argument( "-o", "--stdout", dest="stdout", action="store_true", help="Log data to standard output, to be piped into a Rerun Viewer", ) def init_rerun_args( headless=True, connect=True, serve=False, url="rerun+http://127.0.0.1:2004/proxy", save=None, stdout=False, ) -> Namespace: """ Initialize common Rerun script arguments. Parameters ---------- headless : bool, optional Don't show GUI, by default True connect : bool, optional Connect to an external viewer, by default True serve : bool, optional Serve a web viewer (WARNING: experimental feature), by default False url : str, optional Connect to this HTTP(S) URL, by default rerun+http://127.0.0.1:2004/proxy save : str, optional Save data to a .rrd file at this path, by default None stdout : bool, optional Log data to standard output, to be piped into a Rerun Viewer, by default False Returns ------- Namespace The parsed arguments. """ rerun_args = Namespace() rerun_args.headless = headless rerun_args.connect = connect rerun_args.serve = serve rerun_args.url = url rerun_args.save = save rerun_args.stdout = stdout return rerun_args def predictions_to_glb( predictions, as_mesh=True, ) -> trimesh.Scene: """ Converts predictions to a 3D scene represented as a GLB file. Args: predictions (dict): Dictionary containing model predictions with keys: - world_points: 3D point coordinates (V, H, W, 3) - images: Input images (V, H, W, 3) - final_masks: Validity masks (V, H, W) as_mesh (bool): Represent the data as a mesh instead of point cloud (default: True) Returns: trimesh.Scene: Processed 3D scene containing point cloud/mesh and cameras Raises: ValueError: If input predictions structure is invalid """ if not isinstance(predictions, dict): raise ValueError("predictions must be a dictionary") # Get the world frame points and images from the predictions pred_world_points = predictions["world_points"] images = predictions["images"] # Get the points and rgb vertices_3d = pred_world_points.reshape(-1, 3) # Handle different image formats - check if images need transposing if images.ndim == 4 and images.shape[1] == 3: # NCHW format colors_rgb = np.transpose(images, (0, 2, 3, 1)) else: # Assume already in NHWC format colors_rgb = images colors_rgb = (colors_rgb.reshape(-1, 3) * 255).astype(np.uint8) # Initialize a 3D scene scene_3d = trimesh.Scene() # Add point cloud data to the scene if as_mesh: # Multi-frame case - create separate meshes for each frame for frame_idx in range(pred_world_points.shape[0]): H, W = pred_world_points.shape[1:3] # Get data for this frame frame_points = pred_world_points[frame_idx] frame_final_mask = predictions["final_masks"][frame_idx] # Get frame image if images.ndim == 4 and images.shape[1] == 3: # NCHW format frame_image = np.transpose(images[frame_idx], (1, 2, 0)) else: # Assume already in HWC format frame_image = images[frame_idx] frame_image *= 255 # Create mesh for this frame faces, vertices, vertex_colors = image_mesh( frame_points * np.array([1, -1, 1], dtype=np.float32), frame_image / 255.0, mask=frame_final_mask, tri=True, return_indices=False, ) vertices = vertices * np.array([1, -1, 1], dtype=np.float32) # Create trimesh object for this frame frame_mesh = trimesh.Trimesh( vertices=vertices, faces=faces, vertex_colors=(vertex_colors * 255).astype(np.uint8), process=False, ) scene_3d.add_geometry(frame_mesh) else: final_masks = predictions["final_masks"].reshape(-1) vertices_3d = vertices_3d[final_masks].copy() colors_rgb = colors_rgb[final_masks].copy() point_cloud_data = trimesh.PointCloud(vertices=vertices_3d, colors=colors_rgb) scene_3d.add_geometry(point_cloud_data) # Apply 180° rotation around X-axis to fix orientation (upside-down issue) rotation_matrix_x = trimesh.transformations.rotation_matrix(np.pi, [1, 0, 0]) scene_3d.apply_transform(rotation_matrix_x) return scene_3d