Spaces:

ColamanAI
/

Map-anything-seg

Sleeping

File size: 8,239 Bytes

b74998d

# Copyright (c) Meta Platforms, Inc. and affiliates.
#
# This source code is licensed under the Apache License, Version 2.0
# found in the LICENSE file in the root directory of this source tree.

"""

Utility functions for visualization

"""

from argparse import ArgumentParser, Namespace
from distutils.util import strtobool

import numpy as np
import rerun as rr
import trimesh

from mapanything.utils.hf_utils.viz import image_mesh


def log_posed_rgbd_data_to_rerun(

    image, depthmap, pose, intrinsics, base_name, mask=None

):
    """

    Log camera and image data to Rerun visualization tool.



    Parameters

    ----------

    image : numpy.ndarray

        RGB image to be logged

    depthmap : numpy.ndarray

        Depth map corresponding to the image

    pose : numpy.ndarray

        4x4 camera pose matrix with rotation (3x3) and translation (3x1)

    intrinsics : numpy.ndarray

        Camera intrinsic matrix

    base_name : str

        Base name for the logged entities in Rerun

    mask : numpy.ndarray, optional

        Optional segmentation mask for the depth image

    """
    # Log camera info and loaded data
    height, width = image.shape[0], image.shape[1]
    rr.log(
        base_name,
        rr.Transform3D(
            translation=pose[:3, 3],
            mat3x3=pose[:3, :3],
        ),
    )
    rr.log(
        f"{base_name}/pinhole",
        rr.Pinhole(
            image_from_camera=intrinsics,
            height=height,
            width=width,
            camera_xyz=rr.ViewCoordinates.RDF,
        ),
    )
    rr.log(
        f"{base_name}/pinhole/rgb",
        rr.Image(image),
    )
    rr.log(
        f"{base_name}/pinhole/depth",
        rr.DepthImage(depthmap),
    )
    if mask is not None:
        rr.log(
            f"{base_name}/pinhole/depth_mask",
            rr.SegmentationImage(mask),
        )


def str2bool(v):
    return bool(strtobool(v))


def script_add_rerun_args(parser: ArgumentParser) -> None:
    """

    Add common Rerun script arguments to `parser`.



    Change Log from https://github.com/rerun-io/rerun/blob/29eb8954b08e59ff96943dc0677f46f7ea4ea734/rerun_py/rerun_sdk/rerun/script_helpers.py#L65:

        - Added default portforwarding url for ease of use

        - Update parser types



    Parameters

    ----------

    parser : ArgumentParser

        The parser to add arguments to.



    Returns

    -------

    None

    """
    parser.add_argument(
        "--headless",
        type=str2bool,
        nargs="?",
        const=True,
        default=True,
        help="Don't show GUI",
    )
    parser.add_argument(
        "--connect",
        dest="connect",
        type=str2bool,
        nargs="?",
        const=True,
        default=True,
        help="Connect to an external viewer",
    )
    parser.add_argument(
        "--serve",
        dest="serve",
        type=str2bool,
        nargs="?",
        const=True,
        default=False,
        help="Serve a web viewer (WARNING: experimental feature)",
    )
    parser.add_argument(
        "--url",
        type=str,
        default="rerun+http://127.0.0.1:2004/proxy",
        help="Connect to this HTTP(S) URL",
    )
    parser.add_argument(
        "--save", type=str, default=None, help="Save data to a .rrd file at this path"
    )
    parser.add_argument(
        "-o",
        "--stdout",
        dest="stdout",
        action="store_true",
        help="Log data to standard output, to be piped into a Rerun Viewer",
    )


def init_rerun_args(

    headless=True,

    connect=True,

    serve=False,

    url="rerun+http://127.0.0.1:2004/proxy",

    save=None,

    stdout=False,

) -> Namespace:
    """

    Initialize common Rerun script arguments.



    Parameters

    ----------

    headless : bool, optional

        Don't show GUI, by default True

    connect : bool, optional

        Connect to an external viewer, by default True

    serve : bool, optional

        Serve a web viewer (WARNING: experimental feature), by default False

    url : str, optional

        Connect to this HTTP(S) URL, by default rerun+http://127.0.0.1:2004/proxy

    save : str, optional

        Save data to a .rrd file at this path, by default None

    stdout : bool, optional

        Log data to standard output, to be piped into a Rerun Viewer, by default False



    Returns

    -------

    Namespace

        The parsed arguments.

    """
    rerun_args = Namespace()
    rerun_args.headless = headless
    rerun_args.connect = connect
    rerun_args.serve = serve
    rerun_args.url = url
    rerun_args.save = save
    rerun_args.stdout = stdout

    return rerun_args


def predictions_to_glb(

    predictions,

    as_mesh=True,

) -> trimesh.Scene:
    """

    Converts predictions to a 3D scene represented as a GLB file.



    Args:

        predictions (dict): Dictionary containing model predictions with keys:

            - world_points: 3D point coordinates (V, H, W, 3)

            - images: Input images (V, H, W, 3)

            - final_masks: Validity masks (V, H, W)

        as_mesh (bool): Represent the data as a mesh instead of point cloud (default: True)



    Returns:

        trimesh.Scene: Processed 3D scene containing point cloud/mesh and cameras



    Raises:

        ValueError: If input predictions structure is invalid

    """
    if not isinstance(predictions, dict):
        raise ValueError("predictions must be a dictionary")

    # Get the world frame points and images from the predictions
    pred_world_points = predictions["world_points"]
    images = predictions["images"]

    # Get the points and rgb
    vertices_3d = pred_world_points.reshape(-1, 3)
    # Handle different image formats - check if images need transposing
    if images.ndim == 4 and images.shape[1] == 3:  # NCHW format
        colors_rgb = np.transpose(images, (0, 2, 3, 1))
    else:  # Assume already in NHWC format
        colors_rgb = images
    colors_rgb = (colors_rgb.reshape(-1, 3) * 255).astype(np.uint8)

    # Initialize a 3D scene
    scene_3d = trimesh.Scene()

    # Add point cloud data to the scene
    if as_mesh:
        # Multi-frame case - create separate meshes for each frame
        for frame_idx in range(pred_world_points.shape[0]):
            H, W = pred_world_points.shape[1:3]

            # Get data for this frame
            frame_points = pred_world_points[frame_idx]
            frame_final_mask = predictions["final_masks"][frame_idx]

            # Get frame image
            if images.ndim == 4 and images.shape[1] == 3:  # NCHW format
                frame_image = np.transpose(images[frame_idx], (1, 2, 0))
            else:  # Assume already in HWC format
                frame_image = images[frame_idx]
            frame_image *= 255

            # Create mesh for this frame
            faces, vertices, vertex_colors = image_mesh(
                frame_points * np.array([1, -1, 1], dtype=np.float32),
                frame_image / 255.0,
                mask=frame_final_mask,
                tri=True,
                return_indices=False,
            )
            vertices = vertices * np.array([1, -1, 1], dtype=np.float32)

            # Create trimesh object for this frame
            frame_mesh = trimesh.Trimesh(
                vertices=vertices,
                faces=faces,
                vertex_colors=(vertex_colors * 255).astype(np.uint8),
                process=False,
            )
            scene_3d.add_geometry(frame_mesh)
    else:
        final_masks = predictions["final_masks"].reshape(-1)
        vertices_3d = vertices_3d[final_masks].copy()
        colors_rgb = colors_rgb[final_masks].copy()
        point_cloud_data = trimesh.PointCloud(vertices=vertices_3d, colors=colors_rgb)
        scene_3d.add_geometry(point_cloud_data)

    # Apply 180° rotation around X-axis to fix orientation (upside-down issue)
    rotation_matrix_x = trimesh.transformations.rotation_matrix(np.pi, [1, 0, 0])
    scene_3d.apply_transform(rotation_matrix_x)

    return scene_3d