File size: 12,784 Bytes

663494c

import os
import numpy as np
import cv2
import mmcv
from mmdet.datasets.builder import PIPELINES
from mmdet3d.datasets.pipelines import LoadAnnotations3D

@PIPELINES.register_module()
class LoadMultiViewImageFromFilesInCeph(object):
    """Load multi channel images from a list of separate channel files.

    Expects results['img_filename'] to be a list of filenames.

    Args:
        to_float32 (bool): Whether to convert the img to float32.
            Defaults to False.
        color_type (str): Color type of the file. Defaults to 'unchanged'.
    """

    def __init__(
        self,
        to_float32=False,
        color_type="unchanged",
        file_client_args=dict(backend="disk"),
        img_root="",
    ):
        self.to_float32 = to_float32
        self.color_type = color_type
        self.file_client_args = file_client_args.copy()
        self.file_client = mmcv.FileClient(**self.file_client_args)
        self.img_root = img_root

    def __call__(self, results):
        """Call function to load multi-view image from files.

        Args:
            results (dict): Result dict containing multi-view image filenames.

        Returns:
            dict: The result dict containing the multi-view image data. \
                Added keys and values are described below.

                - filename (list of str): Multi-view image filenames.
                - img (np.ndarray): Multi-view image arrays.
                - img_shape (tuple[int]): Shape of multi-view image arrays.
                - ori_shape (tuple[int]): Shape of original image arrays.
                - pad_shape (tuple[int]): Shape of padded image arrays.
                - scale_factor (float): Scale factor.
                - img_norm_cfg (dict): Normalization configuration of images.
        """
        images_multiView = []
        filename = results["img_filename"]
        img_path: str   # already an absolute path, no need to pad in front
        for img_path in filename:

            if not str(img_path).startswith('/') and (self.img_root not in str(img_path)):
                img_path = os.path.join(self.img_root, img_path)
            if self.file_client_args["backend"] == "petrel":
                img_bytes = self.file_client.get(img_path)
                img = mmcv.imfrombytes(img_bytes)
            elif self.file_client_args["backend"] == "disk":
                img = mmcv.imread(img_path, self.color_type)

            images_multiView.append(img)
        # img is of shape (h, w, c, num_views)
        img = np.stack(
            # [mmcv.imread(name, self.color_type) for name in filename], axis=-1)
            images_multiView,
            axis=-1,
        )
        if self.to_float32:
            img = img.astype(np.float32)
        results["filename"] = filename
        # unravel to list, see `DefaultFormatBundle` in formating.py
        # which will transpose each image separately and then stack into array
        results["img"] = [img[..., i] for i in range(img.shape[-1])]
        results["img_shape"] = img.shape
        # print(len(results["img"]))
        # zxc

        results["ori_shape"] = img.shape
        # Set initial values for default meta_keys
        results["pad_shape"] = img.shape
        results["scale_factor"] = 1.0
        num_channels = 1 if len(img.shape) < 3 else img.shape[2]
        results["img_norm_cfg"] = dict(
            mean=np.zeros(num_channels, dtype=np.float32),
            std=np.ones(num_channels, dtype=np.float32),
            to_rgb=False,
        )
        return results

    def __repr__(self):
        """str: Return a string that describes the module."""
        repr_str = self.__class__.__name__
        repr_str += f"(to_float32={self.to_float32}, "
        repr_str += f"color_type='{self.color_type}')"
        return repr_str

@PIPELINES.register_module()
class LoadMultiViewImageFromFilesWithDownsample(object):
    """Load multi channel images from a list of separate channel files, and downsample the image.

    Args:
        to_float32 (bool): Whether to convert the img to float32.
            Defaults to False.
        img_root (str): The root directory of the images.
        downsample_factor (int): The factor to downsample the image.
    """

    def __init__(
        self,
        to_float32=False,
        img_root="",
        downsample_factor=1,
    ):
        self.to_float32 = to_float32
        self.img_root = img_root
        self.downsample_factor = downsample_factor
        if downsample_factor == 1:
            self.flag = cv2.IMREAD_UNCHANGED
        elif downsample_factor == 2:
            self.flag = cv2.IMREAD_REDUCED_COLOR_2
        elif downsample_factor == 4:
            self.flag = cv2.IMREAD_REDUCED_COLOR_4
        else:
            raise ValueError(f"Invalid downsample factor: {downsample_factor}")

    def imread(self, img_path) -> np.ndarray:
        with open(img_path, 'rb') as f:
            value_buf = f.read()
        img_np = np.frombuffer(value_buf, np.uint8)
        img = cv2.imdecode(img_np, self.flag)
        return img

    def __call__(self, results):
        """Call function to load multi-view image from files.

        Args:
            results (dict): Result dict containing multi-view image filenames.

        Returns:
            dict: The result dict containing the multi-view image data. \
                Added keys and values are described below.

                - filename (list of str): Multi-view image filenames.
                - img (np.ndarray): Multi-view image arrays.
                - img_shape (tuple[int]): Shape of multi-view image arrays.
                - ori_shape (tuple[int]): Shape of original image arrays.
                - pad_shape (tuple[int]): Shape of padded image arrays.
                - scale_factor (float): Scale factor.
                - img_norm_cfg (dict): Normalization configuration of images.
        """
        images_multiView = []
        filenames = results["img_filename"]

        for img_path in filenames:
            if not str(img_path).startswith('/') and (self.img_root not in str(img_path)):
                img_path = os.path.join(self.img_root, img_path)

            img = self.imread(img_path)
            images_multiView.append(img)

        # img is of shape (h, w, c, num_views)
        img = np.stack(
            images_multiView,
            axis=-1,
        )
        if self.to_float32:
            img = img.astype(np.float32)
        results["filename"] = filenames
        # unravel to list, see `DefaultFormatBundle` in formating.py
        # which will transpose each image separately and then stack into array
        results["img"] = [img[..., i] for i in range(img.shape[-1])]
        results["img_shape"] = img.shape
        results["ori_shape"] = (int(img.shape[0] * self.downsample_factor), int(img.shape[1] * self.downsample_factor), img.shape[2], img.shape[3])
        # Set initial values for default meta_keys
        results["pad_shape"] = img.shape
        results["scale_factor"] = 1.0 / self.downsample_factor

        if self.downsample_factor != 1:
            scale_matrix = np.eye(4)
            scale_matrix[0, 0] *= 1.0 / self.downsample_factor
            scale_matrix[1, 1] *= 1.0 / self.downsample_factor
            results["lidar2img"] = [scale_matrix @ l2i for l2i in results["lidar2img"]]
            results["cam_intrinsic"] = [scale_matrix @ cam_intrinsic for cam_intrinsic in results["cam_intrinsic"]]

        num_channels = 1 if len(img.shape) < 3 else img.shape[2]
        results["img_norm_cfg"] = dict(
            mean=np.zeros(num_channels, dtype=np.float32),
            std=np.ones(num_channels, dtype=np.float32),
            to_rgb=False,
        )
        return results

    def __repr__(self):
        """str: Return a string that describes the module."""
        repr_str = self.__class__.__name__
        repr_str += f"(to_float32={self.to_float32}, "
        repr_str += f"color_type='{self.color_type}')"
        return repr_str


@PIPELINES.register_module()
class LoadAnnotations3D_E2E(LoadAnnotations3D):
    """Load Annotations3D.

    Load instance mask and semantic mask of points and
    encapsulate the items into related fields.

    Args:
        with_bbox_3d (bool, optional): Whether to load 3D boxes.
            Defaults to True.
        with_label_3d (bool, optional): Whether to load 3D labels.
            Defaults to True.
        with_attr_label (bool, optional): Whether to load attribute label.
            Defaults to False.
        with_mask_3d (bool, optional): Whether to load 3D instance masks.
            for points. Defaults to False.
        with_seg_3d (bool, optional): Whether to load 3D semantic masks.
            for points. Defaults to False.
        with_bbox (bool, optional): Whether to load 2D boxes.
            Defaults to False.
        with_label (bool, optional): Whether to load 2D labels.
            Defaults to False.
        with_mask (bool, optional): Whether to load 2D instance masks.
            Defaults to False.
        with_seg (bool, optional): Whether to load 2D semantic masks.
            Defaults to False.
        with_bbox_depth (bool, optional): Whether to load 2.5D boxes.
            Defaults to False.
        poly2mask (bool, optional): Whether to convert polygon annotations
            to bitmasks. Defaults to True.
        seg_3d_dtype (dtype, optional): Dtype of 3D semantic masks.
            Defaults to int64
        file_client_args (dict): Config dict of file clients, refer to
            https://github.com/open-mmlab/mmcv/blob/master/mmcv/fileio/file_client.py
            for more details.
    """

    def __init__(
        self,
        with_future_anns=False,
        with_ins_inds_3d=False,
        ins_inds_add_1=False,  # NOTE: make ins_inds start from 1, not 0
        **kwargs,
    ):
        super().__init__(**kwargs)
        self.with_future_anns = with_future_anns
        self.with_ins_inds_3d = with_ins_inds_3d

        self.ins_inds_add_1 = ins_inds_add_1

    def _load_future_anns(self, results):
        """Private function to load 3D bounding box annotations.

        Args:
            results (dict): Result dict from :obj:`mmdet3d.CustomDataset`.

        Returns:
            dict: The dict containing loaded 3D bounding box annotations.
        """

        gt_bboxes_3d = []
        gt_labels_3d = []
        gt_inds_3d = []
        # gt_valid_flags = []
        gt_vis_tokens = []

        for ann_info in results["occ_future_ann_infos"]:
            if ann_info is not None:
                gt_bboxes_3d.append(ann_info["gt_bboxes_3d"])
                gt_labels_3d.append(ann_info["gt_labels_3d"])

                ann_gt_inds = ann_info["gt_inds"]
                if self.ins_inds_add_1:
                    ann_gt_inds += 1
                    # NOTE: sdc query is changed from -10 -> -9
                gt_inds_3d.append(ann_gt_inds)

                # gt_valid_flags.append(ann_info['gt_valid_flag'])
                gt_vis_tokens.append(ann_info["gt_vis_tokens"])
            else:
                # invalid frame
                gt_bboxes_3d.append(None)
                gt_labels_3d.append(None)
                gt_inds_3d.append(None)
                # gt_valid_flags.append(None)
                gt_vis_tokens.append(None)

        results["future_gt_bboxes_3d"] = gt_bboxes_3d
        # results['future_bbox3d_fields'].append('gt_bboxes_3d')  # Field is used for augmentations, not needed here
        results["future_gt_labels_3d"] = gt_labels_3d
        results["future_gt_inds"] = gt_inds_3d
        # results['future_gt_valid_flag'] = gt_valid_flags
        results["future_gt_vis_tokens"] = gt_vis_tokens

        return results

    def _load_ins_inds_3d(self, results):
        ann_gt_inds = results["ann_info"]["gt_inds"].copy()  # TODO: note here

        # NOTE: Avoid gt_inds generated twice
        results["ann_info"].pop("gt_inds")

        if self.ins_inds_add_1:
            ann_gt_inds += 1
        results["gt_inds"] = ann_gt_inds
        return results

    def __call__(self, results):
        results = super().__call__(results)

        if self.with_future_anns:
            results = self._load_future_anns(results)
        if self.with_ins_inds_3d:
            results = self._load_ins_inds_3d(results)

        # Generate ann for plan
        if "occ_future_ann_infos_for_plan" in results.keys():
            results = self._load_future_anns_plan(results)

        return results

    def __repr__(self):
        repr_str = super().__repr__()
        indent_str = "    "
        repr_str += f"{indent_str}with_future_anns={self.with_future_anns}, "
        repr_str += f"{indent_str}with_ins_inds_3d={self.with_ins_inds_3d}, "

        return repr_str