| """ |
| Preprocessing Script for ScanNet 20/200 |
| |
| Author: Xiaoyang Wu (xiaoyang.wu.cs@gmail.com) |
| Please cite our work if the code is helpful to you. |
| """ |
|
|
| import warnings |
|
|
| warnings.filterwarnings("ignore", category=DeprecationWarning) |
|
|
| import os |
|
|
| os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3" |
| os.environ["CUDA_VISIBLE_DEVICES"] = "-1" |
|
|
| import argparse |
| import numpy as np |
| import tensorflow.compat.v1 as tf |
| from pathlib import Path |
| from waymo_open_dataset.utils import frame_utils |
| from waymo_open_dataset.utils import transform_utils |
| from waymo_open_dataset.utils import range_image_utils |
| from waymo_open_dataset import dataset_pb2 as open_dataset |
| import glob |
| import multiprocessing as mp |
| from concurrent.futures import ProcessPoolExecutor |
| from itertools import repeat |
|
|
|
|
| def create_lidar(frame): |
| """Parse and save the lidar data in psd format. |
| Args: |
| frame (:obj:`Frame`): Open dataset frame proto. |
| """ |
| ( |
| range_images, |
| camera_projections, |
| segmentation_labels, |
| range_image_top_pose, |
| ) = frame_utils.parse_range_image_and_camera_projection(frame) |
|
|
| points, cp_points, valid_masks = convert_range_image_to_point_cloud( |
| frame, |
| range_images, |
| camera_projections, |
| range_image_top_pose, |
| keep_polar_features=True, |
| ) |
| points_ri2, cp_points_ri2, valid_masks_ri2 = convert_range_image_to_point_cloud( |
| frame, |
| range_images, |
| camera_projections, |
| range_image_top_pose, |
| ri_index=1, |
| keep_polar_features=True, |
| ) |
|
|
| |
| points_all = np.concatenate(points, axis=0) |
| points_all_ri2 = np.concatenate(points_ri2, axis=0) |
| |
|
|
| points_all = np.concatenate([points_all, points_all_ri2], axis=0) |
|
|
| velodyne = np.c_[points_all[:, 3:6], points_all[:, 1]] |
| velodyne = velodyne.reshape((velodyne.shape[0] * velodyne.shape[1])) |
|
|
| valid_masks = [valid_masks, valid_masks_ri2] |
| return velodyne, valid_masks |
|
|
|
|
| def create_label(frame): |
| ( |
| range_images, |
| camera_projections, |
| segmentation_labels, |
| range_image_top_pose, |
| ) = frame_utils.parse_range_image_and_camera_projection(frame) |
|
|
| point_labels = convert_range_image_to_point_cloud_labels( |
| frame, range_images, segmentation_labels |
| ) |
| point_labels_ri2 = convert_range_image_to_point_cloud_labels( |
| frame, range_images, segmentation_labels, ri_index=1 |
| ) |
|
|
| |
| point_labels_all = np.concatenate(point_labels, axis=0) |
| point_labels_all_ri2 = np.concatenate(point_labels_ri2, axis=0) |
| point_labels_all = np.concatenate([point_labels_all, point_labels_all_ri2], axis=0) |
|
|
| labels = point_labels_all |
| return labels |
|
|
|
|
| def convert_range_image_to_cartesian( |
| frame, range_images, range_image_top_pose, ri_index=0, keep_polar_features=False |
| ): |
| """Convert range images from polar coordinates to Cartesian coordinates. |
| |
| Args: |
| frame: open dataset frame |
| range_images: A dict of {laser_name, [range_image_first_return, |
| range_image_second_return]}. |
| range_image_top_pose: range image pixel pose for top lidar. |
| ri_index: 0 for the first return, 1 for the second return. |
| keep_polar_features: If true, keep the features from the polar range image |
| (i.e. range, intensity, and elongation) as the first features in the |
| output range image. |
| |
| Returns: |
| dict of {laser_name, (H, W, D)} range images in Cartesian coordinates. D |
| will be 3 if keep_polar_features is False (x, y, z) and 6 if |
| keep_polar_features is True (range, intensity, elongation, x, y, z). |
| """ |
| cartesian_range_images = {} |
| frame_pose = tf.convert_to_tensor( |
| value=np.reshape(np.array(frame.pose.transform), [4, 4]) |
| ) |
|
|
| |
| range_image_top_pose_tensor = tf.reshape( |
| tf.convert_to_tensor(value=range_image_top_pose.data), |
| range_image_top_pose.shape.dims, |
| ) |
| |
| range_image_top_pose_tensor_rotation = transform_utils.get_rotation_matrix( |
| range_image_top_pose_tensor[..., 0], |
| range_image_top_pose_tensor[..., 1], |
| range_image_top_pose_tensor[..., 2], |
| ) |
| range_image_top_pose_tensor_translation = range_image_top_pose_tensor[..., 3:] |
| range_image_top_pose_tensor = transform_utils.get_transform( |
| range_image_top_pose_tensor_rotation, range_image_top_pose_tensor_translation |
| ) |
|
|
| for c in frame.context.laser_calibrations: |
| range_image = range_images[c.name][ri_index] |
| if len(c.beam_inclinations) == 0: |
| beam_inclinations = range_image_utils.compute_inclination( |
| tf.constant([c.beam_inclination_min, c.beam_inclination_max]), |
| height=range_image.shape.dims[0], |
| ) |
| else: |
| beam_inclinations = tf.constant(c.beam_inclinations) |
|
|
| beam_inclinations = tf.reverse(beam_inclinations, axis=[-1]) |
| extrinsic = np.reshape(np.array(c.extrinsic.transform), [4, 4]) |
|
|
| range_image_tensor = tf.reshape( |
| tf.convert_to_tensor(value=range_image.data), range_image.shape.dims |
| ) |
| pixel_pose_local = None |
| frame_pose_local = None |
| if c.name == open_dataset.LaserName.TOP: |
| pixel_pose_local = range_image_top_pose_tensor |
| pixel_pose_local = tf.expand_dims(pixel_pose_local, axis=0) |
| frame_pose_local = tf.expand_dims(frame_pose, axis=0) |
| range_image_cartesian = range_image_utils.extract_point_cloud_from_range_image( |
| tf.expand_dims(range_image_tensor[..., 0], axis=0), |
| tf.expand_dims(extrinsic, axis=0), |
| tf.expand_dims(tf.convert_to_tensor(value=beam_inclinations), axis=0), |
| pixel_pose=pixel_pose_local, |
| frame_pose=frame_pose_local, |
| ) |
|
|
| range_image_cartesian = tf.squeeze(range_image_cartesian, axis=0) |
|
|
| if keep_polar_features: |
| |
| |
| |
| range_image_cartesian = tf.concat( |
| [range_image_tensor[..., 0:3], range_image_cartesian], axis=-1 |
| ) |
|
|
| cartesian_range_images[c.name] = range_image_cartesian |
|
|
| return cartesian_range_images |
|
|
|
|
| def convert_range_image_to_point_cloud( |
| frame, |
| range_images, |
| camera_projections, |
| range_image_top_pose, |
| ri_index=0, |
| keep_polar_features=False, |
| ): |
| """Convert range images to point cloud. |
| |
| Args: |
| frame: open dataset frame |
| range_images: A dict of {laser_name, [range_image_first_return, |
| range_image_second_return]}. |
| camera_projections: A dict of {laser_name, |
| [camera_projection_from_first_return, |
| camera_projection_from_second_return]}. |
| range_image_top_pose: range image pixel pose for top lidar. |
| ri_index: 0 for the first return, 1 for the second return. |
| keep_polar_features: If true, keep the features from the polar range image |
| (i.e. range, intensity, and elongation) as the first features in the |
| output range image. |
| |
| Returns: |
| points: {[N, 3]} list of 3d lidar points of length 5 (number of lidars). |
| (NOTE: Will be {[N, 6]} if keep_polar_features is true. |
| cp_points: {[N, 6]} list of camera projections of length 5 |
| (number of lidars). |
| """ |
| calibrations = sorted(frame.context.laser_calibrations, key=lambda c: c.name) |
| points = [] |
| cp_points = [] |
| valid_masks = [] |
|
|
| cartesian_range_images = convert_range_image_to_cartesian( |
| frame, range_images, range_image_top_pose, ri_index, keep_polar_features |
| ) |
|
|
| for c in calibrations: |
| range_image = range_images[c.name][ri_index] |
| range_image_tensor = tf.reshape( |
| tf.convert_to_tensor(value=range_image.data), range_image.shape.dims |
| ) |
| range_image_mask = range_image_tensor[..., 0] > 0 |
|
|
| range_image_cartesian = cartesian_range_images[c.name] |
| points_tensor = tf.gather_nd( |
| range_image_cartesian, tf.compat.v1.where(range_image_mask) |
| ) |
|
|
| cp = camera_projections[c.name][ri_index] |
| cp_tensor = tf.reshape(tf.convert_to_tensor(value=cp.data), cp.shape.dims) |
| cp_points_tensor = tf.gather_nd(cp_tensor, tf.compat.v1.where(range_image_mask)) |
| points.append(points_tensor.numpy()) |
| cp_points.append(cp_points_tensor.numpy()) |
| valid_masks.append(range_image_mask.numpy()) |
|
|
| return points, cp_points, valid_masks |
|
|
|
|
| def convert_range_image_to_point_cloud_labels( |
| frame, range_images, segmentation_labels, ri_index=0 |
| ): |
| """Convert segmentation labels from range images to point clouds. |
| |
| Args: |
| frame: open dataset frame |
| range_images: A dict of {laser_name, [range_image_first_return, |
| range_image_second_return]}. |
| segmentation_labels: A dict of {laser_name, [range_image_first_return, |
| range_image_second_return]}. |
| ri_index: 0 for the first return, 1 for the second return. |
| |
| Returns: |
| point_labels: {[N, 2]} list of 3d lidar points's segmentation labels. 0 for |
| points that are not labeled. |
| """ |
| calibrations = sorted(frame.context.laser_calibrations, key=lambda c: c.name) |
| point_labels = [] |
| for c in calibrations: |
| range_image = range_images[c.name][ri_index] |
| range_image_tensor = tf.reshape( |
| tf.convert_to_tensor(range_image.data), range_image.shape.dims |
| ) |
| range_image_mask = range_image_tensor[..., 0] > 0 |
|
|
| if c.name in segmentation_labels: |
| sl = segmentation_labels[c.name][ri_index] |
| sl_tensor = tf.reshape(tf.convert_to_tensor(sl.data), sl.shape.dims) |
| sl_points_tensor = tf.gather_nd(sl_tensor, tf.where(range_image_mask)) |
| else: |
| num_valid_point = tf.math.reduce_sum(tf.cast(range_image_mask, tf.int32)) |
| sl_points_tensor = tf.zeros([num_valid_point, 2], dtype=tf.int32) |
|
|
| point_labels.append(sl_points_tensor.numpy()) |
| return point_labels |
|
|
|
|
| def handle_process(file_path, output_root, test_frame_list): |
| file = os.path.basename(file_path) |
| split = os.path.basename(os.path.dirname(file_path)) |
| print(f"Parsing {split}/{file}") |
| save_path = Path(output_root) / split / file.split(".")[0] |
|
|
| data_group = tf.data.TFRecordDataset(file_path, compression_type="") |
| for data in data_group: |
| frame = open_dataset.Frame() |
| frame.ParseFromString(bytearray(data.numpy())) |
| context_name = frame.context.name |
| timestamp = str(frame.timestamp_micros) |
|
|
| if split != "testing": |
| |
| if not frame.lasers[0].ri_return1.segmentation_label_compressed: |
| continue |
| else: |
| |
| if f"{context_name},{timestamp}" not in test_frame_list: |
| continue |
|
|
| os.makedirs(save_path / timestamp, exist_ok=True) |
|
|
| |
| point_cloud, valid_masks = create_lidar(frame) |
| point_cloud = point_cloud.reshape(-1, 4) |
| coord = point_cloud[:, :3] |
| strength = np.tanh(point_cloud[:, -1].reshape([-1, 1])) |
| pose = np.array(frame.pose.transform, np.float32).reshape(4, 4) |
| mask = np.array(valid_masks, dtype=object) |
|
|
| np.save(save_path / timestamp / "coord.npy", coord) |
| np.save(save_path / timestamp / "strength.npy", strength) |
| np.save(save_path / timestamp / "pose.npy", pose) |
|
|
| |
| if split != "training": |
| np.save(save_path / timestamp / "mask.npy", mask) |
|
|
| |
| if split != "testing": |
| |
| label = create_label(frame)[:, 1].reshape([-1]) - 1 |
| np.save(save_path / timestamp / "segment.npy", label) |
|
|
|
|
| if __name__ == "__main__": |
| parser = argparse.ArgumentParser() |
| parser.add_argument( |
| "--dataset_root", |
| required=True, |
| help="Path to the Waymo dataset", |
| ) |
| parser.add_argument( |
| "--output_root", |
| required=True, |
| help="Output path where train/val folders will be located", |
| ) |
| parser.add_argument( |
| "--splits", |
| required=True, |
| nargs="+", |
| choices=["training", "validation", "testing"], |
| help="Splits need to process ([training, validation, testing]).", |
| ) |
| parser.add_argument( |
| "--num_workers", |
| default=mp.cpu_count(), |
| type=int, |
| help="Num workers for preprocessing.", |
| ) |
| config = parser.parse_args() |
|
|
| |
| file_list = glob.glob( |
| os.path.join(os.path.abspath(config.dataset_root), "*", "*.tfrecord") |
| ) |
| assert len(file_list) == 1150 |
|
|
| |
| for split in config.splits: |
| os.makedirs(os.path.join(config.output_root, split), exist_ok=True) |
|
|
| file_list = [ |
| file |
| for file in file_list |
| if os.path.basename(os.path.dirname(file)) in config.splits |
| ] |
|
|
| |
| test_frame_file = os.path.join( |
| os.path.dirname(__file__), "3d_semseg_test_set_frames.txt" |
| ) |
| test_frame_list = [x.rstrip() for x in (open(test_frame_file, "r").readlines())] |
|
|
| |
| print("Processing scenes...") |
| pool = ProcessPoolExecutor(max_workers=config.num_workers) |
| _ = list( |
| pool.map( |
| handle_process, |
| file_list, |
| repeat(config.output_root), |
| repeat(test_frame_list), |
| ) |
| ) |
|
|