File size: 53,985 Bytes

663494c

import argparse
import glob
import os
import pickle as pkl
from typing import Dict, List

import diffstack_utils.box.sensor as box_sensor
import diffstack_utils.box.transform as box_transform
import diffstack_utils.box.vis as box_vis
import diffstack_utils.carla.category as carla_class
import diffstack_utils.carla.control as carla_control
import diffstack_utils.object.rotation as object_rotation
import diffstack_utils.sensor.calibration as calib_utils
import numpy as np
import pyquaternion
import scipy
import xinshuo_io as io_utils
import xinshuo_miscellaneous as miscell_utils
from diffstack_utils.box.data_structure import Box3D
from nuscenes.eval.common.utils import Quaternion, quaternion_yaw

FPS = 20
FRAME_INTERVAL = 10.0
FPS_KEYFRAME = FPS / FRAME_INTERVAL


def parse_token(anno: Dict, scene_root: str, frame: str) -> Dict:
    """Convert root of the sequence dir and frame of a file to nuScenes-like token and ts

    Returns:
        token: unique identifier for every frame of the data
        timestamp: unique identifier of the frame according to time
    """

    # example: seq -> routes_extensive_RouteScenario_0_Town02_2023_04_13_02_54_17
    # town_index -> 6, route_index -> 45
    seq: str = scene_root.split("/")[-1]
    town_string: str = seq.split("_")[4]
    try:
        town_index = int(scene_root.split("Town")[-1].split("_")[0])
    except ValueError:
        town_index = int(scene_root.split("Town")[-1].split("_")[0][:2])  # Town10HD
    route_index = int(scene_root.split("RouteScenario_")[-1].split("_")[0])

    # we construct the "unique" token by concatenating seq and frame
    token: str = f"{seq}_frame_{frame}"

    # we constrcut the "unique" timestamp by using frames and adding offset for each route
    frame_int: int = int(float(frame))
    max_frame_per_route = 100000
    max_route_per_town = 1000
    timestamp: int = (
        town_index * max_route_per_town + route_index
    ) * max_frame_per_route + frame_int

    # update dictionary
    anno.update(
        {
            "token": token,  # unique str for each frame
            "timestamp": timestamp,
            "scene_token": seq,  # unique str for each sequence
            "frame_idx": frame_int,  # frame ID within this sequence
            "town_string": town_string,  # e.g., Town06
        }
    )

    return anno


def parse_traffic_light_anno(anno: Dict, scene_root: str, frame: str) -> Dict:
    """Parse annotation of traffic light and stop sign and add to dictionary"""

    # load traffic light GT
    traffic_light_path = f"{scene_root}/metadata/traffic_light/{frame}.pkl"
    with open(traffic_light_path, "rb") as f:
        traffic_light_data: Dict = pkl.load(f)

    # update dictionary
    anno["traffic_light"] = {
        "hazard": traffic_light_data["hazard"],
        "in_range": traffic_light_data["in_range"],
        "light_close_in_dist": traffic_light_data["light_close_in_dist"],
        "red_light_in_front": traffic_light_data["red_light_in_front"],
    }

    # initialize data infos
    num_obj: int = traffic_light_data["location"].shape[1]
    anno["traffic_light"].update(
        {
            "gt_boxes": np.zeros((num_obj, 7), dtype="float32"),
            "gt_names": np.zeros((num_obj,), dtype=object),
            "attr_labels": np.asarray([[] for _ in range(num_obj)]),
            "valid_flag": np.ones((num_obj,), dtype="bool"),  # all valid
            "bbox_ego_matrix": np.zeros((num_obj, 4, 4), dtype="float32"),
        }
    )

    # go through all bbox to fill in database info
    for bbox_idx in range(num_obj):
        # retrieve basic property
        bbox_location: np.ndarray = traffic_light_data["location"][0, bbox_idx]  # (3, )
        bbox_rotation: np.ndarray = traffic_light_data["rotation"][0, bbox_idx]  # (3, )
        bbox_size: np.ndarray = traffic_light_data["size"][0, bbox_idx]  # (3, )
        bbox_state: str = traffic_light_data["states"][bbox_idx]  # (cls, )

        # get box in global coordinate
        bbox2global = calib_utils.create_transform(
            bbox_location, bbox_rotation
        )  # 4 x 4

        # compute bbox center, convert to LiDAR coordinate
        bbox_center = np.array([[0, 0, 0, 1]]).transpose()  # 4 x 1
        bbox_center_world: np.ndarray = bbox2global @ bbox_center  # 4 x 1
        bbox_center_ego = np.linalg.inv(anno["ego2global"]) @ bbox_center_world  # 4 x 1
        bbox_center_lidar = np.linalg.inv(anno["lidar2ego"]) @ bbox_center_ego  # 4 x 1

        # get orientation in radian
        bbox2lidar = np.linalg.inv(anno["lidar2global"]) @ bbox2global  # 4 x 4
        yaw = scipy.spatial.transform.Rotation.from_matrix(bbox2lidar[:3, :3]).as_euler(
            "xyz", degrees=False
        )[-1]

        # get 7dof representation: xyzlwh + yaw, lidar coordinate
        l, w, h = bbox_size.tolist()
        x, y, z = bbox_center_lidar[:3, 0]
        box_7dof = np.array([float(x), float(y), float(z)] + [l, w, h] + [yaw])  # (7, )

        # add data into list
        anno["traffic_light"]["gt_boxes"][bbox_idx] = box_7dof
        anno["traffic_light"]["bbox_ego_matrix"][bbox_idx] = bbox2global
        anno["traffic_light"]["gt_names"][bbox_idx] = bbox_state

    return anno


def parse_stop_sign_anno(anno: Dict, scene_root: str, frame: str) -> Dict:
    """Parse annotation of traffic light and stop sign and add to dictionary"""

    # load stop sign GT
    stop_sign_path = f"{scene_root}/metadata/stop_sign/{frame}.pkl"
    with open(stop_sign_path, "rb") as f:
        stop_sign_data: Dict = pkl.load(f)

    # update dictionary
    anno["stop_sign"] = {
        "hazard": stop_sign_data["hazard"],
        "in_range": stop_sign_data["in_range"],
    }

    # initialize data infos
    num_obj: int = stop_sign_data["location"].shape[1]
    anno["stop_sign"].update(
        {
            "gt_boxes": np.zeros((num_obj, 7), dtype="float32"),
            "gt_names": np.zeros((num_obj,), dtype=object),
            "attr_labels": np.asarray([[] for _ in range(num_obj)]),
            "valid_flag": np.ones((num_obj,), dtype="bool"),  # all valid
            "bbox_ego_matrix": np.zeros((num_obj, 4, 4), dtype="float32"),
        }
    )

    # go through all bbox to fill in database info
    for bbox_idx in range(num_obj):
        # retrieve basic property
        bbox_location: np.ndarray = stop_sign_data["location"][0, bbox_idx]  # (3, )
        bbox_rotation: np.ndarray = stop_sign_data["rotation"][0, bbox_idx]  # (3, )
        bbox_size: np.ndarray = stop_sign_data["size"][0, bbox_idx]  # (3, )
        bbox_state: str = ""

        # get box in global coordinate
        bbox2global = calib_utils.create_transform(
            bbox_location, bbox_rotation
        )  # 4 x 4

        # compute bbox center, convert to LiDAR coordinate
        bbox_center = np.array([[0, 0, 0, 1]]).transpose()  # 4 x 1
        bbox_center_world: np.ndarray = bbox2global @ bbox_center  # 4 x 1
        bbox_center_ego = np.linalg.inv(anno["ego2global"]) @ bbox_center_world  # 4 x 1
        bbox_center_lidar = np.linalg.inv(anno["lidar2ego"]) @ bbox_center_ego  # 4 x 1

        # get orientation in radian
        bbox2lidar = np.linalg.inv(anno["lidar2global"]) @ bbox2global  # 4 x 4
        yaw = scipy.spatial.transform.Rotation.from_matrix(bbox2lidar[:3, :3]).as_euler(
            "xyz", degrees=False
        )[-1]

        # get 7dof representation: xyzlwh + yaw, lidar coordinate
        l, w, h = bbox_size.tolist()
        x, y, z = bbox_center_lidar[:3, 0]
        box_7dof = np.array([float(x), float(y), float(z)] + [l, w, h] + [yaw])  # (7, )

        # add data into list
        anno["stop_sign"]["gt_boxes"][bbox_idx] = box_7dof
        anno["stop_sign"]["bbox_ego_matrix"][bbox_idx] = bbox2global
        anno["stop_sign"]["gt_names"][bbox_idx] = bbox_state

    return anno


def parse_ego_sensor_calib(
    anno: Dict, scene_root: str, frame: str, vis: bool = False
) -> Dict:
    """Parse calibration between world, ego, lidar and camera"""

    save_root = "data/carla/carla_data_0414"

    # load planning data
    planning_path = f"{scene_root}/metadata/planning/{frame}.pkl"
    with open(planning_path, "rb") as handle:
        planning_data: Dict = pkl.load(handle)

    # load calibration raw data
    pose_path = f"{scene_root}/metadata/ego/{frame}.pkl"
    ego2global = np.asarray(calib_utils.load_ego2world_transform(pose_path))  # 4 x 4
    lidar2ego = np.asarray(calib_utils.lidar_to_ego_transform())  # 4 x 4
    lidar2global: np.ndarray = ego2global @ lidar2ego  # 4 x 4
    e2g_r = ego2global[:3, :3]  # 3 x 3

    # print(lidar2ego)
    # print(ego2global)
    # print(lidar2global)
    # zxc

    # print('scene_token', anno['scene_token'])
    # print('frame_idx', anno['frame_idx'])

    # load raw ego data to build can_bus
    can_bus = np.zeros((18), dtype="float32")
    with open(pose_path, "rb") as handle:
        ego_data: Dict = pkl.load(handle)

        # location in different coordinates
        sdc_center_ego = np.array([[0, 0, 0, 1.0]]).transpose()  # 4 x 1
        sdc_center_lidar = np.linalg.inv(lidar2ego) @ sdc_center_ego  # 4 x 1
        sdc_center_global = ego2global[:3, 3]
        can_bus[:3] = sdc_center_global
        # print("sdc_center_ego\n", sdc_center_ego)
        # print("sdc_center_lidar\n", sdc_center_lidar)
        # print("sdc_center_global\n", sdc_center_global)

        # velocity in different coordinates
        sdc_vel_global: np.ndarray = ego_data["velocity"]
        sdc_vel_ego = sdc_vel_global @ np.linalg.inv(e2g_r).T  # ego coordinate
        sdc_vel_lidar: np.ndarray = np.linalg.inv(lidar2ego[:3, :3]) @ sdc_vel_ego  # 3
        can_bus[13:16] = sdc_vel_ego
        # print("sdc_vel_ego\n", sdc_vel_ego)
        # print("sdc_vel_global\n", sdc_vel_global)
        # print("sdc_vel_lidar\n", sdc_vel_lidar)
        # zxc

        # acceleration in ego coordinate
        sdc_acc_global: np.ndarray = ego_data["acceleration"]
        sdc_acc_global[-1] = 9.8  # add gravity
        sdc_acc_ego = sdc_acc_global @ np.linalg.inv(e2g_r).T  # ego coordinate
        sdc_acc_lidar: np.ndarray = np.linalg.inv(lidar2ego[:3, :3]) @ sdc_acc_ego  # 3
        can_bus[7:10] = sdc_acc_ego
        # print("sdc_acc_ego\n", sdc_acc_ego)
        # print("sdc_acc_global\n", sdc_acc_global)
        # print("sdc_acc_lidar\n", sdc_acc_lidar)

        # angular vel in ego coordinate
        sdc_ang_global: np.ndarray = ego_data["angular_velocity"]
        sdc_ang_ego = sdc_ang_global @ np.linalg.inv(e2g_r).T  # ego coordinate
        sdc_ang_lidar: np.ndarray = np.linalg.inv(lidar2ego[:3, :3]) @ sdc_ang_ego  # 3
        can_bus[10:13] = sdc_ang_ego
        # print("sdc_ang_ego\n", sdc_ang_ego)
        # print("sdc_ang_global\n", sdc_ang_global)
        # print("sdc_ang_lidar\n", sdc_ang_lidar)

        # rotation/yaw in different coordinates
        rot_global_quat = Quaternion(
            object_rotation.convert_mat2qua(ego2global[:3, :3])
        ).elements
        can_bus[3:7] = rot_global_quat
        sdc_yaw_global_deg = ego_data["rotation"]  # in degree
        sdc_yaw_global_rad = ego_data["rotation"] / 180 * np.pi  # in radian
        # print('\n')
        # print("sdc_yaw_global_rad", sdc_yaw_global_rad)
        # sdc_yaw_lidar = (
        #     sdc_yaw_global_rad @ np.linalg.inv(lidar2global[:3, :3]).T
        # )  # lidar coordinate
        # bbox2lidar = np.linalg.inv(lidar2global) @ ego2global  # 4 x 4
        yaw_lidar = scipy.spatial.transform.Rotation.from_matrix(
            np.linalg.inv(lidar2ego)[:3, :3]
        ).as_euler("xyz", degrees=False)[-1]
        # print('yaw', yaw)
        # print('yaw_lidar\n', yaw_lidar)
        # zxc

        can_bus[-2] = sdc_yaw_global_rad[1]
        can_bus[-1] = sdc_yaw_global_deg[1]
        # print('sdc_yaw_lidar', sdc_yaw_lidar)
        # print('sdc_yaw_global_rad', sdc_yaw_global_rad)
        # print('sdc_yaw_global_deg', sdc_yaw_global_deg)
        # zxc

        # get ego box in lidar
        sdc_size: np.ndarray = ego_data["size"]  # (3, )
        l, w, h = sdc_size.tolist()

        # sdc's box in lidar coordinate
        gt_sdc_bbox_lidar = np.array(
            [
                sdc_center_lidar[0, 0],
                sdc_center_lidar[1, 0],
                sdc_center_lidar[2, 0],
                l,
                w,
                h,
                yaw_lidar,
                sdc_vel_lidar[0],
                sdc_vel_lidar[1],
                # sdc_vel_lidar[2],
            ]
        )  # 9
        # print('gt_sdc_bbox_lidar\n', gt_sdc_bbox_lidar)

    # load camera calib
    cam_param_all: Dict[str, Dict] = calib_utils.get_camera_param()
    cams: Dict[str, Dict] = {}
    for cam_type in cam_param_all.keys():
        cam_intrinsic = calib_utils.get_camera_intrinsics(
            cam_param_all[cam_type]
        )  # 3 x 3
        sensor2ego = calib_utils.camera_to_ego_transform(
            cam_param_all[cam_type]
        )  # 4 x 4
        sensor2lidar = np.linalg.inv(lidar2ego) @ sensor2ego  # 4 x 4

        # convert absolute path to relative path
        cam_path = f"{scene_root}/image_{cam_type}/{frame}.png"
        if "carla_1.0" in cam_path:
            cam_path = cam_path.split("carla_data_0414/")[-1]
            cam_path = os.path.join(save_root, cam_path)

        # construct nuScenes-like camera record
        cams[cam_type] = {
            "data_path": cam_path,
            "type": cam_type,
            "sample_data_token": "fake_cam_token",
            "cam_intrinsic": cam_intrinsic,
            "sensor2ego_translation": sensor2ego[:3, 3],
            "sensor2ego_rotation": object_rotation.convert_mat2qua(sensor2ego[:3, :3]),
            "sensor2lidar_translation": sensor2lidar[:3, 3],
            # "sensor2lidar_rotation": object_rotation.convert_mat2qua(
            #     sensor2lidar[:3, :3]
            # ),
            "sensor2lidar_rotation": sensor2lidar[:3, :3],
            "timestamp": anno["timestamp"],
            "cam_param": cam_param_all[cam_type],
        }

    # load lidar data, each frame only has half of the lidar data
    # so we load both the current frame and one previous frame
    lidar_cur = f"{scene_root}/lidar/{frame}.npy"
    frame_int: int = int(float(frame))
    pre_frame: str = f"{frame_int - 1:06d}"
    lidar_pre = f"{scene_root}/lidar/{pre_frame}.npy"
    points_cur = np.fromfile(lidar_cur, dtype=np.float32)
    points_cur = points_cur.reshape(-1, 4)
    points_pre = np.fromfile(lidar_pre, dtype=np.float32)
    points_pre = points_pre.reshape(-1, 4)
    points: np.ndarray = np.concatenate((points_cur, points_pre), axis=0)  # N x 4

    # visualize lidar points only, default color ORANGE
    if vis:
        try:
            save_dir = scene_root.replace("/train/", "/vis_train_lidar/")
        except:
            save_dir = scene_root.replace("/val/", "/vis_val_lidar/")
        save_path: str = os.path.join(save_dir, frame + ".jpg")
        box_vis.vis_box_on_lidar(points_pre, save_path)

    # save full-surrounding lidar to disk
    lidar_path = f"{scene_root}/lidar_full/{frame}.npy"
    if "carla_1.0" in lidar_path:
        lidar_path = lidar_path.split("carla_data_0414/")[-1]
        lidar_path = os.path.join(save_root, lidar_path)
    if not io_utils.is_path_exists(lidar_path):
        io_utils.mkdir_if_missing(lidar_path)
        points.tofile(lidar_path)

    # get the map string for the whole sequence
    map_path: str = os.path.join(scene_root, "metadata/hd_map/000000.pkl")

    command = carla_control.roadoption2int(planning_data["local"][0][1])  # int

    # update dictionary
    anno.update(
        {
            "lidar_pts": points,
            "lidar_path": lidar_path,
            "map_path": map_path,
            "sweeps": [],
            "cams": cams,
            "lidar2ego_translation": lidar2ego[:3, 3],
            "lidar2ego_rotation": object_rotation.convert_mat2qua(
                lidar2ego[:3, :3]
            ),  # (4, )
            "lidar2ego": lidar2ego,
            "ego2global_translation": ego2global[:3, 3],
            "ego2global_rotation": object_rotation.convert_mat2qua(
                ego2global[:3, :3]
            ),  # (4, )
            "ego2global": ego2global,  # ego's location in global coordinate
            "lidar2global": lidar2global,
            "sdc_vel_global": sdc_vel_global,  # sdc's velocity in global coordinate
            # sdc's information in global/ego coordinate,
            # 0-3-> loc, 3-7-> rot_quat, 7-10-> acc, 10-13-> ang,
            # 13-16->vel, -2-> yaw_rad, -1-> yaw_deg
            "can_bus": can_bus,
            # sdc's information in lidar coordinate
            "gt_sdc_bbox_lidar": gt_sdc_bbox_lidar,  # 9
            "global_plan_short": planning_data["local"],  # deque
            "command": command - 1,  # int, convert 1-indexed -> 0-indexed
        }
    )

    return anno


def parse_bbox(anno: Dict, scene_root: str, frame: str, vis: bool = False) -> Dict:
    """Parse each one of the bounding boxes and compute properties"""

    # load GT
    bbox_path = f"{scene_root}/metadata/detection/{frame}.pkl"
    with open(bbox_path, "rb") as f:
        bboxes: Dict = pkl.load(f)
    tracking_path = f"{scene_root}/metadata/tracking/{frame}.pkl"
    with open(tracking_path, "rb") as f:
        tracklets: Dict = pkl.load(f)

    # initialize data infos
    num_obj: int = bboxes["location"].shape[1]
    anno.update(
        {
            "gt_velocity": np.zeros((num_obj, 2), dtype="float32"),  # lidar coordinate
            "gt_boxes": np.zeros((num_obj, 7), dtype="float32"),  # lidar coordinate
            "gt_bboxes_global": np.zeros((num_obj, 9), dtype="float32"),
            "gt_names": np.zeros((num_obj,), dtype=object),
            "gt_inds": np.zeros((num_obj), dtype="int64"),
            "num_lidar_pts": np.zeros((num_obj,), dtype="float32"),
            "num_radar_pts": np.zeros((num_obj,), dtype="float32"),
            "attr_labels": np.asarray([[] for _ in range(num_obj)]),
            "valid_flag": np.zeros((num_obj,), dtype="bool"),
            "bbox_ego_matrix": np.zeros((num_obj, 4, 4), dtype="float32"),
        }
    )

    # get rotation matrix
    ego2global_rotation_mat: np.ndarray = pyquaternion.Quaternion(
        anno["ego2global_rotation"]
    ).rotation_matrix
    lidar2ego_rotation_mat: np.ndarray = pyquaternion.Quaternion(
        anno["lidar2ego_rotation"]
    ).rotation_matrix

    # go through all bbox to fill in database info
    bbox_gt_in_ego_list: List[np.ndarray] = list()
    valid_id_list: List[np.ndarray] = list()
    for bbox_index in range(num_obj):
        # retrieve basic property
        bbox_location: np.ndarray = bboxes["location"][0, bbox_index]  # (3, )
        bbox_rotation: np.ndarray = bboxes["rotation"][0, bbox_index]  # (3, )
        bbox_size: np.ndarray = bboxes["size"][0, bbox_index]  # (3, )
        bbox_cls_vec = bboxes["cls"][0, bbox_index]  # (cls, )

        # get box in global coordinate
        bbox2global = calib_utils.create_transform(
            bbox_location, bbox_rotation
        )  # 4 x 4

        # compute bbox center, convert to LiDAR coordinate
        bbox_center = np.array([[0, 0, 0, 1.0]]).transpose()  # 4 x 1
        bbox_center_world: np.ndarray = bbox2global @ bbox_center  # 4 x 1
        bbox_center_ego = np.linalg.inv(anno["ego2global"]) @ bbox_center_world  # 4 x 1
        bbox_center_lidar = np.linalg.inv(anno["lidar2ego"]) @ bbox_center_ego  # 4 x 1

        # retrieve index of the tracklets
        global_instance_id: int = int(bboxes["id_debug"][0, bbox_index, 0])
        tracklet_index: int = (
            tracklets["id"][0, :, 0].tolist().index(global_instance_id)
        )

        # get velocity of the most recent frame in lidar coordinate
        velocity: np.ndarray = tracklets["velocity"][0, tracklet_index, -1, :]  # (3, )
        velocity_world = np.expand_dims(velocity, axis=1)  # 3 x 1
        velocity_ego: np.ndarray = (
            np.linalg.inv(ego2global_rotation_mat) @ velocity_world
        )  # 3 x 1
        velocity_lidar: np.ndarray = (
            np.linalg.inv(lidar2ego_rotation_mat) @ velocity_ego
        )  # 3 x 1

        # get orientation in radian
        bbox2lidar = np.linalg.inv(anno["lidar2global"]) @ bbox2global  # 4 x 4
        yaw = scipy.spatial.transform.Rotation.from_matrix(bbox2lidar[:3, :3]).as_euler(
            "xyz", degrees=False
        )[-1]

        # get 7dof representation: xyzlwh + yaw, lidar coordinate
        l, w, h = bbox_size.tolist()
        x, y, z = bbox_center_lidar[:3, 0]
        box_7dof_lidar = np.array(
            [float(x), float(y), float(z)] + [l, w, h] + [yaw]
        )  # (7, )

        # get 7dof representation: xyzlwh + yaw, global coordinate
        box_7dof_global = np.array(
            [
                float(bbox_center_world[0]),
                float(bbox_center_world[1]),
                float(bbox_center_world[2]),
            ]
            + [l, w, h]
            + [float(bbox_rotation[1]) / 180 * np.pi]
        )  # (7, )

        # extract number of points
        box3d: Box3D = Box3D.array2bbox_xyzlwhyaw(box_7dof_lidar)
        box_in_lidar: np.ndarray = Box3D.box2corners3d_lidar(box3d)  # 8 x 4
        pc_sub: np.ndarray = box_sensor.extract_pc_in_box3d(
            anno["lidar_pts"], box_in_lidar
        )[
            0
        ]  # N x 3

        # add data into list
        anno["num_lidar_pts"][bbox_index] = pc_sub.shape[0]
        anno["gt_boxes"][bbox_index, :] = box_7dof_lidar  # lidar coordinate
        anno["gt_velocity"][bbox_index, :] = velocity_lidar[
            :2, 0
        ]  # only take xy-plane velocity, in lidar coordinate
        anno["gt_bboxes_global"][bbox_index, :7] = box_7dof_global
        anno["gt_bboxes_global"][bbox_index, 7:] = velocity_world[:2, 0]
        anno["bbox_ego_matrix"][bbox_index] = bbox2global
        anno["gt_names"][bbox_index] = carla_class.convert_vec2str(bbox_cls_vec)
        anno["gt_inds"][bbox_index] = global_instance_id

        # set valid only if there are lidar points inside the box
        if pc_sub.shape[0] > 0:
            anno["valid_flag"][bbox_index] = True

            # convert to the ego coordinate for visualization
            if vis:
                box_in_ego: np.ndarray = box_transform.box_in_lidar_to_ego(
                    box_in_lidar
                )  # 8 x 4
                bbox_gt_in_ego_list.append(box_in_ego)
                valid_id_list.append(global_instance_id)

    if vis:
        # get save path for train/val split
        try:
            save_dir = scene_root.replace("/train/", "/vis_train_gt/")
        except:
            save_dir = scene_root.replace("/val/", "/vis_val_gt/")
        save_path: str = os.path.join(save_dir, frame + ".jpg")

        # add ego box
        box3d: Box3D = Box3D.array2bbox_xyzlwhyaw(anno["gt_sdc_bbox_lidar"][:7])
        sdc_box_in_lidar: np.ndarray = Box3D.box2corners3d_lidar(box3d)  # 8 x 4
        sdc_box_in_ego: np.ndarray = box_transform.box_in_lidar_to_ego(
            sdc_box_in_lidar
        )  # 8 x 4
        bbox_gt_in_ego_list.append(sdc_box_in_ego)
        valid_id_list.append(-1)

        # visualizating boxes
        box_vis.vis_box_on_lidar(
            anno["lidar_pts"],
            save_path,
            bbox_gt_in_ego_list=bbox_gt_in_ego_list,
            # bev=False,
            id_list=valid_id_list,
        )

        # visualizing map
        from pathlib import Path

        import matplotlib.pyplot as plt
        from mmdet3d_plugin.datasets.carla.trajdata_rasterizer import rasterize
        from trajdata import MapAPI, SceneBatch, VectorMap
        from trajdata.caching.df_cache import DataFrameCache
        from trajdata.data_structures import StateTensor
        from trajdata.dataset_specific.opendrive import parse_maps
        from trajdata.maps.vec_map_elements import (MapElement, MapElementType,
                                                    PedCrosswalk, PedWalkway,
                                                    Polyline, RoadArea,
                                                    RoadLane)

        # load map from the disk
        with open(anno["map_path"], "rb") as handle:
            opendrive_str = pkl.load(handle)

        # Parse the ODMap from the opendrive string
        trajdata_cache_dir = "/home/xweng/trajdata"
        map_api = MapAPI(trajdata_cache_dir)
        map_id = f"carla:{anno['town_string']}"
        parse_maps.init_global_vars()
        vec_map = parse_maps.parse_opendrive_map(
            map_filename=None, map_rawstring=opendrive_str, map_id=map_id
        )

        # Save the resulting map to load later
        DataFrameCache.finalize_and_cache_map(
            Path(trajdata_cache_dir), vec_map, {"px_per_m": 2}
        )

        # Load map with map api
        vec_map = map_api.get_map(
            map_id,
            incl_road_lanes=True,
            incl_road_areas=True,
            incl_ped_crosswalks=True,
            incl_ped_walkways=True,
        )

        # from bottom to look up, need to flip the world coordinate
        map_img, polylines, raster_from_world = rasterize(
            vec_map=vec_map,
            resolution=5,
            return_tf_mat=True,
            incl_centerlines=False,
            incl_lane_edges=False,  # land divider
            area_color=(255, 255, 255),
            edge_color=(255, 0, 0),
            # incl_lane_area=False,     # driveable area
            anno=anno,
            # debug=True,
        )
        # map_img: H x W x 3, from bottom to look up, right hand coordinate
        # need to flip in order to get the actual map
        # polylines: List[Dict]

        # def get_lane_id(road_id: str, section_idx: int, lane_id: str):
        #     return "_".join((road_id, f"s{section_idx}", lane_id))

        # vec_map.visualize_lane_graph(
        #     origin_lane=vec_map.get_road_lane(get_lane_id("13", 0, "1")),
        #     num_hops=5,
        #     raster_from_world=raster_from_world,
        #     ax=ax
        # # )

    return anno


def parse_data(scene_root: str, frame: str) -> Dict:
    """Convert carla data into the collection of path needed by mmdetection3D dataloader

    Args:
        scene_root: root of the sequence dir
        frame: str
    """

    anno = {}
    anno: Dict = parse_token(anno, scene_root, frame)
    anno: Dict = parse_ego_sensor_calib(anno, scene_root, frame)
    anno: Dict = parse_traffic_light_anno(anno, scene_root, frame)
    anno: Dict = parse_stop_sign_anno(anno, scene_root, frame)
    anno: Dict = parse_bbox(anno, scene_root, frame)

    return anno


def parse_sequence(
    scene_root: str,
    annos: List[Dict],
    seq_index: List[int],
    pre_sec: int = 2,
    fut_sec: int = 6,
    vis: bool = False,
) -> List[Dict]:
    """Add past/future trajectory into data at each frame to allow prediction"""

    # get the name of the sequence
    seq: str = scene_root.split("/")[-1]
    anno_seq: List[Dict] = [annos[index] for index in seq_index]
    print(f"processing: {seq}")

    # data_frame = [anno['frame_idx'] for anno in annos]
    # data_frame = [anno['gt_fut_traj'] for anno in annos]
    # print(data_frame)

    # sort data by frame ID, as the original data might not be sorted based
    # on frame index
    def sort_data(data_dict: Dict):
        return data_dict["frame_idx"]

    anno_seq.sort(key=sort_data)
    min_frame_ID: int = anno_seq[0]["frame_idx"]
    max_frame_ID: int = anno_seq[-1]["frame_idx"]

    # data_frame = [anno['frame_idx'] for anno in annos]
    # print(data_frame)
    # zxc

    # loop through each frame in this sequence
    for index in range(len(seq_index)):
        anno: Dict = anno_seq[index]  # data for the frame
        frame_ID: int = anno["frame_idx"]  # the frame ID
        # print("current frame is", frame_ID)

        # check if the data is from the same sequence
        scene_name: str = anno["scene_token"]  # the sequence name
        assert scene_name == seq, "error, not the same sequence"

        # retrieve info for other objects
        # gt_velocity: np.ndarray = anno["gt_velocity"]  # N x 2
        gt_bboxes_lidar: np.ndarray = anno["gt_boxes"]  # N x 7
        num_obj: int = gt_bboxes_lidar.shape[0]
        obj_ids: List[int] = anno["gt_inds"].tolist()

        # retrieve info for ego and calib
        l, w, h = anno["gt_sdc_bbox_lidar"][3:6]
        lidar2global: np.ndarray = anno["lidar2global"]  # 4 x 4

        ########### get the target data we need for GT
        pre_frames: int = int(pre_sec * FPS_KEYFRAME)
        fut_frames: int = int(fut_sec * FPS_KEYFRAME)

        # the past & future trajectories for objects existing in the current frame
        # so we know the exact number of objects
        gt_fut_bbox_lidar: np.ndarray = np.zeros((num_obj, fut_frames, 9))
        gt_pre_bbox_lidar: np.ndarray = np.zeros((num_obj, pre_frames, 9))

        # initially set all mask as invalid until we identify valid frames
        gt_fut_bbox_mask: np.ndarray = np.zeros((num_obj, fut_frames, 1))
        gt_pre_bbox_mask: np.ndarray = np.zeros((num_obj, pre_frames, 1))

        # ALL the past & future trajectories, including objects that appear
        # from frames to frames, and not existing in the current frame
        # as a result, the number of objects might not be consistent over frames
        # can be used to create an Union of all objects for better training
        gt_fut_bbox_lidar_all: List[np.ndarray] = [
            [np.empty([0])] for _ in range(fut_frames)
        ]
        gt_pre_bbox_lidar_all: List[np.ndarray] = [
            [np.empty([0])] for _ in range(pre_frames)
        ]

        # sdc's temporal info
        gt_pre_bbox_sdc_lidar: np.ndarray = np.zeros((1, pre_frames, 9))
        gt_fut_bbox_sdc_lidar: np.ndarray = np.zeros((1, fut_frames, 9))
        gt_pre_bbox_sdc_global: np.ndarray = np.zeros((1, pre_frames, 9))
        gt_fut_bbox_sdc_global: np.ndarray = np.zeros((1, fut_frames, 9))
        gt_pre_bbox_sdc_mask: np.ndarray = np.zeros((1, pre_frames, 1))
        gt_fut_bbox_sdc_mask: np.ndarray = np.zeros((1, fut_frames, 1))
        gt_pre_command_sdc: np.ndarray = np.zeros((1, pre_frames, 1))
        gt_fut_command_sdc: np.ndarray = np.zeros((1, fut_frames, 1))

        # get past trajectory, backward order, i.e., frame 4, 3, 2, 1
        # start with 1 to not include the current frame
        for pre_frame_index in range(1, pre_frames + 1):
            pre_frame_ID = int(frame_ID - pre_frame_index * FRAME_INTERVAL)
            pre_frame_index_in_seq: int = index - pre_frame_index

            # skip the initial frame with incomplete lidar and also negative frame
            if pre_frame_ID < min_frame_ID:
                continue

            # retrieve the data in the global coordinate
            anno_pre_tmp: Dict = anno_seq[pre_frame_index_in_seq]
            gt_pre_bbox_global_tmp: np.ndarray = anno_pre_tmp[
                "gt_bboxes_global"
            ]  # N x 9
            num_obj_pre: int = gt_pre_bbox_global_tmp.shape[0]
            gt_pre_bbox_lidar_tmp = np.zeros((num_obj_pre, 9))

            ########## convert the global coordinate to lidar coordinate in current frame
            # i.e., ego motion compensation

            # location
            loc_global = np.concatenate(
                (gt_pre_bbox_global_tmp[:, :3], np.ones((num_obj_pre, 1))), axis=1
            ).T  # 4 x N
            loc_lidar = np.linalg.inv(lidar2global) @ loc_global  # 4 x N
            gt_pre_bbox_lidar_tmp[:, :3] = loc_lidar[:3, :].transpose()  # N x 3

            # size
            gt_pre_bbox_lidar_tmp[:, 3:6] = gt_pre_bbox_global_tmp[:, 3:6]

            # rotation
            gt_pre_rot_global_rad = np.concatenate(
                (
                    np.zeros((num_obj_pre, 1)),
                    gt_pre_bbox_global_tmp[:, [6]],
                    np.zeros((num_obj_pre, 1)),
                ),
                axis=1,
            )  # N x 3
            gt_pre_rot_global_deg = gt_pre_rot_global_rad / np.pi * 180
            for obj_index in range(num_obj_pre):
                gt_pre_trans_global = calib_utils.create_transform(
                    gt_pre_bbox_global_tmp[obj_index, :3],
                    gt_pre_rot_global_deg[obj_index],
                )  # 4 x 4
                gt_pre_trans_lidar = (
                    np.linalg.inv(anno["lidar2global"]) @ gt_pre_trans_global
                )  # 4 x 4
                gt_pre_yaw_lidar = scipy.spatial.transform.Rotation.from_matrix(
                    gt_pre_trans_lidar[:3, :3]
                ).as_euler("xyz", degrees=False)[-1]
                gt_pre_bbox_lidar_tmp[obj_index, 6] = gt_pre_yaw_lidar  # N x 3

            # velocity
            gt_pre_vel_global = np.concatenate(
                (
                    gt_pre_bbox_global_tmp[:, 7:],  # N x 2
                    np.zeros((num_obj_pre, 1)),
                ),
                axis=1,
            )  # N x 3
            gt_pre_vel_lidar: np.ndarray = (
                np.linalg.inv(lidar2global[:3, :3]) @ gt_pre_vel_global.T
            ).T  # N x 3
            gt_pre_bbox_lidar_tmp[:, 7:] = gt_pre_vel_lidar[:, :2]  # N x 2

            # dump the data into the set of all objects in the future
            gt_pre_bbox_lidar_all[pre_frame_index - 1] = gt_pre_bbox_lidar_tmp  # N x 9

            # now check the IDs that exist in the current frame
            # in order to produce the mask for future frames
            obj_ids_pre: List[int] = anno_pre_tmp["gt_inds"].tolist()
            (
                obj_ids_common,
                index_cur,
                index_past,
            ) = miscell_utils.find_unique_common_from_lists(obj_ids, obj_ids_pre)

            # possible that we get 0 object in future frame matching with GT object currint
            if len(index_cur) > 0:
                gt_pre_bbox_mask[np.array(index_cur), pre_frame_index - 1] = 1

                # also assign the actual gt boxes into the array
                gt_pre_bbox_lidar[
                    np.array(index_cur), pre_frame_index - 1, :
                ] = gt_pre_bbox_lidar_tmp[np.array(index_past), :]

            ############### get past of the sdc box in lidar coordinate
            sdc_loc_global, _ = calib_utils.transform_matrix_to_vector(
                anno_pre_tmp["ego2global"]
            )
            sdc_loc_global = np.concatenate((sdc_loc_global, np.array([1])))  # 4
            sdc_loc_lidar = np.linalg.inv(lidar2global) @ sdc_loc_global  # 4
            sdc_vel_global: np.ndarray = anno_pre_tmp["sdc_vel_global"]  # 3
            sdc_vel_lidar: np.ndarray = (
                np.linalg.inv(lidar2global[:3, :3]) @ sdc_vel_global
            )  # 3
            sdc_trans_lidar = (
                np.linalg.inv(lidar2global) @ anno_pre_tmp["ego2global"]
            )  # 4 x 4
            yaw_lidar = scipy.spatial.transform.Rotation.from_matrix(
                sdc_trans_lidar[:3, :3]
            ).as_euler("xyz", degrees=False)[-1]
            yaw_global = scipy.spatial.transform.Rotation.from_matrix(
                anno_pre_tmp["ego2global"][:3, :3]
            ).as_euler("xyz", degrees=False)[-1]
            gt_pre_bbox_sdc_mask[0, pre_frame_index - 1] = 1

            # put things into bbox, 9 dof
            gt_sdc_bbox_lidar = np.array(
                [
                    sdc_loc_lidar[0],
                    sdc_loc_lidar[1],
                    sdc_loc_lidar[2],
                    l,
                    w,
                    h,
                    yaw_lidar,
                    sdc_vel_lidar[0],
                    sdc_vel_lidar[1],
                ]
            )
            gt_pre_bbox_sdc_lidar[0, pre_frame_index - 1] = gt_sdc_bbox_lidar
            gt_sdc_bbox_global = np.array(
                [
                    sdc_loc_global[0],
                    sdc_loc_global[1],
                    sdc_loc_global[2],
                    l,
                    w,
                    h,
                    yaw_global,
                    sdc_vel_global[0],
                    sdc_vel_global[1],
                ]
            )
            gt_pre_bbox_sdc_global[0, pre_frame_index - 1] = gt_sdc_bbox_global

            # get command
            gt_pre_command_sdc[0, pre_frame_index - 1]: int = anno_pre_tmp["command"]

        # get future trajectory
        # start with 1 to not include the current frame
        for fut_frame_index in range(1, fut_frames + 1):
            fut_frame_ID = int(frame_ID + fut_frame_index * FRAME_INTERVAL)
            fut_frame_index_in_seq: int = index + fut_frame_index

            # beyond the last timestamp
            if fut_frame_ID > max_frame_ID:
                continue

            # retrieve the data in the global coordinate
            anno_fut_tmp: Dict = anno_seq[fut_frame_index_in_seq]
            gt_fut_bbox_global_tmp: np.ndarray = anno_fut_tmp[
                "gt_bboxes_global"
            ]  # N x 9
            num_obj_fut: int = gt_fut_bbox_global_tmp.shape[0]
            gt_fut_bbox_lidar_tmp = np.zeros((num_obj_fut, 9))

            ########## convert the global coordinate to lidar coordinate in current frame
            # i.e., ego motion compensation

            # location
            loc_global = np.concatenate(
                (gt_fut_bbox_global_tmp[:, :3], np.ones((num_obj_fut, 1))),
                axis=1,
            ).T  # 4 x N
            loc_lidar = np.linalg.inv(lidar2global) @ loc_global  # 4 x N
            gt_fut_bbox_lidar_tmp[:, :3] = loc_lidar[:3, :].transpose()  # N x 3

            # size
            gt_fut_bbox_lidar_tmp[:, 3:6] = gt_fut_bbox_global_tmp[:, 3:6]

            # rotation
            gt_fut_rot_global_rad = np.concatenate(
                (
                    np.zeros((num_obj_fut, 1)),
                    gt_fut_bbox_global_tmp[:, [6]],
                    np.zeros((num_obj_fut, 1)),
                ),
                axis=1,
            )  # N x 3
            gt_fut_rot_global_deg = gt_fut_rot_global_rad / np.pi * 180
            for obj_index in range(num_obj_fut):
                gt_trans_global_future = calib_utils.create_transform(
                    gt_fut_bbox_global_tmp[obj_index, :3],
                    gt_fut_rot_global_deg[obj_index],
                )  # 4 x 4
                gt_fut_trans_lidar = (
                    np.linalg.inv(anno["lidar2global"]) @ gt_trans_global_future
                )  # 4 x 4
                gt_fut_yaw_lidar = scipy.spatial.transform.Rotation.from_matrix(
                    gt_fut_trans_lidar[:3, :3]
                ).as_euler("xyz", degrees=False)[-1]
                gt_fut_bbox_lidar_tmp[obj_index, 6] = gt_fut_yaw_lidar  # N x 3

            # velocity
            gt_fut_vel_global = np.concatenate(
                (
                    gt_fut_bbox_global_tmp[:, 7:],  # N x 2
                    np.zeros((num_obj_fut, 1)),
                ),
                axis=1,
            )  # N x 3
            gt_fut_vel_lidar: np.ndarray = (
                np.linalg.inv(lidar2global[:3, :3]) @ gt_fut_vel_global.T
            ).T  # N x 3
            gt_fut_bbox_lidar_tmp[:, 7:] = gt_fut_vel_lidar[:, :2]  # N x 2

            # gt_vel_future_tmp: np.ndarray = anno_future_tmp["gt_velocity"]  # N x 2
            # gt_box_data_all: np.ndarray = np.concatenate(
            #     (gt_boxes_future_tmp, gt_vel_future_tmp), axis=-1
            # )  # N x 9
            # print("\nfuture frame is", frame_idx)
            # print(gt_boxes_future_tmp)

            # dump the data into the set of all objects in the future
            gt_fut_bbox_lidar_all[fut_frame_index - 1] = gt_fut_bbox_lidar_tmp  # N x 9

            # now check the IDs that exist in the current frame
            # in order to produce the mask for future frames
            obj_ids_fut: List[int] = anno_fut_tmp["gt_inds"].tolist()
            (
                obj_ids_common,
                index_cur,
                index_fut,
            ) = miscell_utils.find_unique_common_from_lists(obj_ids, obj_ids_fut)

            # possible that we get 0 object in future frame matching with GT object currint
            if len(index_cur) > 0:
                gt_fut_bbox_mask[np.array(index_cur), fut_frame_index - 1] = 1
                # also assign the actual gt boxes into the array
                gt_fut_bbox_lidar[
                    np.array(index_cur), fut_frame_index - 1, :
                ] = gt_fut_bbox_lidar_tmp[np.array(index_fut), :]

            ############## get future of the sdc traj
            # gt_fut_sdc_global: np.ndarray = anno["can_bus"][:3]
            # gt_fut_sdc_global = np.concatenate(
            # (gt_fut_sdc_global, np.array([1]))
            # )
            # gt_fut_sdc_lidar = (
            # np.linalg.inv(anno["lidar2global"]) @ gt_fut_sdc_global
            # )  # 4 x 1
            # gt_fut_traj_sdc_lidar[0, future_frame_index - 1, :] = gt_fut_sdc_lidar[:3]

            ############### get future of the sdc box in lidar coordinate
            sdc_loc_global, _ = calib_utils.transform_matrix_to_vector(
                anno_fut_tmp["ego2global"]
            )
            sdc_loc_global = np.concatenate((sdc_loc_global, np.array([1])))  # 4
            sdc_loc_lidar = np.linalg.inv(lidar2global) @ sdc_loc_global  # 4
            sdc_vel_global: np.ndarray = anno_fut_tmp["sdc_vel_global"]  # 3
            sdc_vel_lidar: np.ndarray = (
                np.linalg.inv(lidar2global[:3, :3]) @ sdc_vel_global
            )  # 3
            sdc_trans_lidar = (
                np.linalg.inv(lidar2global) @ anno_fut_tmp["ego2global"]
            )  # 4 x 4
            yaw_lidar = scipy.spatial.transform.Rotation.from_matrix(
                sdc_trans_lidar[:3, :3]
            ).as_euler("xyz", degrees=False)[-1]
            yaw_global = scipy.spatial.transform.Rotation.from_matrix(
                anno_fut_tmp["ego2global"][:3, :3]
            ).as_euler("xyz", degrees=False)[-1]
            gt_fut_bbox_sdc_mask[0, fut_frame_index - 1] = 1

            # put things into bbox, 9 dof
            gt_sdc_bbox_lidar = np.array(
                [
                    sdc_loc_lidar[0],
                    sdc_loc_lidar[1],
                    sdc_loc_lidar[2],
                    l,
                    w,
                    h,
                    yaw_lidar,
                    sdc_vel_lidar[0],
                    sdc_vel_lidar[1],
                ]
            )
            gt_fut_bbox_sdc_lidar[0, fut_frame_index - 1] = gt_sdc_bbox_lidar
            gt_sdc_bbox_global = np.array(
                [
                    sdc_loc_global[0],
                    sdc_loc_global[1],
                    sdc_loc_global[2],
                    l,
                    w,
                    h,
                    yaw_global,
                    sdc_vel_global[0],
                    sdc_vel_global[1],
                ]
            )
            gt_fut_bbox_sdc_global[0, fut_frame_index - 1] = gt_sdc_bbox_global

            # get command
            gt_fut_command_sdc[0, fut_frame_index - 1]: int = anno_fut_tmp["command"]

        # flip array to allow time in forward order for the past trajectory
        gt_pre_bbox_lidar = np.flip(gt_pre_bbox_lidar, axis=1)
        gt_pre_bbox_mask = np.flip(gt_pre_bbox_mask, axis=1)
        gt_pre_bbox_sdc_lidar = np.flip(gt_pre_bbox_sdc_lidar, axis=1)
        gt_pre_bbox_sdc_global = np.flip(gt_pre_bbox_sdc_global, axis=1)
        gt_pre_bbox_sdc_mask = np.flip(gt_pre_bbox_sdc_mask, axis=1)
        gt_pre_command_sdc = np.flip(gt_pre_command_sdc, axis=1)

        # assign the value back into the dictionary
        # in-place assignment to the sorted anno
        anno["gt_pre_bbox_lidar"] = gt_pre_bbox_lidar  # N x 4 x 9
        anno["gt_fut_bbox_lidar"] = gt_fut_bbox_lidar  # N x 12 x 9
        anno["gt_pre_bbox_mask"] = gt_pre_bbox_mask  # N x 4 x 1
        anno["gt_fut_bbox_mask"] = gt_fut_bbox_mask  # N x 12 x 1
        anno["gt_pre_bbox_sdc_lidar"] = gt_pre_bbox_sdc_lidar  # 1 x 4 x 9
        anno["gt_fut_bbox_sdc_lidar"] = gt_fut_bbox_sdc_lidar  # 1 x 12 x 9
        anno["gt_pre_bbox_sdc_global"] = gt_pre_bbox_sdc_global  # 1 x 4 x 9
        anno["gt_fut_bbox_sdc_global"] = gt_fut_bbox_sdc_global  # 1 x 12 x 9
        anno["gt_pre_bbox_sdc_mask"] = gt_pre_bbox_sdc_mask  # 1 x 4 x 1
        anno["gt_fut_bbox_sdc_mask"] = gt_fut_bbox_sdc_mask  # 1 x 12 x 1
        anno["gt_pre_command_sdc"] = gt_pre_command_sdc  # 1 x 4 x 1
        anno["gt_fut_command_sdc"] = gt_fut_command_sdc  # 1 x 12 x 1

        # debug other object's trajectory
        # print('\nobjects')
        # # index = 6
        # # obj_id = obj_ids[index]
        # # print(obj_id)

        # obj_id = 373
        # index = obj_ids.index(obj_id)

        # print(index)
        # gt_bboxes_lidar = np.expand_dims(gt_bboxes_lidar, axis=1)
        # gt_box_curfut = np.concatenate(
        #     (gt_bboxes_lidar, gt_fut_traj), axis=1
        # )  # N x 13 x 9
        # print(gt_past_traj[index])
        # print(gt_box_curfut[index])
        # print(gt_past_traj_mask[index])
        # print(gt_fut_traj_mask[index])
        # zxc

        # debug ego trajectory
        gt_sdc_bbox_lidar_all = np.concatenate(
            (
                gt_pre_bbox_sdc_lidar[0],  # 4 x 9
                anno["gt_sdc_bbox_lidar"].reshape((1, -1)),  # 1 x 9
                gt_fut_bbox_sdc_lidar[0],  # 12 x 9
            ),
            axis=0,
        )
        # gt_sdc_mask_all = np.concatenate(
        #     (
        #         gt_pre_bbox_sdc_mask,  # 1 x 4 x 1
        #         np.array([1]).reshape(1, -1, 1),
        #         gt_fut_bbox_sdc_mask,
        #     ),
        #     axis=1,
        # )
        # gt_sdc_command_all = np.concatenate(
        #     (
        #         gt_pre_command_sdc,
        #         np.array([anno["command"]]).reshape(1, -1, 1),
        #         gt_fut_command_sdc),
        #     axis=1,
        # )
        # print(gt_fut_traj_sdc_lidar)
        # print("\nego")
        # print('gt_sdc_bbox_lidar_all\n', gt_sdc_bbox_lidar_all)
        # print(gt_sdc_mask_all)
        # print(gt_sdc_command_all)
        # zxc

        # data_frame = [anno['frame_idx'] for anno in annos]
        # print(data_frame)
        # data_frame = [anno['gt_fut_traj'].shape for anno in annos]
        # print(data_frame)

        #         print(obj_ids)
        #         print(obj_ids_fut)
        #         print(obj_ids_common)
        #         print(index_cur)
        #         print(index_fut)
        #         print(gt_fut_traj_mask[:, future_frame_index - 1])
        #         print(gt_fut_traj[:, future_frame_index - 1])
        #         # zxc
        #     zxc
        # zxc

        if vis:
            # get save path for train/val split
            try:
                save_dir = scene_root.replace("/train/", "/vis_train_gt/")
            except:
                save_dir = scene_root.replace("/val/", "/vis_val_gt/")
            save_path: str = os.path.join(save_dir, "%06d.jpg" % frame_ID)

            # go through all bboxes
            bbox_gt_in_ego_list: List[np.ndarray] = list()
            for bbox_index in range(num_obj):
                box_7dof_lidar: np.ndarray = anno["gt_boxes"][bbox_index, :]
                box3d: Box3D = Box3D.array2bbox_xyzlwhyaw(box_7dof_lidar)
                box_in_lidar: np.ndarray = Box3D.box2corners3d_lidar(box3d)  # 8 x 4
                box_in_ego: np.ndarray = box_transform.box_in_lidar_to_ego(
                    box_in_lidar
                )  # 8 x 4
                bbox_gt_in_ego_list.append(box_in_ego)

            # add ego box
            box3d: Box3D = Box3D.array2bbox_xyzlwhyaw(anno["gt_sdc_bbox_lidar"][:7])
            sdc_box_in_lidar: np.ndarray = Box3D.box2corners3d_lidar(box3d)  # 8 x 4
            sdc_box_in_ego: np.ndarray = box_transform.box_in_lidar_to_ego(
                sdc_box_in_lidar
            )  # 8 x 4
            bbox_gt_in_ego_list.append(sdc_box_in_ego)

            # aggregate traj
            bbox_fut_lidar_all = np.concatenate(
                (anno["gt_fut_bbox_lidar"], anno["gt_fut_bbox_sdc_lidar"]), axis=0
            )  # N x 12 x 9
            bbox_fut_mask_all = np.concatenate(
                (anno["gt_fut_bbox_mask"], anno["gt_fut_bbox_sdc_mask"]), axis=0
            )  # N x 12 x 1
            bbox_pre_lidar_all = np.concatenate(
                (anno["gt_pre_bbox_lidar"], anno["gt_pre_bbox_sdc_lidar"]), axis=0
            )  # N x 12 x 9
            bbox_pre_mask_all = np.concatenate(
                (anno["gt_pre_bbox_mask"], anno["gt_pre_bbox_sdc_mask"]), axis=0
            )  # N x 12 x 1

            # visualization
            box_vis.vis_box_on_lidar(
                anno["lidar_pts"],
                save_path,
                bbox_gt_in_ego_list=bbox_gt_in_ego_list,
                bev=False,
                id_list=obj_ids + [-1],
                fut_traj=bbox_fut_lidar_all,
                fut_traj_mask=bbox_fut_mask_all,
                pre_traj=bbox_pre_lidar_all,
                pre_traj_mask=bbox_pre_mask_all,
            )

        # delete lidar points in the dicionary to reduce file size prior to saving
        del anno["lidar_pts"]

    return annos


if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "--data_root",
        type=str,
        default="/mnt/hdd2/datasets/carla_1.0/carla_data_0414",
        # default="/home/xweng/ngc/workspace/diffstack/experiments/carla/2023-04-09-11-49-06_smallscale_datacollection",
        help="root directory of raw carla data",
    )
    parser.add_argument(
        "--output_dir",
        type=str,
        default="/mnt/hdd2/datasets/carla_1.0/carla_data_0414",
        # default="/home/xweng/ngc/workspace/diffstack/experiments/carla/2023-04-09-11-49-06_smallscale_datacollection",
        help="path of output file",
    )
    parser.add_argument(
        "--mini",
        action="store_true",
        help="generate mini pkl file",
    )
    # parser.add_argument(
    #     "--partial",
    #     action='store_true',
    #     help="partial set of data by sampling to make the dataset smaller",
    # )
    parser.add_argument(
        "--tiny",
        action="store_true",
        help="for debugging purpose",
    )
    parser.add_argument("--split", type=str, default="val", help="train/val")
    args = parser.parse_args()

    # nuScenes: train: 28136 frames
    # nuScenes: train_new (excluding OOD): 27334 frames

    # 2023/01/15 dataset statistics
    # train: 114464 （2 runs, 50 x 2 = 100 routes）
    # val: 37833

    # 2023/04/15 dataset statistics (1 run, 173 routes, dynamic weather)
    # train: 72236
    # val: 41305
    # trainval: 113541

    # 2023/04/14 dataset statistics (1 run, 173 routes, normal weather)
    # train: 78470
    # val: 42404
    # val_partial: 10601
    # mini: 888
    # trainval: 120874 keyframes -> 1208740 total frames (20FPS) = 16.8 hours of driving

    # go through all sequences
    annos: List[Dict] = []
    num_frames = 0
    scene_name: str
    for scene_name in glob.glob(f"{args.data_root}/{args.split}/*"):
        seq_index: List[int] = []

        for idx in glob.glob(f"{scene_name}/image_front/*.png"):
            frame: str = idx.split("/")[-1].split(".")[0]
            frame_int: int = int(float(frame))

            # skip the first frame
            if frame_int == 0:
                continue

            # # only process the first 100 frames for debugging
            # if frame_int > 1000:
            #     continue

            # only process the first frame ego moving for debugging
            # if frame_int < 680:
            #     continue
            # if frame_int > 700:
            #     continue
            # print(frame)

            # collect the index of frames that belong to the same sequence
            seq_index.append(num_frames)

            anno: Dict = parse_data(scene_name, frame)
            annos.append(anno)
            num_frames += 1

            if len(annos) % 10 == 0:
                print("finished: ", len(annos))

        # collect temporal data after pre-processing for the entire sequence
        # e.g., add future trajectory into every frame of data
        annos: List[Dict] = parse_sequence(scene_name, annos, seq_index)

        # only process the first sequence for mini
        if args.mini or args.tiny:
            break

    # # reduce frames by random sub-sampling, the order is random
    # not good doing this as it will change the data frequency at test time
    # e.g., from 2 Hz to 0.5 Hz, past trajectory data does not make sense
    # print(f"number of total frames is {len(annos)}")
    # if args.partial:

    #     def sort_seq(data_dict: Dict):
    #         return data_dict["token"]
    #     annos.sort(key=sort_seq)
    #     annos = annos[0::4]
    #     print(f"number of total frames after sampling is {len(annos)}")

    if args.tiny:
        annos = annos[:10]
        # for anno in annos:
        #     print(anno['token'])
        print(f"number of total frames after sampling is {len(annos)}")

    carla = {"infos": annos, "metadata": {"version": "v1.0"}}

    # save
    if args.mini:
        output_path = os.path.join(
            args.output_dir, f"data_{args.split}_nusc_format_mini.pkl"
        )
    else:
        # if args.partial:
        #     output_path = os.path.join(args.output_dir, f"data_{args.split}_nusc_format_partial.pkl")
        if args.tiny:
            output_path = os.path.join(
                args.output_dir, f"data_{args.split}_nusc_format_tiny.pkl"
            )
        else:
            output_path = os.path.join(
                args.output_dir, f"data_{args.split}_nusc_format.pkl"
            )
    with open(output_path, "wb") as f:
        pkl.dump(carla, f)