import argparse import os import pickle import yaml from typing import Dict, List from tqdm import tqdm parser = argparse.ArgumentParser() parser.add_argument( "--data_root", type=str, default="./data/openscene-v1.1", help="root directory of raw carla data", ) parser.add_argument( "--split", type=str, default="navsim", help="trainval/mini/navsim/test", ) args = parser.parse_args() def get_pkl_filelist(meta_data_dir: str) -> List[str]: meta_data_list = os.listdir(meta_data_dir) meta_data_list = [ os.path.join(meta_data_dir, each) for each in meta_data_list if each.endswith(".pkl") ] return meta_data_list if __name__ == "__main__": # OpenScenes/nuPlan/NavSim: # mini_train: 43261 (43417 pre-cleaning) -> 6h # mini_val: 8450 -> 1.17h # val: 115564 (115733 pre-cleaning) -> 16h # train: 605263 (607286 pre-cleaning) -> 84h # trainval: 720827 -> 100.11h # navtest: 12136 -> 1.69h # navtrain: 102983 -> 14.3h # random split for the standard nuPlan split mini_train/mini_val, trainval_train/trainval_val if args.split in ['trainval', 'mini', 'test']: # source data meta_data_dir = os.path.join(args.data_root, "meta_datas_paradrive", args.split) meta_data_list = get_pkl_filelist(meta_data_dir) # random split based on percentage if args.split == 'test': test_paths = meta_data_list val_paths = None train_paths = None else: train_paths = meta_data_list[: int(len(meta_data_list) * 0.85)] val_paths = meta_data_list[int(len(meta_data_list) * 0.85) :] print(f"total log for {args.split}: {len(meta_data_list)}") elif args.split in ['navsim']: # all trainval data, use filter later meta_data_dir = os.path.join(args.data_root, "meta_datas_paradrive_v2/trainval") train_paths = get_pkl_filelist(meta_data_dir) navtrain_filter = "navsim/navsim/planning/script/config/common/train_test_split/scene_filter/navtrain.yaml" with open(navtrain_filter, 'r') as file: navtrain_filter = yaml.safe_load(file) log_filter_train = navtrain_filter['log_names'] try: scene_filter_train = navtrain_filter['tokens'] except: scene_filter_train = navtrain_filter['scenario_tokens'] # all test data, use filter later meta_data_dir = os.path.join(args.data_root, "meta_datas_paradrive_v2/test") val_paths = get_pkl_filelist(meta_data_dir) navtest_filter = "navsim/navsim/planning/script/config/common/train_test_split/scene_filter/navtest.yaml" with open(navtest_filter, 'r') as file: navtest_filter = yaml.safe_load(file) log_filter_test = navtest_filter['log_names'] try: scene_filter_test = navtest_filter['tokens'] except KeyError: scene_filter_test = navtest_filter['scenario_tokens'] if val_paths is None: print(f"test log len: {len(test_paths)}") else: print(f"train log len: {len(train_paths)}") print(f"val log len: {len(val_paths)}") save_dir = os.path.join(args.data_root, "paradrive_infos_v2") save_val = os.path.join(save_dir, f"nuplan_{args.split}_test.pkl") # load and merge pkl files into train/val # only take the infos for now, leave the mapping to be used later if needed if not os.path.exists(save_val) and val_paths is not None: data_infos = [] total_len = 0 for file in val_paths: with open(file, "rb") as f: print(f'val: loading {file}') data_tmp = pickle.load(f)["infos"] total_len += len(data_tmp) ADD = False # check if this log falls into the navtrain/navtest filter if args.split == 'navsim': log_name_tmp = data_tmp[0]['log_name'] if log_name_tmp not in log_filter_test: continue # calculate the scene_fileter for this log history_frame_num = 3 # 3, 2, 1, 0 future_frame_num = 0 # 1, 2, 3, 4, 5, 6, 7, 8 # get the scene_filter for this log scene_filter_expanded = set() for idx, data_frame in enumerate(data_tmp): if data_frame['token'] in scene_filter_test: start_frame_idx = idx - history_frame_num end_frame_idx = idx + future_frame_num for i in range(start_frame_idx, end_frame_idx + 1): if i < 0 or i >= len(data_tmp): continue scene_filter_expanded.add(data_tmp[i]['token']) data_save = [] for data_frame in data_tmp: token = data_frame['token'] if token in scene_filter_expanded: ADD = True data_save.append(data_frame) else: ADD = True data_save = data_tmp if ADD: data_infos.extend(data_save) print(f"val info len before: {total_len}") print(f"val info len after: {len(data_infos)}") # save if not os.path.exists(save_dir): os.makedirs(save_dir) with open(save_val, "wb") as f: pickle.dump(data_infos, f) else: print('skipped because val is saved before') # for train/test split save_train = os.path.join(save_dir, f"nuplan_{args.split}_train.pkl") if not os.path.exists(save_train): # for test split if train_paths is None: train_paths = test_paths data_infos = [] total_len = 0 for file in tqdm(train_paths): with open(file, "rb") as f: tqdm.write(f'train: loading {file}') data_tmp = pickle.load(f)["infos"] total_len += len(data_tmp) ADD = False # check if this log falls into the navtrain/navtest filter if args.split == 'navsim': log_name_tmp = data_tmp[0]['log_name'] if log_name_tmp not in log_filter_train: continue # calculate the scene_fileter for this log history_frame_num = 3 # 3, 2, 1, 0 future_frame_num = 8 # 1, 2, 3, 4, 5, 6, 7, 8 # get the scene_filter for this log scene_filter_expanded = set() for idx, data_frame in enumerate(data_tmp): if data_frame['token'] in scene_filter_train: start_frame_idx = idx - history_frame_num end_frame_idx = idx + future_frame_num for i in range(start_frame_idx, end_frame_idx + 1): if i < 0 or i >= len(data_tmp): continue scene_filter_expanded.add(data_tmp[i]['token']) data_save = [] for data_frame in data_tmp: token = data_frame['token'] if token in scene_filter_expanded: ADD = True data_save.append(data_frame) else: ADD = True data_save = data_tmp if ADD: data_infos.extend(data_save) print(f"train info len before: {total_len}") print(f"train info len after: {len(data_infos)}") # save with open(save_train, "wb") as f: pickle.dump(data_infos, f)