File size: 8,116 Bytes
663494c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 |
import argparse
import os
import pickle
import yaml
from typing import Dict, List
from tqdm import tqdm
parser = argparse.ArgumentParser()
parser.add_argument(
"--data_root",
type=str,
default="./data/openscene-v1.1",
help="root directory of raw carla data",
)
parser.add_argument(
"--split",
type=str,
default="navsim",
help="trainval/mini/navsim/test",
)
args = parser.parse_args()
def get_pkl_filelist(meta_data_dir: str) -> List[str]:
meta_data_list = os.listdir(meta_data_dir)
meta_data_list = [
os.path.join(meta_data_dir, each)
for each in meta_data_list
if each.endswith(".pkl")
]
return meta_data_list
if __name__ == "__main__":
# OpenScenes/nuPlan/NavSim:
# mini_train: 43261 (43417 pre-cleaning) -> 6h
# mini_val: 8450 -> 1.17h
# val: 115564 (115733 pre-cleaning) -> 16h
# train: 605263 (607286 pre-cleaning) -> 84h
# trainval: 720827 -> 100.11h
# navtest: 12136 -> 1.69h
# navtrain: 102983 -> 14.3h
# random split for the standard nuPlan split mini_train/mini_val, trainval_train/trainval_val
if args.split in ['trainval', 'mini', 'test']:
# source data
meta_data_dir = os.path.join(args.data_root, "meta_datas_paradrive", args.split)
meta_data_list = get_pkl_filelist(meta_data_dir)
# random split based on percentage
if args.split == 'test':
test_paths = meta_data_list
val_paths = None
train_paths = None
else:
train_paths = meta_data_list[: int(len(meta_data_list) * 0.85)]
val_paths = meta_data_list[int(len(meta_data_list) * 0.85) :]
print(f"total log for {args.split}: {len(meta_data_list)}")
elif args.split in ['navsim']:
# all trainval data, use filter later
meta_data_dir = os.path.join(args.data_root, "meta_datas_paradrive_v2/trainval")
train_paths = get_pkl_filelist(meta_data_dir)
navtrain_filter = "navsim/navsim/planning/script/config/common/train_test_split/scene_filter/navtrain.yaml"
with open(navtrain_filter, 'r') as file:
navtrain_filter = yaml.safe_load(file)
log_filter_train = navtrain_filter['log_names']
try:
scene_filter_train = navtrain_filter['tokens']
except:
scene_filter_train = navtrain_filter['scenario_tokens']
# all test data, use filter later
meta_data_dir = os.path.join(args.data_root, "meta_datas_paradrive_v2/test")
val_paths = get_pkl_filelist(meta_data_dir)
navtest_filter = "navsim/navsim/planning/script/config/common/train_test_split/scene_filter/navtest.yaml"
with open(navtest_filter, 'r') as file:
navtest_filter = yaml.safe_load(file)
log_filter_test = navtest_filter['log_names']
try:
scene_filter_test = navtest_filter['tokens']
except KeyError:
scene_filter_test = navtest_filter['scenario_tokens']
if val_paths is None:
print(f"test log len: {len(test_paths)}")
else:
print(f"train log len: {len(train_paths)}")
print(f"val log len: {len(val_paths)}")
save_dir = os.path.join(args.data_root, "paradrive_infos_v2")
save_val = os.path.join(save_dir, f"nuplan_{args.split}_test.pkl")
# load and merge pkl files into train/val
# only take the infos for now, leave the mapping to be used later if needed
if not os.path.exists(save_val) and val_paths is not None:
data_infos = []
total_len = 0
for file in val_paths:
with open(file, "rb") as f:
print(f'val: loading {file}')
data_tmp = pickle.load(f)["infos"]
total_len += len(data_tmp)
ADD = False
# check if this log falls into the navtrain/navtest filter
if args.split == 'navsim':
log_name_tmp = data_tmp[0]['log_name']
if log_name_tmp not in log_filter_test:
continue
# calculate the scene_fileter for this log
history_frame_num = 3 # 3, 2, 1, 0
future_frame_num = 0 # 1, 2, 3, 4, 5, 6, 7, 8
# get the scene_filter for this log
scene_filter_expanded = set()
for idx, data_frame in enumerate(data_tmp):
if data_frame['token'] in scene_filter_test:
start_frame_idx = idx - history_frame_num
end_frame_idx = idx + future_frame_num
for i in range(start_frame_idx, end_frame_idx + 1):
if i < 0 or i >= len(data_tmp):
continue
scene_filter_expanded.add(data_tmp[i]['token'])
data_save = []
for data_frame in data_tmp:
token = data_frame['token']
if token in scene_filter_expanded:
ADD = True
data_save.append(data_frame)
else:
ADD = True
data_save = data_tmp
if ADD:
data_infos.extend(data_save)
print(f"val info len before: {total_len}")
print(f"val info len after: {len(data_infos)}")
# save
if not os.path.exists(save_dir):
os.makedirs(save_dir)
with open(save_val, "wb") as f:
pickle.dump(data_infos, f)
else:
print('skipped because val is saved before')
# for train/test split
save_train = os.path.join(save_dir, f"nuplan_{args.split}_train.pkl")
if not os.path.exists(save_train):
# for test split
if train_paths is None:
train_paths = test_paths
data_infos = []
total_len = 0
for file in tqdm(train_paths):
with open(file, "rb") as f:
tqdm.write(f'train: loading {file}')
data_tmp = pickle.load(f)["infos"]
total_len += len(data_tmp)
ADD = False
# check if this log falls into the navtrain/navtest filter
if args.split == 'navsim':
log_name_tmp = data_tmp[0]['log_name']
if log_name_tmp not in log_filter_train:
continue
# calculate the scene_fileter for this log
history_frame_num = 3 # 3, 2, 1, 0
future_frame_num = 8 # 1, 2, 3, 4, 5, 6, 7, 8
# get the scene_filter for this log
scene_filter_expanded = set()
for idx, data_frame in enumerate(data_tmp):
if data_frame['token'] in scene_filter_train:
start_frame_idx = idx - history_frame_num
end_frame_idx = idx + future_frame_num
for i in range(start_frame_idx, end_frame_idx + 1):
if i < 0 or i >= len(data_tmp):
continue
scene_filter_expanded.add(data_tmp[i]['token'])
data_save = []
for data_frame in data_tmp:
token = data_frame['token']
if token in scene_filter_expanded:
ADD = True
data_save.append(data_frame)
else:
ADD = True
data_save = data_tmp
if ADD:
data_infos.extend(data_save)
print(f"train info len before: {total_len}")
print(f"train info len after: {len(data_infos)}")
# save
with open(save_train, "wb") as f:
pickle.dump(data_infos, f)
|