unknownuser6666's picture
Upload folder using huggingface_hub
663494c verified
import argparse
import os
import pickle
import yaml
from typing import Dict, List
from tqdm import tqdm
parser = argparse.ArgumentParser()
parser.add_argument(
"--data_root",
type=str,
default="./data/openscene-v1.1",
help="root directory of raw carla data",
)
parser.add_argument(
"--split",
type=str,
default="navsim",
help="trainval/mini/navsim/test",
)
args = parser.parse_args()
def get_pkl_filelist(meta_data_dir: str) -> List[str]:
meta_data_list = os.listdir(meta_data_dir)
meta_data_list = [
os.path.join(meta_data_dir, each)
for each in meta_data_list
if each.endswith(".pkl")
]
return meta_data_list
if __name__ == "__main__":
# OpenScenes/nuPlan/NavSim:
# mini_train: 43261 (43417 pre-cleaning) -> 6h
# mini_val: 8450 -> 1.17h
# val: 115564 (115733 pre-cleaning) -> 16h
# train: 605263 (607286 pre-cleaning) -> 84h
# trainval: 720827 -> 100.11h
# navtest: 12136 -> 1.69h
# navtrain: 102983 -> 14.3h
# random split for the standard nuPlan split mini_train/mini_val, trainval_train/trainval_val
if args.split in ['trainval', 'mini', 'test']:
# source data
meta_data_dir = os.path.join(args.data_root, "meta_datas_paradrive", args.split)
meta_data_list = get_pkl_filelist(meta_data_dir)
# random split based on percentage
if args.split == 'test':
test_paths = meta_data_list
val_paths = None
train_paths = None
else:
train_paths = meta_data_list[: int(len(meta_data_list) * 0.85)]
val_paths = meta_data_list[int(len(meta_data_list) * 0.85) :]
print(f"total log for {args.split}: {len(meta_data_list)}")
elif args.split in ['navsim']:
# all trainval data, use filter later
meta_data_dir = os.path.join(args.data_root, "meta_datas_paradrive_v2/trainval")
train_paths = get_pkl_filelist(meta_data_dir)
navtrain_filter = "navsim/navsim/planning/script/config/common/train_test_split/scene_filter/navtrain.yaml"
with open(navtrain_filter, 'r') as file:
navtrain_filter = yaml.safe_load(file)
log_filter_train = navtrain_filter['log_names']
try:
scene_filter_train = navtrain_filter['tokens']
except:
scene_filter_train = navtrain_filter['scenario_tokens']
# all test data, use filter later
meta_data_dir = os.path.join(args.data_root, "meta_datas_paradrive_v2/test")
val_paths = get_pkl_filelist(meta_data_dir)
navtest_filter = "navsim/navsim/planning/script/config/common/train_test_split/scene_filter/navtest.yaml"
with open(navtest_filter, 'r') as file:
navtest_filter = yaml.safe_load(file)
log_filter_test = navtest_filter['log_names']
try:
scene_filter_test = navtest_filter['tokens']
except KeyError:
scene_filter_test = navtest_filter['scenario_tokens']
if val_paths is None:
print(f"test log len: {len(test_paths)}")
else:
print(f"train log len: {len(train_paths)}")
print(f"val log len: {len(val_paths)}")
save_dir = os.path.join(args.data_root, "paradrive_infos_v2")
save_val = os.path.join(save_dir, f"nuplan_{args.split}_test.pkl")
# load and merge pkl files into train/val
# only take the infos for now, leave the mapping to be used later if needed
if not os.path.exists(save_val) and val_paths is not None:
data_infos = []
total_len = 0
for file in val_paths:
with open(file, "rb") as f:
print(f'val: loading {file}')
data_tmp = pickle.load(f)["infos"]
total_len += len(data_tmp)
ADD = False
# check if this log falls into the navtrain/navtest filter
if args.split == 'navsim':
log_name_tmp = data_tmp[0]['log_name']
if log_name_tmp not in log_filter_test:
continue
# calculate the scene_fileter for this log
history_frame_num = 3 # 3, 2, 1, 0
future_frame_num = 0 # 1, 2, 3, 4, 5, 6, 7, 8
# get the scene_filter for this log
scene_filter_expanded = set()
for idx, data_frame in enumerate(data_tmp):
if data_frame['token'] in scene_filter_test:
start_frame_idx = idx - history_frame_num
end_frame_idx = idx + future_frame_num
for i in range(start_frame_idx, end_frame_idx + 1):
if i < 0 or i >= len(data_tmp):
continue
scene_filter_expanded.add(data_tmp[i]['token'])
data_save = []
for data_frame in data_tmp:
token = data_frame['token']
if token in scene_filter_expanded:
ADD = True
data_save.append(data_frame)
else:
ADD = True
data_save = data_tmp
if ADD:
data_infos.extend(data_save)
print(f"val info len before: {total_len}")
print(f"val info len after: {len(data_infos)}")
# save
if not os.path.exists(save_dir):
os.makedirs(save_dir)
with open(save_val, "wb") as f:
pickle.dump(data_infos, f)
else:
print('skipped because val is saved before')
# for train/test split
save_train = os.path.join(save_dir, f"nuplan_{args.split}_train.pkl")
if not os.path.exists(save_train):
# for test split
if train_paths is None:
train_paths = test_paths
data_infos = []
total_len = 0
for file in tqdm(train_paths):
with open(file, "rb") as f:
tqdm.write(f'train: loading {file}')
data_tmp = pickle.load(f)["infos"]
total_len += len(data_tmp)
ADD = False
# check if this log falls into the navtrain/navtest filter
if args.split == 'navsim':
log_name_tmp = data_tmp[0]['log_name']
if log_name_tmp not in log_filter_train:
continue
# calculate the scene_fileter for this log
history_frame_num = 3 # 3, 2, 1, 0
future_frame_num = 8 # 1, 2, 3, 4, 5, 6, 7, 8
# get the scene_filter for this log
scene_filter_expanded = set()
for idx, data_frame in enumerate(data_tmp):
if data_frame['token'] in scene_filter_train:
start_frame_idx = idx - history_frame_num
end_frame_idx = idx + future_frame_num
for i in range(start_frame_idx, end_frame_idx + 1):
if i < 0 or i >= len(data_tmp):
continue
scene_filter_expanded.add(data_tmp[i]['token'])
data_save = []
for data_frame in data_tmp:
token = data_frame['token']
if token in scene_filter_expanded:
ADD = True
data_save.append(data_frame)
else:
ADD = True
data_save = data_tmp
if ADD:
data_infos.extend(data_save)
print(f"train info len before: {total_len}")
print(f"train info len after: {len(data_infos)}")
# save
with open(save_train, "wb") as f:
pickle.dump(data_infos, f)