File size: 8,116 Bytes
663494c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
import argparse
import os
import pickle
import yaml
from typing import Dict, List
from tqdm import tqdm

parser = argparse.ArgumentParser()
parser.add_argument(
    "--data_root",
    type=str,
    default="./data/openscene-v1.1",
    help="root directory of raw carla data",
)
parser.add_argument(
    "--split",
    type=str,
    default="navsim",
    help="trainval/mini/navsim/test",
)
args = parser.parse_args()

def get_pkl_filelist(meta_data_dir: str) -> List[str]:

    meta_data_list = os.listdir(meta_data_dir)
    meta_data_list = [
        os.path.join(meta_data_dir, each)
        for each in meta_data_list
        if each.endswith(".pkl")
    ]

    return meta_data_list

if __name__ == "__main__":

    # OpenScenes/nuPlan/NavSim:
    # mini_train: 43261 (43417 pre-cleaning) -> 6h
    # mini_val: 8450 -> 1.17h
    # val: 115564 (115733 pre-cleaning) -> 16h
    # train: 605263 (607286 pre-cleaning) -> 84h    
    # trainval: 720827 -> 100.11h
    # navtest: 12136 -> 1.69h
    # navtrain: 102983 -> 14.3h

    # random split for the standard nuPlan split mini_train/mini_val, trainval_train/trainval_val
    if args.split in ['trainval', 'mini', 'test']:
        # source data
        meta_data_dir = os.path.join(args.data_root, "meta_datas_paradrive", args.split)
        meta_data_list = get_pkl_filelist(meta_data_dir)

        # random split based on percentage
        if args.split == 'test':
            test_paths = meta_data_list
            val_paths = None
            train_paths = None
        else:
            train_paths = meta_data_list[: int(len(meta_data_list) * 0.85)]
            val_paths = meta_data_list[int(len(meta_data_list) * 0.85) :]
        print(f"total log for {args.split}: {len(meta_data_list)}")

    elif args.split in ['navsim']:

        # all trainval data, use filter later
        meta_data_dir = os.path.join(args.data_root, "meta_datas_paradrive_v2/trainval")
        train_paths = get_pkl_filelist(meta_data_dir)
        navtrain_filter = "navsim/navsim/planning/script/config/common/train_test_split/scene_filter/navtrain.yaml"
        with open(navtrain_filter, 'r') as file:
            navtrain_filter = yaml.safe_load(file)
            log_filter_train = navtrain_filter['log_names']
            try:
                scene_filter_train = navtrain_filter['tokens']
            except:
                scene_filter_train = navtrain_filter['scenario_tokens']

        # all test data, use filter later
        meta_data_dir = os.path.join(args.data_root, "meta_datas_paradrive_v2/test")
        val_paths = get_pkl_filelist(meta_data_dir)
        navtest_filter = "navsim/navsim/planning/script/config/common/train_test_split/scene_filter/navtest.yaml"
        with open(navtest_filter, 'r') as file:
            navtest_filter = yaml.safe_load(file)
            log_filter_test = navtest_filter['log_names']
            try:
                scene_filter_test = navtest_filter['tokens']
            except KeyError:
                scene_filter_test = navtest_filter['scenario_tokens']

    if val_paths is None:
        print(f"test log len: {len(test_paths)}")
    else:
        print(f"train log len: {len(train_paths)}")
        print(f"val log len: {len(val_paths)}")

    save_dir = os.path.join(args.data_root, "paradrive_infos_v2")
    save_val = os.path.join(save_dir, f"nuplan_{args.split}_test.pkl")

    # load and merge pkl files into train/val
    # only take the infos for now, leave the mapping to be used later if needed
    if not os.path.exists(save_val) and val_paths is not None:
        data_infos = []
        total_len = 0
        for file in val_paths:
            with open(file, "rb") as f:
                print(f'val: loading {file}')
                data_tmp = pickle.load(f)["infos"]
                total_len += len(data_tmp)
                ADD = False

                # check if this log falls into the navtrain/navtest filter
                if args.split == 'navsim':
                    log_name_tmp = data_tmp[0]['log_name']
                    if log_name_tmp not in log_filter_test:
                        continue

                    # calculate the scene_fileter for this log
                    history_frame_num = 3  # 3, 2, 1, 0
                    future_frame_num = 0  # 1, 2, 3, 4, 5, 6, 7, 8

                    # get the scene_filter for this log
                    scene_filter_expanded = set()
                    for idx, data_frame in enumerate(data_tmp):
                        if data_frame['token'] in scene_filter_test:
                            start_frame_idx = idx - history_frame_num
                            end_frame_idx = idx + future_frame_num
                            for i in range(start_frame_idx, end_frame_idx + 1):
                                if i < 0 or i >= len(data_tmp):
                                    continue
                                scene_filter_expanded.add(data_tmp[i]['token'])

                    data_save = []
                    for data_frame in data_tmp:
                        token = data_frame['token']
                        if token in scene_filter_expanded:
                            ADD = True
                            data_save.append(data_frame)
                else:
                    ADD = True
                    data_save = data_tmp

                if ADD:
                    data_infos.extend(data_save)

        print(f"val info len before: {total_len}")
        print(f"val info len after: {len(data_infos)}")

        # save
        if not os.path.exists(save_dir):
            os.makedirs(save_dir)
        with open(save_val, "wb") as f:
            pickle.dump(data_infos, f)
    else:
        print('skipped because val is saved before')

    # for train/test split
    save_train = os.path.join(save_dir, f"nuplan_{args.split}_train.pkl")
    if not os.path.exists(save_train):
        # for test split
        if train_paths is None:
            train_paths = test_paths

        data_infos = []
        total_len = 0
        for file in tqdm(train_paths):
            with open(file, "rb") as f:
                tqdm.write(f'train: loading {file}')
                data_tmp = pickle.load(f)["infos"]
                total_len += len(data_tmp)                
                ADD = False

                # check if this log falls into the navtrain/navtest filter
                if args.split == 'navsim':

                    log_name_tmp = data_tmp[0]['log_name']
                    if log_name_tmp not in log_filter_train:
                        continue

                    # calculate the scene_fileter for this log
                    history_frame_num = 3  # 3, 2, 1, 0
                    future_frame_num = 8  # 1, 2, 3, 4, 5, 6, 7, 8

                    # get the scene_filter for this log
                    scene_filter_expanded = set()
                    for idx, data_frame in enumerate(data_tmp):
                        if data_frame['token'] in scene_filter_train:
                            start_frame_idx = idx - history_frame_num
                            end_frame_idx = idx + future_frame_num
                            for i in range(start_frame_idx, end_frame_idx + 1):
                                if i < 0 or i >= len(data_tmp):
                                    continue
                                scene_filter_expanded.add(data_tmp[i]['token'])

                    data_save = []
                    for data_frame in data_tmp:
                        token = data_frame['token']     
                        if token in scene_filter_expanded:
                            ADD = True
                            data_save.append(data_frame)                            
                else:
                    ADD = True
                    data_save = data_tmp

                if ADD:
                    data_infos.extend(data_save)
        print(f"train info len before: {total_len}")
        print(f"train info len after: {len(data_infos)}")

        # save
        with open(save_train, "wb") as f:
            pickle.dump(data_infos, f)