| | import glob |
| | import os, sys |
| | import json |
| | import numpy as np |
| | import torch |
| | import h5py |
| |
|
| | import argparse |
| | parser = argparse.ArgumentParser() |
| | parser.add_argument("--data-name") |
| | parser.add_argument("--data-dir") |
| | parser.add_argument("--tgt-dir") |
| | |
| | args = parser.parse_args() |
| |
|
| | if args.data_dir is None: |
| | args.data_dir = f"/home/zgtang/{args.data_name}" |
| | else: |
| | args.data_name = os.path.basename(args.data_dir) |
| |
|
| | if args.tgt_dir is None: |
| | args.tgt_dir = args.data_dir |
| |
|
| | def extract_scene_name(x, data_name_): |
| | if "_meta" == data_name_[-5:]: |
| | data_name = data_name_[:-5] |
| | else: |
| | data_name = data_name_ |
| | |
| | if "scannetpp" in x: |
| | return 'scannetpp' |
| | elif "scannet" in x: |
| | xx = x.split('/') |
| | for x_ in xx: |
| | if 'scene' in x_: |
| | return x_ |
| | elif "habitat_sim" in x: |
| | if "gibson" in x: |
| | return 'gibson' |
| | if "mp3d" in x: |
| | return "mp3d" |
| | xx = x.split('/') |
| | for x_ in xx: |
| | if x_[5] == "-": |
| | return x_ |
| | raise NotImplementedError |
| | |
| | |
| | train_name_list = json.load(open("./data/train_name_list.json", 'r')) |
| |
|
| | def split_dps(x, dir_name): |
| |
|
| | |
| | if not os.path.exists(dir_name): |
| | os.makedirs(dir_name) |
| |
|
| | data_train = [] |
| | data_test = [] |
| | for (d_id, d) in enumerate(x): |
| | rgb_path = d['rgb_list'][0] |
| | data_name = extract_scene_name(rgb_path, args.data_name) |
| | if data_name in train_name_list: |
| | data_train.append(d) |
| | else: |
| | data_test.append(d) |
| | |
| | print('train', len(data_train), 'test', len(data_test)) |
| | if len(data_train) <= 2000: |
| | json.dump(data_train[::], open(f"{dir_name}/dps_train_sample.json", 'w'), indent=4) |
| | json.dump( data_test[::], open(f"{dir_name}/dps_test_sample.json", 'w'), indent=4) |
| | else: |
| | json.dump(data_train[::1000], open(f"{dir_name}/dps_train_sample.json", 'w'), indent=4) |
| | json.dump( data_test[::1000], open(f"{dir_name}/dps_test_sample.json", 'w'), indent=4) |
| |
|
| | if len(data_train): |
| | print('dumping train') |
| | json_strs = [json.dumps(x) for x in data_train] |
| | ma_len = max(map(len, json_strs)) |
| | json_strs = np.array(json_strs, dtype='S'+str(ma_len)) |
| | with h5py.File(f"{dir_name}/dps_train.h5", 'w') as f: |
| | f.create_dataset('json_strs', data=json_strs, compression='gzip') |
| | |
| | if len(data_test): |
| | print('dumping test') |
| | json_strs = [json.dumps(x) for x in data_test] |
| | ma_len = max(map(len, json_strs)) |
| | json_strs = np.array(json_strs, dtype='S'+str(ma_len)) |
| | with h5py.File(f"{dir_name}/dps_test.h5", 'w') as f: |
| | f.create_dataset('json_strs', data=json_strs, compression='gzip') |
| |
|
| |
|
| | def tuple_n_general_new(dir_name, tgt_name): |
| | |
| | if os.path.exists(f"{dir_name}/dps.h5"): |
| | x = h5py.File(f"{dir_name}/dps.h5", 'r') |
| | json_strs = x['json_strs'] |
| | for x in json_strs[::1000]: |
| | print(json.loads(x)['rgb_list']) |
| | input() |
| | x = [json.loads(x) for x in json_strs] |
| | split_dps(x, dir_name) |
| | return |
| | if os.path.exists(f"{dir_name}/dps.json"): |
| | x = json.load(open(f"{dir_name}/dps.json", 'r')) |
| | split_dps(x, dir_name) |
| | return |
| | dps = [] |
| | |
| | pt_name = f"{dir_name}/*/*extra.pt" |
| | metadata_list = [] |
| | for i in range(6): |
| | |
| | metadata_list = metadata_list + glob.glob(pt_name) |
| | pt_name = pt_name.replace("/*extra.pt", "/*/*extra.pt") |
| | sorted(metadata_list) |
| | print('tuple in sum', len(metadata_list)) |
| | |
| | scene_cnt = {} |
| | for x_id, x in enumerate(metadata_list[::100]): |
| | dp = {} |
| | dp['scene_name'] = x.split('/')[-2] |
| | if dp['scene_name'] not in scene_cnt.keys(): |
| | scene_cnt[dp['scene_name']] = 1 |
| | else: |
| | scene_cnt[dp['scene_name']] += 1 |
| | print('scene in sum', len(list(scene_cnt.keys()))) |
| | |
| | scene_cnt = {} |
| | cnt_failed = 0 |
| | for x_id, x_ in enumerate(metadata_list[::]): |
| | try: |
| | x = torch.load(x_) |
| | |
| | dp = {} |
| | dp['scene_name'] = x_.split('/')[-2] |
| | if dp['scene_name'] not in scene_cnt.keys(): |
| | scene_cnt[dp['scene_name']] = 1 |
| | else: |
| | scene_cnt[dp['scene_name']] += 1 |
| | |
| | |
| | |
| | |
| | dp['rgb_list'] = x['rgb_list'] |
| | dp['depth_list'] = x['depth_list'] |
| | if 'pose_raw_list' in x.keys(): |
| | dp['pose_raw_list'] = x['pose_raw_list'] |
| | else: |
| | dp['pose_list'] = x['pose_list'] |
| | if "nv" in x.keys(): |
| | dp['nv'] = x['nv'] |
| | if "intrinsic_raw" in x.keys(): |
| | dp['intrinsic_raw'] = x['intrinsic_raw'] |
| | else: |
| | dp['intrinsic_list'] = x['intrinsic_list'] |
| | C = np.round(np.array(x['C']), 2).tolist() |
| | dp['C'] = C |
| | dps.append(dp) |
| | if x_id % 20000 == 0: |
| | print('tuple collecting', len(scene_cnt.keys()), x_id, cnt_failed) |
| | |
| | except: |
| | cnt_failed += 1 |
| | |
| | split_dps(dps, tgt_name) |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| |
|
| | tuple_n_general_new(args.data_dir, args.tgt_dir) |