import os from tqdm import tqdm import time from utils.config import get_args from pathlib import Path import pickle def execute_commands(commands_list, command_type, process_num): print('====> Start', command_type) from multiprocessing import Pool pool = Pool(process_num) for _ in tqdm(pool.imap_unordered(os.system, commands_list), total=len(commands_list)): pass pool.close() pool.join() pool.terminate() print('====> Finish', command_type) def get_seq_name_list(dataset): if dataset == 'scannet': file_path = 'splits/scannet.txt' elif dataset == 'scannetpp': file_path = 'splits/scannetpp.txt' elif dataset == 'scannetpp_dust3r_posed': file_path = 'splits/scannetpp.txt' elif dataset == 'scannetpp_dust3r_filtered_depth': file_path = 'splits/scannetpp.txt' elif dataset == 'scannetpp_mapanything_posed': file_path = 'splits/scannetpp.txt' elif dataset == 'scannetpp_dust3r_unposed': file_path = 'splits/scannetpp.txt' elif dataset == 'matterport3d': file_path = 'splits/matterport3d.txt' with open(file_path, 'r') as f: seq_name_list = f.readlines() seq_name_list = [seq_name.strip() for seq_name in seq_name_list] return seq_name_list def parallel_compute(general_command, command_name, resource_type, cuda_list, seq_name_list): cuda_num = len(cuda_list) if resource_type == 'cuda': commands = [] for i, cuda_id in enumerate(cuda_list): process_seq_name = seq_name_list[i::cuda_num] if len(process_seq_name) == 0: continue process_seq_name = '+'.join(process_seq_name) command = f'CUDA_VISIBLE_DEVICES={cuda_id} {general_command % process_seq_name}' commands.append(command) execute_commands(commands, command_name, cuda_num) elif resource_type == 'cpu': commands = [] for seq_name in seq_name_list: commands.append(f'{general_command} --seq_name {seq_name}') execute_commands(commands, command_name, cuda_num) def get_label_text_feature(cuda_id): label_text_feature_path = 'data/text_features/matterport3d.npy' if os.path.exists(label_text_feature_path): return command = f'CUDA_VISIBLE_DEVICES={cuda_id} python -m semantics.extract_label_featrues' os.system(command) def main(args): CUDA_LIST = args.devices dataset = "scannetpp" config = args.config cropformer_path = args.cropformer_path root = f'data/{config}/processed' image_path_pattern = 'color/*.jpg' # stride = 10 # gt = 'data/scannet_dust3r_posed/gt' t0 = time.time() # seq_name_list = get_seq_name_list(config) seq_name_list = os.listdir(root) # val_path = Path("../") / "OKNO/data/arkitscenes/arkitscenes_offline_infos_val.pkl" # out_dir = Path("data/arkit_vggt/processed") # with open(val_path, "rb") as f: # data_list = pickle.load(f)['data_list'] # val_scenes = [scene["lidar_points"]["lidar_path"] for scene in data_list][:2500] # def extract_name(item): # return item.split("_")[0] # val_scenes = set([extract_name(scene) for scene in val_scenes]) # seq_name_list = list(seq_name_list.intersection(val_scenes)) # seq_name_list = ["scene0011_00", "scene0011_01", "scene0015_00", "scene0019_00", "scene0019_01", "scene0025_00", "scene0025_01", "scene0025_02", "scene0030_00", "scene0030_01", "scene0030_02", "scene0046_00", "scene0046_01", "scene0046_02", "scene0050_00", "scene0050_01", "scene0050_02", "scene0063_00", "scene0064_00", "scene0064_01", "scene0077_00", "scene0077_01", "scene0081_00", "scene0081_01", "scene0081_02", "scene0084_00", "scene0084_01", "scene0084_02", "scene0086_00", "scene0086_01", "scene0086_02", "scene0088_00", "scene0088_01", "scene0088_02", "scene0088_03", "scene0095_00", "scene0095_01", "scene0100_00", "scene0100_01", "scene0100_02", "scene0131_00", "scene0131_01", "scene0131_02", "scene0139_00", "scene0144_00", "scene0144_01", "scene0146_00", "scene0146_01", "scene0146_02", "scene0149_00", "scene0153_00", "scene0153_01", "scene0164_00", "scene0164_01", "scene0164_02", "scene0164_03", "scene0169_00", "scene0169_01", "scene0187_00", "scene0187_01", "scene0193_00", "scene0193_01", "scene0196_00", "scene0203_00", "scene0203_01", "scene0203_02", "scene0207_00", "scene0207_01", "scene0207_02", "scene0208_00", "scene0217_00", "scene0221_00", "scene0221_01", "scene0222_00", "scene0222_01", "scene0231_00", "scene0231_01", "scene0231_02", "scene0246_00", "scene0249_00", "scene0251_00", "scene0256_00", "scene0256_01", "scene0256_02", "scene0257_00", "scene0277_00", "scene0277_01", "scene0277_02", "scene0278_00", "scene0278_01", "scene0300_00", "scene0300_01", "scene0304_00", "scene0307_00", "scene0307_01", "scene0307_02", "scene0314_00", "scene0316_00", "scene0328_00", "scene0329_00", "scene0329_01", "scene0329_02", "scene0334_00", "scene0334_01", "scene0334_02", "scene0338_00", "scene0338_01", "scene0338_02", "scene0342_00", "scene0343_00", "scene0351_00", "scene0351_01", "scene0353_00", "scene0353_01", "scene0353_02", "scene0354_00", "scene0355_00", "scene0355_01", "scene0356_00", "scene0356_01", "scene0356_02", "scene0357_00", "scene0357_01", "scene0377_00", "scene0377_01", "scene0377_02", "scene0378_00", "scene0378_01", "scene0378_02", "scene0382_00", "scene0382_01", "scene0389_00", "scene0406_00", "scene0406_01", "scene0406_02", "scene0412_00", "scene0412_01", "scene0414_00", "scene0423_00", "scene0423_01", "scene0423_02", "scene0426_00", "scene0426_01", "scene0426_02", "scene0426_03", "scene0427_00", "scene0430_00", "scene0430_01", "scene0432_00", "scene0432_01", "scene0435_00", "scene0435_01", "scene0435_02", "scene0435_03", "scene0441_00", "scene0458_00", "scene0458_01", "scene0461_00", "scene0462_00", "scene0474_00", "scene0474_01", "scene0474_02", "scene0474_03", "scene0474_04", "scene0474_05", "scene0488_00", "scene0488_01", "scene0490_00", "scene0494_00", "scene0496_00", "scene0500_00", "scene0500_01", "scene0518_00", "scene0527_00", "scene0535_00", "scene0549_00", "scene0549_01", "scene0550_00", "scene0552_00", "scene0552_01", "scene0553_00", "scene0553_01", "scene0553_02", "scene0558_00", "scene0558_01", "scene0558_02", "scene0559_00", "scene0559_01", "scene0559_02", "scene0565_00", "scene0568_00", "scene0568_01", "scene0568_02", "scene0574_00", "scene0574_01", "scene0574_02", "scene0575_00", "scene0575_01", "scene0575_02", "scene0578_00", "scene0578_01", "scene0578_02", "scene0580_00", "scene0580_01", "scene0583_00", "scene0583_01", "scene0583_02", "scene0591_00", "scene0591_01", "scene0591_02", "scene0593_00", "scene0593_01", "scene0595_00", "scene0598_00", "scene0598_01", "scene0598_02", "scene0599_00", "scene0599_01", "scene0599_02", "scene0606_00", "scene0606_01", "scene0606_02", "scene0607_00", "scene0607_01", "scene0608_00", "scene0608_01", "scene0608_02", "scene0609_00", "scene0609_01", "scene0609_02", "scene0609_03", "scene0616_00", "scene0616_01", "scene0618_00", "scene0621_00", "scene0629_00", "scene0629_01", "scene0629_02", "scene0633_00", "scene0633_01", "scene0643_00", "scene0644_00", "scene0645_00", "scene0645_01", "scene0645_02", "scene0647_00", "scene0647_01", "scene0648_00", "scene0648_01", "scene0651_00", "scene0651_01", "scene0651_02", "scene0652_00", "scene0653_00", "scene0653_01", "scene0655_00", "scene0655_01", "scene0655_02", "scene0658_00", "scene0660_00", "scene0663_00", "scene0663_01", "scene0663_02", "scene0664_00", "scene0664_01", "scene0664_02", "scene0665_00", "scene0665_01", "scene0670_00", "scene0670_01", "scene0671_00", "scene0671_01", "scene0678_00", "scene0678_01", "scene0678_02", "scene0684_00", "scene0684_01", "scene0685_00", "scene0685_01", "scene0685_02", "scene0686_00", "scene0686_01", "scene0686_02", "scene0689_00", "scene0690_00", "scene0690_01", "scene0693_00", "scene0693_01", "scene0693_02", "scene0695_00", "scene0695_01", "scene0695_02", "scene0695_03", "scene0696_00", "scene0696_01", "scene0696_02", "scene0697_00", "scene0697_01", "scene0697_02", "scene0697_03", "scene0699_00", "scene0700_00", "scene0700_01", "scene0700_02", "scene0701_00", "scene0701_01", "scene0701_02", "scene0702_00", "scene0702_01", "scene0702_02", "scene0704_00", "scene0704_01"] # print('There are %d scenes' % len(seq_name_list)) # Step 1: use Cropformer to get 2D instance masks for all sequences. # parallel_compute(f'python third_party/detectron2/projects/CropFormer/demo_cropformer/mask_predict.py --config-file third_party/detectron2/projects/CropFormer/configs/entityv2/entity_segmentation/mask2former_hornet_3x.yaml --root {root} --image_path_pattern {image_path_pattern} --dataset {args.dataset} --seq_name_list %s --opts MODEL.WEIGHTS {cropformer_path}', 'predict mask', 'cuda', CUDA_LIST, seq_name_list) t1 = time.time() print('finish predict mask, cropformer time: ', t1 - t0, 'sec') print('avg per scene time: ', (t1 - t0) / len(seq_name_list), 'sec') # # Step 2: Mask clustering using our proposed method. parallel_compute(f'python main.py --config {config} --seq_name_list %s', 'mask clustering', 'cuda', CUDA_LIST, seq_name_list) t2 = time.time() print('finish mask clustering, mask clustering time: ', t2 - t1) print('avg per scene time: ', (t2 - t1) / len(seq_name_list)) # Step 3: Evaluate the class-agnostic results. # os.system(f'python -m evaluation.evaluate --pred_path data/prediction/{config}_class_agnostic --gt_path {gt} --dataset {dataset} --no_class') # Step 4: Get the open-vocabulary semantic features for each 2D masks. parallel_compute(f'python -m semantics.get_open-voc_features --config {config} --seq_name_list %s', 'get open-vocabulary semantic features using CLIP', 'cuda', CUDA_LIST, seq_name_list) # Step 5: Get the text CLIP features for each label. # get_label_text_feature(CUDA_LIST[0]) # Step 6: Get labels for each 3D instances. parallel_compute(f'python -m semantics.open-voc_query --config {config}', 'get text labels', 'cpu', CUDA_LIST, seq_name_list) t3 = time.time() print('finish get text labels, get text labels time: ', t3 - t2) print('avg per scene time: ', (t3 - t2) / len(seq_name_list)) # Step 7: Evaluate the class-aware results. # os.system(f'python -m evaluation.evaluate --pred_path data/prediction/{config} --gt_path {gt} --dataset {dataset}') print('total time', (time.time() - t0)//60, 'min') print('Average time', (time.time() - t0) / len(seq_name_list), 'sec') if __name__ == '__main__': args = get_args() main(args)