from tqdm import tqdm import os import argparse import json from concurrent.futures import ProcessPoolExecutor from itertools import repeat import numpy as np import pandas as pd import sys sys.path.append('../..') from evaluation.constants import SCANNET_IDS raw_data_dir = '../../data/scannet/raw/scans' gt_dir = '../../data/scannet/gt' label_map_file = '../../data/scannet/raw/raw/scannetv2-labels.combined.tsv' split_file_path = '../../splits/scannet.txt' CLOUD_FILE_PFIX = '_vh_clean_2' SEGMENTS_FILE_PFIX = '.0.010000.segs.json' AGGREGATIONS_FILE_PFIX = '.aggregation.json' def export_gt(filename, label_ids, instance_ids): gt_data = label_ids * 1000 + instance_ids + 1 np.savetxt(filename, gt_data, fmt='%d') # Map the raw category id to the point cloud def point_indices_from_group(seg_indices, group, labels_pd): group_segments = np.array(group['segments']) label = group['label'] # Map the category name to id label_ids = labels_pd[labels_pd['raw_category'] == label]['id'] label_id = int(label_ids.iloc[0]) if len(label_ids) > 0 else 0 # Only store for the valid categories if not label_id in SCANNET_IDS: label_id = 0 # get points, where segment indices (points labelled with segment ids) are in the group segment list point_IDs = np.where(np.isin(seg_indices, group_segments)) return point_IDs[0], label_id def handle_process(scene_path, output_path, labels_pd): scene_id = scene_path.split('/')[-1] segments_file = os.path.join(scene_path, f'{scene_id}{CLOUD_FILE_PFIX}{SEGMENTS_FILE_PFIX}') aggregations_file = os.path.join(scene_path, f'{scene_id}{AGGREGATIONS_FILE_PFIX}') output_gt_file = os.path.join(output_path, f'{scene_id}.txt') # Load segments file with open(segments_file) as f: segments = json.load(f) seg_indices = np.array(segments['segIndices']) # Load Aggregations file with open(aggregations_file) as f: aggregation = json.load(f) seg_groups = np.array(aggregation['segGroups']) # Generate new labels labelled_pc = np.zeros((len(seg_indices), 1)) instance_ids = np.zeros((len(seg_indices), 1)) for group in seg_groups: p_inds, label_id = point_indices_from_group(seg_indices, group, labels_pd) labelled_pc[p_inds] = label_id instance_ids[p_inds] = group['id'] + 1 labelled_pc = labelled_pc.astype(int) instance_ids = instance_ids.astype(int) export_gt(output_gt_file, labelled_pc, instance_ids) if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument('--num_workers', default=16, type=int, help='The number of parallel workers') config = parser.parse_args() # Load label map labels_pd = pd.read_csv(label_map_file, sep='\t', header=0) with open(split_file_path) as val_file: val_scenes = val_file.read().splitlines() # Load scene paths scene_paths = [os.path.join(raw_data_dir, scene) for scene in val_scenes] os.makedirs(gt_dir, exist_ok=True) # Preprocess data. pool = ProcessPoolExecutor(max_workers=config.num_workers) print('Processing scenes...') # show progress _ = list(tqdm(pool.map(handle_process, scene_paths, repeat(gt_dir), repeat(labels_pd)), total=len(scene_paths)))