Spaces:
Running
on
Zero
Running
on
Zero
| import argparse | |
| import sys | |
| import random | |
| import time | |
| from omegaconf import open_dict | |
| import matplotlib.pyplot as plt | |
| sys.path.extend([".", ".."]) | |
| from generate_ply_sequence import get_cam_k | |
| from point_utils import read_calib, generate_point_grid, get_fov_mask | |
| from gen_voxelgrid_npy import save_as_voxel_ply, remove_invisible | |
| import logging | |
| from pathlib import Path | |
| import subprocess | |
| import yaml | |
| import cv2 | |
| import os | |
| import numpy as np | |
| from tqdm import tqdm | |
| import pickle | |
| import torch | |
| from torch import nn | |
| import torch.nn.functional as F | |
| from hydra import compose, initialize | |
| import matplotlib.pyplot as plt | |
| from sscbench_dataset import SSCBenchDataset | |
| from pathlib import Path | |
| from scipy.optimize import linear_sum_assignment | |
| import torchvision | |
| RELOAD_DATASET = True | |
| DATASET_LENGTH = 10 | |
| FULL_EVAL = True | |
| SAMPLE_EVERY = None | |
| SAMPLE_OFFSET = 2 | |
| SAMPLE_RANGE = None | |
| SIZE = 51.2 # Can be: 51.2, 25.6, 12.8 | |
| SIZES = (12.8, 25.6, 51.2) | |
| VOXEL_SIZE = 0.2 # Needs: 0.2 % VOXEL_SIZE == 0 | |
| USE_ADDITIONAL_INVALIDS = True | |
| TEST_ALPHA_CUTOFFS = False | |
| SEARCH_VALUES = [10e-1, 10e-2, 10e-3, 10e-4, 10e-5, 10e-6, 10e-7] | |
| SIGMA_CUTOFF = 0.2 | |
| USE_ALPHA_WEIGHTING = True | |
| USE_GROW = True | |
| CREATE_SIGMA_TRADEOFF_PLOT = True | |
| SIGMA_VALUES = [1, 0.5, 0.25, 0.1, 0.05, 0.025, 0.01, 0.005, 0.0025, 0.001] | |
| PLOT_ALL_IMAGES = False | |
| GENERATE_PLY_FILES = False | |
| PLY_ONLY_FOV = True | |
| PLY_IDS = [300, 400, 470] | |
| OUTPUT_PATH = Path("<PATH-OUTPUT>") | |
| PLY_SIZES = [25.6, 51.2] | |
| GENERATE_STATISTICS = False | |
| # For ply generation: | |
| # USE_ADDITIONAL_INVALIDS = False | |
| # USE_GROW = False | |
| # GENERATE_PLY_FILES = True | |
| os.system("nvidia-smi") | |
| device = f'cuda:0' | |
| # DO NOT TOUCH OR YOU WILL BREAK RUNS (should be None) | |
| gpu_id = None | |
| if gpu_id is not None: | |
| print("GPU ID: " + str(gpu_id)) | |
| os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" | |
| os.environ["CUDA_VISIBLE_DEVICES"] = str(gpu_id) | |
| if torch.cuda.is_available(): | |
| torch.backends.cudnn.enabled = True | |
| torch.backends.cudnn.benchmark = True | |
| torch.backends.cudnn.deterministic = True | |
| logging.basicConfig(level=logging.INFO) | |
| def main(): | |
| parser = argparse.ArgumentParser("SSCBenchmark Output generation") | |
| parser.add_argument("--sscbench_data_root", "-ssc", type=str) | |
| parser.add_argument("--voxel_gt_path", "-vgt", type=str) | |
| parser.add_argument("--resolution", "-r", default=(192, 640)) | |
| parser.add_argument("--checkpoint", "-cp", type=str, required=True) | |
| parser.add_argument("--full", "-f", action="store_true") | |
| parser.add_argument("--mode", "-m", default="s4c") | |
| parser.add_argument("--ply_checkname", "-p", default="none") | |
| args = parser.parse_args() | |
| sscbench_data_root = args.sscbench_data_root | |
| voxel_gt_path = args.voxel_gt_path | |
| resolution = args.resolution | |
| cp_path = args.checkpoint | |
| full_evaluation = args.full | |
| mode = args.mode | |
| ply_checkname = args.ply_checkname | |
| if FULL_EVAL: | |
| full_evaluation = True | |
| if GENERATE_PLY_FILES: | |
| assert (not USE_GROW) and (not USE_ADDITIONAL_INVALIDS) # and VOXEL_SIZE == 0.1 | |
| # make the necessary dirs | |
| for size in PLY_SIZES: | |
| if not os.path.exists(OUTPUT_PATH / ply_checkname / str(int(size))): | |
| os.makedirs(OUTPUT_PATH / ply_checkname / str(int(size))) | |
| if not os.path.exists(OUTPUT_PATH / ply_checkname): | |
| os.makedirs(OUTPUT_PATH / ply_checkname) | |
| logging.info(f"Using a sigma cutoff of {SIGMA_CUTOFF}") | |
| logging.info("Setting up dataset") | |
| with open("label_maps.yaml", "r") as f: | |
| label_maps = yaml.safe_load(f) | |
| # pickle the dataset so we don't have to wait all the time | |
| if os.path.isfile("dataset.pkl") and not RELOAD_DATASET: | |
| logging.info("Loading dataset from dataset.pkl file.") | |
| with open("dataset.pkl", "rb") as f: | |
| dataset = pickle.load(f) | |
| else: | |
| logging.info("Generating the dataset and dumping it to dataset.pkl") | |
| dataset = SSCBenchDataset( | |
| data_path=sscbench_data_root, | |
| voxel_gt_path=voxel_gt_path, | |
| sequences=(9,), | |
| target_image_size=resolution, | |
| return_stereo=False, | |
| frame_count=1, | |
| color_aug=False, | |
| load_fisheye=True, | |
| fisheye_offset=10, | |
| ) | |
| if DATASET_LENGTH and not full_evaluation: | |
| dataset.length = DATASET_LENGTH | |
| with open("dataset.pkl", 'wb') as f: | |
| pickle.dump(dataset, f) | |
| logging.info("Setting up the model...") | |
| config_path = "exp_kitti_360" | |
| cp_path = Path(cp_path) | |
| if cp_path.suffix == ".pt": | |
| cp_root_path = cp_path.parent | |
| else: | |
| cp_root_path = cp_path | |
| cp_path = next(cp_root_path.glob("training*.pt")) | |
| bts_dino_config_path = "training_config.yaml" | |
| PRODUCE_FEAT_VIS = GENERATE_PLY_FILES and mode.startswith("scenedino") | |
| prediction_mode = None | |
| if mode == "s4c": | |
| from models.bts.model import BTSNet | |
| from models.common.render import NeRFRenderer | |
| initialize(version_base=None, config_path="../../../configs", job_name="gen_sscbench_outputs") | |
| config = compose(config_name=config_path, overrides=[]) | |
| logging.info('Loading checkpoint') | |
| cp = torch.load(cp_path, map_location=device) | |
| with open_dict(config): | |
| config["renderer"]["hard_alpha_cap"] = True | |
| config["model_conf"]["code_mode"] = "z" | |
| # config["model_conf"]["z_near"] = 8 | |
| config["model_conf"]["mlp_coarse"]["n_blocks"] = 0 | |
| config["model_conf"]["mlp_coarse"]["d_hidden"] = 64 | |
| config["model_conf"]["encoder"]["d_out"] = 64 | |
| config["model_conf"]["encoder"]["type"] = "monodepth2" | |
| config["model_conf"]["grid_learn_empty"] = False | |
| config["model_conf"]["sample_color"] = True | |
| # stuff for segmentation | |
| config["model_conf"]["segmentation_mode"] = "panoptic_deeplab" | |
| net = BTSNet(config["model_conf"]) | |
| net.sample_color = False | |
| renderer = NeRFRenderer.from_conf(config["renderer"]) | |
| renderer = renderer.bind_parallel(net, gpus=None).eval() | |
| renderer.renderer.n_coarse = 64 | |
| renderer.renderer.lindisp = True | |
| class _Wrapper(nn.Module): | |
| def __init__(self): | |
| super().__init__() | |
| self.renderer = renderer | |
| _wrapper = _Wrapper() | |
| _wrapper.load_state_dict(cp["model"], strict=False) | |
| renderer.to(device) | |
| renderer.eval() | |
| elif mode.startswith("scenedino"): | |
| from scenedino.models import make_model as dino_bts_make_model | |
| from scenedino.renderer.nerf import NeRFRenderer as dino_bts_NeRFRenderer | |
| from scenedino.common.ray_sampler import ImageRaySampler as dino_bts_ImageRaySampler | |
| bts_dino_parent_relative = Path("../../../../") | |
| bts_dino_parent_absolute = str(bts_dino_parent_relative.resolve()) | |
| initialize(version_base=None, | |
| config_path=str(bts_dino_parent_relative / cp_root_path.relative_to(bts_dino_parent_absolute)), | |
| job_name="gen_sscbench_outputs") | |
| config = compose(config_name=bts_dino_config_path, overrides=[]) | |
| logging.info('Loading checkpoint') | |
| cp = torch.load(cp_path, map_location=device) | |
| net = dino_bts_make_model(config["model"], config["downstream"]) | |
| renderer = dino_bts_NeRFRenderer.from_conf(config["renderer"]) | |
| renderer.hard_alpha_cap = False | |
| renderer = renderer.bind_parallel(net, gpus=None).eval() | |
| class _Wrapper(nn.Module): | |
| def __init__(self): | |
| super().__init__() | |
| self.renderer = renderer | |
| _wrapper = _Wrapper() | |
| _wrapper.load_state_dict(cp, strict=False) # _wrapper.load_state_dict(cp["model"], strict=False) | |
| renderer.to(device) | |
| renderer.eval() | |
| height, width = config["dataset"]["image_size"] | |
| ray_sampler = dino_bts_ImageRaySampler(z_near=3, z_far=80, width=width, height=height) | |
| if mode == "scenedino_linear": | |
| prediction_mode = "direct_linear" | |
| elif mode == "scenedino_direct_cluster": | |
| prediction_mode = "direct_kmeans" | |
| else: | |
| prediction_mode = "stego_kmeans" | |
| else: | |
| raise NotImplementedError() | |
| logging.info("Loading the Lidar to Camera matrices...") | |
| calib = read_calib() | |
| T_velo_2_cam = calib["Tr"] | |
| logging.info("Generating the point cloud...") | |
| pts, _ = generate_point_grid(vox_origin=np.array([0, -25.6, -2]), | |
| scene_size=(51.2, 51.2, 6.4), | |
| voxel_size=VOXEL_SIZE, | |
| cam_E=T_velo_2_cam, | |
| cam_k=get_cam_k()) | |
| fov_mask = get_fov_mask() | |
| pts = torch.tensor(pts).to(device).reshape(1, -1, 3).float() | |
| fov_mask = fov_mask.reshape(256, 256, 32) | |
| logging.info("Setting up folders...") | |
| downsample_factor = int(0.2 // VOXEL_SIZE) | |
| results = {} | |
| for size in SIZES: | |
| results[size] = { | |
| "tp": 0, | |
| "fp": 0, | |
| "tn": 0, | |
| "fn": 0, | |
| "tp_seg": np.zeros(15), | |
| "fp_seg": np.zeros(15), | |
| "tn_seg": np.zeros(15), | |
| "fn_seg": np.zeros(15), | |
| "confusion_seg": np.zeros((16, 16)), | |
| "tp_recall_seg": np.zeros(15), | |
| "sum_recall_seg": np.zeros(15), | |
| } | |
| # for the sigma tradeoff plots | |
| trade_off_values = np.zeros([len(SIGMA_VALUES), 4]) | |
| cutoff_results = {i: {sv: {"tp":0, "fp": 0, "tn": 0, "fn": 0} for sv in SEARCH_VALUES} for i in range(1, 16)} | |
| pbar = tqdm(range(len(dataset))) | |
| # Randomly select indices without replacement | |
| # dataset_size = len(dataset) | |
| # subset_size = dataset_size // 10 | |
| # subset_indices = random.sample(range(dataset_size), subset_size) | |
| # pbar = tqdm(subset_indices) | |
| images = {"ids": [], "images": []} | |
| ids = [125, 280, 960, 1000, 1150, 1325, 2300, 3175, 3750, 4300, 5155, 5475, 5750, 6475, 6525, 6670, 6775, 7500, 7860, 8000, 8350, 9000, 9350, 10975] | |
| ids = [60, 250, 455, 690, 835, 2235, 2385, 2495, 3385, 4235, 4360, 4550, 4875, 5550, 6035, 7010, 7110, 8575, 9010, 9410, 11260, 11460, 11885] | |
| # for our statistics | |
| tframeIds = [] | |
| tinval = [] | |
| ttp = [] | |
| tfp = [] | |
| ttn = [] | |
| tfn = [] | |
| # plot_image_at_frame_id(dataset, 952) | |
| for i in pbar: | |
| if SAMPLE_EVERY: | |
| if (i - SAMPLE_OFFSET) % SAMPLE_EVERY != 0: | |
| continue | |
| sequence, id, is_right = dataset._datapoints[i] | |
| if SAMPLE_RANGE: | |
| if id not in SAMPLE_RANGE: | |
| continue | |
| if GENERATE_PLY_FILES and id not in PLY_IDS: | |
| continue | |
| if GENERATE_STATISTICS: | |
| tframeIds.append(id) | |
| data = dataset[i] | |
| torch.cuda.empty_cache() | |
| torch.cuda.reset_peak_memory_stats() | |
| torch.cuda.synchronize() | |
| start_time = time.time() | |
| # downsample the sigmas | |
| sigmas, segs, dino = downsample_and_predict(data, net, pts, downsample_factor, prediction_mode, vis=GENERATE_PLY_FILES, feat_vis=PRODUCE_FEAT_VIS) | |
| torch.cuda.synchronize() | |
| inference_time = time.time() - start_time | |
| memory_used = torch.cuda.max_memory_allocated(device) / 1024**2 # in MB | |
| num_params = sum(p.numel() for key, p in net.named_parameters() if not key.startswith("encoder.gt_encoder")) | |
| #print(f"Inference time: {inference_time:.6f} seconds") | |
| #print(f"Memory used: {memory_used:.2f} MB") | |
| #print(f"Number of parameters: {num_params:,}") | |
| # convert both to the right format | |
| segs = convert_voxels(segs, label_maps["cityscapes_to_label"]) | |
| target = convert_voxels(data["voxel_gt"][0].astype(int), label_maps["sscbench_to_label"]) | |
| is_occupied_seg = torch.Tensor(sigmas > SIGMA_CUTOFF).to(torch.bool) | |
| is_occupied_seg = remove_invisible(is_occupied_seg) | |
| #raise ValueError(is_occupied_seg, segs) | |
| is_occupied_seg[segs==0] = False | |
| images = torch.stack([torch.Tensor(_img) for _img in data["imgs"]], dim=0).cuda() | |
| if PRODUCE_FEAT_VIS: | |
| dino = calculate_pca(dino, is_occupied_seg, net) | |
| dino = (255*dino).astype(int) | |
| poses = torch.stack([torch.Tensor(_pose) for _pose in data["poses"]], dim=0).unsqueeze(0).cuda() | |
| projs = torch.stack([torch.Tensor(_proj) for _proj in data["projs"]], dim=0).unsqueeze(0).cuda() | |
| poses = torch.inverse(poses[:, :1]) @ poses | |
| all_rays, _ = ray_sampler.sample(None, poses, projs) | |
| render_dict = renderer(all_rays[:, :], want_weights=True, want_alphas=True) | |
| render_dict = ray_sampler.reconstruct(render_dict) | |
| dino_features = net.encoder.expand_dim(render_dict["coarse"]["dino_features"]).squeeze() | |
| dino_gt = net.encoder.gt_encoder(images / 2 + 0.5)[-1].permute(0, 2, 3, 1) | |
| dino_gt = F.normalize(dino_gt, dim=-1) | |
| dino_rgb_vis = torch.clamp(net.encoder.transform_visualization(dino_features.cpu()), min=-0.5, max=0.5) + 0.5 | |
| dino_rgb_vis_gt = torch.clamp(net.encoder.transform_visualization(dino_gt.cpu()), min=-0.5, max=0.5) + 0.5 | |
| dino_rgb_vis_gt = dino_rgb_vis_gt.repeat_interleave(8, 1).repeat_interleave(8, 2) | |
| if PLOT_ALL_IMAGES: | |
| images["ids"].append(id) | |
| images["images"].append(((data["imgs"][0] + 1) / 2).permute(1, 2, 0)) | |
| if len(images["ids"]) == 6: | |
| plot_images(images) | |
| images = {"images": [], "ids": []} | |
| # print(f"Image_Id: {id}") | |
| # | |
| # plt.imshow(((data["imgs"][0] + 1) / 2).permute(1, 2, 0)) | |
| # plt.show() | |
| # | |
| # out_dict = {"sigmas": sigmas, "segs": segs.copy(), "gt": target, "fov_mask": fov_mask} | |
| # | |
| # with open(f'plots10_40/{id:06d}.pkl', 'wb') as f: | |
| # pickle.dump(out_dict, f) | |
| if GENERATE_PLY_FILES: | |
| _segs = segs.copy() | |
| _target = target.copy() | |
| if PRODUCE_FEAT_VIS: | |
| _dino = dino.copy() | |
| mask = target != 255 | |
| if PLY_ONLY_FOV: | |
| mask = mask & fov_mask | |
| seg_mask = mask.copy() | |
| for dim in range(seg_mask.ndim): | |
| seg_mask = np.repeat(seg_mask, downsample_factor, axis=dim) | |
| # _segs[~seg_mask] = 0 | |
| # _dino[~seg_mask] = 0 | |
| _target[~mask] = 0 | |
| is_occupied_seg = is_occupied_seg.logical_and(torch.Tensor(fov_mask)) | |
| # is_occupied_seg = torch.tensor(_segs > 0) | |
| is_occupied_gt = torch.tensor(_target > 0) | |
| full_num_voxels = int(SIZE // VOXEL_SIZE) | |
| for idx in range(images.size(0)): | |
| torchvision.utils.save_image(((images[idx] + 1) / 2), OUTPUT_PATH / ply_checkname / str(int(size)) / f"{id:06d}_image_{idx}.png") | |
| if PRODUCE_FEAT_VIS: | |
| torchvision.utils.save_image(dino_rgb_vis[idx].permute(2, 0, 1), OUTPUT_PATH / ply_checkname / str(int(size)) / f"{id:06d}_features_{idx}.png") | |
| torchvision.utils.save_image(dino_rgb_vis_gt[idx].permute(2, 0, 1), OUTPUT_PATH / ply_checkname / str(int(size)) / f"{id:06d}_features_gt_{idx}.png") | |
| images = None | |
| for size in PLY_SIZES: | |
| num_voxels = int(size // 0.2) | |
| save_as_voxel_ply(OUTPUT_PATH / ply_checkname / str(int(size)) / f"{id:06d}_gt.ply", | |
| is_occupied_gt[: num_voxels, (128 - num_voxels // 2): (128 + num_voxels // 2),:], | |
| voxel_size=0.2, | |
| classes=torch.tensor(_target[: num_voxels, (128 - num_voxels // 2): (128 + num_voxels // 2),:])) | |
| num_voxels = int(size // VOXEL_SIZE) | |
| save_as_voxel_ply(OUTPUT_PATH / ply_checkname / str(int(size)) / f"{id:06d}.ply", | |
| is_occupied_seg[: num_voxels, (full_num_voxels // 2 - num_voxels // 2): (full_num_voxels // 2 + num_voxels // 2),:], | |
| size=(num_voxels, num_voxels, num_voxels//8), | |
| voxel_size=VOXEL_SIZE, | |
| classes=torch.tensor(_segs[: num_voxels, (full_num_voxels // 2 - num_voxels // 2): (full_num_voxels // 2 + num_voxels // 2),:])) | |
| if PRODUCE_FEAT_VIS: | |
| save_as_voxel_ply(OUTPUT_PATH / ply_checkname / str(int(size)) / f"{id:06d}_feat.ply", | |
| is_occupied_seg[: num_voxels, (full_num_voxels // 2 - num_voxels // 2): (full_num_voxels // 2 + num_voxels // 2),:], | |
| size=(num_voxels, num_voxels, num_voxels//8), | |
| voxel_size=VOXEL_SIZE, | |
| colors=torch.tensor(_dino[: num_voxels, (full_num_voxels // 2 - num_voxels // 2): (full_num_voxels // 2 + num_voxels // 2),:])) | |
| continue | |
| if USE_ADDITIONAL_INVALIDS: | |
| invalids = identify_additional_invalids(target) | |
| # logging.info(np.mean(invalids)) | |
| target[invalids == 1] = 255 | |
| if GENERATE_STATISTICS: | |
| tinval.append(np.mean(invalids)) | |
| # test and summarize different alpha cutoffs | |
| if TEST_ALPHA_CUTOFFS: | |
| for i in range(1, 16): | |
| for search_value in SEARCH_VALUES: | |
| _tmp = segs.copy() | |
| _tmp[np.logical_and(segs == i, sigmas < search_value)] = 0 | |
| _tp_seg, _fp_seg, _tn_seg, _fn_seg = compute_occupancy_numbers_segmentation( | |
| y_pred=_tmp, y_true=target, fov_mask=fov_mask, labels=label_maps["labels"]) | |
| cutoff_results[i][search_value]["tp"] += _tp_seg[i-1] | |
| cutoff_results[i][search_value]["fp"] += _fp_seg[i-1] | |
| cutoff_results[i][search_value]["tn"] += _tn_seg[i-1] | |
| cutoff_results[i][search_value]["fn"] += _fn_seg[i-1] | |
| if CREATE_SIGMA_TRADEOFF_PLOT: | |
| for i, val in enumerate(SIGMA_VALUES): | |
| _tmp = segs.copy() | |
| _tmp[sigmas < val] = 0 | |
| _tp, _fp, _tn, _fn = compute_occupancy_numbers(y_pred=_tmp, y_true=target, fov_mask=fov_mask) | |
| trade_off_values[i] += np.array([_tp, _fp, _tn, _fn]) | |
| segs[sigmas < SIGMA_CUTOFF] = 0 | |
| for size in SIZES: | |
| num_voxels = int(size // 0.2) | |
| # resize to right scene size | |
| _segs = segs[:num_voxels, (128 - num_voxels//2):(128 + num_voxels//2), :] | |
| _target = target[:num_voxels, (128 - num_voxels//2):(128 + num_voxels//2), :] | |
| _fov_mask = fov_mask[:num_voxels, (128 - num_voxels // 2):(128 + num_voxels // 2), :] | |
| _tp, _fp, _tn, _fn = compute_occupancy_numbers(y_pred=_segs, y_true=_target, fov_mask=_fov_mask) | |
| _tp_seg, _fp_seg, _tn_seg, _fn_seg, _confusion_seg = compute_occupancy_numbers_segmentation( | |
| y_pred=_segs, y_true=_target, fov_mask=_fov_mask, labels=label_maps["labels"]) | |
| _tp_rec_seg, _sum_rec_seg = compute_occupancy_recall_segmentation( | |
| y_pred=_segs, y_true=_target, fov_mask=_fov_mask, labels=label_maps["labels"]) | |
| if size == 51.2 and GENERATE_STATISTICS: | |
| ttp += [_tp] | |
| tfp += [_fp] | |
| ttn += [_fn] | |
| tfn += [_fn] | |
| results[size]["tp"] += _tp | |
| results[size]["fp"] += _fp | |
| results[size]["tn"] += _tn | |
| results[size]["fn"] += _fn | |
| results[size]["tp_seg"] += _tp_seg | |
| results[size]["fp_seg"] += _fp_seg | |
| results[size]["tn_seg"] += _tn_seg | |
| results[size]["fn_seg"] += _fn_seg | |
| results[size]["confusion_seg"] += _confusion_seg | |
| results[size]["tp_recall_seg"] += _tp_rec_seg | |
| results[size]["sum_recall_seg"] += _sum_rec_seg | |
| recall = results[size]["tp"] / (results[size]["tp"] + results[size]["fn"]) | |
| precision = results[size]["tp"] / (results[size]["tp"] + results[size]["fp"]) | |
| iou = results[size]["tp"] / (results[size]["tp"] + results[size]["fp"] + results[size]["fn"]) | |
| pbar.set_postfix_str(f"IoU: {iou*100:.2f} Prec: {precision*100:.2f} Rec: {recall*100:.2f}") | |
| result_str = "" | |
| for mode in ["direct", "hungarian"]: | |
| results_table = np.zeros((19, 3), dtype=np.float32) | |
| if mode == "hungarian": | |
| assignments = linear_sum_assignment(results[51.2]["confusion_seg"], maximize=True) # Hungarian matching on full range | |
| # Here we compute all the metrics | |
| for size_i, size in enumerate(SIZES): | |
| recall = results[size]["tp"] / (results[size]["tp"] + results[size]["fn"]) | |
| precision = results[size]["tp"] / (results[size]["tp"] + results[size]["fp"]) | |
| iou = results[size]["tp"] / (results[size]["tp"] + results[size]["fp"] + results[size]["fn"]) | |
| results_table[0, size_i] = iou | |
| results_table[1, size_i] = precision | |
| results_table[2, size_i] = recall | |
| # logging.info(f"#" * 50) | |
| # logging.info(f"Results for size {size}. ") | |
| # logging.info(f"#" * 50) | |
| # logging.info("Occupancy metrics") | |
| # logging.info(f"Recall: {recall*100:.2f}%") | |
| # logging.info(f"Precision: {precision*100:.2f}%") | |
| # logging.info(f"IoU: {iou*100:.2f}") | |
| # recall_seg = results[size]["tp_seg"] / (results[size]["tp_seg"] + results[size]["fn_seg"]) | |
| # precision_seg = results[size]["tp_seg"] / (results[size]["tp_seg"] + results[size]["fp_seg"]) | |
| # iou_seg = results[size]["tp_seg"] / (results[size]["tp_seg"] + results[size]["fp_seg"] + results[size]["fn_seg"]) | |
| # mean_iou = np.mean(np.nan_to_num(iou_seg)) | |
| # Calculate hungarian matching | |
| confusion_matrix = results[size]["confusion_seg"] | |
| if mode == "hungarian": | |
| confusion_matrix = confusion_matrix[np.argsort(assignments[1]), :] | |
| confusion_matrix_tp = np.diag(confusion_matrix) | |
| confusion_matrix_denom = confusion_matrix.sum(0) + confusion_matrix.sum(1) - confusion_matrix_tp | |
| confusion_matrix_per_class_iou = confusion_matrix_tp[1:] / confusion_matrix_denom[1:] | |
| confusion_matrix_miou = np.mean(np.nan_to_num(confusion_matrix_per_class_iou)) | |
| # occupancy_recall_seg = results[size]["tp_recall_seg"] / results[size]["sum_recall_seg"] | |
| weights = label_maps["weights"] | |
| weights_val = np.array(list(weights.values())) | |
| weighted_mean_iou = np.sum(weights_val * np.nan_to_num(confusion_matrix_per_class_iou)) / np.sum(weights_val) | |
| results_table[3, size_i] = confusion_matrix_miou | |
| results_table[4:, size_i] = confusion_matrix_per_class_iou | |
| row_labels = [ | |
| "IoU", "Precision", "Recall", | |
| "mIoU", "car", "bicycle", "motorcycle", "truck", "other-vehicle", "person", | |
| "road", "sidewalk", "building", "fence", "vegetation", "terrain", "pole", | |
| "traffic-sign", "other-object" | |
| ] | |
| column_headers = ["12.8m", "25.6m", "51.2m"] | |
| result_str += f"\n# Benchmark Results for '{ply_checkname}' / Mode: {mode}\n" | |
| result_str += "\n| | " + " | ".join(column_headers) + " |\n" | |
| result_str += "|---------------|-------|-------|-------|\n" | |
| for i in range(len(row_labels)): | |
| row_values = results_table[i] | |
| row_str = f"| {row_labels[i]:<13} | " + " | ".join(f"{v * 100:5.2f}" for v in row_values) + " |\n" | |
| result_str += row_str | |
| if i == 2: | |
| result_str += "|---------------|-------|-------|-------|\n" | |
| result_str += "\n" | |
| if mode == "hungarian": | |
| result_str += f"Reassignment: {np.argsort(assignments[1])}\n" | |
| result_str += f"Mean IoU: {confusion_matrix_miou * 100:.2f}\n" | |
| result_str += f"Weighted Mean IoU: {weighted_mean_iou * 100:.2f}\n\n" | |
| print(result_str) | |
| if not GENERATE_PLY_FILES: | |
| with open(OUTPUT_PATH / ply_checkname / "results.md", "w") as file: | |
| file.write(result_str) | |
| if TEST_ALPHA_CUTOFFS: | |
| cutoff_metrics = \ | |
| {i: {sv: {"precision": np.nan_to_num(100*cutoff_results[i][sv]["tp"] / (cutoff_results[i][sv]["tp"] + cutoff_results[i][sv]["fp"])), | |
| "recall": np.nan_to_num(100*cutoff_results[i][sv]["tp"] / (cutoff_results[i][sv]["tp"] + cutoff_results[i][sv]["fn"])), | |
| "IoU": np.nan_to_num(100*cutoff_results[i][sv]["tp"] / (cutoff_results[i][sv]["tp"] + cutoff_results[i][sv]["fn"] + cutoff_results[i][sv]["fp"]))} | |
| for sv in SEARCH_VALUES} for i in range(1, 16)} | |
| best_values = {i: SEARCH_VALUES[torch.argmax(torch.tensor([cutoff_metrics[i][sv]["IoU"] for sv in SEARCH_VALUES]))] for i in range(1, 16)} | |
| print(best_values) | |
| if CREATE_SIGMA_TRADEOFF_PLOT: | |
| plt.figure(figsize=(10, 8)) | |
| plt.xlabel("Precision") | |
| plt.ylabel("Recall") | |
| plt.xlim([10, 70]) | |
| # plt.ylim([0, 100]) | |
| for i, val in enumerate(SIGMA_VALUES): | |
| tp, fp, tn, fn = trade_off_values[i] | |
| pres = 100*tp / (tp + fp) | |
| recall = 100*tp/ (tp + fn) | |
| plt.scatter(pres, recall) | |
| plt.annotate(f"Sigma: {val}; IoU: {100*tp / (tp + fp + fn):.2f}", (pres, recall)) | |
| identifier = os.path.basename(cp_path) | |
| if FULL_EVAL: | |
| path = f"figures/inv{str(USE_ADDITIONAL_INVALIDS)}_{VOXEL_SIZE:.1f}_mp{str(USE_GROW)}_{identifier}.png" | |
| else: | |
| path = f"figures/inv{str(USE_ADDITIONAL_INVALIDS)}_{DATASET_LENGTH}_{VOXEL_SIZE:.1f}_mp{str(USE_GROW)}_{identifier}.png" | |
| if os.path.isfile(path): | |
| os.remove(path) | |
| plt.savefig(path) | |
| plt.show() | |
| if GENERATE_STATISTICS: | |
| statistics_raw = {"frameId": tframeIds, "TP": ttp, "FP": tfp, "TN": ttn, "FN": tfn, "invalids": tinval} | |
| with open("stats.pkl", "wb") as f: | |
| pickle.dump(statistics_raw, f) | |
| logging.info("Saved the statistics for further analysis.") | |
| def downsample_and_predict(data, net, pts, factor, prediction_mode, vis=False, feat_vis=False): | |
| pts = pts.reshape(256*factor, 256*factor, 32*factor, 3) | |
| if vis: | |
| sigmas = torch.zeros(256*factor, 256*factor, 32*factor).numpy() | |
| segs = torch.zeros(256*factor, 256*factor, 32*factor).numpy() | |
| if feat_vis: | |
| dino = torch.zeros(256*factor, 256*factor, 32*factor, 768).numpy() | |
| else: | |
| dino = None | |
| else: | |
| sigmas = torch.zeros(256, 256, 32).numpy() | |
| segs = torch.zeros(256, 256, 32).numpy() | |
| dino = None | |
| chunk_size_x = chunk_size_y = 128 | |
| chunk_size_z = 32 | |
| n_chunks_x = int(256*factor / chunk_size_x) | |
| n_chunks_y = int(256*factor / chunk_size_y) | |
| n_chunks_z = int(32*factor / chunk_size_z) | |
| if vis: | |
| factor = 1 | |
| b_x = chunk_size_x // factor # size of the mini blocks | |
| b_y = chunk_size_y // factor | |
| b_z = chunk_size_z // factor | |
| # Changed for efficiency | |
| images = torch.stack(data["imgs"], dim=0).unsqueeze(0).to(device).float() | |
| poses = torch.tensor(np.stack(data["poses"], 0)).unsqueeze(0).to(device).float() | |
| projs = torch.tensor(np.stack(data["projs"], 0)).unsqueeze(0).to(device).float() | |
| poses = torch.inverse(poses[:, :1]) @ poses | |
| extra_args = {"images_alt": images * 0.5 + 0.5} | |
| net.compute_grid_transforms(projs, poses) | |
| torch.cuda.synchronize() | |
| encoding_start_time = time.time() | |
| net.encode(images, projs, poses, ids_encoder=[0], ids_render=[0], **extra_args) | |
| torch.cuda.synchronize() | |
| encoding_time = time.time() - encoding_start_time | |
| #print(f" - Encoding time: {encoding_time:.6f} seconds") | |
| net.set_scale(0) | |
| for i in range(n_chunks_x): | |
| for j in range(n_chunks_y): | |
| for k in range(n_chunks_z): | |
| pts_block = pts[i * chunk_size_x:(i + 1) * chunk_size_x, j * chunk_size_y:(j + 1) * chunk_size_y, k * chunk_size_z:(k + 1) * chunk_size_z] | |
| #with torch.autograd.profiler.profile([torch.profiler.ProfilerActivity.CPU, torch.profiler.ProfilerActivity.CUDA], use_cuda=True) as prof: | |
| sigmas_block, segs_block, dino_feat_block = predict_grid(data, net, pts_block, prediction_mode) | |
| #print(prof.key_averages().table(sort_by="cpu_time_total", row_limit=-1)) | |
| #raise ValueError("Profiling done.") | |
| sigmas_block = sigmas_block.reshape(chunk_size_x, chunk_size_y, chunk_size_z) | |
| segs_block = segs_block.reshape(chunk_size_x, chunk_size_y, chunk_size_z, 19) | |
| if feat_vis: | |
| dino_feat_block = dino_feat_block.reshape(chunk_size_x, chunk_size_y, chunk_size_z, dino_feat_block.size(-1)) | |
| if USE_ALPHA_WEIGHTING: | |
| alphas = 1 - torch.exp(- VOXEL_SIZE * sigmas_block) | |
| segs_block = (alphas.unsqueeze(-1) * segs_block).unsqueeze(0) | |
| else: | |
| segs_block = (sigmas_block.unsqueeze(-1) * segs_block).unsqueeze(0) | |
| if vis: | |
| sigmas_block = sigmas_block.detach().cpu().numpy() | |
| segs_pool = torch.argmax(segs_block, dim=-1).detach().cpu().numpy() | |
| if feat_vis: | |
| dino_feat_block = dino_feat_block.detach().cpu().numpy() | |
| else: | |
| segs_pool_list = [F.avg_pool3d(segs_block[..., i], kernel_size=factor, stride=factor, padding=0) for i in | |
| range(segs_block.shape[-1])] | |
| segs_pool = torch.stack(segs_pool_list, dim=-1).unsqueeze(0) | |
| segs_pool = torch.argmax(segs_pool, dim=-1).detach().cpu().numpy() | |
| # pool the observations | |
| sigmas_block = F.max_pool3d(sigmas_block.unsqueeze(0), kernel_size=factor, stride=factor, padding=0).squeeze(0).detach().cpu().numpy() | |
| sigmas[i * b_x:(i + 1) * b_x, j * b_y: (j + 1) * b_y, b_z * k:b_z * (k + 1)] = sigmas_block | |
| segs[i * b_x:(i + 1) * b_x, j * b_y: (j + 1) * b_y, b_z * k:b_z * (k + 1)] = segs_pool | |
| if feat_vis: | |
| dino[i * b_x:(i + 1) * b_x, j * b_y: (j + 1) * b_y, b_z * k:b_z * (k + 1), :] = dino_feat_block | |
| torch.cuda.empty_cache() | |
| if USE_GROW: | |
| sigmas = F.max_pool3d(torch.tensor(sigmas).unsqueeze(0), kernel_size=3, stride=1, padding=1).squeeze(0).numpy() | |
| return sigmas, segs, dino | |
| def calculate_pca(dino, is_occupied_seg, net): | |
| dino = torch.Tensor(dino) | |
| visible_dino = dino[is_occupied_seg] | |
| # print(net.encoder.visualization.batch_rgb_mean, net.encoder.visualization.batch_rgb_comp) | |
| net.encoder.fit_visualization(visible_dino.flatten(0, -2), refit=True) | |
| return torch.clamp(net.encoder.transform_visualization(dino), min=-0.5, max=0.5).cpu().numpy() + 0.5 | |
| def use_custom_maxpool(_sigmas): | |
| sigmas = torch.zeros(258, 258, 34) | |
| sigmas[1:257, 1:257, 1:33] = torch.tensor(_sigmas) | |
| sigmas_pooled = torch.zeros(256, 256, 32) | |
| for i in range(256): | |
| for j in range(256): | |
| for k in range(32): | |
| sigmas_pooled[i, j, k] = max(sigmas[i+1, j+1, k+1], | |
| sigmas[i, j+1, k+1], sigmas[i+1, j, k+1],sigmas[i+1, j+1, k], | |
| sigmas[i+2, j+1, k+1], sigmas[i+1, j+2, k+1],sigmas[i+1, j+1, k+2]) | |
| return sigmas_pooled | |
| def plot_images(images_dict): | |
| """The images dict should include six images and six corresponding ids""" | |
| images = images_dict["images"] | |
| ids = images_dict["ids"] | |
| fig, axes = plt.subplots(3, 2, figsize=(10, 6)) | |
| axes = axes.flatten() | |
| for i, img in enumerate(images): | |
| axes[i].imshow(images[i]) | |
| axes[i].axis("off") | |
| axes[i].set_title(f"FrameId: {ids[i]}") | |
| plt.subplots_adjust(wspace=0.01, hspace=0.01) | |
| plt.show() | |
| def plot_image_at_frame_id(dataset, frame_id): | |
| for i in range(len(dataset)): | |
| sequence, id, is_right = dataset._datapoints[i] | |
| if id == frame_id: | |
| data = dataset[i] | |
| plt.figure(figsize=(10, 4)) | |
| plt.imshow(((data["imgs"][0] + 1) / 2).permute(1, 2, 0)) | |
| plt.gca().set_axis_off() | |
| plt.show() | |
| return | |
| def identify_additional_invalids(target): | |
| # Note: The Numpy implementation is a bit faster (about 0.1 seconds per iteration) | |
| _t = np.concatenate([np.zeros([256, 256, 1]), target], axis=2) | |
| invalids = np.cumsum(np.logical_and(_t != 255, _t != 0), axis=2)[:, :, :32] == 0 | |
| # _t = torch.cat([torch.zeros([256, 256, 1], device=device, dtype=torch.int32), torch.tensor(target, dtype=torch.int32).to(device)], dim=2) | |
| # invalids = torch.cumsum((_t != 255) & (_t != 0), axis=2)[:,:, :32] == 0 | |
| # height cut-off (z > 6 ==> no invalid) | |
| invalids[: , :, 7:] = 0 | |
| # only empty voxels matter | |
| invalids[target != 0] = 0 | |
| # return invalids.cpu().numpy() | |
| return invalids | |
| def predict_grid(data_batch, net, points, prediction_mode): | |
| # Removed for efficiency | |
| # images = torch.stack(data_batch["imgs"], dim=0).unsqueeze(0).to(device).float() | |
| # poses = torch.tensor(np.stack(data_batch["poses"], 0)).unsqueeze(0).to(device).float() | |
| # projs = torch.tensor(np.stack(data_batch["projs"], 0)).unsqueeze(0).to(device).float() | |
| # poses = torch.inverse(poses[:, :1]) @ poses | |
| # extra_args = {"images_alt": images * 0.5 + 0.5} | |
| # net.compute_grid_transforms(projs, poses) | |
| # net.encode(images, projs, poses, ids_encoder=[0], ids_render=[0], **extra_args) | |
| # net.set_scale(0) | |
| # q_pts = get_pts(X_RANGE, Y_RANGE, Z_RANGE, p_res[1], p_res_y, p_res[0]) | |
| # q_pts = q_pts.to(device).reshape(1, -1, 3) | |
| # # _, invalid, sigmas = net.forward(q_pts) | |
| # | |
| points = points.reshape(1, -1, 3) | |
| if prediction_mode is not None: | |
| dino_feat, invalid, sigmas, segs = net.forward(points, predict_segmentation=True, prediction_mode=prediction_mode) | |
| else: | |
| dino_feat, invalid, sigmas, segs = net.forward(points, predict_segmentation=True) | |
| return sigmas, segs, dino_feat | |
| def convert_voxels(arr, map_dict): | |
| f = np.vectorize(map_dict.__getitem__) | |
| return f(arr) | |
| def compute_occupancy_numbers_segmentation(y_pred, y_true, fov_mask, labels): | |
| label_ids = list(labels.keys())[1:] | |
| mask = y_true != 255 | |
| mask = np.logical_and(mask, fov_mask) | |
| mask = mask.flatten() | |
| y_pred = y_pred.flatten()[mask] | |
| y_true = y_true.flatten()[mask] | |
| tp = np.zeros(len(label_ids)) | |
| fp = np.zeros(len(label_ids)) | |
| fn = np.zeros(len(label_ids)) | |
| tn = np.zeros(len(label_ids)) | |
| for label_id in label_ids: | |
| tp[label_id - 1] = np.sum(np.logical_and(y_true == label_id, y_pred == label_id)) | |
| fp[label_id - 1] = np.sum(np.logical_and(y_true != label_id, y_pred == label_id)) | |
| fn[label_id - 1] = np.sum(np.logical_and(y_true == label_id, y_pred != label_id)) | |
| tn[label_id - 1] = np.sum(np.logical_and(y_true != label_id, y_pred != label_id)) | |
| dim_conf = len(label_ids) + 1 | |
| bincount_values = dim_conf * y_true + y_pred | |
| confusion_matrix = np.bincount(bincount_values, minlength=dim_conf*dim_conf).reshape(dim_conf, dim_conf) | |
| return tp, fp, tn, fn, confusion_matrix | |
| def compute_occupancy_recall_segmentation(y_pred, y_true, fov_mask, labels): | |
| label_ids = list(labels.keys())[1:] | |
| mask = y_true != 255 | |
| mask = np.logical_and(mask, fov_mask) | |
| mask = mask.flatten() | |
| y_pred = y_pred.flatten()[mask] | |
| y_true = y_true.flatten()[mask] | |
| tp = np.zeros(len(label_ids)) | |
| sum = np.zeros(len(label_ids)) | |
| for label_id in label_ids: | |
| tp[label_id - 1] = np.sum(np.logical_and(y_true == label_id, y_pred > 0)) | |
| sum[label_id - 1] = np.sum(y_true == label_id) | |
| return tp, sum | |
| def compute_occupancy_numbers(y_pred, y_true, fov_mask): | |
| mask = y_true != 255 | |
| mask = np.logical_and(mask, fov_mask) | |
| mask = mask.flatten() | |
| y_pred = y_pred.flatten() | |
| y_true = y_true.flatten() | |
| occ_true = y_true[mask] > 0 | |
| occ_pred = y_pred[mask] > 0 | |
| tp = np.sum(np.logical_and(occ_true == 1, occ_pred == 1)) | |
| fp = np.sum(np.logical_and(occ_true == 0, occ_pred == 1)) | |
| fn = np.sum(np.logical_and(occ_true == 1, occ_pred == 0)) | |
| tn = np.sum(np.logical_and(occ_true == 0, occ_pred == 0)) | |
| return tp, fp, tn, fn | |
| if __name__ == "__main__": | |
| #with torch.cuda.amp.autocast(dtype=torch.float16): | |
| with torch.no_grad(): | |
| main() | |