| |
|
|
| |
|
|
| |
| |
| |
| |
| |
| |
|
|
| |
| |
|
|
| |
| |
| |
| |
| |
| |
| |
|
|
| |
|
|
| """Miscellaneous utility functions.""" |
|
|
| from scipy import ndimage |
|
|
| import base64 |
| import math |
| import re |
| from io import BytesIO |
|
|
| import matplotlib |
| import matplotlib.cm |
| import numpy as np |
| import requests |
| import torch |
| import torch.distributed as dist |
| import torch.nn |
| import torch.nn as nn |
| import torch.utils.data.distributed |
| from PIL import Image |
| from torchvision.transforms import ToTensor |
|
|
|
|
| class RunningAverage: |
| def __init__(self): |
| self.avg = 0 |
| self.count = 0 |
|
|
| def append(self, value): |
| self.avg = (value + self.count * self.avg) / (self.count + 1) |
| self.count += 1 |
|
|
| def get_value(self): |
| return self.avg |
|
|
|
|
| def denormalize(x): |
| """Reverses the imagenet normalization applied to the input. |
| |
| Args: |
| x (torch.Tensor - shape(N,3,H,W)): input tensor |
| |
| Returns: |
| torch.Tensor - shape(N,3,H,W): Denormalized input |
| """ |
| mean = torch.Tensor([0.485, 0.456, 0.406]).view(1, 3, 1, 1).to(x.device) |
| std = torch.Tensor([0.229, 0.224, 0.225]).view(1, 3, 1, 1).to(x.device) |
| return x * std + mean |
|
|
|
|
| class RunningAverageDict: |
| """A dictionary of running averages.""" |
| def __init__(self): |
| self._dict = None |
|
|
| def update(self, new_dict): |
| if new_dict is None: |
| return |
|
|
| if self._dict is None: |
| self._dict = dict() |
| for key, value in new_dict.items(): |
| self._dict[key] = RunningAverage() |
|
|
| for key, value in new_dict.items(): |
| self._dict[key].append(value) |
|
|
| def get_value(self): |
| if self._dict is None: |
| return None |
| return {key: value.get_value() for key, value in self._dict.items()} |
|
|
|
|
| def colorize(value, vmin=None, vmax=None, cmap='gray_r', invalid_val=-99, invalid_mask=None, background_color=(128, 128, 128, 255), gamma_corrected=False, value_transform=None): |
| """Converts a depth map to a color image. |
| |
| Args: |
| value (torch.Tensor, numpy.ndarry): Input depth map. Shape: (H, W) or (1, H, W) or (1, 1, H, W). All singular dimensions are squeezed |
| vmin (float, optional): vmin-valued entries are mapped to start color of cmap. If None, value.min() is used. Defaults to None. |
| vmax (float, optional): vmax-valued entries are mapped to end color of cmap. If None, value.max() is used. Defaults to None. |
| cmap (str, optional): matplotlib colormap to use. Defaults to 'magma_r'. |
| invalid_val (int, optional): Specifies value of invalid pixels that should be colored as 'background_color'. Defaults to -99. |
| invalid_mask (numpy.ndarray, optional): Boolean mask for invalid regions. Defaults to None. |
| background_color (tuple[int], optional): 4-tuple RGB color to give to invalid pixels. Defaults to (128, 128, 128, 255). |
| gamma_corrected (bool, optional): Apply gamma correction to colored image. Defaults to False. |
| value_transform (Callable, optional): Apply transform function to valid pixels before coloring. Defaults to None. |
| |
| Returns: |
| numpy.ndarray, dtype - uint8: Colored depth map. Shape: (H, W, 4) |
| """ |
| if isinstance(value, torch.Tensor): |
| value = value.detach().cpu().numpy() |
|
|
| value = value.squeeze() |
| if invalid_mask is None: |
| invalid_mask = value == invalid_val |
| mask = np.logical_not(invalid_mask) |
|
|
| |
| vmin = np.percentile(value[mask],2) if vmin is None else vmin |
| vmax = np.percentile(value[mask],85) if vmax is None else vmax |
| if vmin != vmax: |
| value = (value - vmin) / (vmax - vmin) |
| else: |
| |
| value = value * 0. |
|
|
| |
| |
|
|
| value[invalid_mask] = np.nan |
| cmapper = matplotlib.cm.get_cmap(cmap) |
| if value_transform: |
| value = value_transform(value) |
| |
| value = cmapper(value, bytes=True) |
|
|
| |
| img = value[...] |
| img[invalid_mask] = background_color |
|
|
| |
| if gamma_corrected: |
| |
| img = img / 255 |
| img = np.power(img, 2.2) |
| img = img * 255 |
| img = img.astype(np.uint8) |
| return img |
|
|
|
|
| def count_parameters(model, include_all=False): |
| return sum(p.numel() for p in model.parameters() if p.requires_grad or include_all) |
|
|
|
|
| def compute_errors(gt, pred): |
| """Compute metrics for 'pred' compared to 'gt' |
| |
| Args: |
| gt (numpy.ndarray): Ground truth values |
| pred (numpy.ndarray): Predicted values |
| |
| gt.shape should be equal to pred.shape |
| |
| Returns: |
| dict: Dictionary containing the following metrics: |
| 'a1': Delta1 accuracy: Fraction of pixels that are within a scale factor of 1.25 |
| 'a2': Delta2 accuracy: Fraction of pixels that are within a scale factor of 1.25^2 |
| 'a3': Delta3 accuracy: Fraction of pixels that are within a scale factor of 1.25^3 |
| 'abs_rel': Absolute relative error |
| 'rmse': Root mean squared error |
| 'log_10': Absolute log10 error |
| 'sq_rel': Squared relative error |
| 'rmse_log': Root mean squared error on the log scale |
| 'silog': Scale invariant log error |
| """ |
| thresh = np.maximum((gt / pred), (pred / gt)) |
| a1 = (thresh < 1.25).mean() |
| a2 = (thresh < 1.25 ** 2).mean() |
| a3 = (thresh < 1.25 ** 3).mean() |
|
|
| abs_rel = np.mean(np.abs(gt - pred) / gt) |
| sq_rel = np.mean(((gt - pred) ** 2) / gt) |
|
|
| rmse = (gt - pred) ** 2 |
| rmse = np.sqrt(rmse.mean()) |
|
|
| rmse_log = (np.log(gt) - np.log(pred)) ** 2 |
| rmse_log = np.sqrt(rmse_log.mean()) |
|
|
| err = np.log(pred) - np.log(gt) |
| silog = np.sqrt(np.mean(err ** 2) - np.mean(err) ** 2) * 100 |
|
|
| log_10 = (np.abs(np.log10(gt) - np.log10(pred))).mean() |
| return dict(a1=a1, a2=a2, a3=a3, abs_rel=abs_rel, rmse=rmse, log_10=log_10, rmse_log=rmse_log, |
| silog=silog, sq_rel=sq_rel) |
|
|
|
|
| def compute_metrics(gt, pred, interpolate=True, garg_crop=False, eigen_crop=True, dataset='nyu', min_depth_eval=0.1, max_depth_eval=10, **kwargs): |
| """Compute metrics of predicted depth maps. Applies cropping and masking as necessary or specified via arguments. Refer to compute_errors for more details on metrics. |
| """ |
| if 'config' in kwargs: |
| config = kwargs['config'] |
| garg_crop = config.garg_crop |
| eigen_crop = config.eigen_crop |
| min_depth_eval = config.min_depth_eval |
| max_depth_eval = config.max_depth_eval |
|
|
| if gt.shape[-2:] != pred.shape[-2:] and interpolate: |
| pred = nn.functional.interpolate( |
| pred, gt.shape[-2:], mode='bilinear', align_corners=True) |
|
|
| pred = pred.squeeze().cpu().numpy() |
| pred[pred < min_depth_eval] = min_depth_eval |
| pred[pred > max_depth_eval] = max_depth_eval |
| pred[np.isinf(pred)] = max_depth_eval |
| pred[np.isnan(pred)] = min_depth_eval |
|
|
| gt_depth = gt.squeeze().cpu().numpy() |
| valid_mask = np.logical_and( |
| gt_depth > min_depth_eval, gt_depth < max_depth_eval) |
|
|
| if garg_crop or eigen_crop: |
| gt_height, gt_width = gt_depth.shape |
| eval_mask = np.zeros(valid_mask.shape) |
|
|
| if garg_crop: |
| eval_mask[int(0.40810811 * gt_height):int(0.99189189 * gt_height), |
| int(0.03594771 * gt_width):int(0.96405229 * gt_width)] = 1 |
|
|
| elif eigen_crop: |
| |
| if dataset == 'kitti': |
| eval_mask[int(0.3324324 * gt_height):int(0.91351351 * gt_height), |
| int(0.0359477 * gt_width):int(0.96405229 * gt_width)] = 1 |
| else: |
| |
| eval_mask[45:471, 41:601] = 1 |
| else: |
| eval_mask = np.ones(valid_mask.shape) |
| valid_mask = np.logical_and(valid_mask, eval_mask) |
| return compute_errors(gt_depth[valid_mask], pred[valid_mask]) |
|
|
|
|
| |
|
|
|
|
| def parallelize(config, model, find_unused_parameters=True): |
|
|
| if config.gpu is not None: |
| torch.cuda.set_device(config.gpu) |
| model = model.cuda(config.gpu) |
|
|
| config.multigpu = False |
| if config.distributed: |
| |
| config.multigpu = True |
| config.rank = config.rank * config.ngpus_per_node + config.gpu |
| dist.init_process_group(backend=config.dist_backend, init_method=config.dist_url, |
| world_size=config.world_size, rank=config.rank) |
| config.batch_size = int(config.batch_size / config.ngpus_per_node) |
| |
| config.workers = int( |
| (config.num_workers + config.ngpus_per_node - 1) / config.ngpus_per_node) |
| print("Device", config.gpu, "Rank", config.rank, "batch size", |
| config.batch_size, "Workers", config.workers) |
| torch.cuda.set_device(config.gpu) |
| model = nn.SyncBatchNorm.convert_sync_batchnorm(model) |
| model = model.cuda(config.gpu) |
| model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[config.gpu], output_device=config.gpu, |
| find_unused_parameters=find_unused_parameters) |
|
|
| elif config.gpu is None: |
| |
| config.multigpu = True |
| model = model.cuda() |
| model = torch.nn.DataParallel(model) |
|
|
| return model |
|
|
|
|
| |
|
|
|
|
| |
|
|
|
|
| class colors: |
| '''Colors class: |
| Reset all colors with colors.reset |
| Two subclasses fg for foreground and bg for background. |
| Use as colors.subclass.colorname. |
| i.e. colors.fg.red or colors.bg.green |
| Also, the generic bold, disable, underline, reverse, strikethrough, |
| and invisible work with the main class |
| i.e. colors.bold |
| ''' |
| reset = '\033[0m' |
| bold = '\033[01m' |
| disable = '\033[02m' |
| underline = '\033[04m' |
| reverse = '\033[07m' |
| strikethrough = '\033[09m' |
| invisible = '\033[08m' |
|
|
| class fg: |
| black = '\033[30m' |
| red = '\033[31m' |
| green = '\033[32m' |
| orange = '\033[33m' |
| blue = '\033[34m' |
| purple = '\033[35m' |
| cyan = '\033[36m' |
| lightgrey = '\033[37m' |
| darkgrey = '\033[90m' |
| lightred = '\033[91m' |
| lightgreen = '\033[92m' |
| yellow = '\033[93m' |
| lightblue = '\033[94m' |
| pink = '\033[95m' |
| lightcyan = '\033[96m' |
|
|
| class bg: |
| black = '\033[40m' |
| red = '\033[41m' |
| green = '\033[42m' |
| orange = '\033[43m' |
| blue = '\033[44m' |
| purple = '\033[45m' |
| cyan = '\033[46m' |
| lightgrey = '\033[47m' |
|
|
|
|
| def printc(text, color): |
| print(f"{color}{text}{colors.reset}") |
|
|
| |
|
|
| def get_image_from_url(url): |
| response = requests.get(url) |
| img = Image.open(BytesIO(response.content)).convert("RGB") |
| return img |
|
|
| def url_to_torch(url, size=(384, 384)): |
| img = get_image_from_url(url) |
| img = img.resize(size, Image.ANTIALIAS) |
| img = torch.from_numpy(np.asarray(img)).float() |
| img = img.permute(2, 0, 1) |
| img.div_(255) |
| return img |
|
|
| def pil_to_batched_tensor(img): |
| return ToTensor()(img).unsqueeze(0) |
|
|
| def save_raw_16bit(depth, fpath="raw.png"): |
| if isinstance(depth, torch.Tensor): |
| depth = depth.squeeze().cpu().numpy() |
| |
| assert isinstance(depth, np.ndarray), "Depth must be a torch tensor or numpy array" |
| assert depth.ndim == 2, "Depth must be 2D" |
| depth = depth * 256 |
| depth = depth.astype(np.uint16) |
| depth = Image.fromarray(depth) |
| depth.save(fpath) |
| print("Saved raw depth to", fpath) |