Spaces:
Runtime error
Runtime error
| # -------------------------------------------------------- | |
| # X-Decoder -- Generalized Decoding for Pixel, Image, and Language | |
| # Copyright (c) 2022 Microsoft | |
| # Licensed under The MIT License [see LICENSE for details] | |
| # Modified by Xueyan Zou (xueyan@cs.wisc.edu) | |
| # -------------------------------------------------------- | |
| # Copyright (c) Facebook, Inc. and its affiliates. | |
| import copy | |
| import scipy.io | |
| import numpy as np | |
| import torch | |
| from PIL import Image | |
| from torchvision import transforms | |
| from detectron2.utils import configurable | |
| __all__ = ["BDDSemDatasetMapper"] | |
| # This is specifically designed for the COCO dataset. | |
| class BDDSemDatasetMapper: | |
| """ | |
| A callable which takes a dataset dict in Detectron2 Dataset format, | |
| and map it into a format used by MaskFormer. | |
| This dataset mapper applies the same transformation as DETR for COCO panoptic segmentation. | |
| The callable currently does the following: | |
| 1. Read the image from "file_name" | |
| 2. Applies geometric transforms to the image and annotation | |
| 3. Find and applies suitable cropping to the image and annotation | |
| 4. Prepare image and annotation to Tensors | |
| """ | |
| def __init__( | |
| self, | |
| is_train=True, | |
| min_size_test=None, | |
| max_size_test=None, | |
| mean=None, | |
| std=None, | |
| ): | |
| """ | |
| NOTE: this interface is experimental. | |
| Args: | |
| is_train: for training or inference | |
| augmentations: a list of augmentations or deterministic transforms to apply | |
| tfm_gens: data augmentation | |
| image_format: an image format supported by :func:`detection_utils.read_image`. | |
| """ | |
| self.is_train = is_train | |
| self.min_size_test = min_size_test | |
| self.max_size_test = max_size_test | |
| self.pixel_mean = torch.tensor(mean)[:,None,None] | |
| self.pixel_std = torch.tensor(std)[:,None,None] | |
| t = [] | |
| t.append(transforms.Resize(self.min_size_test, interpolation=Image.BICUBIC)) | |
| self.transform = transforms.Compose(t) | |
| def from_config(cls, cfg, is_train=True): | |
| ret = { | |
| "is_train": is_train, | |
| "min_size_test": cfg['INPUT']['MIN_SIZE_TEST'], | |
| "max_size_test": cfg['INPUT']['MAX_SIZE_TEST'], | |
| "mean": cfg['INPUT']['PIXEL_MEAN'], | |
| "std": cfg['INPUT']['PIXEL_STD'], | |
| } | |
| return ret | |
| def read_semseg(self, file_name): | |
| if '.png' in file_name: | |
| semseg = np.asarray(Image.open(file_name)) | |
| elif '.mat' in file_name: | |
| semseg = scipy.io.loadmat(file_name)['LabelMap'] | |
| return semseg | |
| def __call__(self, dataset_dict): | |
| """ | |
| Args: | |
| dataset_dict (dict): Metadata of one image, in Detectron2 Dataset format. | |
| Returns: | |
| dict: a format that builtin models in detectron2 accept | |
| """ | |
| dataset_dict = copy.deepcopy(dataset_dict) # it will be modified by code below | |
| file_name = dataset_dict['file_name'] | |
| semseg_name = dataset_dict['sem_seg_file_name'] | |
| image = Image.open(file_name).convert('RGB') | |
| dataset_dict['width'] = image.size[0] | |
| dataset_dict['height'] = image.size[1] | |
| if self.is_train == False: | |
| image = self.transform(image) | |
| image = torch.from_numpy(np.asarray(image).copy()) | |
| image = image.permute(2,0,1) | |
| semseg = self.read_semseg(semseg_name) | |
| semseg = torch.from_numpy(semseg.astype(np.int32)) | |
| dataset_dict['image'] = image | |
| dataset_dict['semseg'] = semseg | |
| return dataset_dict |