tuandunghcmut commited on
Commit
141c79f
·
verified ·
1 Parent(s): f15b370

Add files using upload-large-folder tool

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +7 -0
  2. Groma/mmdet/models/backbones/__pycache__/csp_darknet.cpython-39.pyc +0 -0
  3. Groma/mmdet/models/backbones/__pycache__/darknet.cpython-39.pyc +0 -0
  4. Groma/mmdet/models/backbones/__pycache__/detectors_resnet.cpython-39.pyc +0 -0
  5. Groma/mmdet/models/backbones/__pycache__/detectors_resnext.cpython-39.pyc +0 -0
  6. Groma/mmdet/models/backbones/__pycache__/hourglass.cpython-39.pyc +0 -0
  7. Groma/mmdet/models/backbones/__pycache__/hrnet.cpython-39.pyc +0 -0
  8. Groma/mmdet/models/backbones/__pycache__/mobilenet_v2.cpython-39.pyc +0 -0
  9. Groma/mmdet/models/backbones/__pycache__/regnet.cpython-39.pyc +0 -0
  10. Groma/mmdet/models/backbones/__pycache__/res2net.cpython-39.pyc +0 -0
  11. Groma/mmdet/models/backbones/__pycache__/resnest.cpython-39.pyc +0 -0
  12. Groma/mmdet/models/backbones/__pycache__/resnet.cpython-39.pyc +0 -0
  13. Groma/mmdet/models/backbones/__pycache__/resnext.cpython-39.pyc +0 -0
  14. Groma/mmdet/models/backbones/__pycache__/ssd_vgg.cpython-39.pyc +0 -0
  15. Groma/mmdet/models/backbones/__pycache__/swin.cpython-39.pyc +0 -0
  16. Groma/mmdet/models/backbones/__pycache__/trident_resnet.cpython-39.pyc +0 -0
  17. Groma/mmdet/models/losses/__pycache__/__init__.cpython-39.pyc +0 -0
  18. Groma/mmdet/models/losses/__pycache__/accuracy.cpython-39.pyc +0 -0
  19. Groma/mmdet/models/losses/__pycache__/ae_loss.cpython-39.pyc +0 -0
  20. Groma/mmdet/models/losses/__pycache__/balanced_l1_loss.cpython-39.pyc +0 -0
  21. Groma/mmdet/models/losses/__pycache__/cross_entropy_loss.cpython-39.pyc +0 -0
  22. Groma/mmdet/models/losses/__pycache__/dice_loss.cpython-39.pyc +0 -0
  23. Groma/mmdet/models/losses/__pycache__/focal_loss.cpython-39.pyc +0 -0
  24. Groma/mmdet/models/losses/__pycache__/gaussian_focal_loss.cpython-39.pyc +0 -0
  25. Groma/mmdet/models/losses/__pycache__/gfocal_loss.cpython-39.pyc +0 -0
  26. Groma/mmdet/models/losses/__pycache__/ghm_loss.cpython-39.pyc +0 -0
  27. Groma/mmdet/models/losses/__pycache__/iou_loss.cpython-39.pyc +0 -0
  28. Groma/mmdet/models/losses/__pycache__/kd_loss.cpython-39.pyc +0 -0
  29. Groma/mmdet/models/losses/__pycache__/mse_loss.cpython-39.pyc +0 -0
  30. Groma/mmdet/models/losses/__pycache__/pisa_loss.cpython-39.pyc +0 -0
  31. Groma/mmdet/models/losses/__pycache__/seesaw_loss.cpython-39.pyc +0 -0
  32. Groma/mmdet/models/losses/__pycache__/smooth_l1_loss.cpython-39.pyc +0 -0
  33. Groma/mmdet/models/losses/__pycache__/utils.cpython-39.pyc +0 -0
  34. Groma/mmdet/models/losses/__pycache__/varifocal_loss.cpython-39.pyc +0 -0
  35. Groma/mmdet/utils/__pycache__/__init__.cpython-39.pyc +0 -0
  36. Groma/mmdet/utils/__pycache__/collect_env.cpython-39.pyc +0 -0
  37. Groma/mmdet/utils/__pycache__/contextmanagers.cpython-39.pyc +0 -0
  38. Groma/mmdet/utils/__pycache__/logger.cpython-39.pyc +0 -0
  39. Groma/mmdet/utils/__pycache__/misc.cpython-39.pyc +0 -0
  40. Groma/mmdet/utils/__pycache__/setup_env.cpython-39.pyc +0 -0
  41. Groma/mmdet/utils/__pycache__/util_mixins.cpython-39.pyc +0 -0
  42. OpenSeeD/datasets/__init__.py +2 -0
  43. OpenSeeD/datasets/build.py +638 -0
  44. OpenSeeD/datasets/dataset_mappers/__init__.py +14 -0
  45. OpenSeeD/datasets/dataset_mappers/coco_instance_new_baseline_dataset_mapper.py +191 -0
  46. OpenSeeD/datasets/dataset_mappers/coco_panoptic_new_baseline_dataset_mapper.py +166 -0
  47. OpenSeeD/datasets/dataset_mappers/imagenet_dataset_mapper.py +95 -0
  48. OpenSeeD/datasets/dataset_mappers/lvis_dataset_mapper.py +170 -0
  49. OpenSeeD/datasets/dataset_mappers/mask_former_instance_dataset_mapper.py +184 -0
  50. OpenSeeD/datasets/dataset_mappers/mask_former_panoptic_dataset_mapper.py +168 -0
.gitattributes CHANGED
@@ -584,3 +584,10 @@ Groma/mmcv/docs/en/_static/flow_warp.png filter=lfs diff=lfs merge=lfs -text
584
  Groma/mmcv/docs/en/_static/flow_raw_images.png filter=lfs diff=lfs merge=lfs -text
585
  Groma/mmcv/docs/en/_static/zhihu_qrcode.jpg filter=lfs diff=lfs merge=lfs -text
586
  Groma/mmcv/docs/en/_static/community/3.png filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
584
  Groma/mmcv/docs/en/_static/flow_raw_images.png filter=lfs diff=lfs merge=lfs -text
585
  Groma/mmcv/docs/en/_static/zhihu_qrcode.jpg filter=lfs diff=lfs merge=lfs -text
586
  Groma/mmcv/docs/en/_static/community/3.png filter=lfs diff=lfs merge=lfs -text
587
+ OpenSeeD/figs/results1.jpg filter=lfs diff=lfs merge=lfs -text
588
+ OpenSeeD/figs/framework.jpg filter=lfs diff=lfs merge=lfs -text
589
+ OpenSeeD/figs/cover.jpg filter=lfs diff=lfs merge=lfs -text
590
+ OpenSeeD/figs/results2.jpg filter=lfs diff=lfs merge=lfs -text
591
+ OpenSeeD/figs/intro.jpg filter=lfs diff=lfs merge=lfs -text
592
+ OpenSeeD/images/animals.png filter=lfs diff=lfs merge=lfs -text
593
+ OpenSeeD/images/street.jpg filter=lfs diff=lfs merge=lfs -text
Groma/mmdet/models/backbones/__pycache__/csp_darknet.cpython-39.pyc ADDED
Binary file (9.09 kB). View file
 
Groma/mmdet/models/backbones/__pycache__/darknet.cpython-39.pyc ADDED
Binary file (7.27 kB). View file
 
Groma/mmdet/models/backbones/__pycache__/detectors_resnet.cpython-39.pyc ADDED
Binary file (9.53 kB). View file
 
Groma/mmdet/models/backbones/__pycache__/detectors_resnext.cpython-39.pyc ADDED
Binary file (2.94 kB). View file
 
Groma/mmdet/models/backbones/__pycache__/hourglass.cpython-39.pyc ADDED
Binary file (6.32 kB). View file
 
Groma/mmdet/models/backbones/__pycache__/hrnet.cpython-39.pyc ADDED
Binary file (13.5 kB). View file
 
Groma/mmdet/models/backbones/__pycache__/mobilenet_v2.cpython-39.pyc ADDED
Binary file (5.84 kB). View file
 
Groma/mmdet/models/backbones/__pycache__/regnet.cpython-39.pyc ADDED
Binary file (11.1 kB). View file
 
Groma/mmdet/models/backbones/__pycache__/res2net.cpython-39.pyc ADDED
Binary file (8.79 kB). View file
 
Groma/mmdet/models/backbones/__pycache__/resnest.cpython-39.pyc ADDED
Binary file (8.9 kB). View file
 
Groma/mmdet/models/backbones/__pycache__/resnet.cpython-39.pyc ADDED
Binary file (17.4 kB). View file
 
Groma/mmdet/models/backbones/__pycache__/resnext.cpython-39.pyc ADDED
Binary file (4.72 kB). View file
 
Groma/mmdet/models/backbones/__pycache__/ssd_vgg.cpython-39.pyc ADDED
Binary file (4.36 kB). View file
 
Groma/mmdet/models/backbones/__pycache__/swin.cpython-39.pyc ADDED
Binary file (22.4 kB). View file
 
Groma/mmdet/models/backbones/__pycache__/trident_resnet.cpython-39.pyc ADDED
Binary file (9.44 kB). View file
 
Groma/mmdet/models/losses/__pycache__/__init__.cpython-39.pyc ADDED
Binary file (1.59 kB). View file
 
Groma/mmdet/models/losses/__pycache__/accuracy.cpython-39.pyc ADDED
Binary file (3.22 kB). View file
 
Groma/mmdet/models/losses/__pycache__/ae_loss.cpython-39.pyc ADDED
Binary file (3.6 kB). View file
 
Groma/mmdet/models/losses/__pycache__/balanced_l1_loss.cpython-39.pyc ADDED
Binary file (4.1 kB). View file
 
Groma/mmdet/models/losses/__pycache__/cross_entropy_loss.cpython-39.pyc ADDED
Binary file (7.61 kB). View file
 
Groma/mmdet/models/losses/__pycache__/dice_loss.cpython-39.pyc ADDED
Binary file (4.86 kB). View file
 
Groma/mmdet/models/losses/__pycache__/focal_loss.cpython-39.pyc ADDED
Binary file (7.34 kB). View file
 
Groma/mmdet/models/losses/__pycache__/gaussian_focal_loss.cpython-39.pyc ADDED
Binary file (3.33 kB). View file
 
Groma/mmdet/models/losses/__pycache__/gfocal_loss.cpython-39.pyc ADDED
Binary file (8.41 kB). View file
 
Groma/mmdet/models/losses/__pycache__/ghm_loss.cpython-39.pyc ADDED
Binary file (6.33 kB). View file
 
Groma/mmdet/models/losses/__pycache__/iou_loss.cpython-39.pyc ADDED
Binary file (12.5 kB). View file
 
Groma/mmdet/models/losses/__pycache__/kd_loss.cpython-39.pyc ADDED
Binary file (2.91 kB). View file
 
Groma/mmdet/models/losses/__pycache__/mse_loss.cpython-39.pyc ADDED
Binary file (2.13 kB). View file
 
Groma/mmdet/models/losses/__pycache__/pisa_loss.cpython-39.pyc ADDED
Binary file (4.42 kB). View file
 
Groma/mmdet/models/losses/__pycache__/seesaw_loss.cpython-39.pyc ADDED
Binary file (7.76 kB). View file
 
Groma/mmdet/models/losses/__pycache__/smooth_l1_loss.cpython-39.pyc ADDED
Binary file (3.95 kB). View file
 
Groma/mmdet/models/losses/__pycache__/utils.cpython-39.pyc ADDED
Binary file (2.76 kB). View file
 
Groma/mmdet/models/losses/__pycache__/varifocal_loss.cpython-39.pyc ADDED
Binary file (4.77 kB). View file
 
Groma/mmdet/utils/__pycache__/__init__.cpython-39.pyc ADDED
Binary file (358 Bytes). View file
 
Groma/mmdet/utils/__pycache__/collect_env.cpython-39.pyc ADDED
Binary file (589 Bytes). View file
 
Groma/mmdet/utils/__pycache__/contextmanagers.cpython-39.pyc ADDED
Binary file (3.55 kB). View file
 
Groma/mmdet/utils/__pycache__/logger.cpython-39.pyc ADDED
Binary file (649 Bytes). View file
 
Groma/mmdet/utils/__pycache__/misc.cpython-39.pyc ADDED
Binary file (1.17 kB). View file
 
Groma/mmdet/utils/__pycache__/setup_env.cpython-39.pyc ADDED
Binary file (1.49 kB). View file
 
Groma/mmdet/utils/__pycache__/util_mixins.cpython-39.pyc ADDED
Binary file (3.75 kB). View file
 
OpenSeeD/datasets/__init__.py ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ from . import registration
2
+ from .build import *
OpenSeeD/datasets/build.py ADDED
@@ -0,0 +1,638 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Facebook, Inc. and its affiliates.
2
+ import os
3
+ import itertools
4
+ import logging
5
+ import copy
6
+ from typing import Any, Callable, Dict, List, Optional, Union
7
+
8
+ import torch
9
+ import torch.utils.data
10
+ import torch.utils.data as torchdata
11
+
12
+ import detectron2.utils.comm as comm
13
+ from detectron2.data.build import (
14
+ build_batch_data_loader,
15
+ load_proposals_into_dataset,
16
+ trivial_batch_collator,
17
+ )
18
+ from detectron2.data import MetadataCatalog
19
+ from detectron2.data.catalog import DatasetCatalog
20
+ from detectron2.data.common import DatasetFromList, MapDataset
21
+ from detectron2.data.dataset_mapper import DatasetMapper
22
+ from detectron2.data.samplers import InferenceSampler, TrainingSampler
23
+ from detectron2.evaluation import (
24
+ CityscapesInstanceEvaluator,
25
+ CityscapesSemSegEvaluator,
26
+ COCOEvaluator,
27
+ DatasetEvaluators,
28
+ LVISEvaluator,
29
+ verify_results,
30
+ )
31
+ from fvcore.common.config import CfgNode
32
+ from omegaconf import DictConfig, OmegaConf
33
+
34
+ from .dataset_mappers import (
35
+ COCOInstanceNewBaselineDatasetMapper,
36
+ COCOPanopticNewBaselineDatasetMapper,
37
+ MaskFormerInstanceDatasetMapper,
38
+ MaskFormerPanopticDatasetMapper,
39
+ MaskFormerSemanticDatasetMapper,
40
+ ImageNetDatasetMapper,
41
+ VLPreDatasetMapper,
42
+ SunRGBDSegDatasetMapper,
43
+ ScanNetSegDatasetMapper,
44
+ BDDSemDatasetMapper,
45
+ ScanNetPanoDatasetMapper,
46
+ RefCOCODatasetMapper,
47
+ O365InstanceNewBaselineDatasetMapper,
48
+ )
49
+ from .evaluation import (InstanceSegEvaluator,
50
+ SemSegEvaluator,
51
+ COCOPanopticEvaluator,
52
+ )
53
+ from openseed.utils import configurable
54
+ from detectron2.utils.comm import get_world_size
55
+ from typing import Any, Dict, List, Set
56
+
57
+ class JointLoader(torchdata.IterableDataset):
58
+ def __init__(self, loaders, key_dataset):
59
+ dataset_names = []
60
+ for key, loader in loaders.items():
61
+ name = "{}".format(key.split('_')[0])
62
+ setattr(self, name, loader)
63
+ dataset_names += [name]
64
+ self.dataset_names = dataset_names
65
+ self.key_dataset = key_dataset
66
+
67
+ def __iter__(self):
68
+ for batch in zip(*[getattr(self, name) for name in self.dataset_names]):
69
+ yield {key: batch[i] for i, key in enumerate(self.dataset_names)}
70
+
71
+ def __len__(self):
72
+ return len(getattr(self, self.key_dataset))
73
+
74
+ def filter_images_with_only_crowd_annotations(dataset_dicts, dataset_names):
75
+ """
76
+ Filter out images with none annotations or only crowd annotations
77
+ (i.e., images without non-crowd annotations).
78
+ A common training-time preprocessing on COCO dataset.
79
+
80
+ Args:
81
+ dataset_dicts (list[dict]): annotations in Detectron2 Dataset format.
82
+
83
+ Returns:
84
+ list[dict]: the same format, but filtered.
85
+ """
86
+ num_before = len(dataset_dicts)
87
+
88
+ def valid(anns):
89
+ for ann in anns:
90
+ if isinstance(ann, list):
91
+ for instance in ann:
92
+ if instance.get("iscrowd", 0) == 0:
93
+ return True
94
+ else:
95
+ if ann.get("iscrowd", 0) == 0:
96
+ return True
97
+ return False
98
+
99
+ dataset_dicts = [x for x in dataset_dicts if valid(x["annotations"])]
100
+ num_after = len(dataset_dicts)
101
+ logger = logging.getLogger(__name__)
102
+ logger.info(
103
+ "Removed {} images with no usable annotations. {} images left.".format(
104
+ num_before - num_after, num_after
105
+ )
106
+ )
107
+ return dataset_dicts
108
+
109
+
110
+ def get_detection_dataset_dicts(
111
+ dataset_names, filter_empty=True, proposal_files=None
112
+ ):
113
+ """
114
+ Load and prepare dataset dicts for instance detection/segmentation and semantic segmentation.
115
+
116
+ Args:
117
+ dataset_names (str or list[str]): a dataset name or a list of dataset names
118
+ filter_empty (bool): whether to filter out images without instance annotations
119
+ proposal_files (list[str]): if given, a list of object proposal files
120
+ that match each dataset in `dataset_names`.
121
+
122
+ Returns:
123
+ list[dict]: a list of dicts following the standard dataset dict format.
124
+ """
125
+ if isinstance(dataset_names, str):
126
+ dataset_names = [dataset_names]
127
+ assert len(dataset_names)
128
+
129
+ dataset_dicts = [DatasetCatalog.get(dataset_name) for dataset_name in dataset_names]
130
+ for dataset_name, dicts in zip(dataset_names, dataset_dicts):
131
+ assert len(dicts), "Dataset '{}' is empty!".format(dataset_name)
132
+
133
+ if proposal_files is not None:
134
+ assert len(dataset_names) == len(proposal_files)
135
+ # load precomputed proposals from proposal files
136
+ dataset_dicts = [
137
+ load_proposals_into_dataset(dataset_i_dicts, proposal_file)
138
+ for dataset_i_dicts, proposal_file in zip(dataset_dicts, proposal_files)
139
+ ]
140
+
141
+ dataset_dicts = list(itertools.chain.from_iterable(dataset_dicts))
142
+
143
+ has_instances = "annotations" in dataset_dicts[0]
144
+ if filter_empty and has_instances:
145
+ dataset_dicts = filter_images_with_only_crowd_annotations(dataset_dicts, dataset_names)
146
+
147
+ assert len(dataset_dicts), "No valid data found in {}.".format(",".join(dataset_names))
148
+ return dataset_dicts
149
+
150
+
151
+ def _test_loader_from_config(cfg, dataset_name, mapper=None):
152
+ """
153
+ Uses the given `dataset_name` argument (instead of the names in cfg), because the
154
+ standard practice is to evaluate each test set individually (not combining them).
155
+ """
156
+ if isinstance(dataset_name, str):
157
+ dataset_name = [dataset_name]
158
+
159
+ dataset = get_detection_dataset_dicts(
160
+ dataset_name,
161
+ filter_empty=False,
162
+ proposal_files=None,
163
+ )
164
+ # import ipdb;ipdb.set_trace()
165
+ if mapper is None:
166
+ if isinstance(cfg, (DictConfig)):
167
+ cfg = OmegaConf.to_container(copy.deepcopy(cfg))
168
+ mapper_cfg = CfgNode({'INPUT': cfg['INPUT'], 'MODEL': cfg['MODEL'], 'DATASETS': cfg['DATASETS']})
169
+ mapper = DatasetMapper(mapper_cfg, False)
170
+ assert cfg['TEST']['BATCH_SIZE_TOTAL'] % get_world_size() == 0, "Evaluation total batchsize is not divisible by gpu number"
171
+ batch_size = cfg['TEST']['BATCH_SIZE_TOTAL'] // get_world_size()
172
+
173
+ return {
174
+ "dataset": dataset,
175
+ "mapper": mapper,
176
+ "num_workers": cfg['DATALOADER']['NUM_WORKERS'],
177
+ "sampler": InferenceSampler(len(dataset)),
178
+ "batch_size": batch_size,
179
+ }
180
+
181
+
182
+ @configurable(from_config=_test_loader_from_config)
183
+ def build_detection_test_loader(
184
+ dataset: Union[List[Any], torchdata.Dataset],
185
+ *,
186
+ mapper: Callable[[Dict[str, Any]], Any],
187
+ sampler: Optional[torchdata.Sampler] = None,
188
+ batch_size: int = 1,
189
+ num_workers: int = 0,
190
+ collate_fn: Optional[Callable[[List[Any]], Any]] = None,
191
+ ) -> torchdata.DataLoader:
192
+ """
193
+ Similar to `build_detection_train_loader`, with default batch size = 1,
194
+ and sampler = :class:`InferenceSampler`. This sampler coordinates all workers
195
+ to produce the exact set of all samples.
196
+
197
+ Args:
198
+ dataset: a list of dataset dicts,
199
+ or a pytorch dataset (either map-style or iterable). They can be obtained
200
+ by using :func:`DatasetCatalog.get` or :func:`get_detection_dataset_dicts`.
201
+ mapper: a callable which takes a sample (dict) from dataset
202
+ and returns the format to be consumed by the model.
203
+ When using cfg, the default choice is ``DatasetMapper(cfg, is_train=False)``.
204
+ sampler: a sampler that produces
205
+ indices to be applied on ``dataset``. Default to :class:`InferenceSampler`,
206
+ which splits the dataset across all workers. Sampler must be None
207
+ if `dataset` is iterable.
208
+ batch_size: the batch size of the data loader to be created.
209
+ Default to 1 image per worker since this is the standard when reporting
210
+ inference time in papers.
211
+ num_workers: number of parallel data loading workers
212
+ collate_fn: same as the argument of `torch.utils.data.DataLoader`.
213
+ Defaults to do no collation and return a list of data.
214
+
215
+ Returns:
216
+ DataLoader: a torch DataLoader, that loads the given detection
217
+ dataset, with test-time transformation and batching.
218
+
219
+ Examples:
220
+ ::
221
+ data_loader = build_detection_test_loader(
222
+ DatasetRegistry.get("my_test"),
223
+ mapper=DatasetMapper(...))
224
+
225
+ # or, instantiate with a CfgNode:
226
+ data_loader = build_detection_test_loader(cfg, "my_test")
227
+ """
228
+
229
+ if isinstance(dataset, list):
230
+ dataset = DatasetFromList(dataset, copy=False)
231
+ if mapper is not None:
232
+ dataset = MapDataset(dataset, mapper)
233
+ if isinstance(dataset, torchdata.IterableDataset):
234
+ assert sampler is None, "sampler must be None if dataset is IterableDataset"
235
+ else:
236
+ if sampler is None:
237
+ sampler = InferenceSampler(len(dataset))
238
+ return torchdata.DataLoader(
239
+ dataset,
240
+ batch_size=batch_size,
241
+ sampler=sampler,
242
+ drop_last=False,
243
+ num_workers=num_workers,
244
+ collate_fn=trivial_batch_collator if collate_fn is None else collate_fn,
245
+ )
246
+
247
+
248
+ def _train_loader_from_config(cfg, dataset_name, mapper, *, dataset=None, sampler=None):
249
+ cfg_datasets = cfg['DATASETS']
250
+ cfg_dataloader = cfg['DATALOADER']
251
+
252
+ if dataset is None:
253
+ dataset = get_detection_dataset_dicts(
254
+ dataset_name,
255
+ filter_empty=cfg_dataloader['FILTER_EMPTY_ANNOTATIONS'],
256
+ proposal_files=cfg_datasets['PROPOSAL_FILES_TRAIN'] if cfg_dataloader['LOAD_PROPOSALS'] else None,
257
+ )
258
+
259
+ if mapper is None:
260
+ mapper = DatasetMapper(cfg, True)
261
+
262
+ if sampler is None:
263
+ sampler_name = cfg_dataloader['SAMPLER_TRAIN']
264
+ logger = logging.getLogger(__name__)
265
+ logger.info("Using training sampler {}".format(sampler_name))
266
+ sampler = TrainingSampler(len(dataset))
267
+
268
+ return {
269
+ "dataset": dataset,
270
+ "sampler": sampler,
271
+ "mapper": mapper,
272
+ "total_batch_size": cfg['TRAIN']['BATCH_SIZE_TOTAL'],
273
+ "aspect_ratio_grouping": cfg_dataloader['ASPECT_RATIO_GROUPING'],
274
+ "num_workers": cfg_dataloader['NUM_WORKERS'],
275
+ }
276
+
277
+
278
+ @configurable(from_config=_train_loader_from_config)
279
+ def build_detection_train_loader(
280
+ dataset, *, mapper, sampler=None, total_batch_size, aspect_ratio_grouping=True, num_workers=0
281
+ ):
282
+ """
283
+ Build a dataloader for object detection with some default features.
284
+ This interface is experimental.
285
+
286
+ Args:
287
+ dataset (list or torch.utils.data.Dataset): a list of dataset dicts,
288
+ or a map-style pytorch dataset. They can be obtained by using
289
+ :func:`DatasetCatalog.get` or :func:`get_detection_dataset_dicts`.
290
+ mapper (callable): a callable which takes a sample (dict) from dataset and
291
+ returns the format to be consumed by the model.
292
+ When using cfg, the default choice is ``DatasetMapper(cfg, is_train=True)``.
293
+ sampler (torch.utils.data.sampler.Sampler or None): a sampler that
294
+ produces indices to be applied on ``dataset``.
295
+ Default to :class:`TrainingSampler`, which coordinates a random shuffle
296
+ sequence across all workers.
297
+ total_batch_size (int): total batch size across all workers. Batching
298
+ simply puts data into a list.
299
+ aspect_ratio_grouping (bool): whether to group images with similar
300
+ aspect ratio for efficiency. When enabled, it requires each
301
+ element in dataset be a dict with keys "width" and "height".
302
+ num_workers (int): number of parallel data loading workers
303
+
304
+ Returns:
305
+ torch.utils.data.DataLoader: a dataloader. Each output from it is a
306
+ ``list[mapped_element]`` of length ``total_batch_size / num_workers``,
307
+ where ``mapped_element`` is produced by the ``mapper``.
308
+ """
309
+ if isinstance(dataset, list):
310
+ dataset = DatasetFromList(dataset, copy=False)
311
+ if mapper is not None:
312
+ dataset = MapDataset(dataset, mapper)
313
+ if sampler is None:
314
+ sampler = TrainingSampler(len(dataset))
315
+ assert isinstance(sampler, torch.utils.data.sampler.Sampler)
316
+ return build_batch_data_loader(
317
+ dataset,
318
+ sampler,
319
+ total_batch_size,
320
+ aspect_ratio_grouping=aspect_ratio_grouping,
321
+ num_workers=num_workers,
322
+ )
323
+
324
+
325
+ def get_config_from_name(cfg, dataset_name):
326
+ # adjust config according to dataset
327
+ if 'refcoco' in dataset_name:
328
+ cfg.update(cfg['REF'])
329
+ return cfg
330
+ elif 'coco' in dataset_name:
331
+ if 'COCO' in cfg.keys():
332
+ cfg.update(cfg['COCO'])
333
+ return cfg
334
+ elif 'ade' in dataset_name:
335
+ if 'ADE20K' in cfg.keys():
336
+ cfg.update(cfg['ADE20K'])
337
+ return cfg
338
+ elif 'imagenet' in dataset_name:
339
+ if 'IMAGENET' in cfg.keys():
340
+ cfg.update(cfg['IMAGENET'])
341
+ return cfg
342
+ elif 'vlp' in dataset_name:
343
+ cfg.update(cfg['VLP'])
344
+ return cfg
345
+ elif 'sun' in dataset_name:
346
+ cfg.update(cfg['SUN'])
347
+ return cfg
348
+ elif 'object365' in dataset_name:
349
+ cfg.update(cfg['OBJECT365'])
350
+ return cfg
351
+ elif 'scan' in dataset_name:
352
+ cfg.update(cfg['SCAN'])
353
+ return cfg
354
+ elif 'cityscape' in dataset_name:
355
+ cfg.update(cfg['CITY'])
356
+ return cfg
357
+ elif 'bdd' in dataset_name:
358
+ cfg.update(cfg['BDD'])
359
+ return cfg
360
+ else:
361
+ assert False, "dataset not support."
362
+
363
+
364
+ def build_eval_dataloader(cfg, ):
365
+ dataloaders = []
366
+ cfg = copy.deepcopy(cfg)
367
+ for dataset_name in cfg['DATASETS']['TEST']:
368
+ cfg = get_config_from_name(cfg, dataset_name)
369
+ # adjust mapper according to dataset
370
+ if dataset_name == 'imagenet_val':
371
+ mapper = ImageNetDatasetMapper(cfg, False)
372
+ elif dataset_name == 'bdd10k_val_sem_seg':
373
+ mapper = BDDSemDatasetMapper(cfg, False)
374
+ elif dataset_name in ["vlp_val", "vlp_captioning_val", "vlp_val2017", "vlp_captioning_val2017"]:
375
+ mapper = VLPreDatasetMapper(cfg, False, dataset_name)
376
+ elif dataset_name in ["scannet_21_val_seg", "scannet_38_val_seg", "scannet_41_val_seg"]:
377
+ mapper = ScanNetSegDatasetMapper(cfg, False)
378
+ elif dataset_name in ["scannet_21_panoptic_val", 'bdd10k_40_panoptic_val']:
379
+ mapper = ScanNetPanoDatasetMapper(cfg, False)
380
+ elif 'sun' in dataset_name:
381
+ mapper = SunRGBDSegDatasetMapper(cfg, False)
382
+ elif 'refcoco' in dataset_name:
383
+ mapper = RefCOCODatasetMapper(cfg, False)
384
+ else:
385
+ mapper = None
386
+ dataloaders += [build_detection_test_loader(cfg, dataset_name, mapper=mapper)]
387
+ # dataloaders = build_detection_test_loader(cfg, dataset_name, mapper=mapper)
388
+ return dataloaders
389
+
390
+
391
+ def build_train_dataloader(cfg, ):
392
+ dataset_names = cfg['DATASETS']['TRAIN']
393
+
394
+ loaders = {}
395
+ cfg = copy.deepcopy(cfg)
396
+ for dataset_name in dataset_names:
397
+ cfg = get_config_from_name(cfg, dataset_name)
398
+ mapper_name = cfg['INPUT']['DATASET_MAPPER_NAME']
399
+ # Semantic segmentation dataset mapper
400
+ if mapper_name == "mask_former_semantic":
401
+ mapper = MaskFormerSemanticDatasetMapper(cfg, True)
402
+ loaders['coco'] = build_detection_train_loader(cfg, dataset_name=dataset_name, mapper=mapper)
403
+ # Panoptic segmentation dataset mapper
404
+ elif mapper_name == "mask_former_panoptic": # TODO: Hack for ade training; should add ade name
405
+ mapper = MaskFormerPanopticDatasetMapper(cfg, True)
406
+ loaders['ade'] = build_detection_train_loader(cfg, dataset_name=dataset_name, mapper=mapper)
407
+ # Instance segmentation dataset mapper
408
+ elif mapper_name == "mask_former_instance":
409
+ mapper = MaskFormerInstanceDatasetMapper(cfg, True)
410
+ loaders['coco'] = build_detection_train_loader(cfg, dataset_name=dataset_name, mapper=mapper)
411
+ # coco instance segmentation lsj new baseline
412
+ elif mapper_name == "coco_instance_lsj":
413
+ mapper = COCOInstanceNewBaselineDatasetMapper(cfg, True)
414
+ loaders['coco'] = build_detection_train_loader(cfg, dataset_name=dataset_name, mapper=mapper)
415
+ # coco panoptic segmentation lsj new baseline
416
+ elif mapper_name == "coco_panoptic_lsj":
417
+ mapper = COCOPanopticNewBaselineDatasetMapper(cfg, True)
418
+ loaders['coco'] = build_detection_train_loader(cfg, dataset_name=dataset_name, mapper=mapper)
419
+
420
+ elif mapper_name == "object365":
421
+ mapper = O365InstanceNewBaselineDatasetMapper(cfg, True) # Use lsj instance mapper for o365
422
+ loaders['o365'] = build_detection_train_loader(cfg, dataset_name=dataset_name, mapper=mapper)
423
+ elif mapper_name == "vlpretrain":
424
+ mapper = VLPreDatasetMapper(cfg, True, dataset_name)
425
+ loaders['vlp'] = build_detection_train_loader(cfg, dataset_name=dataset_name, mapper=mapper)
426
+ elif mapper_name == "refcoco":
427
+ mapper = RefCOCODatasetMapper(cfg, True)
428
+ loaders['ref'] = build_detection_train_loader(cfg, dataset_name=dataset_name, mapper=mapper)
429
+ else:
430
+ mapper = None
431
+ loaders[dataset_name] = build_detection_train_loader(cfg, dataset_name=dataset_name, mapper=mapper)
432
+ # import ipdb; ipdb.set_trace()
433
+ if len(loaders) == 1 and not cfg['LOADER'].get('JOINT', False):
434
+ for k, v in loaders.items():
435
+ print("number of iterations per epoch: ", v, len(loaders[k]))
436
+ return list(loaders.values())[0]
437
+ # return loaders.values()['coco']
438
+ # return loaders['coco']
439
+ else:
440
+ return JointLoader(loaders, key_dataset=cfg['LOADER'].get('KEY_DATASET', 'coco'))
441
+
442
+
443
+ def build_evaluator(cfg, dataset_name, output_folder=None):
444
+ """
445
+ Create evaluator(s) for a given dataset.
446
+ This uses the special metadata "evaluator_type" associated with each
447
+ builtin dataset. For your own dataset, you can simply create an
448
+ evaluator manually in your script and do not have to worry about the
449
+ hacky if-else logic here.
450
+ """
451
+ if output_folder is None:
452
+ output_folder = os.path.join(cfg["OUTPUT_DIR"], "inference")
453
+ evaluator_list = []
454
+ evaluator_type = MetadataCatalog.get(dataset_name).evaluator_type
455
+
456
+ # semantic segmentation
457
+ if evaluator_type in ["sem_seg", "ade20k_panoptic_seg"]:
458
+ evaluator_list.append(
459
+ SemSegEvaluator(
460
+ dataset_name,
461
+ distributed=True,
462
+ output_dir=output_folder,
463
+ )
464
+ )
465
+ # instance segmentation
466
+ if evaluator_type == "coco":
467
+ evaluator_list.append(COCOEvaluator(dataset_name, output_dir=output_folder))
468
+
469
+ cfg_model_decoder_test = cfg["MODEL"]["DECODER"]["TEST"]
470
+ # panoptic segmentation
471
+ if evaluator_type in [
472
+ "coco_panoptic_seg",
473
+ "ade20k_panoptic_seg",
474
+ "cityscapes_panoptic_seg",
475
+ "mapillary_vistas_panoptic_seg",
476
+ "scannet_panoptic_seg",
477
+ "bdd_panoptic_pano"
478
+ ]:
479
+ if cfg_model_decoder_test["PANOPTIC_ON"]:
480
+ evaluator_list.append(COCOPanopticEvaluator(dataset_name, output_folder))
481
+ # COCO
482
+ if (evaluator_type == "coco_panoptic_seg" and cfg_model_decoder_test["INSTANCE_ON"]) or evaluator_type == "object365_od":
483
+ evaluator_list.append(COCOEvaluator(dataset_name, output_dir=output_folder))
484
+ if (evaluator_type == "coco_panoptic_seg" and cfg_model_decoder_test["SEMANTIC_ON"]) or evaluator_type == "coco_sem_seg":
485
+ evaluator_list.append(SemSegEvaluator(dataset_name, distributed=True, output_dir=output_folder))
486
+ # Mapillary Vistas
487
+ if evaluator_type == "mapillary_vistas_panoptic_seg" and cfg_model_decoder_test["INSTANCE_ON"]:
488
+ evaluator_list.append(InstanceSegEvaluator(dataset_name, output_dir=output_folder))
489
+ if evaluator_type == "mapillary_vistas_panoptic_seg" and cfg_model_decoder_test["SEMANTIC_ON"]:
490
+ evaluator_list.append(SemSegEvaluator(dataset_name, distributed=True, output_dir=output_folder))
491
+ # Cityscapes
492
+ if evaluator_type == "cityscapes_instance":
493
+ assert (
494
+ torch.cuda.device_count() > comm.get_rank()
495
+ ), "CityscapesEvaluator currently do not work with multiple machines."
496
+ return CityscapesInstanceEvaluator(dataset_name)
497
+ if evaluator_type == "cityscapes_sem_seg":
498
+ assert (
499
+ torch.cuda.device_count() > comm.get_rank()
500
+ ), "CityscapesEvaluator currently do not work with multiple machines."
501
+ return CityscapesSemSegEvaluator(dataset_name)
502
+ if evaluator_type == "cityscapes_panoptic_seg":
503
+ if cfg_model_decoder_test["SEMANTIC_ON"]:
504
+ assert (
505
+ torch.cuda.device_count() > comm.get_rank()
506
+ ), "CityscapesEvaluator currently do not work with multiple machines."
507
+ evaluator_list.append(CityscapesSemSegEvaluator(dataset_name))
508
+ if cfg_model_decoder_test["INSTANCE_ON"]:
509
+ assert (
510
+ torch.cuda.device_count() > comm.get_rank()
511
+ ), "CityscapesEvaluator currently do not work with multiple machines."
512
+ evaluator_list.append(CityscapesInstanceEvaluator(dataset_name))
513
+ # ADE20K
514
+ if evaluator_type == "ade20k_panoptic_seg" and cfg_model_decoder_test["INSTANCE_ON"]:
515
+ evaluator_list.append(InstanceSegEvaluator(dataset_name, output_dir=output_folder))
516
+ # SEGINW
517
+ if evaluator_type == "seginw" and cfg_model_decoder_test["INSTANCE_ON"]:
518
+ evaluator_list.append(InstanceSegEvaluator(dataset_name, output_dir=output_folder))
519
+ # LVIS
520
+ if evaluator_type == "lvis":
521
+ return LVISEvaluator(dataset_name, output_dir=output_folder)
522
+ # Classification
523
+ if evaluator_type == "classification":
524
+ evaluator_list.append(ClassificationEvaluator(dataset_name, output_folder))
525
+ # Retrieval
526
+ if evaluator_type == "retrieval":
527
+ evaluator_list.append(RetrievalEvaluator(dataset_name, output_folder, cfg['MODEL']['DECODER']['RETRIEVAL']['ENSEMBLE']))
528
+ if evaluator_type == "captioning":
529
+ evaluator_list.append(CaptioningEvaluator(dataset_name, output_folder, MetadataCatalog.get(dataset_name).gt_json))
530
+ if evaluator_type in ["grounding_refcoco", "grounding_phrasecut"]:
531
+ evaluator_list.append(GroundingEvaluator(dataset_name))
532
+
533
+ if len(evaluator_list) == 0:
534
+ raise NotImplementedError(
535
+ "no Evaluator for the dataset {} with the type {}".format(
536
+ dataset_name, evaluator_type
537
+ )
538
+ )
539
+ elif len(evaluator_list) == 1:
540
+ return evaluator_list[0]
541
+
542
+
543
+ return DatasetEvaluators(evaluator_list)
544
+
545
+
546
+ def build_optimizer(cls, cfg, model):
547
+ cfg_solver = cfg['SOLVER']
548
+ weight_decay_norm = cfg_solver['WEIGHT_DECAY_NORM']
549
+ weight_decay_embed = cfg_solver['WEIGHT_DECAY_EMBED']
550
+ weight_decay_bias = cfg_solver.get('WEIGHT_DECAY_BIAS', 0.0)
551
+
552
+ defaults = {}
553
+ defaults["lr"] = cfg_solver['BASE_LR']
554
+ defaults["weight_decay"] = cfg_solver['WEIGHT_DECAY']
555
+
556
+ norm_module_types = (
557
+ torch.nn.BatchNorm1d,
558
+ torch.nn.BatchNorm2d,
559
+ torch.nn.BatchNorm3d,
560
+ torch.nn.SyncBatchNorm,
561
+ # NaiveSyncBatchNorm inherits from BatchNorm2d
562
+ torch.nn.GroupNorm,
563
+ torch.nn.InstanceNorm1d,
564
+ torch.nn.InstanceNorm2d,
565
+ torch.nn.InstanceNorm3d,
566
+ torch.nn.LayerNorm,
567
+ torch.nn.LocalResponseNorm,
568
+ )
569
+
570
+ lr_multiplier = cfg['SOLVER']['LR_MULTIPLIER']
571
+
572
+ # for _module_name in model.module_names:
573
+ # # parameters = self.raw_modules[module_name].get_training_parameters()
574
+ # # self.optimizers[module_name] = optimizer_class(parameters, **optimizer_parameters)
575
+ # # params = []
576
+ # # for module_param_name, value in self.raw_modules[module_name].named_parameters(recurse=True):
577
+ params: List[Dict[str, Any]] = []
578
+ memo: Set[torch.nn.parameter.Parameter] = set()
579
+ for module_name, module in model.named_modules():
580
+ for module_param_name, value in module.named_parameters(recurse=False):
581
+ if not value.requires_grad:
582
+ continue
583
+ # Avoid duplicating parameters
584
+ if value in memo:
585
+ continue
586
+ memo.add(value)
587
+
588
+ hyperparams = copy.copy(defaults)
589
+
590
+ for key, lr_mul in lr_multiplier.items():
591
+ if key in "{}.{}".format(module_name, module_param_name):
592
+ hyperparams["lr"] = hyperparams["lr"] * lr_mul
593
+ if is_main_process():
594
+ logger.info("Modify Learning rate of {}: {}".format(
595
+ "{}.{}".format(module_name, module_param_name), lr_mul))
596
+
597
+ if (
598
+ "relative_position_bias_table" in module_param_name
599
+ or "absolute_pos_embed" in module_param_name
600
+ ):
601
+ hyperparams["weight_decay"] = 0.0
602
+ if isinstance(module, norm_module_types):
603
+ hyperparams["weight_decay"] = weight_decay_norm
604
+ if isinstance(module, torch.nn.Embedding):
605
+ hyperparams["weight_decay"] = weight_decay_embed
606
+ if "bias" in module_name:
607
+ hyperparams["weight_decay"] = weight_decay_bias
608
+ params.append({"params": [value], **hyperparams})
609
+
610
+ def maybe_add_full_model_gradient_clipping(optim):
611
+ # detectron2 doesn't have full model gradient clipping now
612
+ clip_norm_val = cfg_solver['CLIP_GRADIENTS']['CLIP_VALUE']
613
+ enable = (
614
+ cfg_solver['CLIP_GRADIENTS']['ENABLED']
615
+ and cfg_solver['CLIP_GRADIENTS']['CLIP_TYPE'] == "full_model"
616
+ and clip_norm_val > 0.0
617
+ )
618
+
619
+ class FullModelGradientClippingOptimizer(optim):
620
+ def step(self, closure=None):
621
+ all_params = itertools.chain(*[x["params"] for x in self.param_groups])
622
+ torch.nn.utils.clip_grad_norm_(all_params, clip_norm_val)
623
+ super().step(closure=closure)
624
+
625
+ return FullModelGradientClippingOptimizer if enable else optim
626
+
627
+ optimizer_type = cfg_solver['OPTIMIZER']
628
+ if optimizer_type == "SGD":
629
+ optimizer = maybe_add_full_model_gradient_clipping(torch.optim.SGD)(
630
+ params, cfg_solver['BASE_LR'], momentum=cfg_solver['MOMENTUM']
631
+ )
632
+ elif optimizer_type == "ADAMW":
633
+ optimizer = maybe_add_full_model_gradient_clipping(torch.optim.AdamW)(
634
+ params, cfg_solver['BASE_LR']
635
+ )
636
+ else:
637
+ raise NotImplementedError(f"no optimizer type {optimizer_type}")
638
+ return optimizer
OpenSeeD/datasets/dataset_mappers/__init__.py ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Facebook, Inc. and its affiliates.
2
+ from .coco_instance_new_baseline_dataset_mapper import COCOInstanceNewBaselineDatasetMapper
3
+ from .coco_panoptic_new_baseline_dataset_mapper import COCOPanopticNewBaselineDatasetMapper
4
+ from .mask_former_instance_dataset_mapper import MaskFormerInstanceDatasetMapper
5
+ from .mask_former_panoptic_dataset_mapper import MaskFormerPanopticDatasetMapper
6
+ from .mask_former_semantic_dataset_mapper import MaskFormerSemanticDatasetMapper
7
+ from .imagenet_dataset_mapper import ImageNetDatasetMapper
8
+ from .vlp_dataset_mapper import VLPreDatasetMapper
9
+ from .sunrgbd_dataset_mapper import SunRGBDSegDatasetMapper
10
+ from .scannet_dataset_mapper import ScanNetSegDatasetMapper
11
+ from .bdd_semseg_dataset_mapper import BDDSemDatasetMapper
12
+ from .scannet_pano_dataset_mapper import ScanNetPanoDatasetMapper
13
+ from .refcoco_dataset_mapper import RefCOCODatasetMapper
14
+ from .o365_instance_new_baseline_dataset_mapper import O365InstanceNewBaselineDatasetMapper
OpenSeeD/datasets/dataset_mappers/coco_instance_new_baseline_dataset_mapper.py ADDED
@@ -0,0 +1,191 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Facebook, Inc. and its affiliates.
2
+ # Modified by Bowen Cheng from https://github.com/facebookresearch/detr/blob/master/d2/detr/dataset_mapper.py
3
+ import copy
4
+ import logging
5
+
6
+ import numpy as np
7
+ import torch
8
+
9
+ from detectron2.data import detection_utils as utils
10
+ from detectron2.data import transforms as T
11
+ from detectron2.data.transforms import TransformGen
12
+ from detectron2.structures import BitMasks, Instances
13
+
14
+ from pycocotools import mask as coco_mask
15
+
16
+ from openseed.utils import configurable
17
+
18
+ __all__ = ["COCOInstanceNewBaselineDatasetMapper"]
19
+
20
+
21
+ def convert_coco_poly_to_mask(segmentations, height, width):
22
+ masks = []
23
+ for polygons in segmentations:
24
+ rles = coco_mask.frPyObjects(polygons, height, width)
25
+ mask = coco_mask.decode(rles)
26
+ if len(mask.shape) < 3:
27
+ mask = mask[..., None]
28
+ mask = torch.as_tensor(mask, dtype=torch.uint8)
29
+ mask = mask.any(dim=2)
30
+ masks.append(mask)
31
+ if masks:
32
+ masks = torch.stack(masks, dim=0)
33
+ else:
34
+ masks = torch.zeros((0, height, width), dtype=torch.uint8)
35
+ return masks
36
+
37
+
38
+ def build_transform_gen(cfg, is_train):
39
+ """
40
+ Create a list of default :class:`Augmentation` from config.
41
+ Now it includes resizing and flipping.
42
+ Returns:
43
+ list[Augmentation]
44
+ """
45
+ assert is_train, "Only support training augmentation"
46
+ cfg_input = cfg['INPUT']
47
+ image_size = cfg_input['IMAGE_SIZE']
48
+ min_scale = cfg_input['MIN_SCALE']
49
+ max_scale = cfg_input['MAX_SCALE']
50
+
51
+ augmentation = []
52
+
53
+ if cfg_input['RANDOM_FLIP'] != "none":
54
+ augmentation.append(
55
+ T.RandomFlip(
56
+ horizontal=cfg_input['RANDOM_FLIP'] == "horizontal",
57
+ vertical=cfg_input['RANDOM_FLIP'] == "vertical",
58
+ )
59
+ )
60
+
61
+ augmentation.extend([
62
+ T.ResizeScale(
63
+ min_scale=min_scale, max_scale=max_scale, target_height=image_size, target_width=image_size
64
+ ),
65
+ T.FixedSizeCrop(crop_size=(image_size, image_size)),
66
+ ])
67
+
68
+ return augmentation
69
+
70
+
71
+ # This is specifically designed for the COCO dataset.
72
+ class COCOInstanceNewBaselineDatasetMapper:
73
+ """
74
+ A callable which takes a dataset dict in Detectron2 Dataset format,
75
+ and map it into a format used by MaskFormer.
76
+
77
+ This dataset mapper applies the same transformation as DETR for COCO panoptic segmentation.
78
+
79
+ The callable currently does the following:
80
+
81
+ 1. Read the image from "file_name"
82
+ 2. Applies geometric transforms to the image and annotation
83
+ 3. Find and applies suitable cropping to the image and annotation
84
+ 4. Prepare image and annotation to Tensors
85
+ """
86
+
87
+ @configurable
88
+ def __init__(
89
+ self,
90
+ is_train=True,
91
+ *,
92
+ tfm_gens,
93
+ image_format,
94
+ ):
95
+ """
96
+ NOTE: this interface is experimental.
97
+ Args:
98
+ is_train: for training or inference
99
+ augmentations: a list of augmentations or deterministic transforms to apply
100
+ tfm_gens: data augmentation
101
+ image_format: an image format supported by :func:`detection_utils.read_image`.
102
+ """
103
+ self.tfm_gens = tfm_gens
104
+ logging.getLogger(__name__).info(
105
+ "[COCOInstanceNewBaselineDatasetMapper] Full TransformGens used in training: {}".format(str(self.tfm_gens))
106
+ )
107
+
108
+ self.img_format = image_format
109
+ self.is_train = is_train
110
+
111
+ @classmethod
112
+ def from_config(cls, cfg, is_train=True):
113
+ # Build augmentation
114
+ tfm_gens = build_transform_gen(cfg, is_train)
115
+
116
+ ret = {
117
+ "is_train": is_train,
118
+ "tfm_gens": tfm_gens,
119
+ "image_format": cfg['INPUT']['FORMAT'],
120
+ }
121
+ return ret
122
+
123
+ def __call__(self, dataset_dict):
124
+ """
125
+ Args:
126
+ dataset_dict (dict): Metadata of one image, in Detectron2 Dataset format.
127
+
128
+ Returns:
129
+ dict: a format that builtin models in detectron2 accept
130
+ """
131
+ dataset_dict = copy.deepcopy(dataset_dict) # it will be modified by code below
132
+ image = utils.read_image(dataset_dict["file_name"], format=self.img_format)
133
+ utils.check_image_size(dataset_dict, image)
134
+
135
+ # TODO: get padding mask
136
+ # by feeding a "segmentation mask" to the same transforms
137
+ padding_mask = np.ones(image.shape[:2])
138
+
139
+ image, transforms = T.apply_transform_gens(self.tfm_gens, image)
140
+ # the crop transformation has default padding value 0 for segmentation
141
+ padding_mask = transforms.apply_segmentation(padding_mask)
142
+ padding_mask = ~ padding_mask.astype(bool)
143
+
144
+ image_shape = image.shape[:2] # h, w
145
+
146
+ # Pytorch's dataloader is efficient on torch.Tensor due to shared-memory,
147
+ # but not efficient on large generic data structures due to the use of pickle & mp.Queue.
148
+ # Therefore it's important to use torch.Tensor.
149
+ dataset_dict["image"] = torch.as_tensor(np.ascontiguousarray(image.transpose(2, 0, 1)))
150
+ dataset_dict["padding_mask"] = torch.as_tensor(np.ascontiguousarray(padding_mask))
151
+
152
+ if not self.is_train:
153
+ # USER: Modify this if you want to keep them for some reason.
154
+ dataset_dict.pop("annotations", None)
155
+ return dataset_dict
156
+
157
+ if "annotations" in dataset_dict:
158
+ # USER: Modify this if you want to keep them for some reason.
159
+ for anno in dataset_dict["annotations"]:
160
+ # Let's always keep mask
161
+ # if not self.mask_on:
162
+ # anno.pop("segmentation", None)
163
+ anno.pop("keypoints", None)
164
+
165
+ # USER: Implement additional transformations if you have other types of data
166
+ annos = [
167
+ utils.transform_instance_annotations(obj, transforms, image_shape)
168
+ for obj in dataset_dict.pop("annotations")
169
+ if obj.get("iscrowd", 0) == 0
170
+ ]
171
+ # NOTE: does not support BitMask due to augmentation
172
+ # Current BitMask cannot handle empty objects
173
+ instances = utils.annotations_to_instances(annos, image_shape)
174
+ # After transforms such as cropping are applied, the bounding box may no longer
175
+ # tightly bound the object. As an example, imagine a triangle object
176
+ # [(0,0), (2,0), (0,2)] cropped by a box [(1,0),(2,2)] (XYXY format). The tight
177
+ # bounding box of the cropped triangle should be [(1,0),(2,1)], which is not equal to
178
+ # the intersection of original bounding box and the cropping box.
179
+ instances.gt_boxes = instances.gt_masks.get_bounding_boxes()
180
+ # Need to filter empty instances first (due to augmentation)
181
+ instances = utils.filter_empty_instances(instances)
182
+ # Generate masks from polygon
183
+ h, w = instances.image_size
184
+ # image_size_xyxy = torch.as_tensor([w, h, w, h], dtype=torch.float)
185
+ if hasattr(instances, 'gt_masks'):
186
+ gt_masks = instances.gt_masks
187
+ gt_masks = convert_coco_poly_to_mask(gt_masks.polygons, h, w)
188
+ instances.gt_masks = gt_masks
189
+ dataset_dict["instances"] = instances
190
+
191
+ return dataset_dict
OpenSeeD/datasets/dataset_mappers/coco_panoptic_new_baseline_dataset_mapper.py ADDED
@@ -0,0 +1,166 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Facebook, Inc. and its affiliates.
2
+ # Modified by Bowen Cheng from https://github.com/facebookresearch/detr/blob/master/d2/detr/dataset_mapper.py
3
+
4
+ import copy
5
+ import logging
6
+
7
+ import numpy as np
8
+ import torch
9
+
10
+ from detectron2.config import configurable
11
+ from detectron2.data import detection_utils as utils
12
+ from detectron2.data import transforms as T
13
+ from detectron2.data.transforms import TransformGen
14
+ from detectron2.structures import BitMasks, Boxes, Instances
15
+
16
+ __all__ = ["COCOPanopticNewBaselineDatasetMapper"]
17
+
18
+
19
+ def build_transform_gen(cfg, is_train):
20
+ """
21
+ Create a list of default :class:`Augmentation` from config.
22
+ Now it includes resizing and flipping.
23
+ Returns:
24
+ list[Augmentation]
25
+ """
26
+ assert is_train, "Only support training augmentation"
27
+ image_size = cfg.INPUT.IMAGE_SIZE
28
+ min_scale = cfg.INPUT.MIN_SCALE
29
+ max_scale = cfg.INPUT.MAX_SCALE
30
+
31
+ augmentation = []
32
+
33
+ if cfg.INPUT.RANDOM_FLIP != "none":
34
+ augmentation.append(
35
+ T.RandomFlip(
36
+ horizontal=cfg.INPUT.RANDOM_FLIP == "horizontal",
37
+ vertical=cfg.INPUT.RANDOM_FLIP == "vertical",
38
+ )
39
+ )
40
+
41
+ augmentation.extend([
42
+ T.ResizeScale(
43
+ min_scale=min_scale, max_scale=max_scale, target_height=image_size, target_width=image_size
44
+ ),
45
+ T.FixedSizeCrop(crop_size=(image_size, image_size)),
46
+ ])
47
+
48
+ return augmentation
49
+
50
+
51
+ # This is specifically designed for the COCO dataset.
52
+ class COCOPanopticNewBaselineDatasetMapper:
53
+ """
54
+ A callable which takes a dataset dict in Detectron2 Dataset format,
55
+ and map it into a format used by MaskFormer.
56
+
57
+ This dataset mapper applies the same transformation as DETR for COCO panoptic segmentation.
58
+
59
+ The callable currently does the following:
60
+
61
+ 1. Read the image from "file_name"
62
+ 2. Applies geometric transforms to the image and annotation
63
+ 3. Find and applies suitable cropping to the image and annotation
64
+ 4. Prepare image and annotation to Tensors
65
+ """
66
+
67
+ @configurable
68
+ def __init__(
69
+ self,
70
+ is_train=True,
71
+ *,
72
+ tfm_gens,
73
+ image_format,
74
+ ):
75
+ """
76
+ NOTE: this interface is experimental.
77
+ Args:
78
+ is_train: for training or inference
79
+ augmentations: a list of augmentations or deterministic transforms to apply
80
+ crop_gen: crop augmentation
81
+ tfm_gens: data augmentation
82
+ image_format: an image format supported by :func:`detection_utils.read_image`.
83
+ """
84
+ self.tfm_gens = tfm_gens
85
+ logging.getLogger(__name__).info(
86
+ "[COCOPanopticNewBaselineDatasetMapper] Full TransformGens used in training: {}".format(
87
+ str(self.tfm_gens)
88
+ )
89
+ )
90
+
91
+ self.img_format = image_format
92
+ self.is_train = is_train
93
+
94
+ @classmethod
95
+ def from_config(cls, cfg, is_train=True):
96
+ # Build augmentation
97
+ tfm_gens = build_transform_gen(cfg, is_train)
98
+
99
+ ret = {
100
+ "is_train": is_train,
101
+ "tfm_gens": tfm_gens,
102
+ "image_format": cfg.INPUT.FORMAT,
103
+ }
104
+ return ret
105
+
106
+ def __call__(self, dataset_dict):
107
+ """
108
+ Args:
109
+ dataset_dict (dict): Metadata of one image, in Detectron2 Dataset format.
110
+
111
+ Returns:
112
+ dict: a format that builtin models in detectron2 accept
113
+ """
114
+ dataset_dict = copy.deepcopy(dataset_dict) # it will be modified by code below
115
+ image = utils.read_image(dataset_dict["file_name"], format=self.img_format)
116
+ utils.check_image_size(dataset_dict, image)
117
+
118
+ image, transforms = T.apply_transform_gens(self.tfm_gens, image)
119
+ image_shape = image.shape[:2] # h, w
120
+
121
+ # Pytorch's dataloader is efficient on torch.Tensor due to shared-memory,
122
+ # but not efficient on large generic data structures due to the use of pickle & mp.Queue.
123
+ # Therefore it's important to use torch.Tensor.
124
+ dataset_dict["image"] = torch.as_tensor(np.ascontiguousarray(image.transpose(2, 0, 1)))
125
+
126
+ if not self.is_train:
127
+ # USER: Modify this if you want to keep them for some reason.
128
+ dataset_dict.pop("annotations", None)
129
+ return dataset_dict
130
+
131
+ if "pan_seg_file_name" in dataset_dict:
132
+ pan_seg_gt = utils.read_image(dataset_dict.pop("pan_seg_file_name"), "RGB")
133
+ segments_info = dataset_dict["segments_info"]
134
+
135
+ # apply the same transformation to panoptic segmentation
136
+ pan_seg_gt = transforms.apply_segmentation(pan_seg_gt)
137
+
138
+ from panopticapi.utils import rgb2id
139
+
140
+ pan_seg_gt = rgb2id(pan_seg_gt)
141
+
142
+ instances = Instances(image_shape)
143
+ classes = []
144
+ masks = []
145
+ for segment_info in segments_info:
146
+ class_id = segment_info["category_id"]
147
+ if not segment_info["iscrowd"]:
148
+ classes.append(class_id)
149
+ masks.append(pan_seg_gt == segment_info["id"])
150
+
151
+ classes = np.array(classes)
152
+ instances.gt_classes = torch.tensor(classes, dtype=torch.int64)
153
+ if len(masks) == 0:
154
+ # Some image does not have annotation (all ignored)
155
+ instances.gt_masks = torch.zeros((0, pan_seg_gt.shape[-2], pan_seg_gt.shape[-1]))
156
+ instances.gt_boxes = Boxes(torch.zeros((0, 4)))
157
+ else:
158
+ masks = BitMasks(
159
+ torch.stack([torch.from_numpy(np.ascontiguousarray(x.copy())) for x in masks])
160
+ )
161
+ instances.gt_masks = masks.tensor
162
+ instances.gt_boxes = masks.get_bounding_boxes()
163
+
164
+ dataset_dict["instances"] = instances
165
+
166
+ return dataset_dict
OpenSeeD/datasets/dataset_mappers/imagenet_dataset_mapper.py ADDED
@@ -0,0 +1,95 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # --------------------------------------------------------
2
+ # X-Decoder -- Generalized Decoding for Pixel, Image, and Language
3
+ # Copyright (c) 2022 Microsoft
4
+ # Licensed under The MIT License [see LICENSE for details]
5
+ # Modified by Xueyan Zou (xueyan@cs.wisc.edu)
6
+ # --------------------------------------------------------
7
+ # Copyright (c) Facebook, Inc. and its affiliates.
8
+ import copy
9
+ from PIL import Image
10
+ # import logging
11
+
12
+ import cv2
13
+ import numpy as np
14
+
15
+ import torch
16
+ from torchvision import transforms
17
+
18
+ from openseed.utils import configurable
19
+
20
+ __all__ = ["ImageNetDatasetMapper"]
21
+
22
+
23
+ # This is specifically designed for the COCO dataset.
24
+ class ImageNetDatasetMapper:
25
+ """
26
+ A callable which takes a dataset dict in Detectron2 Dataset format,
27
+ and map it into a format used by MaskFormer.
28
+
29
+ This dataset mapper applies the same transformation as DETR for COCO panoptic segmentation.
30
+
31
+ The callable currently does the following:
32
+
33
+ 1. Read the image from "file_name"
34
+ 2. Applies geometric transforms to the image and annotation
35
+ 3. Find and applies suitable cropping to the image and annotation
36
+ 4. Prepare image and annotation to Tensors
37
+ """
38
+
39
+ @configurable
40
+ def __init__(
41
+ self,
42
+ is_train=True,
43
+ size_train=None,
44
+ size_test=None,
45
+ size_crop=None,
46
+ ):
47
+ """
48
+ NOTE: this interface is experimental.
49
+ Args:
50
+ is_train: for training or inference
51
+ augmentations: a list of augmentations or deterministic transforms to apply
52
+ tfm_gens: data augmentation
53
+ image_format: an image format supported by :func:`detection_utils.read_image`.
54
+ """
55
+ self.is_train = is_train
56
+ self.size_train = size_train
57
+ self.size_test = size_test
58
+ self.size_crop = size_crop
59
+
60
+ t = []
61
+ t.append(transforms.Resize(size_crop, interpolation=Image.BICUBIC))
62
+ t.append(transforms.CenterCrop(size_test))
63
+ self.transform = transforms.Compose(t)
64
+
65
+ @classmethod
66
+ def from_config(cls, cfg, is_train=True):
67
+ ret = {
68
+ "is_train": is_train,
69
+ "size_train": cfg['INPUT']['SIZE_TRAIN'],
70
+ "size_test": cfg['INPUT']['SIZE_TEST'],
71
+ "size_crop": cfg['INPUT']['SIZE_CROP']
72
+ }
73
+ return ret
74
+
75
+ def __call__(self, dataset_dict):
76
+ """
77
+ Args:
78
+ dataset_dict (dict): Metadata of one image, in Detectron2 Dataset format.
79
+
80
+ Returns:
81
+ dict: a format that builtin models in detectron2 accept
82
+ """
83
+ dataset_dict = copy.deepcopy(dataset_dict) # it will be modified by code below
84
+ file_name = dataset_dict['file_name']
85
+ image = Image.open(file_name).convert('RGB')
86
+
87
+ if self.is_train == False:
88
+ image = self.transform(image)
89
+ image = torch.from_numpy(np.asarray(image).copy())
90
+ image = image.permute(2,0,1)
91
+
92
+ dataset_dict['image'] = image
93
+ dataset_dict['height'] = image.shape[1]
94
+ dataset_dict['width'] = image.shape[2]
95
+ return dataset_dict
OpenSeeD/datasets/dataset_mappers/lvis_dataset_mapper.py ADDED
@@ -0,0 +1,170 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Facebook, Inc. and its affiliates.
2
+ # Modified by Bowen Cheng from https://github.com/facebookresearch/detr/blob/master/d2/detr/dataset_mapper.py
3
+ import copy
4
+ import random
5
+
6
+ import scipy.io
7
+ import numpy as np
8
+ import torch
9
+ from PIL import Image
10
+
11
+ from torchvision import transforms
12
+
13
+ from pycocotools import mask
14
+ from detectron2.data import detection_utils as utils
15
+ from detectron2.data import transforms as T
16
+ from detectron2.data import MetadataCatalog
17
+
18
+ from ...Networks.Mask2Former.utils import configurable
19
+
20
+ __all__ = ["LVISDatasetMapper"]
21
+
22
+ def build_transform_gen(cfg, is_train):
23
+ """
24
+ Create a list of default :class:`Augmentation` from config.
25
+ Now it includes resizing and flipping.
26
+ Returns:
27
+ list[Augmentation]
28
+ """
29
+ assert is_train, "Only support training augmentation"
30
+ cfg_input = cfg['INPUT']
31
+ image_size = cfg_input['IMAGE_SIZE']
32
+ min_scale = cfg_input['MIN_SCALE']
33
+ max_scale = cfg_input['MAX_SCALE']
34
+
35
+ augmentation = []
36
+
37
+
38
+ if cfg_input['RANDOM_FLIP'] != "none":
39
+ augmentation.append(
40
+ T.RandomFlip(
41
+ horizontal=cfg_input['RANDOM_FLIP'] == "horizontal",
42
+ vertical=cfg_input['RANDOM_FLIP'] == "vertical",
43
+ )
44
+ )
45
+
46
+ augmentation.extend([
47
+ T.ResizeScale(
48
+ min_scale=min_scale, max_scale=max_scale, target_height=image_size, target_width=image_size
49
+ ),
50
+ T.FixedSizeCrop(crop_size=(image_size, image_size)),
51
+ ])
52
+
53
+ return augmentation
54
+
55
+
56
+ # This is specifically designed for the COCO dataset.
57
+ class LVISDatasetMapper:
58
+ """
59
+ A callable which takes a dataset dict in Detectron2 Dataset format,
60
+ and map it into a format used by MaskFormer.
61
+
62
+ This dataset mapper applies the same transformation as DETR for COCO panoptic segmentation.
63
+
64
+ The callable currently does the following:
65
+
66
+ 1. Read the image from "file_name"
67
+ 2. Applies geometric transforms to the image and annotation
68
+ 3. Find and applies suitable cropping to the image and annotation
69
+ 4. Prepare image and annotation to Tensors
70
+ """
71
+
72
+ @configurable
73
+ def __init__(
74
+ self,
75
+ is_train=True,
76
+ tfm_gens=None,
77
+ image_format=None,
78
+ min_size_test=None,
79
+ max_size_test=None,
80
+ mean=None,
81
+ std=None,
82
+ max_len=None,
83
+ ):
84
+ """
85
+ NOTE: this interface is experimental.
86
+ Args:
87
+ is_train: for training or inference
88
+ augmentations: a list of augmentations or deterministic transforms to apply
89
+ tfm_gens: data augmentation
90
+ image_format: an image format supported by :func:`detection_utils.read_image`.
91
+ """
92
+ self.tfm_gens = tfm_gens
93
+ self.img_format = image_format
94
+ self.is_train = is_train
95
+ self.min_size_test = min_size_test
96
+ self.max_size_test = max_size_test
97
+ self.pixel_mean = torch.tensor(mean)[:,None,None]
98
+ self.pixel_std = torch.tensor(std)[:,None,None]
99
+ self.max_grounding_num = max_len
100
+
101
+ t = []
102
+ t.append(transforms.Resize(self.min_size_test, interpolation=Image.BICUBIC))
103
+ self.transform = transforms.Compose(t)
104
+ self.categories = torch.load(MetadataCatalog.get('logistic').get('cat_root'))
105
+
106
+ @classmethod
107
+ def from_config(cls, cfg, is_train=True):
108
+ # Build augmentation
109
+ if is_train:
110
+ tfm_gens = build_transform_gen(cfg, is_train)
111
+ else:
112
+ tfm_gens = None
113
+
114
+ ret = {
115
+ "is_train": is_train,
116
+ "tfm_gens": tfm_gens,
117
+ "image_format": cfg['INPUT']['FORMAT'],
118
+ "min_size_test": cfg['INPUT']['MIN_SIZE_TEST'],
119
+ "max_size_test": cfg['INPUT']['MAX_SIZE_TEST'],
120
+ "mean": cfg['INPUT']['PIXEL_MEAN'],
121
+ "std": cfg['INPUT']['PIXEL_STD'],
122
+ "max_len": cfg['MODEL']['DECODER']['GROUNDING']['MAX_LEN'],
123
+ }
124
+ return ret
125
+
126
+ def __call__(self, dataset_dict):
127
+ """
128
+ Args:
129
+ dataset_dict (dict): Metadata of one image, in Detectron2 Dataset format.
130
+
131
+ Returns:
132
+ dict: a format that builtin models in detectron2 accept
133
+ """
134
+ dataset_dict = copy.deepcopy(dataset_dict) # it will be modified by code below
135
+ file_name = dataset_dict['file_name']
136
+ if self.is_train == False:
137
+ assert False, "Only support training."
138
+ else:
139
+ image = utils.read_image(dataset_dict["file_name"], format=self.img_format)
140
+ utils.check_image_size(dataset_dict, image)
141
+ image, transforms = T.apply_transform_gens(self.tfm_gens, image)
142
+ image_shape = image.shape[:2] # h, w
143
+ dataset_dict["image"] = torch.as_tensor(np.ascontiguousarray(image.transpose(2, 0, 1)))
144
+
145
+ assert len(dataset_dict['instance']) > 0
146
+ masks_grd = []
147
+ texts_grd = []
148
+ boxes_grd = []
149
+ hash_grd = []
150
+ for inst, label in zip(dataset_dict['instance'], dataset_dict['labels']):
151
+ rle = mask.frPyObjects(inst, dataset_dict['height'], dataset_dict['width'])
152
+ m = mask.decode(rle)
153
+ # sometimes there are multiple binary map (corresponding to multiple segs)
154
+ m = np.sum(m, axis=2)
155
+ m = m.astype(np.uint8) # convert to np.uint8
156
+ m = transforms.apply_segmentation(m[:,:,None])[:,:,0]
157
+ masks_grd += [m]
158
+ label_names = self.categories[label]
159
+ rand_id = random.randint(0, len(label_names)-1)
160
+ texts_grd.append(label_names[rand_id].lower())
161
+ hash_grd.append(hash(label_names[rand_id].lower()))
162
+
163
+ indices = torch.randperm(len(hash_grd))[:self.max_grounding_num]
164
+ masks_grd = torch.from_numpy(np.stack(masks_grd))[indices]
165
+ boxes_grd = torch.tensor(boxes_grd)
166
+ texts_grd = np.array(texts_grd)[indices.numpy()].tolist()
167
+ hash_grd = np.array(hash_grd)[indices.numpy()].tolist()
168
+ groundings = {'masks': masks_grd, 'texts': texts_grd, 'hash': hash_grd, 'mode': 'text'}
169
+ dataset_dict["groundings"] = groundings
170
+ return dataset_dict
OpenSeeD/datasets/dataset_mappers/mask_former_instance_dataset_mapper.py ADDED
@@ -0,0 +1,184 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Facebook, Inc. and its affiliates.
2
+ import copy
3
+ import logging
4
+
5
+ import numpy as np
6
+ import pycocotools.mask as mask_util
7
+ import torch
8
+ from torch.nn import functional as F
9
+
10
+ from detectron2.data import detection_utils as utils
11
+ from detectron2.data import transforms as T
12
+ from detectron2.projects.point_rend import ColorAugSSDTransform
13
+ from detectron2.structures import BitMasks, Instances, polygons_to_bitmask
14
+
15
+ from openseed.utils import configurable
16
+
17
+ __all__ = ["MaskFormerInstanceDatasetMapper"]
18
+
19
+
20
+ class MaskFormerInstanceDatasetMapper:
21
+ """
22
+ A callable which takes a dataset dict in Detectron2 Dataset format,
23
+ and map it into a format used by MaskFormer for instance segmentation.
24
+
25
+ The callable currently does the following:
26
+
27
+ 1. Read the image from "file_name"
28
+ 2. Applies geometric transforms to the image and annotation
29
+ 3. Find and applies suitable cropping to the image and annotation
30
+ 4. Prepare image and annotation to Tensors
31
+ """
32
+
33
+ @configurable
34
+ def __init__(
35
+ self,
36
+ is_train=True,
37
+ *,
38
+ augmentations,
39
+ image_format,
40
+ size_divisibility,
41
+ ):
42
+ """
43
+ NOTE: this interface is experimental.
44
+ Args:
45
+ is_train: for training or inference
46
+ augmentations: a list of augmentations or deterministic transforms to apply
47
+ image_format: an image format supported by :func:`detection_utils.read_image`.
48
+ size_divisibility: pad image size to be divisible by this value
49
+ """
50
+ self.is_train = is_train
51
+ self.tfm_gens = augmentations
52
+ self.img_format = image_format
53
+ self.size_divisibility = size_divisibility
54
+
55
+ logger = logging.getLogger(__name__)
56
+ mode = "training" if is_train else "inference"
57
+ logger.info(f"[{self.__class__.__name__}] Augmentations used in {mode}: {augmentations}")
58
+
59
+ @classmethod
60
+ def from_config(cls, cfg, is_train=True):
61
+ # Build augmentation
62
+ cfg_input = cfg['INPUT']
63
+ augs = [
64
+ T.ResizeShortestEdge(
65
+ cfg_input['MIN_SIZE_TRAIN'],
66
+ cfg_input['MAX_SIZE_TRAIN'],
67
+ cfg_input['MIN_SIZE_TRAIN_SAMPLING'],
68
+ )
69
+ ]
70
+
71
+ cfg_input_crop = cfg_input['CROP']
72
+ if cfg_input_crop['ENABLED']:
73
+ augs.append(
74
+ T.RandomCrop(
75
+ cfg_input_crop['TYPE'],
76
+ cfg_input_crop['SIZE'],
77
+ )
78
+ )
79
+ if cfg_input['COLOR_AUG_SSD']:
80
+ augs.append(ColorAugSSDTransform(img_format=cfg_input['FORMAT']))
81
+ augs.append(T.RandomFlip())
82
+
83
+ ret = {
84
+ "is_train": is_train,
85
+ "augmentations": augs,
86
+ "image_format": cfg_input['FORMAT'],
87
+ "size_divisibility": cfg_input['SIZE_DIVISIBILITY'],
88
+ }
89
+ return ret
90
+
91
+ def __call__(self, dataset_dict):
92
+ """
93
+ Args:
94
+ dataset_dict (dict): Metadata of one image, in Detectron2 Dataset format.
95
+
96
+ Returns:
97
+ dict: a format that builtin models in detectron2 accept
98
+ """
99
+ assert self.is_train, "MaskFormerPanopticDatasetMapper should only be used for training!"
100
+
101
+ dataset_dict = copy.deepcopy(dataset_dict) # it will be modified by code below
102
+ image = utils.read_image(dataset_dict["file_name"], format=self.img_format)
103
+ utils.check_image_size(dataset_dict, image)
104
+
105
+ aug_input = T.AugInput(image)
106
+ aug_input, transforms = T.apply_transform_gens(self.tfm_gens, aug_input)
107
+ image = aug_input.image
108
+
109
+ # transform instnace masks
110
+ assert "annotations" in dataset_dict
111
+ for anno in dataset_dict["annotations"]:
112
+ anno.pop("keypoints", None)
113
+
114
+ annos = [
115
+ utils.transform_instance_annotations(obj, transforms, image.shape[:2])
116
+ for obj in dataset_dict.pop("annotations")
117
+ if obj.get("iscrowd", 0) == 0
118
+ ]
119
+
120
+ if len(annos):
121
+ assert "segmentation" in annos[0]
122
+ segms = [obj["segmentation"] for obj in annos]
123
+ masks = []
124
+ for segm in segms:
125
+ if isinstance(segm, list):
126
+ # polygon
127
+ masks.append(polygons_to_bitmask(segm, *image.shape[:2]))
128
+ elif isinstance(segm, dict):
129
+ # COCO RLE
130
+ masks.append(mask_util.decode(segm))
131
+ elif isinstance(segm, np.ndarray):
132
+ assert segm.ndim == 2, "Expect segmentation of 2 dimensions, got {}.".format(
133
+ segm.ndim
134
+ )
135
+ # mask array
136
+ masks.append(segm)
137
+ else:
138
+ raise ValueError(
139
+ "Cannot convert segmentation of type '{}' to BitMasks!"
140
+ "Supported types are: polygons as list[list[float] or ndarray],"
141
+ " COCO-style RLE as a dict, or a binary segmentation mask "
142
+ " in a 2D numpy array of shape HxW.".format(type(segm))
143
+ )
144
+
145
+ # Pad image and segmentation label here!
146
+ image = torch.as_tensor(np.ascontiguousarray(image.transpose(2, 0, 1)))
147
+ masks = [torch.from_numpy(np.ascontiguousarray(x)) for x in masks]
148
+
149
+ classes = [int(obj["category_id"]) for obj in annos]
150
+ classes = torch.tensor(classes, dtype=torch.int64)
151
+
152
+ if self.size_divisibility > 0:
153
+ image_size = (image.shape[-2], image.shape[-1])
154
+ padding_size = [
155
+ 0,
156
+ self.size_divisibility - image_size[1],
157
+ 0,
158
+ self.size_divisibility - image_size[0],
159
+ ]
160
+ # pad image
161
+ image = F.pad(image, padding_size, value=128).contiguous()
162
+ # pad mask
163
+ masks = [F.pad(x, padding_size, value=0).contiguous() for x in masks]
164
+
165
+ image_shape = (image.shape[-2], image.shape[-1]) # h, w
166
+
167
+ # Pytorch's dataloader is efficient on torch.Tensor due to shared-memory,
168
+ # but not efficient on large generic data structures due to the use of pickle & mp.Queue.
169
+ # Therefore it's important to use torch.Tensor.
170
+ dataset_dict["image"] = image
171
+
172
+ # Prepare per-category binary masks
173
+ instances = Instances(image_shape)
174
+ instances.gt_classes = classes
175
+ if len(masks) == 0:
176
+ # Some image does not have annotation (all ignored)
177
+ instances.gt_masks = torch.zeros((0, image.shape[-2], image.shape[-1]))
178
+ else:
179
+ masks = BitMasks(torch.stack(masks))
180
+ instances.gt_masks = masks.tensor
181
+
182
+ dataset_dict["instances"] = instances
183
+
184
+ return dataset_dict
OpenSeeD/datasets/dataset_mappers/mask_former_panoptic_dataset_mapper.py ADDED
@@ -0,0 +1,168 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Facebook, Inc. and its affiliates.
2
+ import copy
3
+ import logging
4
+
5
+ import numpy as np
6
+ import torch
7
+ from torch.nn import functional as F
8
+
9
+ from detectron2.data import detection_utils as utils
10
+ from detectron2.data import transforms as T
11
+ from detectron2.structures import BitMasks, Instances
12
+
13
+ from .mask_former_semantic_dataset_mapper import MaskFormerSemanticDatasetMapper
14
+ from openseed.utils import configurable
15
+
16
+
17
+
18
+ __all__ = ["MaskFormerPanopticDatasetMapper"]
19
+
20
+
21
+ class MaskFormerPanopticDatasetMapper(MaskFormerSemanticDatasetMapper):
22
+ """
23
+ A callable which takes a dataset dict in Detectron2 Dataset format,
24
+ and map it into a format used by MaskFormer for panoptic segmentation.
25
+
26
+ The callable currently does the following:
27
+
28
+ 1. Read the image from "file_name"
29
+ 2. Applies geometric transforms to the image and annotation
30
+ 3. Find and applies suitable cropping to the image and annotation
31
+ 4. Prepare image and annotation to Tensors
32
+ """
33
+
34
+ @configurable
35
+ def __init__(
36
+ self,
37
+ is_train=True,
38
+ *,
39
+ augmentations,
40
+ image_format,
41
+ ignore_label,
42
+ size_divisibility,
43
+ ):
44
+ """
45
+ NOTE: this interface is experimental.
46
+ Args:
47
+ is_train: for training or inference
48
+ augmentations: a list of augmentations or deterministic transforms to apply
49
+ image_format: an image format supported by :func:`detection_utils.read_image`.
50
+ ignore_label: the label that is ignored to evaluation
51
+ size_divisibility: pad image size to be divisible by this value
52
+ """
53
+ super().__init__(
54
+ is_train,
55
+ augmentations=augmentations,
56
+ image_format=image_format,
57
+ ignore_label=ignore_label,
58
+ size_divisibility=size_divisibility,
59
+ )
60
+
61
+ def __call__(self, dataset_dict):
62
+ """
63
+ Args:
64
+ dataset_dict (dict): Metadata of one image, in Detectron2 Dataset format.
65
+
66
+ Returns:
67
+ dict: a format that builtin models in detectron2 accept
68
+ """
69
+ assert self.is_train, "MaskFormerPanopticDatasetMapper should only be used for training!"
70
+
71
+ dataset_dict = copy.deepcopy(dataset_dict) # it will be modified by code below
72
+ image = utils.read_image(dataset_dict["file_name"], format=self.img_format)
73
+ utils.check_image_size(dataset_dict, image)
74
+
75
+ # semantic segmentation
76
+ if "sem_seg_file_name" in dataset_dict:
77
+ # PyTorch transformation not implemented for uint16, so converting it to double first
78
+ sem_seg_gt = utils.read_image(dataset_dict.pop("sem_seg_file_name")).astype("double")
79
+ else:
80
+ sem_seg_gt = None
81
+
82
+ # panoptic segmentation
83
+ if "pan_seg_file_name" in dataset_dict:
84
+ pan_seg_gt = utils.read_image(dataset_dict.pop("pan_seg_file_name"), "RGB")
85
+ segments_info = dataset_dict["segments_info"]
86
+ else:
87
+ pan_seg_gt = None
88
+ segments_info = None
89
+
90
+ if pan_seg_gt is None:
91
+ raise ValueError(
92
+ "Cannot find 'pan_seg_file_name' for panoptic segmentation dataset {}.".format(
93
+ dataset_dict["file_name"]
94
+ )
95
+ )
96
+
97
+ aug_input = T.AugInput(image, sem_seg=sem_seg_gt)
98
+ aug_input, transforms = T.apply_transform_gens(self.tfm_gens, aug_input)
99
+ image = aug_input.image
100
+ if sem_seg_gt is not None:
101
+ sem_seg_gt = aug_input.sem_seg
102
+
103
+ # apply the same transformation to panoptic segmentation
104
+ pan_seg_gt = transforms.apply_segmentation(pan_seg_gt)
105
+
106
+ from panopticapi.utils import rgb2id
107
+
108
+ pan_seg_gt = rgb2id(pan_seg_gt)
109
+
110
+ # Pad image and segmentation label here!
111
+ image = torch.as_tensor(np.ascontiguousarray(image.transpose(2, 0, 1)))
112
+ if sem_seg_gt is not None:
113
+ sem_seg_gt = torch.as_tensor(sem_seg_gt.astype("long"))
114
+ pan_seg_gt = torch.as_tensor(pan_seg_gt.astype("long"))
115
+
116
+ if self.size_divisibility > 0:
117
+ image_size = (image.shape[-2], image.shape[-1])
118
+ padding_size = [
119
+ 0,
120
+ self.size_divisibility - image_size[1],
121
+ 0,
122
+ self.size_divisibility - image_size[0],
123
+ ]
124
+ image = F.pad(image, padding_size, value=128).contiguous()
125
+ if sem_seg_gt is not None:
126
+ sem_seg_gt = F.pad(sem_seg_gt, padding_size, value=self.ignore_label).contiguous()
127
+ pan_seg_gt = F.pad(
128
+ pan_seg_gt, padding_size, value=0
129
+ ).contiguous() # 0 is the VOID panoptic label
130
+
131
+ image_shape = (image.shape[-2], image.shape[-1]) # h, w
132
+
133
+ # Pytorch's dataloader is efficient on torch.Tensor due to shared-memory,
134
+ # but not efficient on large generic data structures due to the use of pickle & mp.Queue.
135
+ # Therefore it's important to use torch.Tensor.
136
+ dataset_dict["image"] = image
137
+ if sem_seg_gt is not None:
138
+ dataset_dict["sem_seg"] = sem_seg_gt.long()
139
+
140
+ if "annotations" in dataset_dict:
141
+ raise ValueError("Pemantic segmentation dataset should not have 'annotations'.")
142
+
143
+ # Prepare per-category binary masks
144
+ pan_seg_gt = pan_seg_gt.numpy()
145
+ instances = Instances(image_shape)
146
+ classes = []
147
+ masks = []
148
+ for segment_info in segments_info:
149
+ class_id = segment_info["category_id"]
150
+ if not segment_info["iscrowd"]:
151
+ classes.append(class_id)
152
+ masks.append(pan_seg_gt == segment_info["id"])
153
+
154
+ classes = np.array(classes)
155
+ instances.gt_classes = torch.tensor(classes, dtype=torch.int64)
156
+ if len(masks) == 0:
157
+ # Some image does not have annotation (all ignored)
158
+ instances.gt_masks = torch.zeros((0, pan_seg_gt.shape[-2], pan_seg_gt.shape[-1]))
159
+ else:
160
+ masks = BitMasks(
161
+ torch.stack([torch.from_numpy(np.ascontiguousarray(x.copy())) for x in masks])
162
+ )
163
+ instances.gt_masks = masks.tensor
164
+ instances.gt_boxes = masks.get_bounding_boxes()
165
+
166
+ dataset_dict["instances"] = instances
167
+
168
+ return dataset_dict