AnishaNaik03 commited on
Commit
7a7e5aa
·
verified ·
1 Parent(s): 88cd4a4

Upload 440 files

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +2 -0
  2. detectron2/__pycache__/__init__.cpython-311.pyc +0 -0
  3. detectron2/checkpoint/__init__.py +10 -0
  4. detectron2/checkpoint/__pycache__/__init__.cpython-311.pyc +0 -0
  5. detectron2/checkpoint/__pycache__/c2_model_loading.cpython-311.pyc +0 -0
  6. detectron2/checkpoint/__pycache__/catalog.cpython-311.pyc +0 -0
  7. detectron2/checkpoint/__pycache__/detection_checkpoint.cpython-311.pyc +0 -0
  8. detectron2/checkpoint/c2_model_loading.py +406 -0
  9. detectron2/checkpoint/catalog.py +115 -0
  10. detectron2/checkpoint/detection_checkpoint.py +143 -0
  11. detectron2/config/__init__.py +24 -0
  12. detectron2/config/__pycache__/__init__.cpython-311.pyc +0 -0
  13. detectron2/config/__pycache__/compat.cpython-311.pyc +0 -0
  14. detectron2/config/__pycache__/config.cpython-311.pyc +0 -0
  15. detectron2/config/__pycache__/defaults.cpython-311.pyc +0 -0
  16. detectron2/config/__pycache__/instantiate.cpython-311.pyc +0 -0
  17. detectron2/config/__pycache__/lazy.cpython-311.pyc +0 -0
  18. detectron2/config/compat.py +229 -0
  19. detectron2/config/config.py +265 -0
  20. detectron2/config/defaults.py +656 -0
  21. detectron2/config/instantiate.py +88 -0
  22. detectron2/config/lazy.py +436 -0
  23. detectron2/data/__init__.py +19 -0
  24. detectron2/data/__pycache__/__init__.cpython-311.pyc +0 -0
  25. detectron2/data/__pycache__/build.cpython-311.pyc +0 -0
  26. detectron2/data/__pycache__/catalog.cpython-311.pyc +0 -0
  27. detectron2/data/__pycache__/common.cpython-311.pyc +0 -0
  28. detectron2/data/__pycache__/dataset_mapper.cpython-311.pyc +0 -0
  29. detectron2/data/__pycache__/detection_utils.cpython-311.pyc +0 -0
  30. detectron2/data/benchmark.py +225 -0
  31. detectron2/data/build.py +694 -0
  32. detectron2/data/catalog.py +236 -0
  33. detectron2/data/common.py +339 -0
  34. detectron2/data/dataset_mapper.py +191 -0
  35. detectron2/data/datasets/README.md +9 -0
  36. detectron2/data/datasets/__init__.py +9 -0
  37. detectron2/data/datasets/__pycache__/__init__.cpython-311.pyc +0 -0
  38. detectron2/data/datasets/__pycache__/builtin.cpython-311.pyc +0 -0
  39. detectron2/data/datasets/__pycache__/builtin_meta.cpython-311.pyc +0 -0
  40. detectron2/data/datasets/__pycache__/cityscapes.cpython-311.pyc +0 -0
  41. detectron2/data/datasets/__pycache__/cityscapes_panoptic.cpython-311.pyc +0 -0
  42. detectron2/data/datasets/__pycache__/coco.cpython-311.pyc +0 -0
  43. detectron2/data/datasets/__pycache__/coco_panoptic.cpython-311.pyc +0 -0
  44. detectron2/data/datasets/__pycache__/lvis.cpython-311.pyc +0 -0
  45. detectron2/data/datasets/__pycache__/lvis_v0_5_categories.cpython-311.pyc +3 -0
  46. detectron2/data/datasets/__pycache__/lvis_v1_categories.cpython-311.pyc +3 -0
  47. detectron2/data/datasets/__pycache__/lvis_v1_category_image_count.cpython-311.pyc +0 -0
  48. detectron2/data/datasets/__pycache__/pascal_voc.cpython-311.pyc +0 -0
  49. detectron2/data/datasets/builtin.py +259 -0
  50. detectron2/data/datasets/builtin_meta.py +350 -0
.gitattributes CHANGED
@@ -48,3 +48,5 @@ detectron2/detectron2/_C.cpython-311-x86_64-linux-gnu.so filter=lfs diff=lfs mer
48
  detectron2/detectron2/data/datasets/__pycache__/lvis_v0_5_categories.cpython-311.pyc filter=lfs diff=lfs merge=lfs -text
49
  detectron2/detectron2/data/datasets/__pycache__/lvis_v1_categories.cpython-311.pyc filter=lfs diff=lfs merge=lfs -text
50
  detectron2/_C.cpython-311-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
 
 
 
48
  detectron2/detectron2/data/datasets/__pycache__/lvis_v0_5_categories.cpython-311.pyc filter=lfs diff=lfs merge=lfs -text
49
  detectron2/detectron2/data/datasets/__pycache__/lvis_v1_categories.cpython-311.pyc filter=lfs diff=lfs merge=lfs -text
50
  detectron2/_C.cpython-311-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
51
+ detectron2/data/datasets/__pycache__/lvis_v0_5_categories.cpython-311.pyc filter=lfs diff=lfs merge=lfs -text
52
+ detectron2/data/datasets/__pycache__/lvis_v1_categories.cpython-311.pyc filter=lfs diff=lfs merge=lfs -text
detectron2/__pycache__/__init__.cpython-311.pyc ADDED
Binary file (294 Bytes). View file
 
detectron2/checkpoint/__init__.py ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ # -*- coding: utf-8 -*-
2
+ # Copyright (c) Facebook, Inc. and its affiliates.
3
+ # File:
4
+
5
+
6
+ from . import catalog as _UNUSED # register the handler
7
+ from .detection_checkpoint import DetectionCheckpointer
8
+ from fvcore.common.checkpoint import Checkpointer, PeriodicCheckpointer
9
+
10
+ __all__ = ["Checkpointer", "PeriodicCheckpointer", "DetectionCheckpointer"]
detectron2/checkpoint/__pycache__/__init__.cpython-311.pyc ADDED
Binary file (477 Bytes). View file
 
detectron2/checkpoint/__pycache__/c2_model_loading.cpython-311.pyc ADDED
Binary file (29.8 kB). View file
 
detectron2/checkpoint/__pycache__/catalog.cpython-311.pyc ADDED
Binary file (6.53 kB). View file
 
detectron2/checkpoint/__pycache__/detection_checkpoint.cpython-311.pyc ADDED
Binary file (8.53 kB). View file
 
detectron2/checkpoint/c2_model_loading.py ADDED
@@ -0,0 +1,406 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Facebook, Inc. and its affiliates.
2
+ import copy
3
+ import logging
4
+ import re
5
+ from typing import Dict, List
6
+ import torch
7
+
8
+
9
+ def convert_basic_c2_names(original_keys):
10
+ """
11
+ Apply some basic name conversion to names in C2 weights.
12
+ It only deals with typical backbone models.
13
+
14
+ Args:
15
+ original_keys (list[str]):
16
+ Returns:
17
+ list[str]: The same number of strings matching those in original_keys.
18
+ """
19
+ layer_keys = copy.deepcopy(original_keys)
20
+ layer_keys = [
21
+ {"pred_b": "linear_b", "pred_w": "linear_w"}.get(k, k) for k in layer_keys
22
+ ] # some hard-coded mappings
23
+
24
+ layer_keys = [k.replace("_", ".") for k in layer_keys]
25
+ layer_keys = [re.sub("\\.b$", ".bias", k) for k in layer_keys]
26
+ layer_keys = [re.sub("\\.w$", ".weight", k) for k in layer_keys]
27
+ # Uniform both bn and gn names to "norm"
28
+ layer_keys = [re.sub("bn\\.s$", "norm.weight", k) for k in layer_keys]
29
+ layer_keys = [re.sub("bn\\.bias$", "norm.bias", k) for k in layer_keys]
30
+ layer_keys = [re.sub("bn\\.rm", "norm.running_mean", k) for k in layer_keys]
31
+ layer_keys = [re.sub("bn\\.running.mean$", "norm.running_mean", k) for k in layer_keys]
32
+ layer_keys = [re.sub("bn\\.riv$", "norm.running_var", k) for k in layer_keys]
33
+ layer_keys = [re.sub("bn\\.running.var$", "norm.running_var", k) for k in layer_keys]
34
+ layer_keys = [re.sub("bn\\.gamma$", "norm.weight", k) for k in layer_keys]
35
+ layer_keys = [re.sub("bn\\.beta$", "norm.bias", k) for k in layer_keys]
36
+ layer_keys = [re.sub("gn\\.s$", "norm.weight", k) for k in layer_keys]
37
+ layer_keys = [re.sub("gn\\.bias$", "norm.bias", k) for k in layer_keys]
38
+
39
+ # stem
40
+ layer_keys = [re.sub("^res\\.conv1\\.norm\\.", "conv1.norm.", k) for k in layer_keys]
41
+ # to avoid mis-matching with "conv1" in other components (e.g. detection head)
42
+ layer_keys = [re.sub("^conv1\\.", "stem.conv1.", k) for k in layer_keys]
43
+
44
+ # layer1-4 is used by torchvision, however we follow the C2 naming strategy (res2-5)
45
+ # layer_keys = [re.sub("^res2.", "layer1.", k) for k in layer_keys]
46
+ # layer_keys = [re.sub("^res3.", "layer2.", k) for k in layer_keys]
47
+ # layer_keys = [re.sub("^res4.", "layer3.", k) for k in layer_keys]
48
+ # layer_keys = [re.sub("^res5.", "layer4.", k) for k in layer_keys]
49
+
50
+ # blocks
51
+ layer_keys = [k.replace(".branch1.", ".shortcut.") for k in layer_keys]
52
+ layer_keys = [k.replace(".branch2a.", ".conv1.") for k in layer_keys]
53
+ layer_keys = [k.replace(".branch2b.", ".conv2.") for k in layer_keys]
54
+ layer_keys = [k.replace(".branch2c.", ".conv3.") for k in layer_keys]
55
+
56
+ # DensePose substitutions
57
+ layer_keys = [re.sub("^body.conv.fcn", "body_conv_fcn", k) for k in layer_keys]
58
+ layer_keys = [k.replace("AnnIndex.lowres", "ann_index_lowres") for k in layer_keys]
59
+ layer_keys = [k.replace("Index.UV.lowres", "index_uv_lowres") for k in layer_keys]
60
+ layer_keys = [k.replace("U.lowres", "u_lowres") for k in layer_keys]
61
+ layer_keys = [k.replace("V.lowres", "v_lowres") for k in layer_keys]
62
+ return layer_keys
63
+
64
+
65
+ def convert_c2_detectron_names(weights):
66
+ """
67
+ Map Caffe2 Detectron weight names to Detectron2 names.
68
+
69
+ Args:
70
+ weights (dict): name -> tensor
71
+
72
+ Returns:
73
+ dict: detectron2 names -> tensor
74
+ dict: detectron2 names -> C2 names
75
+ """
76
+ logger = logging.getLogger(__name__)
77
+ logger.info("Renaming Caffe2 weights ......")
78
+ original_keys = sorted(weights.keys())
79
+ layer_keys = copy.deepcopy(original_keys)
80
+
81
+ layer_keys = convert_basic_c2_names(layer_keys)
82
+
83
+ # --------------------------------------------------------------------------
84
+ # RPN hidden representation conv
85
+ # --------------------------------------------------------------------------
86
+ # FPN case
87
+ # In the C2 model, the RPN hidden layer conv is defined for FPN level 2 and then
88
+ # shared for all other levels, hence the appearance of "fpn2"
89
+ layer_keys = [
90
+ k.replace("conv.rpn.fpn2", "proposal_generator.rpn_head.conv") for k in layer_keys
91
+ ]
92
+ # Non-FPN case
93
+ layer_keys = [k.replace("conv.rpn", "proposal_generator.rpn_head.conv") for k in layer_keys]
94
+
95
+ # --------------------------------------------------------------------------
96
+ # RPN box transformation conv
97
+ # --------------------------------------------------------------------------
98
+ # FPN case (see note above about "fpn2")
99
+ layer_keys = [
100
+ k.replace("rpn.bbox.pred.fpn2", "proposal_generator.rpn_head.anchor_deltas")
101
+ for k in layer_keys
102
+ ]
103
+ layer_keys = [
104
+ k.replace("rpn.cls.logits.fpn2", "proposal_generator.rpn_head.objectness_logits")
105
+ for k in layer_keys
106
+ ]
107
+ # Non-FPN case
108
+ layer_keys = [
109
+ k.replace("rpn.bbox.pred", "proposal_generator.rpn_head.anchor_deltas") for k in layer_keys
110
+ ]
111
+ layer_keys = [
112
+ k.replace("rpn.cls.logits", "proposal_generator.rpn_head.objectness_logits")
113
+ for k in layer_keys
114
+ ]
115
+
116
+ # --------------------------------------------------------------------------
117
+ # Fast R-CNN box head
118
+ # --------------------------------------------------------------------------
119
+ layer_keys = [re.sub("^bbox\\.pred", "bbox_pred", k) for k in layer_keys]
120
+ layer_keys = [re.sub("^cls\\.score", "cls_score", k) for k in layer_keys]
121
+ layer_keys = [re.sub("^fc6\\.", "box_head.fc1.", k) for k in layer_keys]
122
+ layer_keys = [re.sub("^fc7\\.", "box_head.fc2.", k) for k in layer_keys]
123
+ # 4conv1fc head tensor names: head_conv1_w, head_conv1_gn_s
124
+ layer_keys = [re.sub("^head\\.conv", "box_head.conv", k) for k in layer_keys]
125
+
126
+ # --------------------------------------------------------------------------
127
+ # FPN lateral and output convolutions
128
+ # --------------------------------------------------------------------------
129
+ def fpn_map(name):
130
+ """
131
+ Look for keys with the following patterns:
132
+ 1) Starts with "fpn.inner."
133
+ Example: "fpn.inner.res2.2.sum.lateral.weight"
134
+ Meaning: These are lateral pathway convolutions
135
+ 2) Starts with "fpn.res"
136
+ Example: "fpn.res2.2.sum.weight"
137
+ Meaning: These are FPN output convolutions
138
+ """
139
+ splits = name.split(".")
140
+ norm = ".norm" if "norm" in splits else ""
141
+ if name.startswith("fpn.inner."):
142
+ # splits example: ['fpn', 'inner', 'res2', '2', 'sum', 'lateral', 'weight']
143
+ stage = int(splits[2][len("res") :])
144
+ return "fpn_lateral{}{}.{}".format(stage, norm, splits[-1])
145
+ elif name.startswith("fpn.res"):
146
+ # splits example: ['fpn', 'res2', '2', 'sum', 'weight']
147
+ stage = int(splits[1][len("res") :])
148
+ return "fpn_output{}{}.{}".format(stage, norm, splits[-1])
149
+ return name
150
+
151
+ layer_keys = [fpn_map(k) for k in layer_keys]
152
+
153
+ # --------------------------------------------------------------------------
154
+ # Mask R-CNN mask head
155
+ # --------------------------------------------------------------------------
156
+ # roi_heads.StandardROIHeads case
157
+ layer_keys = [k.replace(".[mask].fcn", "mask_head.mask_fcn") for k in layer_keys]
158
+ layer_keys = [re.sub("^\\.mask\\.fcn", "mask_head.mask_fcn", k) for k in layer_keys]
159
+ layer_keys = [k.replace("mask.fcn.logits", "mask_head.predictor") for k in layer_keys]
160
+ # roi_heads.Res5ROIHeads case
161
+ layer_keys = [k.replace("conv5.mask", "mask_head.deconv") for k in layer_keys]
162
+
163
+ # --------------------------------------------------------------------------
164
+ # Keypoint R-CNN head
165
+ # --------------------------------------------------------------------------
166
+ # interestingly, the keypoint head convs have blob names that are simply "conv_fcnX"
167
+ layer_keys = [k.replace("conv.fcn", "roi_heads.keypoint_head.conv_fcn") for k in layer_keys]
168
+ layer_keys = [
169
+ k.replace("kps.score.lowres", "roi_heads.keypoint_head.score_lowres") for k in layer_keys
170
+ ]
171
+ layer_keys = [k.replace("kps.score.", "roi_heads.keypoint_head.score.") for k in layer_keys]
172
+
173
+ # --------------------------------------------------------------------------
174
+ # Done with replacements
175
+ # --------------------------------------------------------------------------
176
+ assert len(set(layer_keys)) == len(layer_keys)
177
+ assert len(original_keys) == len(layer_keys)
178
+
179
+ new_weights = {}
180
+ new_keys_to_original_keys = {}
181
+ for orig, renamed in zip(original_keys, layer_keys):
182
+ new_keys_to_original_keys[renamed] = orig
183
+ if renamed.startswith("bbox_pred.") or renamed.startswith("mask_head.predictor."):
184
+ # remove the meaningless prediction weight for background class
185
+ new_start_idx = 4 if renamed.startswith("bbox_pred.") else 1
186
+ new_weights[renamed] = weights[orig][new_start_idx:]
187
+ logger.info(
188
+ "Remove prediction weight for background class in {}. The shape changes from "
189
+ "{} to {}.".format(
190
+ renamed, tuple(weights[orig].shape), tuple(new_weights[renamed].shape)
191
+ )
192
+ )
193
+ elif renamed.startswith("cls_score."):
194
+ # move weights of bg class from original index 0 to last index
195
+ logger.info(
196
+ "Move classification weights for background class in {} from index 0 to "
197
+ "index {}.".format(renamed, weights[orig].shape[0] - 1)
198
+ )
199
+ new_weights[renamed] = torch.cat([weights[orig][1:], weights[orig][:1]])
200
+ else:
201
+ new_weights[renamed] = weights[orig]
202
+
203
+ return new_weights, new_keys_to_original_keys
204
+
205
+
206
+ # Note the current matching is not symmetric.
207
+ # it assumes model_state_dict will have longer names.
208
+ def align_and_update_state_dicts(model_state_dict, ckpt_state_dict, c2_conversion=True):
209
+ """
210
+ Match names between the two state-dict, and returns a new chkpt_state_dict with names
211
+ converted to match model_state_dict with heuristics. The returned dict can be later
212
+ loaded with fvcore checkpointer.
213
+ If `c2_conversion==True`, `ckpt_state_dict` is assumed to be a Caffe2
214
+ model and will be renamed at first.
215
+
216
+ Strategy: suppose that the models that we will create will have prefixes appended
217
+ to each of its keys, for example due to an extra level of nesting that the original
218
+ pre-trained weights from ImageNet won't contain. For example, model.state_dict()
219
+ might return backbone[0].body.res2.conv1.weight, while the pre-trained model contains
220
+ res2.conv1.weight. We thus want to match both parameters together.
221
+ For that, we look for each model weight, look among all loaded keys if there is one
222
+ that is a suffix of the current weight name, and use it if that's the case.
223
+ If multiple matches exist, take the one with longest size
224
+ of the corresponding name. For example, for the same model as before, the pretrained
225
+ weight file can contain both res2.conv1.weight, as well as conv1.weight. In this case,
226
+ we want to match backbone[0].body.conv1.weight to conv1.weight, and
227
+ backbone[0].body.res2.conv1.weight to res2.conv1.weight.
228
+ """
229
+ model_keys = sorted(model_state_dict.keys())
230
+ if c2_conversion:
231
+ ckpt_state_dict, original_keys = convert_c2_detectron_names(ckpt_state_dict)
232
+ # original_keys: the name in the original dict (before renaming)
233
+ else:
234
+ original_keys = {x: x for x in ckpt_state_dict.keys()}
235
+ ckpt_keys = sorted(ckpt_state_dict.keys())
236
+
237
+ def match(a, b):
238
+ # Matched ckpt_key should be a complete (starts with '.') suffix.
239
+ # For example, roi_heads.mesh_head.whatever_conv1 does not match conv1,
240
+ # but matches whatever_conv1 or mesh_head.whatever_conv1.
241
+ return a == b or a.endswith("." + b)
242
+
243
+ # get a matrix of string matches, where each (i, j) entry correspond to the size of the
244
+ # ckpt_key string, if it matches
245
+ match_matrix = [len(j) if match(i, j) else 0 for i in model_keys for j in ckpt_keys]
246
+ match_matrix = torch.as_tensor(match_matrix).view(len(model_keys), len(ckpt_keys))
247
+ # use the matched one with longest size in case of multiple matches
248
+ max_match_size, idxs = match_matrix.max(1)
249
+ # remove indices that correspond to no-match
250
+ idxs[max_match_size == 0] = -1
251
+
252
+ logger = logging.getLogger(__name__)
253
+ # matched_pairs (matched checkpoint key --> matched model key)
254
+ matched_keys = {}
255
+ result_state_dict = {}
256
+ for idx_model, idx_ckpt in enumerate(idxs.tolist()):
257
+ if idx_ckpt == -1:
258
+ continue
259
+ key_model = model_keys[idx_model]
260
+ key_ckpt = ckpt_keys[idx_ckpt]
261
+ value_ckpt = ckpt_state_dict[key_ckpt]
262
+ shape_in_model = model_state_dict[key_model].shape
263
+
264
+ if shape_in_model != value_ckpt.shape:
265
+ logger.warning(
266
+ "Shape of {} in checkpoint is {}, while shape of {} in model is {}.".format(
267
+ key_ckpt, value_ckpt.shape, key_model, shape_in_model
268
+ )
269
+ )
270
+ logger.warning(
271
+ "{} will not be loaded. Please double check and see if this is desired.".format(
272
+ key_ckpt
273
+ )
274
+ )
275
+ continue
276
+
277
+ assert key_model not in result_state_dict
278
+ result_state_dict[key_model] = value_ckpt
279
+ if key_ckpt in matched_keys: # already added to matched_keys
280
+ logger.error(
281
+ "Ambiguity found for {} in checkpoint!"
282
+ "It matches at least two keys in the model ({} and {}).".format(
283
+ key_ckpt, key_model, matched_keys[key_ckpt]
284
+ )
285
+ )
286
+ raise ValueError("Cannot match one checkpoint key to multiple keys in the model.")
287
+
288
+ matched_keys[key_ckpt] = key_model
289
+
290
+ # logging:
291
+ matched_model_keys = sorted(matched_keys.values())
292
+ if len(matched_model_keys) == 0:
293
+ logger.warning("No weights in checkpoint matched with model.")
294
+ return ckpt_state_dict
295
+ common_prefix = _longest_common_prefix(matched_model_keys)
296
+ rev_matched_keys = {v: k for k, v in matched_keys.items()}
297
+ original_keys = {k: original_keys[rev_matched_keys[k]] for k in matched_model_keys}
298
+
299
+ model_key_groups = _group_keys_by_module(matched_model_keys, original_keys)
300
+ table = []
301
+ memo = set()
302
+ for key_model in matched_model_keys:
303
+ if key_model in memo:
304
+ continue
305
+ if key_model in model_key_groups:
306
+ group = model_key_groups[key_model]
307
+ memo |= set(group)
308
+ shapes = [tuple(model_state_dict[k].shape) for k in group]
309
+ table.append(
310
+ (
311
+ _longest_common_prefix([k[len(common_prefix) :] for k in group]) + "*",
312
+ _group_str([original_keys[k] for k in group]),
313
+ " ".join([str(x).replace(" ", "") for x in shapes]),
314
+ )
315
+ )
316
+ else:
317
+ key_checkpoint = original_keys[key_model]
318
+ shape = str(tuple(model_state_dict[key_model].shape))
319
+ table.append((key_model[len(common_prefix) :], key_checkpoint, shape))
320
+ submodule_str = common_prefix[:-1] if common_prefix else "model"
321
+ logger.info(
322
+ f"Following weights matched with submodule {submodule_str} - Total num: {len(table)}"
323
+ )
324
+
325
+ unmatched_ckpt_keys = [k for k in ckpt_keys if k not in set(matched_keys.keys())]
326
+ for k in unmatched_ckpt_keys:
327
+ result_state_dict[k] = ckpt_state_dict[k]
328
+ return result_state_dict
329
+
330
+
331
+ def _group_keys_by_module(keys: List[str], original_names: Dict[str, str]):
332
+ """
333
+ Params in the same submodule are grouped together.
334
+
335
+ Args:
336
+ keys: names of all parameters
337
+ original_names: mapping from parameter name to their name in the checkpoint
338
+
339
+ Returns:
340
+ dict[name -> all other names in the same group]
341
+ """
342
+
343
+ def _submodule_name(key):
344
+ pos = key.rfind(".")
345
+ if pos < 0:
346
+ return None
347
+ prefix = key[: pos + 1]
348
+ return prefix
349
+
350
+ all_submodules = [_submodule_name(k) for k in keys]
351
+ all_submodules = [x for x in all_submodules if x]
352
+ all_submodules = sorted(all_submodules, key=len)
353
+
354
+ ret = {}
355
+ for prefix in all_submodules:
356
+ group = [k for k in keys if k.startswith(prefix)]
357
+ if len(group) <= 1:
358
+ continue
359
+ original_name_lcp = _longest_common_prefix_str([original_names[k] for k in group])
360
+ if len(original_name_lcp) == 0:
361
+ # don't group weights if original names don't share prefix
362
+ continue
363
+
364
+ for k in group:
365
+ if k in ret:
366
+ continue
367
+ ret[k] = group
368
+ return ret
369
+
370
+
371
+ def _longest_common_prefix(names: List[str]) -> str:
372
+ """
373
+ ["abc.zfg", "abc.zef"] -> "abc."
374
+ """
375
+ names = [n.split(".") for n in names]
376
+ m1, m2 = min(names), max(names)
377
+ ret = [a for a, b in zip(m1, m2) if a == b]
378
+ ret = ".".join(ret) + "." if len(ret) else ""
379
+ return ret
380
+
381
+
382
+ def _longest_common_prefix_str(names: List[str]) -> str:
383
+ m1, m2 = min(names), max(names)
384
+ lcp = []
385
+ for a, b in zip(m1, m2):
386
+ if a == b:
387
+ lcp.append(a)
388
+ else:
389
+ break
390
+ lcp = "".join(lcp)
391
+ return lcp
392
+
393
+
394
+ def _group_str(names: List[str]) -> str:
395
+ """
396
+ Turn "common1", "common2", "common3" into "common{1,2,3}"
397
+ """
398
+ lcp = _longest_common_prefix_str(names)
399
+ rest = [x[len(lcp) :] for x in names]
400
+ rest = "{" + ",".join(rest) + "}"
401
+ ret = lcp + rest
402
+
403
+ # add some simplification for BN specifically
404
+ ret = ret.replace("bn_{beta,running_mean,running_var,gamma}", "bn_*")
405
+ ret = ret.replace("bn_beta,bn_running_mean,bn_running_var,bn_gamma", "bn_*")
406
+ return ret
detectron2/checkpoint/catalog.py ADDED
@@ -0,0 +1,115 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Facebook, Inc. and its affiliates.
2
+ import logging
3
+
4
+ from detectron2.utils.file_io import PathHandler, PathManager
5
+
6
+
7
+ class ModelCatalog:
8
+ """
9
+ Store mappings from names to third-party models.
10
+ """
11
+
12
+ S3_C2_DETECTRON_PREFIX = "https://dl.fbaipublicfiles.com/detectron"
13
+
14
+ # MSRA models have STRIDE_IN_1X1=True. False otherwise.
15
+ # NOTE: all BN models here have fused BN into an affine layer.
16
+ # As a result, you should only load them to a model with "FrozenBN".
17
+ # Loading them to a model with regular BN or SyncBN is wrong.
18
+ # Even when loaded to FrozenBN, it is still different from affine by an epsilon,
19
+ # which should be negligible for training.
20
+ # NOTE: all models here uses PIXEL_STD=[1,1,1]
21
+ # NOTE: Most of the BN models here are no longer used. We use the
22
+ # re-converted pre-trained models under detectron2 model zoo instead.
23
+ C2_IMAGENET_MODELS = {
24
+ "MSRA/R-50": "ImageNetPretrained/MSRA/R-50.pkl",
25
+ "MSRA/R-101": "ImageNetPretrained/MSRA/R-101.pkl",
26
+ "FAIR/R-50-GN": "ImageNetPretrained/47261647/R-50-GN.pkl",
27
+ "FAIR/R-101-GN": "ImageNetPretrained/47592356/R-101-GN.pkl",
28
+ "FAIR/X-101-32x8d": "ImageNetPretrained/20171220/X-101-32x8d.pkl",
29
+ "FAIR/X-101-64x4d": "ImageNetPretrained/FBResNeXt/X-101-64x4d.pkl",
30
+ "FAIR/X-152-32x8d-IN5k": "ImageNetPretrained/25093814/X-152-32x8d-IN5k.pkl",
31
+ }
32
+
33
+ C2_DETECTRON_PATH_FORMAT = (
34
+ "{prefix}/{url}/output/train/{dataset}/{type}/model_final.pkl" # noqa B950
35
+ )
36
+
37
+ C2_DATASET_COCO = "coco_2014_train%3Acoco_2014_valminusminival"
38
+ C2_DATASET_COCO_KEYPOINTS = "keypoints_coco_2014_train%3Akeypoints_coco_2014_valminusminival"
39
+
40
+ # format: {model_name} -> part of the url
41
+ C2_DETECTRON_MODELS = {
42
+ "35857197/e2e_faster_rcnn_R-50-C4_1x": "35857197/12_2017_baselines/e2e_faster_rcnn_R-50-C4_1x.yaml.01_33_49.iAX0mXvW", # noqa B950
43
+ "35857345/e2e_faster_rcnn_R-50-FPN_1x": "35857345/12_2017_baselines/e2e_faster_rcnn_R-50-FPN_1x.yaml.01_36_30.cUF7QR7I", # noqa B950
44
+ "35857890/e2e_faster_rcnn_R-101-FPN_1x": "35857890/12_2017_baselines/e2e_faster_rcnn_R-101-FPN_1x.yaml.01_38_50.sNxI7sX7", # noqa B950
45
+ "36761737/e2e_faster_rcnn_X-101-32x8d-FPN_1x": "36761737/12_2017_baselines/e2e_faster_rcnn_X-101-32x8d-FPN_1x.yaml.06_31_39.5MIHi1fZ", # noqa B950
46
+ "35858791/e2e_mask_rcnn_R-50-C4_1x": "35858791/12_2017_baselines/e2e_mask_rcnn_R-50-C4_1x.yaml.01_45_57.ZgkA7hPB", # noqa B950
47
+ "35858933/e2e_mask_rcnn_R-50-FPN_1x": "35858933/12_2017_baselines/e2e_mask_rcnn_R-50-FPN_1x.yaml.01_48_14.DzEQe4wC", # noqa B950
48
+ "35861795/e2e_mask_rcnn_R-101-FPN_1x": "35861795/12_2017_baselines/e2e_mask_rcnn_R-101-FPN_1x.yaml.02_31_37.KqyEK4tT", # noqa B950
49
+ "36761843/e2e_mask_rcnn_X-101-32x8d-FPN_1x": "36761843/12_2017_baselines/e2e_mask_rcnn_X-101-32x8d-FPN_1x.yaml.06_35_59.RZotkLKI", # noqa B950
50
+ "48616381/e2e_mask_rcnn_R-50-FPN_2x_gn": "GN/48616381/04_2018_gn_baselines/e2e_mask_rcnn_R-50-FPN_2x_gn_0416.13_23_38.bTlTI97Q", # noqa B950
51
+ "37697547/e2e_keypoint_rcnn_R-50-FPN_1x": "37697547/12_2017_baselines/e2e_keypoint_rcnn_R-50-FPN_1x.yaml.08_42_54.kdzV35ao", # noqa B950
52
+ "35998355/rpn_R-50-C4_1x": "35998355/12_2017_baselines/rpn_R-50-C4_1x.yaml.08_00_43.njH5oD9L", # noqa B950
53
+ "35998814/rpn_R-50-FPN_1x": "35998814/12_2017_baselines/rpn_R-50-FPN_1x.yaml.08_06_03.Axg0r179", # noqa B950
54
+ "36225147/fast_R-50-FPN_1x": "36225147/12_2017_baselines/fast_rcnn_R-50-FPN_1x.yaml.08_39_09.L3obSdQ2", # noqa B950
55
+ }
56
+
57
+ @staticmethod
58
+ def get(name):
59
+ if name.startswith("Caffe2Detectron/COCO"):
60
+ return ModelCatalog._get_c2_detectron_baseline(name)
61
+ if name.startswith("ImageNetPretrained/"):
62
+ return ModelCatalog._get_c2_imagenet_pretrained(name)
63
+ raise RuntimeError("model not present in the catalog: {}".format(name))
64
+
65
+ @staticmethod
66
+ def _get_c2_imagenet_pretrained(name):
67
+ prefix = ModelCatalog.S3_C2_DETECTRON_PREFIX
68
+ name = name[len("ImageNetPretrained/") :]
69
+ name = ModelCatalog.C2_IMAGENET_MODELS[name]
70
+ url = "/".join([prefix, name])
71
+ return url
72
+
73
+ @staticmethod
74
+ def _get_c2_detectron_baseline(name):
75
+ name = name[len("Caffe2Detectron/COCO/") :]
76
+ url = ModelCatalog.C2_DETECTRON_MODELS[name]
77
+ if "keypoint_rcnn" in name:
78
+ dataset = ModelCatalog.C2_DATASET_COCO_KEYPOINTS
79
+ else:
80
+ dataset = ModelCatalog.C2_DATASET_COCO
81
+
82
+ if "35998355/rpn_R-50-C4_1x" in name:
83
+ # this one model is somehow different from others ..
84
+ type = "rpn"
85
+ else:
86
+ type = "generalized_rcnn"
87
+
88
+ # Detectron C2 models are stored in the structure defined in `C2_DETECTRON_PATH_FORMAT`.
89
+ url = ModelCatalog.C2_DETECTRON_PATH_FORMAT.format(
90
+ prefix=ModelCatalog.S3_C2_DETECTRON_PREFIX, url=url, type=type, dataset=dataset
91
+ )
92
+ return url
93
+
94
+
95
+ class ModelCatalogHandler(PathHandler):
96
+ """
97
+ Resolve URL like catalog://.
98
+ """
99
+
100
+ PREFIX = "catalog://"
101
+
102
+ def _get_supported_prefixes(self):
103
+ return [self.PREFIX]
104
+
105
+ def _get_local_path(self, path, **kwargs):
106
+ logger = logging.getLogger(__name__)
107
+ catalog_path = ModelCatalog.get(path[len(self.PREFIX) :])
108
+ logger.info("Catalog entry {} points to {}".format(path, catalog_path))
109
+ return PathManager.get_local_path(catalog_path, **kwargs)
110
+
111
+ def _open(self, path, mode="r", **kwargs):
112
+ return PathManager.open(self._get_local_path(path), mode, **kwargs)
113
+
114
+
115
+ PathManager.register_handler(ModelCatalogHandler())
detectron2/checkpoint/detection_checkpoint.py ADDED
@@ -0,0 +1,143 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Facebook, Inc. and its affiliates.
2
+ import logging
3
+ import os
4
+ import pickle
5
+ from urllib.parse import parse_qs, urlparse
6
+ import torch
7
+ from fvcore.common.checkpoint import Checkpointer
8
+ from torch.nn.parallel import DistributedDataParallel
9
+
10
+ import detectron2.utils.comm as comm
11
+ from detectron2.utils.file_io import PathManager
12
+
13
+ from .c2_model_loading import align_and_update_state_dicts
14
+
15
+
16
+ class DetectionCheckpointer(Checkpointer):
17
+ """
18
+ Same as :class:`Checkpointer`, but is able to:
19
+ 1. handle models in detectron & detectron2 model zoo, and apply conversions for legacy models.
20
+ 2. correctly load checkpoints that are only available on the master worker
21
+ """
22
+
23
+ def __init__(self, model, save_dir="", *, save_to_disk=None, **checkpointables):
24
+ is_main_process = comm.is_main_process()
25
+ super().__init__(
26
+ model,
27
+ save_dir,
28
+ save_to_disk=is_main_process if save_to_disk is None else save_to_disk,
29
+ **checkpointables,
30
+ )
31
+ self.path_manager = PathManager
32
+ self._parsed_url_during_load = None
33
+
34
+ def load(self, path, *args, **kwargs):
35
+ assert self._parsed_url_during_load is None
36
+ need_sync = False
37
+ logger = logging.getLogger(__name__)
38
+ logger.info("[DetectionCheckpointer] Loading from {} ...".format(path))
39
+
40
+ if path and isinstance(self.model, DistributedDataParallel):
41
+ path = self.path_manager.get_local_path(path)
42
+ has_file = os.path.isfile(path)
43
+ all_has_file = comm.all_gather(has_file)
44
+ if not all_has_file[0]:
45
+ raise OSError(f"File {path} not found on main worker.")
46
+ if not all(all_has_file):
47
+ logger.warning(
48
+ f"Not all workers can read checkpoint {path}. "
49
+ "Training may fail to fully resume."
50
+ )
51
+ # TODO: broadcast the checkpoint file contents from main
52
+ # worker, and load from it instead.
53
+ need_sync = True
54
+ if not has_file:
55
+ path = None # don't load if not readable
56
+
57
+ if path:
58
+ parsed_url = urlparse(path)
59
+ self._parsed_url_during_load = parsed_url
60
+ path = parsed_url._replace(query="").geturl() # remove query from filename
61
+ path = self.path_manager.get_local_path(path)
62
+ ret = super().load(path, *args, **kwargs)
63
+
64
+ if need_sync:
65
+ logger.info("Broadcasting model states from main worker ...")
66
+ self.model._sync_params_and_buffers()
67
+ self._parsed_url_during_load = None # reset to None
68
+ return ret
69
+
70
+ def _load_file(self, filename):
71
+ if filename.endswith(".pkl"):
72
+ with PathManager.open(filename, "rb") as f:
73
+ data = pickle.load(f, encoding="latin1")
74
+ if "model" in data and "__author__" in data:
75
+ # file is in Detectron2 model zoo format
76
+ self.logger.info("Reading a file from '{}'".format(data["__author__"]))
77
+ return data
78
+ else:
79
+ # assume file is from Caffe2 / Detectron1 model zoo
80
+ if "blobs" in data:
81
+ # Detection models have "blobs", but ImageNet models don't
82
+ data = data["blobs"]
83
+ data = {k: v for k, v in data.items() if not k.endswith("_momentum")}
84
+ return {"model": data, "__author__": "Caffe2", "matching_heuristics": True}
85
+ elif filename.endswith(".pyth"):
86
+ # assume file is from pycls; no one else seems to use the ".pyth" extension
87
+ with PathManager.open(filename, "rb") as f:
88
+ data = torch.load(f)
89
+ assert (
90
+ "model_state" in data
91
+ ), f"Cannot load .pyth file {filename}; pycls checkpoints must contain 'model_state'."
92
+ model_state = {
93
+ k: v
94
+ for k, v in data["model_state"].items()
95
+ if not k.endswith("num_batches_tracked")
96
+ }
97
+ return {"model": model_state, "__author__": "pycls", "matching_heuristics": True}
98
+
99
+ loaded = self._torch_load(filename)
100
+ if "model" not in loaded:
101
+ loaded = {"model": loaded}
102
+ assert self._parsed_url_during_load is not None, "`_load_file` must be called inside `load`"
103
+ parsed_url = self._parsed_url_during_load
104
+ queries = parse_qs(parsed_url.query)
105
+ if queries.pop("matching_heuristics", "False") == ["True"]:
106
+ loaded["matching_heuristics"] = True
107
+ if len(queries) > 0:
108
+ raise ValueError(
109
+ f"Unsupported query remaining: f{queries}, orginal filename: {parsed_url.geturl()}"
110
+ )
111
+ return loaded
112
+
113
+ def _torch_load(self, f):
114
+ return super()._load_file(f)
115
+
116
+ def _load_model(self, checkpoint):
117
+ if checkpoint.get("matching_heuristics", False):
118
+ self._convert_ndarray_to_tensor(checkpoint["model"])
119
+ # convert weights by name-matching heuristics
120
+ checkpoint["model"] = align_and_update_state_dicts(
121
+ self.model.state_dict(),
122
+ checkpoint["model"],
123
+ c2_conversion=checkpoint.get("__author__", None) == "Caffe2",
124
+ )
125
+ # for non-caffe2 models, use standard ways to load it
126
+ incompatible = super()._load_model(checkpoint)
127
+
128
+ model_buffers = dict(self.model.named_buffers(recurse=False))
129
+ for k in ["pixel_mean", "pixel_std"]:
130
+ # Ignore missing key message about pixel_mean/std.
131
+ # Though they may be missing in old checkpoints, they will be correctly
132
+ # initialized from config anyway.
133
+ if k in model_buffers:
134
+ try:
135
+ incompatible.missing_keys.remove(k)
136
+ except ValueError:
137
+ pass
138
+ for k in incompatible.unexpected_keys[:]:
139
+ # Ignore unexpected keys about cell anchors. They exist in old checkpoints
140
+ # but now they are non-persistent buffers and will not be in new checkpoints.
141
+ if "anchor_generator.cell_anchors" in k:
142
+ incompatible.unexpected_keys.remove(k)
143
+ return incompatible
detectron2/config/__init__.py ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Facebook, Inc. and its affiliates.
2
+ from .compat import downgrade_config, upgrade_config
3
+ from .config import CfgNode, get_cfg, global_cfg, set_global_cfg, configurable
4
+ from .instantiate import instantiate
5
+ from .lazy import LazyCall, LazyConfig
6
+
7
+ __all__ = [
8
+ "CfgNode",
9
+ "get_cfg",
10
+ "global_cfg",
11
+ "set_global_cfg",
12
+ "downgrade_config",
13
+ "upgrade_config",
14
+ "configurable",
15
+ "instantiate",
16
+ "LazyCall",
17
+ "LazyConfig",
18
+ ]
19
+
20
+
21
+ from detectron2.utils.env import fixup_module_metadata
22
+
23
+ fixup_module_metadata(__name__, globals(), __all__)
24
+ del fixup_module_metadata
detectron2/config/__pycache__/__init__.cpython-311.pyc ADDED
Binary file (827 Bytes). View file
 
detectron2/config/__pycache__/compat.cpython-311.pyc ADDED
Binary file (11.7 kB). View file
 
detectron2/config/__pycache__/config.cpython-311.pyc ADDED
Binary file (11.7 kB). View file
 
detectron2/config/__pycache__/defaults.cpython-311.pyc ADDED
Binary file (13.8 kB). View file
 
detectron2/config/__pycache__/instantiate.cpython-311.pyc ADDED
Binary file (4.76 kB). View file
 
detectron2/config/__pycache__/lazy.cpython-311.pyc ADDED
Binary file (23.5 kB). View file
 
detectron2/config/compat.py ADDED
@@ -0,0 +1,229 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Facebook, Inc. and its affiliates.
2
+ """
3
+ Backward compatibility of configs.
4
+
5
+ Instructions to bump version:
6
+ + It's not needed to bump version if new keys are added.
7
+ It's only needed when backward-incompatible changes happen
8
+ (i.e., some existing keys disappear, or the meaning of a key changes)
9
+ + To bump version, do the following:
10
+ 1. Increment _C.VERSION in defaults.py
11
+ 2. Add a converter in this file.
12
+
13
+ Each ConverterVX has a function "upgrade" which in-place upgrades config from X-1 to X,
14
+ and a function "downgrade" which in-place downgrades config from X to X-1
15
+
16
+ In each function, VERSION is left unchanged.
17
+
18
+ Each converter assumes that its input has the relevant keys
19
+ (i.e., the input is not a partial config).
20
+ 3. Run the tests (test_config.py) to make sure the upgrade & downgrade
21
+ functions are consistent.
22
+ """
23
+
24
+ import logging
25
+ from typing import List, Optional, Tuple
26
+
27
+ from .config import CfgNode as CN
28
+ from .defaults import _C
29
+
30
+ __all__ = ["upgrade_config", "downgrade_config"]
31
+
32
+
33
+ def upgrade_config(cfg: CN, to_version: Optional[int] = None) -> CN:
34
+ """
35
+ Upgrade a config from its current version to a newer version.
36
+
37
+ Args:
38
+ cfg (CfgNode):
39
+ to_version (int): defaults to the latest version.
40
+ """
41
+ cfg = cfg.clone()
42
+ if to_version is None:
43
+ to_version = _C.VERSION
44
+
45
+ assert cfg.VERSION <= to_version, "Cannot upgrade from v{} to v{}!".format(
46
+ cfg.VERSION, to_version
47
+ )
48
+ for k in range(cfg.VERSION, to_version):
49
+ converter = globals()["ConverterV" + str(k + 1)]
50
+ converter.upgrade(cfg)
51
+ cfg.VERSION = k + 1
52
+ return cfg
53
+
54
+
55
+ def downgrade_config(cfg: CN, to_version: int) -> CN:
56
+ """
57
+ Downgrade a config from its current version to an older version.
58
+
59
+ Args:
60
+ cfg (CfgNode):
61
+ to_version (int):
62
+
63
+ Note:
64
+ A general downgrade of arbitrary configs is not always possible due to the
65
+ different functionalities in different versions.
66
+ The purpose of downgrade is only to recover the defaults in old versions,
67
+ allowing it to load an old partial yaml config.
68
+ Therefore, the implementation only needs to fill in the default values
69
+ in the old version when a general downgrade is not possible.
70
+ """
71
+ cfg = cfg.clone()
72
+ assert cfg.VERSION >= to_version, "Cannot downgrade from v{} to v{}!".format(
73
+ cfg.VERSION, to_version
74
+ )
75
+ for k in range(cfg.VERSION, to_version, -1):
76
+ converter = globals()["ConverterV" + str(k)]
77
+ converter.downgrade(cfg)
78
+ cfg.VERSION = k - 1
79
+ return cfg
80
+
81
+
82
+ def guess_version(cfg: CN, filename: str) -> int:
83
+ """
84
+ Guess the version of a partial config where the VERSION field is not specified.
85
+ Returns the version, or the latest if cannot make a guess.
86
+
87
+ This makes it easier for users to migrate.
88
+ """
89
+ logger = logging.getLogger(__name__)
90
+
91
+ def _has(name: str) -> bool:
92
+ cur = cfg
93
+ for n in name.split("."):
94
+ if n not in cur:
95
+ return False
96
+ cur = cur[n]
97
+ return True
98
+
99
+ # Most users' partial configs have "MODEL.WEIGHT", so guess on it
100
+ ret = None
101
+ if _has("MODEL.WEIGHT") or _has("TEST.AUG_ON"):
102
+ ret = 1
103
+
104
+ if ret is not None:
105
+ logger.warning("Config '{}' has no VERSION. Assuming it to be v{}.".format(filename, ret))
106
+ else:
107
+ ret = _C.VERSION
108
+ logger.warning(
109
+ "Config '{}' has no VERSION. Assuming it to be compatible with latest v{}.".format(
110
+ filename, ret
111
+ )
112
+ )
113
+ return ret
114
+
115
+
116
+ def _rename(cfg: CN, old: str, new: str) -> None:
117
+ old_keys = old.split(".")
118
+ new_keys = new.split(".")
119
+
120
+ def _set(key_seq: List[str], val: str) -> None:
121
+ cur = cfg
122
+ for k in key_seq[:-1]:
123
+ if k not in cur:
124
+ cur[k] = CN()
125
+ cur = cur[k]
126
+ cur[key_seq[-1]] = val
127
+
128
+ def _get(key_seq: List[str]) -> CN:
129
+ cur = cfg
130
+ for k in key_seq:
131
+ cur = cur[k]
132
+ return cur
133
+
134
+ def _del(key_seq: List[str]) -> None:
135
+ cur = cfg
136
+ for k in key_seq[:-1]:
137
+ cur = cur[k]
138
+ del cur[key_seq[-1]]
139
+ if len(cur) == 0 and len(key_seq) > 1:
140
+ _del(key_seq[:-1])
141
+
142
+ _set(new_keys, _get(old_keys))
143
+ _del(old_keys)
144
+
145
+
146
+ class _RenameConverter:
147
+ """
148
+ A converter that handles simple rename.
149
+ """
150
+
151
+ RENAME: List[Tuple[str, str]] = [] # list of tuples of (old name, new name)
152
+
153
+ @classmethod
154
+ def upgrade(cls, cfg: CN) -> None:
155
+ for old, new in cls.RENAME:
156
+ _rename(cfg, old, new)
157
+
158
+ @classmethod
159
+ def downgrade(cls, cfg: CN) -> None:
160
+ for old, new in cls.RENAME[::-1]:
161
+ _rename(cfg, new, old)
162
+
163
+
164
+ class ConverterV1(_RenameConverter):
165
+ RENAME = [("MODEL.RPN_HEAD.NAME", "MODEL.RPN.HEAD_NAME")]
166
+
167
+
168
+ class ConverterV2(_RenameConverter):
169
+ """
170
+ A large bulk of rename, before public release.
171
+ """
172
+
173
+ RENAME = [
174
+ ("MODEL.WEIGHT", "MODEL.WEIGHTS"),
175
+ ("MODEL.PANOPTIC_FPN.SEMANTIC_LOSS_SCALE", "MODEL.SEM_SEG_HEAD.LOSS_WEIGHT"),
176
+ ("MODEL.PANOPTIC_FPN.RPN_LOSS_SCALE", "MODEL.RPN.LOSS_WEIGHT"),
177
+ ("MODEL.PANOPTIC_FPN.INSTANCE_LOSS_SCALE", "MODEL.PANOPTIC_FPN.INSTANCE_LOSS_WEIGHT"),
178
+ ("MODEL.PANOPTIC_FPN.COMBINE_ON", "MODEL.PANOPTIC_FPN.COMBINE.ENABLED"),
179
+ (
180
+ "MODEL.PANOPTIC_FPN.COMBINE_OVERLAP_THRESHOLD",
181
+ "MODEL.PANOPTIC_FPN.COMBINE.OVERLAP_THRESH",
182
+ ),
183
+ (
184
+ "MODEL.PANOPTIC_FPN.COMBINE_STUFF_AREA_LIMIT",
185
+ "MODEL.PANOPTIC_FPN.COMBINE.STUFF_AREA_LIMIT",
186
+ ),
187
+ (
188
+ "MODEL.PANOPTIC_FPN.COMBINE_INSTANCES_CONFIDENCE_THRESHOLD",
189
+ "MODEL.PANOPTIC_FPN.COMBINE.INSTANCES_CONFIDENCE_THRESH",
190
+ ),
191
+ ("MODEL.ROI_HEADS.SCORE_THRESH", "MODEL.ROI_HEADS.SCORE_THRESH_TEST"),
192
+ ("MODEL.ROI_HEADS.NMS", "MODEL.ROI_HEADS.NMS_THRESH_TEST"),
193
+ ("MODEL.RETINANET.INFERENCE_SCORE_THRESHOLD", "MODEL.RETINANET.SCORE_THRESH_TEST"),
194
+ ("MODEL.RETINANET.INFERENCE_TOPK_CANDIDATES", "MODEL.RETINANET.TOPK_CANDIDATES_TEST"),
195
+ ("MODEL.RETINANET.INFERENCE_NMS_THRESHOLD", "MODEL.RETINANET.NMS_THRESH_TEST"),
196
+ ("TEST.DETECTIONS_PER_IMG", "TEST.DETECTIONS_PER_IMAGE"),
197
+ ("TEST.AUG_ON", "TEST.AUG.ENABLED"),
198
+ ("TEST.AUG_MIN_SIZES", "TEST.AUG.MIN_SIZES"),
199
+ ("TEST.AUG_MAX_SIZE", "TEST.AUG.MAX_SIZE"),
200
+ ("TEST.AUG_FLIP", "TEST.AUG.FLIP"),
201
+ ]
202
+
203
+ @classmethod
204
+ def upgrade(cls, cfg: CN) -> None:
205
+ super().upgrade(cfg)
206
+
207
+ if cfg.MODEL.META_ARCHITECTURE == "RetinaNet":
208
+ _rename(
209
+ cfg, "MODEL.RETINANET.ANCHOR_ASPECT_RATIOS", "MODEL.ANCHOR_GENERATOR.ASPECT_RATIOS"
210
+ )
211
+ _rename(cfg, "MODEL.RETINANET.ANCHOR_SIZES", "MODEL.ANCHOR_GENERATOR.SIZES")
212
+ del cfg["MODEL"]["RPN"]["ANCHOR_SIZES"]
213
+ del cfg["MODEL"]["RPN"]["ANCHOR_ASPECT_RATIOS"]
214
+ else:
215
+ _rename(cfg, "MODEL.RPN.ANCHOR_ASPECT_RATIOS", "MODEL.ANCHOR_GENERATOR.ASPECT_RATIOS")
216
+ _rename(cfg, "MODEL.RPN.ANCHOR_SIZES", "MODEL.ANCHOR_GENERATOR.SIZES")
217
+ del cfg["MODEL"]["RETINANET"]["ANCHOR_SIZES"]
218
+ del cfg["MODEL"]["RETINANET"]["ANCHOR_ASPECT_RATIOS"]
219
+ del cfg["MODEL"]["RETINANET"]["ANCHOR_STRIDES"]
220
+
221
+ @classmethod
222
+ def downgrade(cls, cfg: CN) -> None:
223
+ super().downgrade(cfg)
224
+
225
+ _rename(cfg, "MODEL.ANCHOR_GENERATOR.ASPECT_RATIOS", "MODEL.RPN.ANCHOR_ASPECT_RATIOS")
226
+ _rename(cfg, "MODEL.ANCHOR_GENERATOR.SIZES", "MODEL.RPN.ANCHOR_SIZES")
227
+ cfg.MODEL.RETINANET.ANCHOR_ASPECT_RATIOS = cfg.MODEL.RPN.ANCHOR_ASPECT_RATIOS
228
+ cfg.MODEL.RETINANET.ANCHOR_SIZES = cfg.MODEL.RPN.ANCHOR_SIZES
229
+ cfg.MODEL.RETINANET.ANCHOR_STRIDES = [] # this is not used anywhere in any version
detectron2/config/config.py ADDED
@@ -0,0 +1,265 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # -*- coding: utf-8 -*-
2
+ # Copyright (c) Facebook, Inc. and its affiliates.
3
+
4
+ import functools
5
+ import inspect
6
+ import logging
7
+ from fvcore.common.config import CfgNode as _CfgNode
8
+
9
+ from detectron2.utils.file_io import PathManager
10
+
11
+
12
+ class CfgNode(_CfgNode):
13
+ """
14
+ The same as `fvcore.common.config.CfgNode`, but different in:
15
+
16
+ 1. Use unsafe yaml loading by default.
17
+ Note that this may lead to arbitrary code execution: you must not
18
+ load a config file from untrusted sources before manually inspecting
19
+ the content of the file.
20
+ 2. Support config versioning.
21
+ When attempting to merge an old config, it will convert the old config automatically.
22
+
23
+ .. automethod:: clone
24
+ .. automethod:: freeze
25
+ .. automethod:: defrost
26
+ .. automethod:: is_frozen
27
+ .. automethod:: load_yaml_with_base
28
+ .. automethod:: merge_from_list
29
+ .. automethod:: merge_from_other_cfg
30
+ """
31
+
32
+ @classmethod
33
+ def _open_cfg(cls, filename):
34
+ return PathManager.open(filename, "r")
35
+
36
+ # Note that the default value of allow_unsafe is changed to True
37
+ def merge_from_file(self, cfg_filename: str, allow_unsafe: bool = True) -> None:
38
+ """
39
+ Load content from the given config file and merge it into self.
40
+
41
+ Args:
42
+ cfg_filename: config filename
43
+ allow_unsafe: allow unsafe yaml syntax
44
+ """
45
+ assert PathManager.isfile(cfg_filename), f"Config file '{cfg_filename}' does not exist!"
46
+ loaded_cfg = self.load_yaml_with_base(cfg_filename, allow_unsafe=allow_unsafe)
47
+ loaded_cfg = type(self)(loaded_cfg)
48
+
49
+ # defaults.py needs to import CfgNode
50
+ from .defaults import _C
51
+
52
+ latest_ver = _C.VERSION
53
+ assert (
54
+ latest_ver == self.VERSION
55
+ ), "CfgNode.merge_from_file is only allowed on a config object of latest version!"
56
+
57
+ logger = logging.getLogger(__name__)
58
+
59
+ loaded_ver = loaded_cfg.get("VERSION", None)
60
+ if loaded_ver is None:
61
+ from .compat import guess_version
62
+
63
+ loaded_ver = guess_version(loaded_cfg, cfg_filename)
64
+ assert loaded_ver <= self.VERSION, "Cannot merge a v{} config into a v{} config.".format(
65
+ loaded_ver, self.VERSION
66
+ )
67
+
68
+ if loaded_ver == self.VERSION:
69
+ self.merge_from_other_cfg(loaded_cfg)
70
+ else:
71
+ # compat.py needs to import CfgNode
72
+ from .compat import upgrade_config, downgrade_config
73
+
74
+ logger.warning(
75
+ "Loading an old v{} config file '{}' by automatically upgrading to v{}. "
76
+ "See docs/CHANGELOG.md for instructions to update your files.".format(
77
+ loaded_ver, cfg_filename, self.VERSION
78
+ )
79
+ )
80
+ # To convert, first obtain a full config at an old version
81
+ old_self = downgrade_config(self, to_version=loaded_ver)
82
+ old_self.merge_from_other_cfg(loaded_cfg)
83
+ new_config = upgrade_config(old_self)
84
+ self.clear()
85
+ self.update(new_config)
86
+
87
+ def dump(self, *args, **kwargs):
88
+ """
89
+ Returns:
90
+ str: a yaml string representation of the config
91
+ """
92
+ # to make it show up in docs
93
+ return super().dump(*args, **kwargs)
94
+
95
+
96
+ global_cfg = CfgNode()
97
+
98
+
99
+ def get_cfg() -> CfgNode:
100
+ """
101
+ Get a copy of the default config.
102
+
103
+ Returns:
104
+ a detectron2 CfgNode instance.
105
+ """
106
+ from .defaults import _C
107
+
108
+ return _C.clone()
109
+
110
+
111
+ def set_global_cfg(cfg: CfgNode) -> None:
112
+ """
113
+ Let the global config point to the given cfg.
114
+
115
+ Assume that the given "cfg" has the key "KEY", after calling
116
+ `set_global_cfg(cfg)`, the key can be accessed by:
117
+ ::
118
+ from detectron2.config import global_cfg
119
+ print(global_cfg.KEY)
120
+
121
+ By using a hacky global config, you can access these configs anywhere,
122
+ without having to pass the config object or the values deep into the code.
123
+ This is a hacky feature introduced for quick prototyping / research exploration.
124
+ """
125
+ global global_cfg
126
+ global_cfg.clear()
127
+ global_cfg.update(cfg)
128
+
129
+
130
+ def configurable(init_func=None, *, from_config=None):
131
+ """
132
+ Decorate a function or a class's __init__ method so that it can be called
133
+ with a :class:`CfgNode` object using a :func:`from_config` function that translates
134
+ :class:`CfgNode` to arguments.
135
+
136
+ Examples:
137
+ ::
138
+ # Usage 1: Decorator on __init__:
139
+ class A:
140
+ @configurable
141
+ def __init__(self, a, b=2, c=3):
142
+ pass
143
+
144
+ @classmethod
145
+ def from_config(cls, cfg): # 'cfg' must be the first argument
146
+ # Returns kwargs to be passed to __init__
147
+ return {"a": cfg.A, "b": cfg.B}
148
+
149
+ a1 = A(a=1, b=2) # regular construction
150
+ a2 = A(cfg) # construct with a cfg
151
+ a3 = A(cfg, b=3, c=4) # construct with extra overwrite
152
+
153
+ # Usage 2: Decorator on any function. Needs an extra from_config argument:
154
+ @configurable(from_config=lambda cfg: {"a: cfg.A, "b": cfg.B})
155
+ def a_func(a, b=2, c=3):
156
+ pass
157
+
158
+ a1 = a_func(a=1, b=2) # regular call
159
+ a2 = a_func(cfg) # call with a cfg
160
+ a3 = a_func(cfg, b=3, c=4) # call with extra overwrite
161
+
162
+ Args:
163
+ init_func (callable): a class's ``__init__`` method in usage 1. The
164
+ class must have a ``from_config`` classmethod which takes `cfg` as
165
+ the first argument.
166
+ from_config (callable): the from_config function in usage 2. It must take `cfg`
167
+ as its first argument.
168
+ """
169
+
170
+ if init_func is not None:
171
+ assert (
172
+ inspect.isfunction(init_func)
173
+ and from_config is None
174
+ and init_func.__name__ == "__init__"
175
+ ), "Incorrect use of @configurable. Check API documentation for examples."
176
+
177
+ @functools.wraps(init_func)
178
+ def wrapped(self, *args, **kwargs):
179
+ try:
180
+ from_config_func = type(self).from_config
181
+ except AttributeError as e:
182
+ raise AttributeError(
183
+ "Class with @configurable must have a 'from_config' classmethod."
184
+ ) from e
185
+ if not inspect.ismethod(from_config_func):
186
+ raise TypeError("Class with @configurable must have a 'from_config' classmethod.")
187
+
188
+ if _called_with_cfg(*args, **kwargs):
189
+ explicit_args = _get_args_from_config(from_config_func, *args, **kwargs)
190
+ init_func(self, **explicit_args)
191
+ else:
192
+ init_func(self, *args, **kwargs)
193
+
194
+ return wrapped
195
+
196
+ else:
197
+ if from_config is None:
198
+ return configurable # @configurable() is made equivalent to @configurable
199
+ assert inspect.isfunction(
200
+ from_config
201
+ ), "from_config argument of configurable must be a function!"
202
+
203
+ def wrapper(orig_func):
204
+ @functools.wraps(orig_func)
205
+ def wrapped(*args, **kwargs):
206
+ if _called_with_cfg(*args, **kwargs):
207
+ explicit_args = _get_args_from_config(from_config, *args, **kwargs)
208
+ return orig_func(**explicit_args)
209
+ else:
210
+ return orig_func(*args, **kwargs)
211
+
212
+ wrapped.from_config = from_config
213
+ return wrapped
214
+
215
+ return wrapper
216
+
217
+
218
+ def _get_args_from_config(from_config_func, *args, **kwargs):
219
+ """
220
+ Use `from_config` to obtain explicit arguments.
221
+
222
+ Returns:
223
+ dict: arguments to be used for cls.__init__
224
+ """
225
+ signature = inspect.signature(from_config_func)
226
+ if list(signature.parameters.keys())[0] != "cfg":
227
+ if inspect.isfunction(from_config_func):
228
+ name = from_config_func.__name__
229
+ else:
230
+ name = f"{from_config_func.__self__}.from_config"
231
+ raise TypeError(f"{name} must take 'cfg' as the first argument!")
232
+ support_var_arg = any(
233
+ param.kind in [param.VAR_POSITIONAL, param.VAR_KEYWORD]
234
+ for param in signature.parameters.values()
235
+ )
236
+ if support_var_arg: # forward all arguments to from_config, if from_config accepts them
237
+ ret = from_config_func(*args, **kwargs)
238
+ else:
239
+ # forward supported arguments to from_config
240
+ supported_arg_names = set(signature.parameters.keys())
241
+ extra_kwargs = {}
242
+ for name in list(kwargs.keys()):
243
+ if name not in supported_arg_names:
244
+ extra_kwargs[name] = kwargs.pop(name)
245
+ ret = from_config_func(*args, **kwargs)
246
+ # forward the other arguments to __init__
247
+ ret.update(extra_kwargs)
248
+ return ret
249
+
250
+
251
+ def _called_with_cfg(*args, **kwargs):
252
+ """
253
+ Returns:
254
+ bool: whether the arguments contain CfgNode and should be considered
255
+ forwarded to from_config.
256
+ """
257
+ from omegaconf import DictConfig
258
+
259
+ if len(args) and isinstance(args[0], (_CfgNode, DictConfig)):
260
+ return True
261
+ if isinstance(kwargs.pop("cfg", None), (_CfgNode, DictConfig)):
262
+ return True
263
+ # `from_config`'s first argument is forced to be "cfg".
264
+ # So the above check covers all cases.
265
+ return False
detectron2/config/defaults.py ADDED
@@ -0,0 +1,656 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Facebook, Inc. and its affiliates.
2
+ from .config import CfgNode as CN
3
+
4
+ # NOTE: given the new config system
5
+ # (https://detectron2.readthedocs.io/en/latest/tutorials/lazyconfigs.html),
6
+ # we will stop adding new functionalities to default CfgNode.
7
+
8
+ # -----------------------------------------------------------------------------
9
+ # Convention about Training / Test specific parameters
10
+ # -----------------------------------------------------------------------------
11
+ # Whenever an argument can be either used for training or for testing, the
12
+ # corresponding name will be post-fixed by a _TRAIN for a training parameter,
13
+ # or _TEST for a test-specific parameter.
14
+ # For example, the number of images during training will be
15
+ # IMAGES_PER_BATCH_TRAIN, while the number of images for testing will be
16
+ # IMAGES_PER_BATCH_TEST
17
+
18
+ # -----------------------------------------------------------------------------
19
+ # Config definition
20
+ # -----------------------------------------------------------------------------
21
+
22
+ _C = CN()
23
+
24
+ # The version number, to upgrade from old configs to new ones if any
25
+ # changes happen. It's recommended to keep a VERSION in your config file.
26
+ _C.VERSION = 2
27
+
28
+ _C.MODEL = CN()
29
+ _C.MODEL.LOAD_PROPOSALS = False
30
+ _C.MODEL.MASK_ON = False
31
+ _C.MODEL.KEYPOINT_ON = False
32
+ _C.MODEL.DEVICE = "cuda"
33
+ _C.MODEL.META_ARCHITECTURE = "GeneralizedRCNN"
34
+
35
+ # Path (a file path, or URL like detectron2://.., https://..) to a checkpoint file
36
+ # to be loaded to the model. You can find available models in the model zoo.
37
+ _C.MODEL.WEIGHTS = ""
38
+
39
+ # Values to be used for image normalization (BGR order, since INPUT.FORMAT defaults to BGR).
40
+ # To train on images of different number of channels, just set different mean & std.
41
+ # Default values are the mean pixel value from ImageNet: [103.53, 116.28, 123.675]
42
+ _C.MODEL.PIXEL_MEAN = [103.530, 116.280, 123.675]
43
+ # When using pre-trained models in Detectron1 or any MSRA models,
44
+ # std has been absorbed into its conv1 weights, so the std needs to be set 1.
45
+ # Otherwise, you can use [57.375, 57.120, 58.395] (ImageNet std)
46
+ _C.MODEL.PIXEL_STD = [1.0, 1.0, 1.0]
47
+
48
+
49
+ # -----------------------------------------------------------------------------
50
+ # INPUT
51
+ # -----------------------------------------------------------------------------
52
+ _C.INPUT = CN()
53
+ # By default, {MIN,MAX}_SIZE options are used in transforms.ResizeShortestEdge.
54
+ # Please refer to ResizeShortestEdge for detailed definition.
55
+ # Size of the smallest side of the image during training
56
+ _C.INPUT.MIN_SIZE_TRAIN = (800,)
57
+ # Sample size of smallest side by choice or random selection from range give by
58
+ # INPUT.MIN_SIZE_TRAIN
59
+ _C.INPUT.MIN_SIZE_TRAIN_SAMPLING = "choice"
60
+ # Maximum size of the side of the image during training
61
+ _C.INPUT.MAX_SIZE_TRAIN = 1333
62
+ # Size of the smallest side of the image during testing. Set to zero to disable resize in testing.
63
+ _C.INPUT.MIN_SIZE_TEST = 800
64
+ # Maximum size of the side of the image during testing
65
+ _C.INPUT.MAX_SIZE_TEST = 1333
66
+ # Mode for flipping images used in data augmentation during training
67
+ # choose one of ["horizontal, "vertical", "none"]
68
+ _C.INPUT.RANDOM_FLIP = "horizontal"
69
+
70
+ # `True` if cropping is used for data augmentation during training
71
+ _C.INPUT.CROP = CN({"ENABLED": False})
72
+ # Cropping type. See documentation of `detectron2.data.transforms.RandomCrop` for explanation.
73
+ _C.INPUT.CROP.TYPE = "relative_range"
74
+ # Size of crop in range (0, 1] if CROP.TYPE is "relative" or "relative_range" and in number of
75
+ # pixels if CROP.TYPE is "absolute"
76
+ _C.INPUT.CROP.SIZE = [0.9, 0.9]
77
+
78
+
79
+ # Whether the model needs RGB, YUV, HSV etc.
80
+ # Should be one of the modes defined here, as we use PIL to read the image:
81
+ # https://pillow.readthedocs.io/en/stable/handbook/concepts.html#concept-modes
82
+ # with BGR being the one exception. One can set image format to BGR, we will
83
+ # internally use RGB for conversion and flip the channels over
84
+ _C.INPUT.FORMAT = "BGR"
85
+ # The ground truth mask format that the model will use.
86
+ # Mask R-CNN supports either "polygon" or "bitmask" as ground truth.
87
+ _C.INPUT.MASK_FORMAT = "polygon" # alternative: "bitmask"
88
+
89
+
90
+ # -----------------------------------------------------------------------------
91
+ # Dataset
92
+ # -----------------------------------------------------------------------------
93
+ _C.DATASETS = CN()
94
+ # List of the dataset names for training. Must be registered in DatasetCatalog
95
+ # Samples from these datasets will be merged and used as one dataset.
96
+ _C.DATASETS.TRAIN = ()
97
+ # List of the pre-computed proposal files for training, which must be consistent
98
+ # with datasets listed in DATASETS.TRAIN.
99
+ _C.DATASETS.PROPOSAL_FILES_TRAIN = ()
100
+ # Number of top scoring precomputed proposals to keep for training
101
+ _C.DATASETS.PRECOMPUTED_PROPOSAL_TOPK_TRAIN = 2000
102
+ # List of the dataset names for testing. Must be registered in DatasetCatalog
103
+ _C.DATASETS.TEST = ()
104
+ # List of the pre-computed proposal files for test, which must be consistent
105
+ # with datasets listed in DATASETS.TEST.
106
+ _C.DATASETS.PROPOSAL_FILES_TEST = ()
107
+ # Number of top scoring precomputed proposals to keep for test
108
+ _C.DATASETS.PRECOMPUTED_PROPOSAL_TOPK_TEST = 1000
109
+
110
+ # -----------------------------------------------------------------------------
111
+ # DataLoader
112
+ # -----------------------------------------------------------------------------
113
+ _C.DATALOADER = CN()
114
+ # Number of data loading threads
115
+ _C.DATALOADER.NUM_WORKERS = 4
116
+ # If True, each batch should contain only images for which the aspect ratio
117
+ # is compatible. This groups portrait images together, and landscape images
118
+ # are not batched with portrait images.
119
+ _C.DATALOADER.ASPECT_RATIO_GROUPING = True
120
+ # Options: TrainingSampler, RepeatFactorTrainingSampler
121
+ _C.DATALOADER.SAMPLER_TRAIN = "TrainingSampler"
122
+ # Repeat threshold for RepeatFactorTrainingSampler
123
+ _C.DATALOADER.REPEAT_THRESHOLD = 0.0
124
+ # if True, take square root when computing repeating factor
125
+ _C.DATALOADER.REPEAT_SQRT = True
126
+ # Tf True, when working on datasets that have instance annotations, the
127
+ # training dataloader will filter out images without associated annotations
128
+ _C.DATALOADER.FILTER_EMPTY_ANNOTATIONS = True
129
+
130
+ # ---------------------------------------------------------------------------- #
131
+ # Backbone options
132
+ # ---------------------------------------------------------------------------- #
133
+ _C.MODEL.BACKBONE = CN()
134
+
135
+ _C.MODEL.BACKBONE.NAME = "build_resnet_backbone"
136
+ # Freeze the first several stages so they are not trained.
137
+ # There are 5 stages in ResNet. The first is a convolution, and the following
138
+ # stages are each group of residual blocks.
139
+ _C.MODEL.BACKBONE.FREEZE_AT = 2
140
+
141
+
142
+ # ---------------------------------------------------------------------------- #
143
+ # FPN options
144
+ # ---------------------------------------------------------------------------- #
145
+ _C.MODEL.FPN = CN()
146
+ # Names of the input feature maps to be used by FPN
147
+ # They must have contiguous power of 2 strides
148
+ # e.g., ["res2", "res3", "res4", "res5"]
149
+ _C.MODEL.FPN.IN_FEATURES = []
150
+ _C.MODEL.FPN.OUT_CHANNELS = 256
151
+
152
+ # Options: "" (no norm), "GN"
153
+ _C.MODEL.FPN.NORM = ""
154
+
155
+ # Types for fusing the FPN top-down and lateral features. Can be either "sum" or "avg"
156
+ _C.MODEL.FPN.FUSE_TYPE = "sum"
157
+
158
+
159
+ # ---------------------------------------------------------------------------- #
160
+ # Proposal generator options
161
+ # ---------------------------------------------------------------------------- #
162
+ _C.MODEL.PROPOSAL_GENERATOR = CN()
163
+ # Current proposal generators include "RPN", "RRPN" and "PrecomputedProposals"
164
+ _C.MODEL.PROPOSAL_GENERATOR.NAME = "RPN"
165
+ # Proposal height and width both need to be greater than MIN_SIZE
166
+ # (a the scale used during training or inference)
167
+ _C.MODEL.PROPOSAL_GENERATOR.MIN_SIZE = 0
168
+
169
+
170
+ # ---------------------------------------------------------------------------- #
171
+ # Anchor generator options
172
+ # ---------------------------------------------------------------------------- #
173
+ _C.MODEL.ANCHOR_GENERATOR = CN()
174
+ # The generator can be any name in the ANCHOR_GENERATOR registry
175
+ _C.MODEL.ANCHOR_GENERATOR.NAME = "DefaultAnchorGenerator"
176
+ # Anchor sizes (i.e. sqrt of area) in absolute pixels w.r.t. the network input.
177
+ # Format: list[list[float]]. SIZES[i] specifies the list of sizes to use for
178
+ # IN_FEATURES[i]; len(SIZES) must be equal to len(IN_FEATURES) or 1.
179
+ # When len(SIZES) == 1, SIZES[0] is used for all IN_FEATURES.
180
+ _C.MODEL.ANCHOR_GENERATOR.SIZES = [[32, 64, 128, 256, 512]]
181
+ # Anchor aspect ratios. For each area given in `SIZES`, anchors with different aspect
182
+ # ratios are generated by an anchor generator.
183
+ # Format: list[list[float]]. ASPECT_RATIOS[i] specifies the list of aspect ratios (H/W)
184
+ # to use for IN_FEATURES[i]; len(ASPECT_RATIOS) == len(IN_FEATURES) must be true,
185
+ # or len(ASPECT_RATIOS) == 1 is true and aspect ratio list ASPECT_RATIOS[0] is used
186
+ # for all IN_FEATURES.
187
+ _C.MODEL.ANCHOR_GENERATOR.ASPECT_RATIOS = [[0.5, 1.0, 2.0]]
188
+ # Anchor angles.
189
+ # list[list[float]], the angle in degrees, for each input feature map.
190
+ # ANGLES[i] specifies the list of angles for IN_FEATURES[i].
191
+ _C.MODEL.ANCHOR_GENERATOR.ANGLES = [[-90, 0, 90]]
192
+ # Relative offset between the center of the first anchor and the top-left corner of the image
193
+ # Value has to be in [0, 1). Recommend to use 0.5, which means half stride.
194
+ # The value is not expected to affect model accuracy.
195
+ _C.MODEL.ANCHOR_GENERATOR.OFFSET = 0.0
196
+
197
+ # ---------------------------------------------------------------------------- #
198
+ # RPN options
199
+ # ---------------------------------------------------------------------------- #
200
+ _C.MODEL.RPN = CN()
201
+ _C.MODEL.RPN.HEAD_NAME = "StandardRPNHead" # used by RPN_HEAD_REGISTRY
202
+
203
+ # Names of the input feature maps to be used by RPN
204
+ # e.g., ["p2", "p3", "p4", "p5", "p6"] for FPN
205
+ _C.MODEL.RPN.IN_FEATURES = ["res4"]
206
+ # Remove RPN anchors that go outside the image by BOUNDARY_THRESH pixels
207
+ # Set to -1 or a large value, e.g. 100000, to disable pruning anchors
208
+ _C.MODEL.RPN.BOUNDARY_THRESH = -1
209
+ # IOU overlap ratios [BG_IOU_THRESHOLD, FG_IOU_THRESHOLD]
210
+ # Minimum overlap required between an anchor and ground-truth box for the
211
+ # (anchor, gt box) pair to be a positive example (IoU >= FG_IOU_THRESHOLD
212
+ # ==> positive RPN example: 1)
213
+ # Maximum overlap allowed between an anchor and ground-truth box for the
214
+ # (anchor, gt box) pair to be a negative examples (IoU < BG_IOU_THRESHOLD
215
+ # ==> negative RPN example: 0)
216
+ # Anchors with overlap in between (BG_IOU_THRESHOLD <= IoU < FG_IOU_THRESHOLD)
217
+ # are ignored (-1)
218
+ _C.MODEL.RPN.IOU_THRESHOLDS = [0.3, 0.7]
219
+ _C.MODEL.RPN.IOU_LABELS = [0, -1, 1]
220
+ # Number of regions per image used to train RPN
221
+ _C.MODEL.RPN.BATCH_SIZE_PER_IMAGE = 256
222
+ # Target fraction of foreground (positive) examples per RPN minibatch
223
+ _C.MODEL.RPN.POSITIVE_FRACTION = 0.5
224
+ # Options are: "smooth_l1", "giou", "diou", "ciou"
225
+ _C.MODEL.RPN.BBOX_REG_LOSS_TYPE = "smooth_l1"
226
+ _C.MODEL.RPN.BBOX_REG_LOSS_WEIGHT = 1.0
227
+ # Weights on (dx, dy, dw, dh) for normalizing RPN anchor regression targets
228
+ _C.MODEL.RPN.BBOX_REG_WEIGHTS = (1.0, 1.0, 1.0, 1.0)
229
+ # The transition point from L1 to L2 loss. Set to 0.0 to make the loss simply L1.
230
+ _C.MODEL.RPN.SMOOTH_L1_BETA = 0.0
231
+ _C.MODEL.RPN.LOSS_WEIGHT = 1.0
232
+ # Number of top scoring RPN proposals to keep before applying NMS
233
+ # When FPN is used, this is *per FPN level* (not total)
234
+ _C.MODEL.RPN.PRE_NMS_TOPK_TRAIN = 12000
235
+ _C.MODEL.RPN.PRE_NMS_TOPK_TEST = 6000
236
+ # Number of top scoring RPN proposals to keep after applying NMS
237
+ # When FPN is used, this limit is applied per level and then again to the union
238
+ # of proposals from all levels
239
+ # NOTE: When FPN is used, the meaning of this config is different from Detectron1.
240
+ # It means per-batch topk in Detectron1, but per-image topk here.
241
+ # See the "find_top_rpn_proposals" function for details.
242
+ _C.MODEL.RPN.POST_NMS_TOPK_TRAIN = 2000
243
+ _C.MODEL.RPN.POST_NMS_TOPK_TEST = 1000
244
+ # NMS threshold used on RPN proposals
245
+ _C.MODEL.RPN.NMS_THRESH = 0.7
246
+ # Set this to -1 to use the same number of output channels as input channels.
247
+ _C.MODEL.RPN.CONV_DIMS = [-1]
248
+
249
+ # ---------------------------------------------------------------------------- #
250
+ # ROI HEADS options
251
+ # ---------------------------------------------------------------------------- #
252
+ _C.MODEL.ROI_HEADS = CN()
253
+ _C.MODEL.ROI_HEADS.NAME = "Res5ROIHeads"
254
+ # Number of foreground classes
255
+ _C.MODEL.ROI_HEADS.NUM_CLASSES = 80
256
+ # Names of the input feature maps to be used by ROI heads
257
+ # Currently all heads (box, mask, ...) use the same input feature map list
258
+ # e.g., ["p2", "p3", "p4", "p5"] is commonly used for FPN
259
+ _C.MODEL.ROI_HEADS.IN_FEATURES = ["res4"]
260
+ # IOU overlap ratios [IOU_THRESHOLD]
261
+ # Overlap threshold for an RoI to be considered background (if < IOU_THRESHOLD)
262
+ # Overlap threshold for an RoI to be considered foreground (if >= IOU_THRESHOLD)
263
+ _C.MODEL.ROI_HEADS.IOU_THRESHOLDS = [0.5]
264
+ _C.MODEL.ROI_HEADS.IOU_LABELS = [0, 1]
265
+ # RoI minibatch size *per image* (number of regions of interest [ROIs]) during training
266
+ # Total number of RoIs per training minibatch =
267
+ # ROI_HEADS.BATCH_SIZE_PER_IMAGE * SOLVER.IMS_PER_BATCH
268
+ # E.g., a common configuration is: 512 * 16 = 8192
269
+ _C.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = 512
270
+ # Target fraction of RoI minibatch that is labeled foreground (i.e. class > 0)
271
+ _C.MODEL.ROI_HEADS.POSITIVE_FRACTION = 0.25
272
+
273
+ # Only used on test mode
274
+
275
+ # Minimum score threshold (assuming scores in a [0, 1] range); a value chosen to
276
+ # balance obtaining high recall with not having too many low precision
277
+ # detections that will slow down inference post processing steps (like NMS)
278
+ # A default threshold of 0.0 increases AP by ~0.2-0.3 but significantly slows down
279
+ # inference.
280
+ _C.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.05
281
+ # Overlap threshold used for non-maximum suppression (suppress boxes with
282
+ # IoU >= this threshold)
283
+ _C.MODEL.ROI_HEADS.NMS_THRESH_TEST = 0.5
284
+ # If True, augment proposals with ground-truth boxes before sampling proposals to
285
+ # train ROI heads.
286
+ _C.MODEL.ROI_HEADS.PROPOSAL_APPEND_GT = True
287
+
288
+ # ---------------------------------------------------------------------------- #
289
+ # Box Head
290
+ # ---------------------------------------------------------------------------- #
291
+ _C.MODEL.ROI_BOX_HEAD = CN()
292
+ # C4 don't use head name option
293
+ # Options for non-C4 models: FastRCNNConvFCHead,
294
+ _C.MODEL.ROI_BOX_HEAD.NAME = ""
295
+ # Options are: "smooth_l1", "giou", "diou", "ciou"
296
+ _C.MODEL.ROI_BOX_HEAD.BBOX_REG_LOSS_TYPE = "smooth_l1"
297
+ # The final scaling coefficient on the box regression loss, used to balance the magnitude of its
298
+ # gradients with other losses in the model. See also `MODEL.ROI_KEYPOINT_HEAD.LOSS_WEIGHT`.
299
+ _C.MODEL.ROI_BOX_HEAD.BBOX_REG_LOSS_WEIGHT = 1.0
300
+ # Default weights on (dx, dy, dw, dh) for normalizing bbox regression targets
301
+ # These are empirically chosen to approximately lead to unit variance targets
302
+ _C.MODEL.ROI_BOX_HEAD.BBOX_REG_WEIGHTS = (10.0, 10.0, 5.0, 5.0)
303
+ # The transition point from L1 to L2 loss. Set to 0.0 to make the loss simply L1.
304
+ _C.MODEL.ROI_BOX_HEAD.SMOOTH_L1_BETA = 0.0
305
+ _C.MODEL.ROI_BOX_HEAD.POOLER_RESOLUTION = 14
306
+ _C.MODEL.ROI_BOX_HEAD.POOLER_SAMPLING_RATIO = 0
307
+ # Type of pooling operation applied to the incoming feature map for each RoI
308
+ _C.MODEL.ROI_BOX_HEAD.POOLER_TYPE = "ROIAlignV2"
309
+
310
+ _C.MODEL.ROI_BOX_HEAD.NUM_FC = 0
311
+ # Hidden layer dimension for FC layers in the RoI box head
312
+ _C.MODEL.ROI_BOX_HEAD.FC_DIM = 1024
313
+ _C.MODEL.ROI_BOX_HEAD.NUM_CONV = 0
314
+ # Channel dimension for Conv layers in the RoI box head
315
+ _C.MODEL.ROI_BOX_HEAD.CONV_DIM = 256
316
+ # Normalization method for the convolution layers.
317
+ # Options: "" (no norm), "GN", "SyncBN".
318
+ _C.MODEL.ROI_BOX_HEAD.NORM = ""
319
+ # Whether to use class agnostic for bbox regression
320
+ _C.MODEL.ROI_BOX_HEAD.CLS_AGNOSTIC_BBOX_REG = False
321
+ # If true, RoI heads use bounding boxes predicted by the box head rather than proposal boxes.
322
+ _C.MODEL.ROI_BOX_HEAD.TRAIN_ON_PRED_BOXES = False
323
+
324
+ # Federated loss can be used to improve the training of LVIS
325
+ _C.MODEL.ROI_BOX_HEAD.USE_FED_LOSS = False
326
+ # Sigmoid cross entrophy is used with federated loss
327
+ _C.MODEL.ROI_BOX_HEAD.USE_SIGMOID_CE = False
328
+ # The power value applied to image_count when calcualting frequency weight
329
+ _C.MODEL.ROI_BOX_HEAD.FED_LOSS_FREQ_WEIGHT_POWER = 0.5
330
+ # Number of classes to keep in total
331
+ _C.MODEL.ROI_BOX_HEAD.FED_LOSS_NUM_CLASSES = 50
332
+
333
+ # ---------------------------------------------------------------------------- #
334
+ # Cascaded Box Head
335
+ # ---------------------------------------------------------------------------- #
336
+ _C.MODEL.ROI_BOX_CASCADE_HEAD = CN()
337
+ # The number of cascade stages is implicitly defined by the length of the following two configs.
338
+ _C.MODEL.ROI_BOX_CASCADE_HEAD.BBOX_REG_WEIGHTS = (
339
+ (10.0, 10.0, 5.0, 5.0),
340
+ (20.0, 20.0, 10.0, 10.0),
341
+ (30.0, 30.0, 15.0, 15.0),
342
+ )
343
+ _C.MODEL.ROI_BOX_CASCADE_HEAD.IOUS = (0.5, 0.6, 0.7)
344
+
345
+
346
+ # ---------------------------------------------------------------------------- #
347
+ # Mask Head
348
+ # ---------------------------------------------------------------------------- #
349
+ _C.MODEL.ROI_MASK_HEAD = CN()
350
+ _C.MODEL.ROI_MASK_HEAD.NAME = "MaskRCNNConvUpsampleHead"
351
+ _C.MODEL.ROI_MASK_HEAD.POOLER_RESOLUTION = 14
352
+ _C.MODEL.ROI_MASK_HEAD.POOLER_SAMPLING_RATIO = 0
353
+ _C.MODEL.ROI_MASK_HEAD.NUM_CONV = 0 # The number of convs in the mask head
354
+ _C.MODEL.ROI_MASK_HEAD.CONV_DIM = 256
355
+ # Normalization method for the convolution layers.
356
+ # Options: "" (no norm), "GN", "SyncBN".
357
+ _C.MODEL.ROI_MASK_HEAD.NORM = ""
358
+ # Whether to use class agnostic for mask prediction
359
+ _C.MODEL.ROI_MASK_HEAD.CLS_AGNOSTIC_MASK = False
360
+ # Type of pooling operation applied to the incoming feature map for each RoI
361
+ _C.MODEL.ROI_MASK_HEAD.POOLER_TYPE = "ROIAlignV2"
362
+
363
+
364
+ # ---------------------------------------------------------------------------- #
365
+ # Keypoint Head
366
+ # ---------------------------------------------------------------------------- #
367
+ _C.MODEL.ROI_KEYPOINT_HEAD = CN()
368
+ _C.MODEL.ROI_KEYPOINT_HEAD.NAME = "KRCNNConvDeconvUpsampleHead"
369
+ _C.MODEL.ROI_KEYPOINT_HEAD.POOLER_RESOLUTION = 14
370
+ _C.MODEL.ROI_KEYPOINT_HEAD.POOLER_SAMPLING_RATIO = 0
371
+ _C.MODEL.ROI_KEYPOINT_HEAD.CONV_DIMS = tuple(512 for _ in range(8))
372
+ _C.MODEL.ROI_KEYPOINT_HEAD.NUM_KEYPOINTS = 17 # 17 is the number of keypoints in COCO.
373
+
374
+ # Images with too few (or no) keypoints are excluded from training.
375
+ _C.MODEL.ROI_KEYPOINT_HEAD.MIN_KEYPOINTS_PER_IMAGE = 1
376
+ # Normalize by the total number of visible keypoints in the minibatch if True.
377
+ # Otherwise, normalize by the total number of keypoints that could ever exist
378
+ # in the minibatch.
379
+ # The keypoint softmax loss is only calculated on visible keypoints.
380
+ # Since the number of visible keypoints can vary significantly between
381
+ # minibatches, this has the effect of up-weighting the importance of
382
+ # minibatches with few visible keypoints. (Imagine the extreme case of
383
+ # only one visible keypoint versus N: in the case of N, each one
384
+ # contributes 1/N to the gradient compared to the single keypoint
385
+ # determining the gradient direction). Instead, we can normalize the
386
+ # loss by the total number of keypoints, if it were the case that all
387
+ # keypoints were visible in a full minibatch. (Returning to the example,
388
+ # this means that the one visible keypoint contributes as much as each
389
+ # of the N keypoints.)
390
+ _C.MODEL.ROI_KEYPOINT_HEAD.NORMALIZE_LOSS_BY_VISIBLE_KEYPOINTS = True
391
+ # Multi-task loss weight to use for keypoints
392
+ # Recommended values:
393
+ # - use 1.0 if NORMALIZE_LOSS_BY_VISIBLE_KEYPOINTS is True
394
+ # - use 4.0 if NORMALIZE_LOSS_BY_VISIBLE_KEYPOINTS is False
395
+ _C.MODEL.ROI_KEYPOINT_HEAD.LOSS_WEIGHT = 1.0
396
+ # Type of pooling operation applied to the incoming feature map for each RoI
397
+ _C.MODEL.ROI_KEYPOINT_HEAD.POOLER_TYPE = "ROIAlignV2"
398
+
399
+ # ---------------------------------------------------------------------------- #
400
+ # Semantic Segmentation Head
401
+ # ---------------------------------------------------------------------------- #
402
+ _C.MODEL.SEM_SEG_HEAD = CN()
403
+ _C.MODEL.SEM_SEG_HEAD.NAME = "SemSegFPNHead"
404
+ _C.MODEL.SEM_SEG_HEAD.IN_FEATURES = ["p2", "p3", "p4", "p5"]
405
+ # Label in the semantic segmentation ground truth that is ignored, i.e., no loss is calculated for
406
+ # the correposnding pixel.
407
+ _C.MODEL.SEM_SEG_HEAD.IGNORE_VALUE = 255
408
+ # Number of classes in the semantic segmentation head
409
+ _C.MODEL.SEM_SEG_HEAD.NUM_CLASSES = 54
410
+ # Number of channels in the 3x3 convs inside semantic-FPN heads.
411
+ _C.MODEL.SEM_SEG_HEAD.CONVS_DIM = 128
412
+ # Outputs from semantic-FPN heads are up-scaled to the COMMON_STRIDE stride.
413
+ _C.MODEL.SEM_SEG_HEAD.COMMON_STRIDE = 4
414
+ # Normalization method for the convolution layers. Options: "" (no norm), "GN".
415
+ _C.MODEL.SEM_SEG_HEAD.NORM = "GN"
416
+ _C.MODEL.SEM_SEG_HEAD.LOSS_WEIGHT = 1.0
417
+
418
+ _C.MODEL.PANOPTIC_FPN = CN()
419
+ # Scaling of all losses from instance detection / segmentation head.
420
+ _C.MODEL.PANOPTIC_FPN.INSTANCE_LOSS_WEIGHT = 1.0
421
+
422
+ # options when combining instance & semantic segmentation outputs
423
+ _C.MODEL.PANOPTIC_FPN.COMBINE = CN({"ENABLED": True}) # "COMBINE.ENABLED" is deprecated & not used
424
+ _C.MODEL.PANOPTIC_FPN.COMBINE.OVERLAP_THRESH = 0.5
425
+ _C.MODEL.PANOPTIC_FPN.COMBINE.STUFF_AREA_LIMIT = 4096
426
+ _C.MODEL.PANOPTIC_FPN.COMBINE.INSTANCES_CONFIDENCE_THRESH = 0.5
427
+
428
+
429
+ # ---------------------------------------------------------------------------- #
430
+ # RetinaNet Head
431
+ # ---------------------------------------------------------------------------- #
432
+ _C.MODEL.RETINANET = CN()
433
+
434
+ # This is the number of foreground classes.
435
+ _C.MODEL.RETINANET.NUM_CLASSES = 80
436
+
437
+ _C.MODEL.RETINANET.IN_FEATURES = ["p3", "p4", "p5", "p6", "p7"]
438
+
439
+ # Convolutions to use in the cls and bbox tower
440
+ # NOTE: this doesn't include the last conv for logits
441
+ _C.MODEL.RETINANET.NUM_CONVS = 4
442
+
443
+ # IoU overlap ratio [bg, fg] for labeling anchors.
444
+ # Anchors with < bg are labeled negative (0)
445
+ # Anchors with >= bg and < fg are ignored (-1)
446
+ # Anchors with >= fg are labeled positive (1)
447
+ _C.MODEL.RETINANET.IOU_THRESHOLDS = [0.4, 0.5]
448
+ _C.MODEL.RETINANET.IOU_LABELS = [0, -1, 1]
449
+
450
+ # Prior prob for rare case (i.e. foreground) at the beginning of training.
451
+ # This is used to set the bias for the logits layer of the classifier subnet.
452
+ # This improves training stability in the case of heavy class imbalance.
453
+ _C.MODEL.RETINANET.PRIOR_PROB = 0.01
454
+
455
+ # Inference cls score threshold, only anchors with score > INFERENCE_TH are
456
+ # considered for inference (to improve speed)
457
+ _C.MODEL.RETINANET.SCORE_THRESH_TEST = 0.05
458
+ # Select topk candidates before NMS
459
+ _C.MODEL.RETINANET.TOPK_CANDIDATES_TEST = 1000
460
+ _C.MODEL.RETINANET.NMS_THRESH_TEST = 0.5
461
+
462
+ # Weights on (dx, dy, dw, dh) for normalizing Retinanet anchor regression targets
463
+ _C.MODEL.RETINANET.BBOX_REG_WEIGHTS = (1.0, 1.0, 1.0, 1.0)
464
+
465
+ # Loss parameters
466
+ _C.MODEL.RETINANET.FOCAL_LOSS_GAMMA = 2.0
467
+ _C.MODEL.RETINANET.FOCAL_LOSS_ALPHA = 0.25
468
+ _C.MODEL.RETINANET.SMOOTH_L1_LOSS_BETA = 0.1
469
+ # Options are: "smooth_l1", "giou", "diou", "ciou"
470
+ _C.MODEL.RETINANET.BBOX_REG_LOSS_TYPE = "smooth_l1"
471
+
472
+ # One of BN, SyncBN, FrozenBN, GN
473
+ # Only supports GN until unshared norm is implemented
474
+ _C.MODEL.RETINANET.NORM = ""
475
+
476
+
477
+ # ---------------------------------------------------------------------------- #
478
+ # ResNe[X]t options (ResNets = {ResNet, ResNeXt}
479
+ # Note that parts of a resnet may be used for both the backbone and the head
480
+ # These options apply to both
481
+ # ---------------------------------------------------------------------------- #
482
+ _C.MODEL.RESNETS = CN()
483
+
484
+ _C.MODEL.RESNETS.DEPTH = 50
485
+ _C.MODEL.RESNETS.OUT_FEATURES = ["res4"] # res4 for C4 backbone, res2..5 for FPN backbone
486
+
487
+ # Number of groups to use; 1 ==> ResNet; > 1 ==> ResNeXt
488
+ _C.MODEL.RESNETS.NUM_GROUPS = 1
489
+
490
+ # Options: FrozenBN, GN, "SyncBN", "BN"
491
+ _C.MODEL.RESNETS.NORM = "FrozenBN"
492
+
493
+ # Baseline width of each group.
494
+ # Scaling this parameters will scale the width of all bottleneck layers.
495
+ _C.MODEL.RESNETS.WIDTH_PER_GROUP = 64
496
+
497
+ # Place the stride 2 conv on the 1x1 filter
498
+ # Use True only for the original MSRA ResNet; use False for C2 and Torch models
499
+ _C.MODEL.RESNETS.STRIDE_IN_1X1 = True
500
+
501
+ # Apply dilation in stage "res5"
502
+ _C.MODEL.RESNETS.RES5_DILATION = 1
503
+
504
+ # Output width of res2. Scaling this parameters will scale the width of all 1x1 convs in ResNet
505
+ # For R18 and R34, this needs to be set to 64
506
+ _C.MODEL.RESNETS.RES2_OUT_CHANNELS = 256
507
+ _C.MODEL.RESNETS.STEM_OUT_CHANNELS = 64
508
+
509
+ # Apply Deformable Convolution in stages
510
+ # Specify if apply deform_conv on Res2, Res3, Res4, Res5
511
+ _C.MODEL.RESNETS.DEFORM_ON_PER_STAGE = [False, False, False, False]
512
+ # Use True to use modulated deform_conv (DeformableV2, https://arxiv.org/abs/1811.11168);
513
+ # Use False for DeformableV1.
514
+ _C.MODEL.RESNETS.DEFORM_MODULATED = False
515
+ # Number of groups in deformable conv.
516
+ _C.MODEL.RESNETS.DEFORM_NUM_GROUPS = 1
517
+
518
+
519
+ # ---------------------------------------------------------------------------- #
520
+ # Solver
521
+ # ---------------------------------------------------------------------------- #
522
+ _C.SOLVER = CN()
523
+
524
+ # Options: WarmupMultiStepLR, WarmupCosineLR.
525
+ # See detectron2/solver/build.py for definition.
526
+ _C.SOLVER.LR_SCHEDULER_NAME = "WarmupMultiStepLR"
527
+
528
+ _C.SOLVER.MAX_ITER = 40000
529
+
530
+ _C.SOLVER.BASE_LR = 0.001
531
+ # The end lr, only used by WarmupCosineLR
532
+ _C.SOLVER.BASE_LR_END = 0.0
533
+
534
+ _C.SOLVER.MOMENTUM = 0.9
535
+
536
+ _C.SOLVER.NESTEROV = False
537
+
538
+ _C.SOLVER.WEIGHT_DECAY = 0.0001
539
+ # The weight decay that's applied to parameters of normalization layers
540
+ # (typically the affine transformation)
541
+ _C.SOLVER.WEIGHT_DECAY_NORM = 0.0
542
+
543
+ _C.SOLVER.GAMMA = 0.1
544
+ # The iteration number to decrease learning rate by GAMMA.
545
+ _C.SOLVER.STEPS = (30000,)
546
+ # Number of decays in WarmupStepWithFixedGammaLR schedule
547
+ _C.SOLVER.NUM_DECAYS = 3
548
+
549
+ _C.SOLVER.WARMUP_FACTOR = 1.0 / 1000
550
+ _C.SOLVER.WARMUP_ITERS = 1000
551
+ _C.SOLVER.WARMUP_METHOD = "linear"
552
+ # Whether to rescale the interval for the learning schedule after warmup
553
+ _C.SOLVER.RESCALE_INTERVAL = False
554
+
555
+ # Save a checkpoint after every this number of iterations
556
+ _C.SOLVER.CHECKPOINT_PERIOD = 5000
557
+
558
+ # Number of images per batch across all machines. This is also the number
559
+ # of training images per step (i.e. per iteration). If we use 16 GPUs
560
+ # and IMS_PER_BATCH = 32, each GPU will see 2 images per batch.
561
+ # May be adjusted automatically if REFERENCE_WORLD_SIZE is set.
562
+ _C.SOLVER.IMS_PER_BATCH = 16
563
+
564
+ # The reference number of workers (GPUs) this config is meant to train with.
565
+ # It takes no effect when set to 0.
566
+ # With a non-zero value, it will be used by DefaultTrainer to compute a desired
567
+ # per-worker batch size, and then scale the other related configs (total batch size,
568
+ # learning rate, etc) to match the per-worker batch size.
569
+ # See documentation of `DefaultTrainer.auto_scale_workers` for details:
570
+ _C.SOLVER.REFERENCE_WORLD_SIZE = 0
571
+
572
+ # Detectron v1 (and previous detection code) used a 2x higher LR and 0 WD for
573
+ # biases. This is not useful (at least for recent models). You should avoid
574
+ # changing these and they exist only to reproduce Detectron v1 training if
575
+ # desired.
576
+ _C.SOLVER.BIAS_LR_FACTOR = 1.0
577
+ _C.SOLVER.WEIGHT_DECAY_BIAS = None # None means following WEIGHT_DECAY
578
+
579
+ # Gradient clipping
580
+ _C.SOLVER.CLIP_GRADIENTS = CN({"ENABLED": False})
581
+ # Type of gradient clipping, currently 2 values are supported:
582
+ # - "value": the absolute values of elements of each gradients are clipped
583
+ # - "norm": the norm of the gradient for each parameter is clipped thus
584
+ # affecting all elements in the parameter
585
+ _C.SOLVER.CLIP_GRADIENTS.CLIP_TYPE = "value"
586
+ # Maximum absolute value used for clipping gradients
587
+ _C.SOLVER.CLIP_GRADIENTS.CLIP_VALUE = 1.0
588
+ # Floating point number p for L-p norm to be used with the "norm"
589
+ # gradient clipping type; for L-inf, please specify .inf
590
+ _C.SOLVER.CLIP_GRADIENTS.NORM_TYPE = 2.0
591
+
592
+ # Enable automatic mixed precision for training
593
+ # Note that this does not change model's inference behavior.
594
+ # To use AMP in inference, run inference under autocast()
595
+ _C.SOLVER.AMP = CN({"ENABLED": False})
596
+
597
+ # ---------------------------------------------------------------------------- #
598
+ # Specific test options
599
+ # ---------------------------------------------------------------------------- #
600
+ _C.TEST = CN()
601
+ # For end-to-end tests to verify the expected accuracy.
602
+ # Each item is [task, metric, value, tolerance]
603
+ # e.g.: [['bbox', 'AP', 38.5, 0.2]]
604
+ _C.TEST.EXPECTED_RESULTS = []
605
+ # The period (in terms of steps) to evaluate the model during training.
606
+ # Set to 0 to disable.
607
+ _C.TEST.EVAL_PERIOD = 0
608
+ # The sigmas used to calculate keypoint OKS. See http://cocodataset.org/#keypoints-eval
609
+ # When empty, it will use the defaults in COCO.
610
+ # Otherwise it should be a list[float] with the same length as ROI_KEYPOINT_HEAD.NUM_KEYPOINTS.
611
+ _C.TEST.KEYPOINT_OKS_SIGMAS = []
612
+ # Maximum number of detections to return per image during inference (100 is
613
+ # based on the limit established for the COCO dataset).
614
+ _C.TEST.DETECTIONS_PER_IMAGE = 100
615
+
616
+ _C.TEST.AUG = CN({"ENABLED": False})
617
+ _C.TEST.AUG.MIN_SIZES = (400, 500, 600, 700, 800, 900, 1000, 1100, 1200)
618
+ _C.TEST.AUG.MAX_SIZE = 4000
619
+ _C.TEST.AUG.FLIP = True
620
+
621
+ _C.TEST.PRECISE_BN = CN({"ENABLED": False})
622
+ _C.TEST.PRECISE_BN.NUM_ITER = 200
623
+
624
+ # ---------------------------------------------------------------------------- #
625
+ # Misc options
626
+ # ---------------------------------------------------------------------------- #
627
+ # Directory where output files are written
628
+ _C.OUTPUT_DIR = "./output"
629
+ # Set seed to negative to fully randomize everything.
630
+ # Set seed to positive to use a fixed seed. Note that a fixed seed increases
631
+ # reproducibility but does not guarantee fully deterministic behavior.
632
+ # Disabling all parallelism further increases reproducibility.
633
+ _C.SEED = -1
634
+ # Benchmark different cudnn algorithms.
635
+ # If input images have very different sizes, this option will have large overhead
636
+ # for about 10k iterations. It usually hurts total time, but can benefit for certain models.
637
+ # If input images have the same or similar sizes, benchmark is often helpful.
638
+ _C.CUDNN_BENCHMARK = False
639
+ # Option to set PyTorch matmul and CuDNN's float32 precision. When set to non-empty string,
640
+ # the corresponding precision ("highest", "high" or "medium") will be used. The highest
641
+ # precision will effectively disable tf32.
642
+ _C.FLOAT32_PRECISION = ""
643
+ # The period (in terms of steps) for minibatch visualization at train time.
644
+ # Set to 0 to disable.
645
+ _C.VIS_PERIOD = 0
646
+
647
+ # global config is for quick hack purposes.
648
+ # You can set them in command line or config files,
649
+ # and access it with:
650
+ #
651
+ # from detectron2.config import global_cfg
652
+ # print(global_cfg.HACK)
653
+ #
654
+ # Do not commit any configs into it.
655
+ _C.GLOBAL = CN()
656
+ _C.GLOBAL.HACK = 1.0
detectron2/config/instantiate.py ADDED
@@ -0,0 +1,88 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Facebook, Inc. and its affiliates.
2
+
3
+ import collections.abc as abc
4
+ import dataclasses
5
+ import logging
6
+ from typing import Any
7
+
8
+ from detectron2.utils.registry import _convert_target_to_string, locate
9
+
10
+ __all__ = ["dump_dataclass", "instantiate"]
11
+
12
+
13
+ def dump_dataclass(obj: Any):
14
+ """
15
+ Dump a dataclass recursively into a dict that can be later instantiated.
16
+
17
+ Args:
18
+ obj: a dataclass object
19
+
20
+ Returns:
21
+ dict
22
+ """
23
+ assert dataclasses.is_dataclass(obj) and not isinstance(
24
+ obj, type
25
+ ), "dump_dataclass() requires an instance of a dataclass."
26
+ ret = {"_target_": _convert_target_to_string(type(obj))}
27
+ for f in dataclasses.fields(obj):
28
+ v = getattr(obj, f.name)
29
+ if dataclasses.is_dataclass(v):
30
+ v = dump_dataclass(v)
31
+ if isinstance(v, (list, tuple)):
32
+ v = [dump_dataclass(x) if dataclasses.is_dataclass(x) else x for x in v]
33
+ ret[f.name] = v
34
+ return ret
35
+
36
+
37
+ def instantiate(cfg):
38
+ """
39
+ Recursively instantiate objects defined in dictionaries by
40
+ "_target_" and arguments.
41
+
42
+ Args:
43
+ cfg: a dict-like object with "_target_" that defines the caller, and
44
+ other keys that define the arguments
45
+
46
+ Returns:
47
+ object instantiated by cfg
48
+ """
49
+ from omegaconf import ListConfig, DictConfig, OmegaConf
50
+
51
+ if isinstance(cfg, ListConfig):
52
+ lst = [instantiate(x) for x in cfg]
53
+ return ListConfig(lst, flags={"allow_objects": True})
54
+ if isinstance(cfg, list):
55
+ # Specialize for list, because many classes take
56
+ # list[objects] as arguments, such as ResNet, DatasetMapper
57
+ return [instantiate(x) for x in cfg]
58
+
59
+ # If input is a DictConfig backed by dataclasses (i.e. omegaconf's structured config),
60
+ # instantiate it to the actual dataclass.
61
+ if isinstance(cfg, DictConfig) and dataclasses.is_dataclass(cfg._metadata.object_type):
62
+ return OmegaConf.to_object(cfg)
63
+
64
+ if isinstance(cfg, abc.Mapping) and "_target_" in cfg:
65
+ # conceptually equivalent to hydra.utils.instantiate(cfg) with _convert_=all,
66
+ # but faster: https://github.com/facebookresearch/hydra/issues/1200
67
+ cfg = {k: instantiate(v) for k, v in cfg.items()}
68
+ cls = cfg.pop("_target_")
69
+ cls = instantiate(cls)
70
+
71
+ if isinstance(cls, str):
72
+ cls_name = cls
73
+ cls = locate(cls_name)
74
+ assert cls is not None, cls_name
75
+ else:
76
+ try:
77
+ cls_name = cls.__module__ + "." + cls.__qualname__
78
+ except Exception:
79
+ # target could be anything, so the above could fail
80
+ cls_name = str(cls)
81
+ assert callable(cls), f"_target_ {cls} does not define a callable object"
82
+ try:
83
+ return cls(**cfg)
84
+ except TypeError:
85
+ logger = logging.getLogger(__name__)
86
+ logger.error(f"Error when instantiating {cls_name}!")
87
+ raise
88
+ return cfg # return as-is if don't know what to do
detectron2/config/lazy.py ADDED
@@ -0,0 +1,436 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Facebook, Inc. and its affiliates.
2
+
3
+ import ast
4
+ import builtins
5
+ import collections.abc as abc
6
+ import importlib
7
+ import inspect
8
+ import logging
9
+ import os
10
+ import uuid
11
+ from contextlib import contextmanager
12
+ from copy import deepcopy
13
+ from dataclasses import is_dataclass
14
+ from typing import List, Tuple, Union
15
+ import cloudpickle
16
+ import yaml
17
+ from omegaconf import DictConfig, ListConfig, OmegaConf, SCMode
18
+
19
+ from detectron2.utils.file_io import PathManager
20
+ from detectron2.utils.registry import _convert_target_to_string
21
+
22
+ __all__ = ["LazyCall", "LazyConfig"]
23
+
24
+
25
+ class LazyCall:
26
+ """
27
+ Wrap a callable so that when it's called, the call will not be executed,
28
+ but returns a dict that describes the call.
29
+
30
+ LazyCall object has to be called with only keyword arguments. Positional
31
+ arguments are not yet supported.
32
+
33
+ Examples:
34
+ ::
35
+ from detectron2.config import instantiate, LazyCall
36
+
37
+ layer_cfg = LazyCall(nn.Conv2d)(in_channels=32, out_channels=32)
38
+ layer_cfg.out_channels = 64 # can edit it afterwards
39
+ layer = instantiate(layer_cfg)
40
+ """
41
+
42
+ def __init__(self, target):
43
+ if not (callable(target) or isinstance(target, (str, abc.Mapping))):
44
+ raise TypeError(
45
+ f"target of LazyCall must be a callable or defines a callable! Got {target}"
46
+ )
47
+ self._target = target
48
+
49
+ def __call__(self, **kwargs):
50
+ if is_dataclass(self._target):
51
+ # omegaconf object cannot hold dataclass type
52
+ # https://github.com/omry/omegaconf/issues/784
53
+ target = _convert_target_to_string(self._target)
54
+ else:
55
+ target = self._target
56
+ kwargs["_target_"] = target
57
+
58
+ return DictConfig(content=kwargs, flags={"allow_objects": True})
59
+
60
+
61
+ def _visit_dict_config(cfg, func):
62
+ """
63
+ Apply func recursively to all DictConfig in cfg.
64
+ """
65
+ if isinstance(cfg, DictConfig):
66
+ func(cfg)
67
+ for v in cfg.values():
68
+ _visit_dict_config(v, func)
69
+ elif isinstance(cfg, ListConfig):
70
+ for v in cfg:
71
+ _visit_dict_config(v, func)
72
+
73
+
74
+ def _validate_py_syntax(filename):
75
+ # see also https://github.com/open-mmlab/mmcv/blob/master/mmcv/utils/config.py
76
+ with PathManager.open(filename, "r") as f:
77
+ content = f.read()
78
+ try:
79
+ ast.parse(content)
80
+ except SyntaxError as e:
81
+ raise SyntaxError(f"Config file {filename} has syntax error!") from e
82
+
83
+
84
+ def _cast_to_config(obj):
85
+ # if given a dict, return DictConfig instead
86
+ if isinstance(obj, dict):
87
+ return DictConfig(obj, flags={"allow_objects": True})
88
+ return obj
89
+
90
+
91
+ _CFG_PACKAGE_NAME = "detectron2._cfg_loader"
92
+ """
93
+ A namespace to put all imported config into.
94
+ """
95
+
96
+
97
+ def _random_package_name(filename):
98
+ # generate a random package name when loading config files
99
+ return _CFG_PACKAGE_NAME + str(uuid.uuid4())[:4] + "." + os.path.basename(filename)
100
+
101
+
102
+ @contextmanager
103
+ def _patch_import():
104
+ """
105
+ Enhance relative import statements in config files, so that they:
106
+ 1. locate files purely based on relative location, regardless of packages.
107
+ e.g. you can import file without having __init__
108
+ 2. do not cache modules globally; modifications of module states has no side effect
109
+ 3. support other storage system through PathManager, so config files can be in the cloud
110
+ 4. imported dict are turned into omegaconf.DictConfig automatically
111
+ """
112
+ old_import = builtins.__import__
113
+
114
+ def find_relative_file(original_file, relative_import_path, level):
115
+ # NOTE: "from . import x" is not handled. Because then it's unclear
116
+ # if such import should produce `x` as a python module or DictConfig.
117
+ # This can be discussed further if needed.
118
+ relative_import_err = """
119
+ Relative import of directories is not allowed within config files.
120
+ Within a config file, relative import can only import other config files.
121
+ """.replace(
122
+ "\n", " "
123
+ )
124
+ if not len(relative_import_path):
125
+ raise ImportError(relative_import_err)
126
+
127
+ cur_file = os.path.dirname(original_file)
128
+ for _ in range(level - 1):
129
+ cur_file = os.path.dirname(cur_file)
130
+ cur_name = relative_import_path.lstrip(".")
131
+ for part in cur_name.split("."):
132
+ cur_file = os.path.join(cur_file, part)
133
+ if not cur_file.endswith(".py"):
134
+ cur_file += ".py"
135
+ if not PathManager.isfile(cur_file):
136
+ cur_file_no_suffix = cur_file[: -len(".py")]
137
+ if PathManager.isdir(cur_file_no_suffix):
138
+ raise ImportError(f"Cannot import from {cur_file_no_suffix}." + relative_import_err)
139
+ else:
140
+ raise ImportError(
141
+ f"Cannot import name {relative_import_path} from "
142
+ f"{original_file}: {cur_file} does not exist."
143
+ )
144
+ return cur_file
145
+
146
+ def new_import(name, globals=None, locals=None, fromlist=(), level=0):
147
+ if (
148
+ # Only deal with relative imports inside config files
149
+ level != 0
150
+ and globals is not None
151
+ and (globals.get("__package__", "") or "").startswith(_CFG_PACKAGE_NAME)
152
+ ):
153
+ cur_file = find_relative_file(globals["__file__"], name, level)
154
+ _validate_py_syntax(cur_file)
155
+ spec = importlib.machinery.ModuleSpec(
156
+ _random_package_name(cur_file), None, origin=cur_file
157
+ )
158
+ module = importlib.util.module_from_spec(spec)
159
+ module.__file__ = cur_file
160
+ with PathManager.open(cur_file) as f:
161
+ content = f.read()
162
+ exec(compile(content, cur_file, "exec"), module.__dict__)
163
+ for name in fromlist: # turn imported dict into DictConfig automatically
164
+ val = _cast_to_config(module.__dict__[name])
165
+ module.__dict__[name] = val
166
+ return module
167
+ return old_import(name, globals, locals, fromlist=fromlist, level=level)
168
+
169
+ builtins.__import__ = new_import
170
+ yield new_import
171
+ builtins.__import__ = old_import
172
+
173
+
174
+ class LazyConfig:
175
+ """
176
+ Provide methods to save, load, and overrides an omegaconf config object
177
+ which may contain definition of lazily-constructed objects.
178
+ """
179
+
180
+ @staticmethod
181
+ def load_rel(filename: str, keys: Union[None, str, Tuple[str, ...]] = None):
182
+ """
183
+ Similar to :meth:`load()`, but load path relative to the caller's
184
+ source file.
185
+
186
+ This has the same functionality as a relative import, except that this method
187
+ accepts filename as a string, so more characters are allowed in the filename.
188
+ """
189
+ caller_frame = inspect.stack()[1]
190
+ caller_fname = caller_frame[0].f_code.co_filename
191
+ assert caller_fname != "<string>", "load_rel Unable to find caller"
192
+ caller_dir = os.path.dirname(caller_fname)
193
+ filename = os.path.join(caller_dir, filename)
194
+ return LazyConfig.load(filename, keys)
195
+
196
+ @staticmethod
197
+ def load(filename: str, keys: Union[None, str, Tuple[str, ...]] = None):
198
+ """
199
+ Load a config file.
200
+
201
+ Args:
202
+ filename: absolute path or relative path w.r.t. the current working directory
203
+ keys: keys to load and return. If not given, return all keys
204
+ (whose values are config objects) in a dict.
205
+ """
206
+ has_keys = keys is not None
207
+ filename = filename.replace("/./", "/") # redundant
208
+ if os.path.splitext(filename)[1] not in [".py", ".yaml", ".yml"]:
209
+ raise ValueError(f"Config file {filename} has to be a python or yaml file.")
210
+ if filename.endswith(".py"):
211
+ _validate_py_syntax(filename)
212
+
213
+ with _patch_import():
214
+ # Record the filename
215
+ module_namespace = {
216
+ "__file__": filename,
217
+ "__package__": _random_package_name(filename),
218
+ }
219
+ with PathManager.open(filename) as f:
220
+ content = f.read()
221
+ # Compile first with filename to:
222
+ # 1. make filename appears in stacktrace
223
+ # 2. make load_rel able to find its parent's (possibly remote) location
224
+ exec(compile(content, filename, "exec"), module_namespace)
225
+
226
+ ret = module_namespace
227
+ else:
228
+ with PathManager.open(filename) as f:
229
+ obj = yaml.unsafe_load(f)
230
+ ret = OmegaConf.create(obj, flags={"allow_objects": True})
231
+
232
+ if has_keys:
233
+ if isinstance(keys, str):
234
+ return _cast_to_config(ret[keys])
235
+ else:
236
+ return tuple(_cast_to_config(ret[a]) for a in keys)
237
+ else:
238
+ if filename.endswith(".py"):
239
+ # when not specified, only load those that are config objects
240
+ ret = DictConfig(
241
+ {
242
+ name: _cast_to_config(value)
243
+ for name, value in ret.items()
244
+ if isinstance(value, (DictConfig, ListConfig, dict))
245
+ and not name.startswith("_")
246
+ },
247
+ flags={"allow_objects": True},
248
+ )
249
+ return ret
250
+
251
+ @staticmethod
252
+ def save(cfg, filename: str):
253
+ """
254
+ Save a config object to a yaml file.
255
+ Note that when the config dictionary contains complex objects (e.g. lambda),
256
+ it can't be saved to yaml. In that case we will print an error and
257
+ attempt to save to a pkl file instead.
258
+
259
+ Args:
260
+ cfg: an omegaconf config object
261
+ filename: yaml file name to save the config file
262
+ """
263
+ logger = logging.getLogger(__name__)
264
+ try:
265
+ cfg = deepcopy(cfg)
266
+ except Exception:
267
+ pass
268
+ else:
269
+ # if it's deep-copyable, then...
270
+ def _replace_type_by_name(x):
271
+ if "_target_" in x and callable(x._target_):
272
+ try:
273
+ x._target_ = _convert_target_to_string(x._target_)
274
+ except AttributeError:
275
+ pass
276
+
277
+ # not necessary, but makes yaml looks nicer
278
+ _visit_dict_config(cfg, _replace_type_by_name)
279
+
280
+ save_pkl = False
281
+ try:
282
+ dict = OmegaConf.to_container(
283
+ cfg,
284
+ # Do not resolve interpolation when saving, i.e. do not turn ${a} into
285
+ # actual values when saving.
286
+ resolve=False,
287
+ # Save structures (dataclasses) in a format that can be instantiated later.
288
+ # Without this option, the type information of the dataclass will be erased.
289
+ structured_config_mode=SCMode.INSTANTIATE,
290
+ )
291
+ dumped = yaml.dump(dict, default_flow_style=None, allow_unicode=True, width=9999)
292
+ with PathManager.open(filename, "w") as f:
293
+ f.write(dumped)
294
+
295
+ try:
296
+ _ = yaml.unsafe_load(dumped) # test that it is loadable
297
+ except Exception:
298
+ logger.warning(
299
+ "The config contains objects that cannot serialize to a valid yaml. "
300
+ f"{filename} is human-readable but cannot be loaded."
301
+ )
302
+ save_pkl = True
303
+ except Exception:
304
+ logger.exception("Unable to serialize the config to yaml. Error:")
305
+ save_pkl = True
306
+
307
+ if save_pkl:
308
+ new_filename = filename + ".pkl"
309
+ try:
310
+ # retry by pickle
311
+ with PathManager.open(new_filename, "wb") as f:
312
+ cloudpickle.dump(cfg, f)
313
+ logger.warning(f"Config is saved using cloudpickle at {new_filename}.")
314
+ except Exception:
315
+ pass
316
+
317
+ @staticmethod
318
+ def apply_overrides(cfg, overrides: List[str]):
319
+ """
320
+ In-place override contents of cfg.
321
+
322
+ Args:
323
+ cfg: an omegaconf config object
324
+ overrides: list of strings in the format of "a=b" to override configs.
325
+ See https://hydra.cc/docs/next/advanced/override_grammar/basic/
326
+ for syntax.
327
+
328
+ Returns:
329
+ the cfg object
330
+ """
331
+
332
+ def safe_update(cfg, key, value):
333
+ parts = key.split(".")
334
+ for idx in range(1, len(parts)):
335
+ prefix = ".".join(parts[:idx])
336
+ v = OmegaConf.select(cfg, prefix, default=None)
337
+ if v is None:
338
+ break
339
+ if not OmegaConf.is_config(v):
340
+ raise KeyError(
341
+ f"Trying to update key {key}, but {prefix} "
342
+ f"is not a config, but has type {type(v)}."
343
+ )
344
+ OmegaConf.update(cfg, key, value, merge=True)
345
+
346
+ try:
347
+ from hydra.core.override_parser.overrides_parser import OverridesParser
348
+
349
+ has_hydra = True
350
+ except ImportError:
351
+ has_hydra = False
352
+
353
+ if has_hydra:
354
+ parser = OverridesParser.create()
355
+ overrides = parser.parse_overrides(overrides)
356
+ for o in overrides:
357
+ key = o.key_or_group
358
+ value = o.value()
359
+ if o.is_delete():
360
+ # TODO support this
361
+ raise NotImplementedError("deletion is not yet a supported override")
362
+ safe_update(cfg, key, value)
363
+ else:
364
+ # Fallback. Does not support all the features and error checking like hydra.
365
+ for o in overrides:
366
+ key, value = o.split("=")
367
+ try:
368
+ value = ast.literal_eval(value)
369
+ except NameError:
370
+ pass
371
+ safe_update(cfg, key, value)
372
+ return cfg
373
+
374
+ @staticmethod
375
+ def to_py(cfg, prefix: str = "cfg."):
376
+ """
377
+ Try to convert a config object into Python-like psuedo code.
378
+
379
+ Note that perfect conversion is not always possible. So the returned
380
+ results are mainly meant to be human-readable, and not meant to be executed.
381
+
382
+ Args:
383
+ cfg: an omegaconf config object
384
+ prefix: root name for the resulting code (default: "cfg.")
385
+
386
+
387
+ Returns:
388
+ str of formatted Python code
389
+ """
390
+ import black
391
+
392
+ cfg = OmegaConf.to_container(cfg, resolve=True)
393
+
394
+ def _to_str(obj, prefix=None, inside_call=False):
395
+ if prefix is None:
396
+ prefix = []
397
+ if isinstance(obj, abc.Mapping) and "_target_" in obj:
398
+ # Dict representing a function call
399
+ target = _convert_target_to_string(obj.pop("_target_"))
400
+ args = []
401
+ for k, v in sorted(obj.items()):
402
+ args.append(f"{k}={_to_str(v, inside_call=True)}")
403
+ args = ", ".join(args)
404
+ call = f"{target}({args})"
405
+ return "".join(prefix) + call
406
+ elif isinstance(obj, abc.Mapping) and not inside_call:
407
+ # Dict that is not inside a call is a list of top-level config objects that we
408
+ # render as one object per line with dot separated prefixes
409
+ key_list = []
410
+ for k, v in sorted(obj.items()):
411
+ if isinstance(v, abc.Mapping) and "_target_" not in v:
412
+ key_list.append(_to_str(v, prefix=prefix + [k + "."]))
413
+ else:
414
+ key = "".join(prefix) + k
415
+ key_list.append(f"{key}={_to_str(v)}")
416
+ return "\n".join(key_list)
417
+ elif isinstance(obj, abc.Mapping):
418
+ # Dict that is inside a call is rendered as a regular dict
419
+ return (
420
+ "{"
421
+ + ",".join(
422
+ f"{repr(k)}: {_to_str(v, inside_call=inside_call)}"
423
+ for k, v in sorted(obj.items())
424
+ )
425
+ + "}"
426
+ )
427
+ elif isinstance(obj, list):
428
+ return "[" + ",".join(_to_str(x, inside_call=inside_call) for x in obj) + "]"
429
+ else:
430
+ return repr(obj)
431
+
432
+ py_str = _to_str(cfg, prefix=[prefix])
433
+ try:
434
+ return black.format_str(py_str, mode=black.Mode())
435
+ except black.InvalidInput:
436
+ return py_str
detectron2/data/__init__.py ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Facebook, Inc. and its affiliates.
2
+ from . import transforms # isort:skip
3
+
4
+ from .build import (
5
+ build_batch_data_loader,
6
+ build_detection_test_loader,
7
+ build_detection_train_loader,
8
+ get_detection_dataset_dicts,
9
+ load_proposals_into_dataset,
10
+ print_instances_class_histogram,
11
+ )
12
+ from .catalog import DatasetCatalog, MetadataCatalog, Metadata
13
+ from .common import DatasetFromList, MapDataset, ToIterableDataset
14
+ from .dataset_mapper import DatasetMapper
15
+
16
+ # ensure the builtin datasets are registered
17
+ from . import datasets, samplers # isort:skip
18
+
19
+ __all__ = [k for k in globals().keys() if not k.startswith("_")]
detectron2/data/__pycache__/__init__.cpython-311.pyc ADDED
Binary file (1.26 kB). View file
 
detectron2/data/__pycache__/build.cpython-311.pyc ADDED
Binary file (34.5 kB). View file
 
detectron2/data/__pycache__/catalog.cpython-311.pyc ADDED
Binary file (10.8 kB). View file
 
detectron2/data/__pycache__/common.cpython-311.pyc ADDED
Binary file (18.4 kB). View file
 
detectron2/data/__pycache__/dataset_mapper.cpython-311.pyc ADDED
Binary file (8.89 kB). View file
 
detectron2/data/__pycache__/detection_utils.cpython-311.pyc ADDED
Binary file (32.1 kB). View file
 
detectron2/data/benchmark.py ADDED
@@ -0,0 +1,225 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Facebook, Inc. and its affiliates.
2
+ import logging
3
+ import numpy as np
4
+ from itertools import count
5
+ from typing import List, Tuple
6
+ import torch
7
+ import tqdm
8
+ from fvcore.common.timer import Timer
9
+
10
+ from detectron2.utils import comm
11
+
12
+ from .build import build_batch_data_loader
13
+ from .common import DatasetFromList, MapDataset
14
+ from .samplers import TrainingSampler
15
+
16
+ logger = logging.getLogger(__name__)
17
+
18
+
19
+ class _EmptyMapDataset(torch.utils.data.Dataset):
20
+ """
21
+ Map anything to emptiness.
22
+ """
23
+
24
+ def __init__(self, dataset):
25
+ self.ds = dataset
26
+
27
+ def __len__(self):
28
+ return len(self.ds)
29
+
30
+ def __getitem__(self, idx):
31
+ _ = self.ds[idx]
32
+ return [0]
33
+
34
+
35
+ def iter_benchmark(
36
+ iterator, num_iter: int, warmup: int = 5, max_time_seconds: float = 60
37
+ ) -> Tuple[float, List[float]]:
38
+ """
39
+ Benchmark an iterator/iterable for `num_iter` iterations with an extra
40
+ `warmup` iterations of warmup.
41
+ End early if `max_time_seconds` time is spent on iterations.
42
+
43
+ Returns:
44
+ float: average time (seconds) per iteration
45
+ list[float]: time spent on each iteration. Sometimes useful for further analysis.
46
+ """
47
+ num_iter, warmup = int(num_iter), int(warmup)
48
+
49
+ iterator = iter(iterator)
50
+ for _ in range(warmup):
51
+ next(iterator)
52
+ timer = Timer()
53
+ all_times = []
54
+ for curr_iter in tqdm.trange(num_iter):
55
+ start = timer.seconds()
56
+ if start > max_time_seconds:
57
+ num_iter = curr_iter
58
+ break
59
+ next(iterator)
60
+ all_times.append(timer.seconds() - start)
61
+ avg = timer.seconds() / num_iter
62
+ return avg, all_times
63
+
64
+
65
+ class DataLoaderBenchmark:
66
+ """
67
+ Some common benchmarks that help understand perf bottleneck of a standard dataloader
68
+ made of dataset, mapper and sampler.
69
+ """
70
+
71
+ def __init__(
72
+ self,
73
+ dataset,
74
+ *,
75
+ mapper,
76
+ sampler=None,
77
+ total_batch_size,
78
+ num_workers=0,
79
+ max_time_seconds: int = 90,
80
+ ):
81
+ """
82
+ Args:
83
+ max_time_seconds (int): maximum time to spent for each benchmark
84
+ other args: same as in `build.py:build_detection_train_loader`
85
+ """
86
+ if isinstance(dataset, list):
87
+ dataset = DatasetFromList(dataset, copy=False, serialize=True)
88
+ if sampler is None:
89
+ sampler = TrainingSampler(len(dataset))
90
+
91
+ self.dataset = dataset
92
+ self.mapper = mapper
93
+ self.sampler = sampler
94
+ self.total_batch_size = total_batch_size
95
+ self.num_workers = num_workers
96
+ self.per_gpu_batch_size = self.total_batch_size // comm.get_world_size()
97
+
98
+ self.max_time_seconds = max_time_seconds
99
+
100
+ def _benchmark(self, iterator, num_iter, warmup, msg=None):
101
+ avg, all_times = iter_benchmark(iterator, num_iter, warmup, self.max_time_seconds)
102
+ if msg is not None:
103
+ self._log_time(msg, avg, all_times)
104
+ return avg, all_times
105
+
106
+ def _log_time(self, msg, avg, all_times, distributed=False):
107
+ percentiles = [np.percentile(all_times, k, interpolation="nearest") for k in [1, 5, 95, 99]]
108
+ if not distributed:
109
+ logger.info(
110
+ f"{msg}: avg={1.0/avg:.1f} it/s, "
111
+ f"p1={percentiles[0]:.2g}s, p5={percentiles[1]:.2g}s, "
112
+ f"p95={percentiles[2]:.2g}s, p99={percentiles[3]:.2g}s."
113
+ )
114
+ return
115
+ avg_per_gpu = comm.all_gather(avg)
116
+ percentiles_per_gpu = comm.all_gather(percentiles)
117
+ if comm.get_rank() > 0:
118
+ return
119
+ for idx, avg, percentiles in zip(count(), avg_per_gpu, percentiles_per_gpu):
120
+ logger.info(
121
+ f"GPU{idx} {msg}: avg={1.0/avg:.1f} it/s, "
122
+ f"p1={percentiles[0]:.2g}s, p5={percentiles[1]:.2g}s, "
123
+ f"p95={percentiles[2]:.2g}s, p99={percentiles[3]:.2g}s."
124
+ )
125
+
126
+ def benchmark_dataset(self, num_iter, warmup=5):
127
+ """
128
+ Benchmark the speed of taking raw samples from the dataset.
129
+ """
130
+
131
+ def loader():
132
+ while True:
133
+ for k in self.sampler:
134
+ yield self.dataset[k]
135
+
136
+ self._benchmark(loader(), num_iter, warmup, "Dataset Alone")
137
+
138
+ def benchmark_mapper(self, num_iter, warmup=5):
139
+ """
140
+ Benchmark the speed of taking raw samples from the dataset and map
141
+ them in a single process.
142
+ """
143
+
144
+ def loader():
145
+ while True:
146
+ for k in self.sampler:
147
+ yield self.mapper(self.dataset[k])
148
+
149
+ self._benchmark(loader(), num_iter, warmup, "Single Process Mapper (sec/sample)")
150
+
151
+ def benchmark_workers(self, num_iter, warmup=10):
152
+ """
153
+ Benchmark the dataloader by tuning num_workers to [0, 1, self.num_workers].
154
+ """
155
+ candidates = [0, 1]
156
+ if self.num_workers not in candidates:
157
+ candidates.append(self.num_workers)
158
+
159
+ dataset = MapDataset(self.dataset, self.mapper)
160
+ for n in candidates:
161
+ loader = build_batch_data_loader(
162
+ dataset,
163
+ self.sampler,
164
+ self.total_batch_size,
165
+ num_workers=n,
166
+ )
167
+ self._benchmark(
168
+ iter(loader),
169
+ num_iter * max(n, 1),
170
+ warmup * max(n, 1),
171
+ f"DataLoader ({n} workers, bs={self.per_gpu_batch_size})",
172
+ )
173
+ del loader
174
+
175
+ def benchmark_IPC(self, num_iter, warmup=10):
176
+ """
177
+ Benchmark the dataloader where each worker outputs nothing. This
178
+ eliminates the IPC overhead compared to the regular dataloader.
179
+
180
+ PyTorch multiprocessing's IPC only optimizes for torch tensors.
181
+ Large numpy arrays or other data structure may incur large IPC overhead.
182
+ """
183
+ n = self.num_workers
184
+ dataset = _EmptyMapDataset(MapDataset(self.dataset, self.mapper))
185
+ loader = build_batch_data_loader(
186
+ dataset, self.sampler, self.total_batch_size, num_workers=n
187
+ )
188
+ self._benchmark(
189
+ iter(loader),
190
+ num_iter * max(n, 1),
191
+ warmup * max(n, 1),
192
+ f"DataLoader ({n} workers, bs={self.per_gpu_batch_size}) w/o comm",
193
+ )
194
+
195
+ def benchmark_distributed(self, num_iter, warmup=10):
196
+ """
197
+ Benchmark the dataloader in each distributed worker, and log results of
198
+ all workers. This helps understand the final performance as well as
199
+ the variances among workers.
200
+
201
+ It also prints startup time (first iter) of the dataloader.
202
+ """
203
+ gpu = comm.get_world_size()
204
+ dataset = MapDataset(self.dataset, self.mapper)
205
+ n = self.num_workers
206
+ loader = build_batch_data_loader(
207
+ dataset, self.sampler, self.total_batch_size, num_workers=n
208
+ )
209
+
210
+ timer = Timer()
211
+ loader = iter(loader)
212
+ next(loader)
213
+ startup_time = timer.seconds()
214
+ logger.info("Dataloader startup time: {:.2f} seconds".format(startup_time))
215
+
216
+ comm.synchronize()
217
+
218
+ avg, all_times = self._benchmark(loader, num_iter * max(n, 1), warmup * max(n, 1))
219
+ del loader
220
+ self._log_time(
221
+ f"DataLoader ({gpu} GPUs x {n} workers, total bs={self.total_batch_size})",
222
+ avg,
223
+ all_times,
224
+ True,
225
+ )
detectron2/data/build.py ADDED
@@ -0,0 +1,694 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Facebook, Inc. and its affiliates.
2
+ import itertools
3
+ import logging
4
+ import numpy as np
5
+ import operator
6
+ import pickle
7
+ from collections import OrderedDict, defaultdict
8
+ from typing import Any, Callable, Dict, List, Optional, Union
9
+ import torch
10
+ import torch.utils.data as torchdata
11
+ from tabulate import tabulate
12
+ from termcolor import colored
13
+
14
+ from detectron2.config import configurable
15
+ from detectron2.structures import BoxMode
16
+ from detectron2.utils.comm import get_world_size
17
+ from detectron2.utils.env import seed_all_rng
18
+ from detectron2.utils.file_io import PathManager
19
+ from detectron2.utils.logger import _log_api_usage, log_first_n
20
+
21
+ from .catalog import DatasetCatalog, MetadataCatalog
22
+ from .common import AspectRatioGroupedDataset, DatasetFromList, MapDataset, ToIterableDataset
23
+ from .dataset_mapper import DatasetMapper
24
+ from .detection_utils import check_metadata_consistency
25
+ from .samplers import (
26
+ InferenceSampler,
27
+ RandomSubsetTrainingSampler,
28
+ RepeatFactorTrainingSampler,
29
+ TrainingSampler,
30
+ )
31
+
32
+ """
33
+ This file contains the default logic to build a dataloader for training or testing.
34
+ """
35
+
36
+ __all__ = [
37
+ "build_batch_data_loader",
38
+ "build_detection_train_loader",
39
+ "build_detection_test_loader",
40
+ "get_detection_dataset_dicts",
41
+ "load_proposals_into_dataset",
42
+ "print_instances_class_histogram",
43
+ ]
44
+
45
+
46
+ def filter_images_with_only_crowd_annotations(dataset_dicts):
47
+ """
48
+ Filter out images with none annotations or only crowd annotations
49
+ (i.e., images without non-crowd annotations).
50
+ A common training-time preprocessing on COCO dataset.
51
+
52
+ Args:
53
+ dataset_dicts (list[dict]): annotations in Detectron2 Dataset format.
54
+
55
+ Returns:
56
+ list[dict]: the same format, but filtered.
57
+ """
58
+ num_before = len(dataset_dicts)
59
+
60
+ def valid(anns):
61
+ for ann in anns:
62
+ if ann.get("iscrowd", 0) == 0:
63
+ return True
64
+ return False
65
+
66
+ dataset_dicts = [x for x in dataset_dicts if valid(x["annotations"])]
67
+ num_after = len(dataset_dicts)
68
+ logger = logging.getLogger(__name__)
69
+ logger.info(
70
+ "Removed {} images with no usable annotations. {} images left.".format(
71
+ num_before - num_after, num_after
72
+ )
73
+ )
74
+ return dataset_dicts
75
+
76
+
77
+ def filter_images_with_few_keypoints(dataset_dicts, min_keypoints_per_image):
78
+ """
79
+ Filter out images with too few number of keypoints.
80
+
81
+ Args:
82
+ dataset_dicts (list[dict]): annotations in Detectron2 Dataset format.
83
+
84
+ Returns:
85
+ list[dict]: the same format as dataset_dicts, but filtered.
86
+ """
87
+ num_before = len(dataset_dicts)
88
+
89
+ def visible_keypoints_in_image(dic):
90
+ # Each keypoints field has the format [x1, y1, v1, ...], where v is visibility
91
+ annotations = dic["annotations"]
92
+ return sum(
93
+ (np.array(ann["keypoints"][2::3]) > 0).sum()
94
+ for ann in annotations
95
+ if "keypoints" in ann
96
+ )
97
+
98
+ dataset_dicts = [
99
+ x for x in dataset_dicts if visible_keypoints_in_image(x) >= min_keypoints_per_image
100
+ ]
101
+ num_after = len(dataset_dicts)
102
+ logger = logging.getLogger(__name__)
103
+ logger.info(
104
+ "Removed {} images with fewer than {} keypoints.".format(
105
+ num_before - num_after, min_keypoints_per_image
106
+ )
107
+ )
108
+ return dataset_dicts
109
+
110
+
111
+ def load_proposals_into_dataset(dataset_dicts, proposal_file):
112
+ """
113
+ Load precomputed object proposals into the dataset.
114
+
115
+ The proposal file should be a pickled dict with the following keys:
116
+
117
+ - "ids": list[int] or list[str], the image ids
118
+ - "boxes": list[np.ndarray], each is an Nx4 array of boxes corresponding to the image id
119
+ - "objectness_logits": list[np.ndarray], each is an N sized array of objectness scores
120
+ corresponding to the boxes.
121
+ - "bbox_mode": the BoxMode of the boxes array. Defaults to ``BoxMode.XYXY_ABS``.
122
+
123
+ Args:
124
+ dataset_dicts (list[dict]): annotations in Detectron2 Dataset format.
125
+ proposal_file (str): file path of pre-computed proposals, in pkl format.
126
+
127
+ Returns:
128
+ list[dict]: the same format as dataset_dicts, but added proposal field.
129
+ """
130
+ logger = logging.getLogger(__name__)
131
+ logger.info("Loading proposals from: {}".format(proposal_file))
132
+
133
+ with PathManager.open(proposal_file, "rb") as f:
134
+ proposals = pickle.load(f, encoding="latin1")
135
+
136
+ # Rename the key names in D1 proposal files
137
+ rename_keys = {"indexes": "ids", "scores": "objectness_logits"}
138
+ for key in rename_keys:
139
+ if key in proposals:
140
+ proposals[rename_keys[key]] = proposals.pop(key)
141
+
142
+ # Fetch the indexes of all proposals that are in the dataset
143
+ # Convert image_id to str since they could be int.
144
+ img_ids = set({str(record["image_id"]) for record in dataset_dicts})
145
+ id_to_index = {str(id): i for i, id in enumerate(proposals["ids"]) if str(id) in img_ids}
146
+
147
+ # Assuming default bbox_mode of precomputed proposals are 'XYXY_ABS'
148
+ bbox_mode = BoxMode(proposals["bbox_mode"]) if "bbox_mode" in proposals else BoxMode.XYXY_ABS
149
+
150
+ for record in dataset_dicts:
151
+ # Get the index of the proposal
152
+ i = id_to_index[str(record["image_id"])]
153
+
154
+ boxes = proposals["boxes"][i]
155
+ objectness_logits = proposals["objectness_logits"][i]
156
+ # Sort the proposals in descending order of the scores
157
+ inds = objectness_logits.argsort()[::-1]
158
+ record["proposal_boxes"] = boxes[inds]
159
+ record["proposal_objectness_logits"] = objectness_logits[inds]
160
+ record["proposal_bbox_mode"] = bbox_mode
161
+
162
+ return dataset_dicts
163
+
164
+
165
+ def print_instances_class_histogram(dataset_dicts, class_names):
166
+ """
167
+ Args:
168
+ dataset_dicts (list[dict]): list of dataset dicts.
169
+ class_names (list[str]): list of class names (zero-indexed).
170
+ """
171
+ num_classes = len(class_names)
172
+ hist_bins = np.arange(num_classes + 1)
173
+ histogram = np.zeros((num_classes,), dtype=int)
174
+ for entry in dataset_dicts:
175
+ annos = entry["annotations"]
176
+ classes = np.asarray(
177
+ [x["category_id"] for x in annos if not x.get("iscrowd", 0)], dtype=int
178
+ )
179
+ if len(classes):
180
+ assert classes.min() >= 0, f"Got an invalid category_id={classes.min()}"
181
+ assert (
182
+ classes.max() < num_classes
183
+ ), f"Got an invalid category_id={classes.max()} for a dataset of {num_classes} classes"
184
+ histogram += np.histogram(classes, bins=hist_bins)[0]
185
+
186
+ N_COLS = min(6, len(class_names) * 2)
187
+
188
+ def short_name(x):
189
+ # make long class names shorter. useful for lvis
190
+ if len(x) > 13:
191
+ return x[:11] + ".."
192
+ return x
193
+
194
+ data = list(
195
+ itertools.chain(*[[short_name(class_names[i]), int(v)] for i, v in enumerate(histogram)])
196
+ )
197
+ total_num_instances = sum(data[1::2])
198
+ data.extend([None] * (N_COLS - (len(data) % N_COLS)))
199
+ if num_classes > 1:
200
+ data.extend(["total", total_num_instances])
201
+ data = itertools.zip_longest(*[data[i::N_COLS] for i in range(N_COLS)])
202
+ table = tabulate(
203
+ data,
204
+ headers=["category", "#instances"] * (N_COLS // 2),
205
+ tablefmt="pipe",
206
+ numalign="left",
207
+ stralign="center",
208
+ )
209
+ log_first_n(
210
+ logging.INFO,
211
+ "Distribution of instances among all {} categories:\n".format(num_classes)
212
+ + colored(table, "cyan"),
213
+ key="message",
214
+ )
215
+
216
+
217
+ def get_detection_dataset_dicts(
218
+ names,
219
+ filter_empty=True,
220
+ min_keypoints=0,
221
+ proposal_files=None,
222
+ check_consistency=True,
223
+ ):
224
+ """
225
+ Load and prepare dataset dicts for instance detection/segmentation and semantic segmentation.
226
+
227
+ Args:
228
+ names (str or list[str]): a dataset name or a list of dataset names
229
+ filter_empty (bool): whether to filter out images without instance annotations
230
+ min_keypoints (int): filter out images with fewer keypoints than
231
+ `min_keypoints`. Set to 0 to do nothing.
232
+ proposal_files (list[str]): if given, a list of object proposal files
233
+ that match each dataset in `names`.
234
+ check_consistency (bool): whether to check if datasets have consistent metadata.
235
+
236
+ Returns:
237
+ list[dict]: a list of dicts following the standard dataset dict format.
238
+ """
239
+ if isinstance(names, str):
240
+ names = [names]
241
+ assert len(names), names
242
+
243
+ available_datasets = DatasetCatalog.keys()
244
+ names_set = set(names)
245
+ if not names_set.issubset(available_datasets):
246
+ logger = logging.getLogger(__name__)
247
+ logger.warning(
248
+ "The following dataset names are not registered in the DatasetCatalog: "
249
+ f"{names_set - available_datasets}. "
250
+ f"Available datasets are {available_datasets}"
251
+ )
252
+
253
+ dataset_dicts = [DatasetCatalog.get(dataset_name) for dataset_name in names]
254
+
255
+ if isinstance(dataset_dicts[0], torchdata.Dataset):
256
+ if len(dataset_dicts) > 1:
257
+ # ConcatDataset does not work for iterable style dataset.
258
+ # We could support concat for iterable as well, but it's often
259
+ # not a good idea to concat iterables anyway.
260
+ return torchdata.ConcatDataset(dataset_dicts)
261
+ return dataset_dicts[0]
262
+
263
+ for dataset_name, dicts in zip(names, dataset_dicts):
264
+ assert len(dicts), "Dataset '{}' is empty!".format(dataset_name)
265
+
266
+ if proposal_files is not None:
267
+ assert len(names) == len(proposal_files)
268
+ # load precomputed proposals from proposal files
269
+ dataset_dicts = [
270
+ load_proposals_into_dataset(dataset_i_dicts, proposal_file)
271
+ for dataset_i_dicts, proposal_file in zip(dataset_dicts, proposal_files)
272
+ ]
273
+
274
+ dataset_dicts = list(itertools.chain.from_iterable(dataset_dicts))
275
+
276
+ has_instances = "annotations" in dataset_dicts[0]
277
+ if filter_empty and has_instances:
278
+ dataset_dicts = filter_images_with_only_crowd_annotations(dataset_dicts)
279
+ if min_keypoints > 0 and has_instances:
280
+ dataset_dicts = filter_images_with_few_keypoints(dataset_dicts, min_keypoints)
281
+
282
+ if check_consistency and has_instances:
283
+ try:
284
+ class_names = MetadataCatalog.get(names[0]).thing_classes
285
+ check_metadata_consistency("thing_classes", names)
286
+ print_instances_class_histogram(dataset_dicts, class_names)
287
+ except AttributeError: # class names are not available for this dataset
288
+ pass
289
+
290
+ assert len(dataset_dicts), "No valid data found in {}.".format(",".join(names))
291
+ return dataset_dicts
292
+
293
+
294
+ def build_batch_data_loader(
295
+ dataset,
296
+ sampler,
297
+ total_batch_size,
298
+ *,
299
+ aspect_ratio_grouping=False,
300
+ num_workers=0,
301
+ collate_fn=None,
302
+ drop_last: bool = True,
303
+ single_gpu_batch_size=None,
304
+ prefetch_factor=2,
305
+ persistent_workers=False,
306
+ pin_memory=False,
307
+ seed=None,
308
+ **kwargs,
309
+ ):
310
+ """
311
+ Build a batched dataloader. The main differences from `torch.utils.data.DataLoader` are:
312
+ 1. support aspect ratio grouping options
313
+ 2. use no "batch collation", because this is common for detection training
314
+
315
+ Args:
316
+ dataset (torch.utils.data.Dataset): a pytorch map-style or iterable dataset.
317
+ sampler (torch.utils.data.sampler.Sampler or None): a sampler that produces indices.
318
+ Must be provided iff. ``dataset`` is a map-style dataset.
319
+ total_batch_size, aspect_ratio_grouping, num_workers, collate_fn: see
320
+ :func:`build_detection_train_loader`.
321
+ single_gpu_batch_size: You can specify either `single_gpu_batch_size` or `total_batch_size`.
322
+ `single_gpu_batch_size` specifies the batch size that will be used for each gpu/process.
323
+ `total_batch_size` allows you to specify the total aggregate batch size across gpus.
324
+ It is an error to supply a value for both.
325
+ drop_last (bool): if ``True``, the dataloader will drop incomplete batches.
326
+
327
+ Returns:
328
+ iterable[list]. Length of each list is the batch size of the current
329
+ GPU. Each element in the list comes from the dataset.
330
+ """
331
+ if single_gpu_batch_size:
332
+ if total_batch_size:
333
+ raise ValueError(
334
+ """total_batch_size and single_gpu_batch_size are mutually incompatible.
335
+ Please specify only one. """
336
+ )
337
+ batch_size = single_gpu_batch_size
338
+ else:
339
+ world_size = get_world_size()
340
+ assert (
341
+ total_batch_size > 0 and total_batch_size % world_size == 0
342
+ ), "Total batch size ({}) must be divisible by the number of gpus ({}).".format(
343
+ total_batch_size, world_size
344
+ )
345
+ batch_size = total_batch_size // world_size
346
+ logger = logging.getLogger(__name__)
347
+ logger.info("Making batched data loader with batch_size=%d", batch_size)
348
+
349
+ if isinstance(dataset, torchdata.IterableDataset):
350
+ assert sampler is None, "sampler must be None if dataset is IterableDataset"
351
+ else:
352
+ dataset = ToIterableDataset(dataset, sampler, shard_chunk_size=batch_size)
353
+
354
+ generator = None
355
+ if seed is not None:
356
+ generator = torch.Generator()
357
+ generator.manual_seed(seed)
358
+
359
+ if aspect_ratio_grouping:
360
+ assert drop_last, "Aspect ratio grouping will drop incomplete batches."
361
+ data_loader = torchdata.DataLoader(
362
+ dataset,
363
+ num_workers=num_workers,
364
+ collate_fn=operator.itemgetter(0), # don't batch, but yield individual elements
365
+ worker_init_fn=worker_init_reset_seed,
366
+ prefetch_factor=prefetch_factor if num_workers > 0 else None,
367
+ persistent_workers=persistent_workers,
368
+ pin_memory=pin_memory,
369
+ generator=generator,
370
+ **kwargs,
371
+ ) # yield individual mapped dict
372
+ data_loader = AspectRatioGroupedDataset(data_loader, batch_size)
373
+ if collate_fn is None:
374
+ return data_loader
375
+ return MapDataset(data_loader, collate_fn)
376
+ else:
377
+ return torchdata.DataLoader(
378
+ dataset,
379
+ batch_size=batch_size,
380
+ drop_last=drop_last,
381
+ num_workers=num_workers,
382
+ collate_fn=trivial_batch_collator if collate_fn is None else collate_fn,
383
+ worker_init_fn=worker_init_reset_seed,
384
+ prefetch_factor=prefetch_factor if num_workers > 0 else None,
385
+ persistent_workers=persistent_workers,
386
+ pin_memory=pin_memory,
387
+ generator=generator,
388
+ **kwargs,
389
+ )
390
+
391
+
392
+ def _get_train_datasets_repeat_factors(cfg) -> Dict[str, float]:
393
+ repeat_factors = cfg.DATASETS.TRAIN_REPEAT_FACTOR
394
+ assert all(len(tup) == 2 for tup in repeat_factors)
395
+ name_to_weight = defaultdict(lambda: 1, dict(repeat_factors))
396
+ # The sampling weights map should only contain datasets in train config
397
+ unrecognized = set(name_to_weight.keys()) - set(cfg.DATASETS.TRAIN)
398
+ assert not unrecognized, f"unrecognized datasets: {unrecognized}"
399
+ logger = logging.getLogger(__name__)
400
+ logger.info(f"Found repeat factors: {list(name_to_weight.items())}")
401
+
402
+ # pyre-fixme[7]: Expected `Dict[str, float]` but got `DefaultDict[typing.Any, int]`.
403
+ return name_to_weight
404
+
405
+
406
+ def _build_weighted_sampler(cfg, enable_category_balance=False):
407
+ dataset_repeat_factors = _get_train_datasets_repeat_factors(cfg)
408
+ # OrderedDict to guarantee order of values() consistent with repeat factors
409
+ dataset_name_to_dicts = OrderedDict(
410
+ {
411
+ name: get_detection_dataset_dicts(
412
+ [name],
413
+ filter_empty=cfg.DATALOADER.FILTER_EMPTY_ANNOTATIONS,
414
+ min_keypoints=(
415
+ cfg.MODEL.ROI_KEYPOINT_HEAD.MIN_KEYPOINTS_PER_IMAGE
416
+ if cfg.MODEL.KEYPOINT_ON
417
+ else 0
418
+ ),
419
+ proposal_files=(
420
+ cfg.DATASETS.PROPOSAL_FILES_TRAIN if cfg.MODEL.LOAD_PROPOSALS else None
421
+ ),
422
+ )
423
+ for name in cfg.DATASETS.TRAIN
424
+ }
425
+ )
426
+ # Repeat factor for every sample in the dataset
427
+ repeat_factors = [
428
+ [dataset_repeat_factors[dsname]] * len(dataset_name_to_dicts[dsname])
429
+ for dsname in cfg.DATASETS.TRAIN
430
+ ]
431
+
432
+ repeat_factors = list(itertools.chain.from_iterable(repeat_factors))
433
+
434
+ repeat_factors = torch.tensor(repeat_factors)
435
+ logger = logging.getLogger(__name__)
436
+ if enable_category_balance:
437
+ """
438
+ 1. Calculate repeat factors using category frequency for each dataset and then merge them.
439
+ 2. Element wise dot producting the dataset frequency repeat factors with
440
+ the category frequency repeat factors gives the final repeat factors.
441
+ """
442
+ category_repeat_factors = [
443
+ RepeatFactorTrainingSampler.repeat_factors_from_category_frequency(
444
+ dataset_dict, cfg.DATALOADER.REPEAT_THRESHOLD, sqrt=cfg.DATALOADER.REPEAT_SQRT
445
+ )
446
+ for dataset_dict in dataset_name_to_dicts.values()
447
+ ]
448
+ # flatten the category repeat factors from all datasets
449
+ category_repeat_factors = list(itertools.chain.from_iterable(category_repeat_factors))
450
+ category_repeat_factors = torch.tensor(category_repeat_factors)
451
+ repeat_factors = torch.mul(category_repeat_factors, repeat_factors)
452
+ repeat_factors = repeat_factors / torch.min(repeat_factors)
453
+ logger.info(
454
+ "Using WeightedCategoryTrainingSampler with repeat_factors={}".format(
455
+ cfg.DATASETS.TRAIN_REPEAT_FACTOR
456
+ )
457
+ )
458
+ else:
459
+ logger.info(
460
+ "Using WeightedTrainingSampler with repeat_factors={}".format(
461
+ cfg.DATASETS.TRAIN_REPEAT_FACTOR
462
+ )
463
+ )
464
+
465
+ sampler = RepeatFactorTrainingSampler(repeat_factors)
466
+ return sampler
467
+
468
+
469
+ def _train_loader_from_config(cfg, mapper=None, *, dataset=None, sampler=None):
470
+ if dataset is None:
471
+ dataset = get_detection_dataset_dicts(
472
+ cfg.DATASETS.TRAIN,
473
+ filter_empty=cfg.DATALOADER.FILTER_EMPTY_ANNOTATIONS,
474
+ min_keypoints=(
475
+ cfg.MODEL.ROI_KEYPOINT_HEAD.MIN_KEYPOINTS_PER_IMAGE if cfg.MODEL.KEYPOINT_ON else 0
476
+ ),
477
+ proposal_files=cfg.DATASETS.PROPOSAL_FILES_TRAIN if cfg.MODEL.LOAD_PROPOSALS else None,
478
+ )
479
+ _log_api_usage("dataset." + cfg.DATASETS.TRAIN[0])
480
+
481
+ if mapper is None:
482
+ mapper = DatasetMapper(cfg, True)
483
+
484
+ if sampler is None:
485
+ sampler_name = cfg.DATALOADER.SAMPLER_TRAIN
486
+ logger = logging.getLogger(__name__)
487
+ if isinstance(dataset, torchdata.IterableDataset):
488
+ logger.info("Not using any sampler since the dataset is IterableDataset.")
489
+ sampler = None
490
+ else:
491
+ logger.info("Using training sampler {}".format(sampler_name))
492
+ if sampler_name == "TrainingSampler":
493
+ sampler = TrainingSampler(len(dataset), seed=cfg.SEED)
494
+ elif sampler_name == "RepeatFactorTrainingSampler":
495
+ repeat_factors = RepeatFactorTrainingSampler.repeat_factors_from_category_frequency(
496
+ dataset, cfg.DATALOADER.REPEAT_THRESHOLD, sqrt=cfg.DATALOADER.REPEAT_SQRT
497
+ )
498
+ sampler = RepeatFactorTrainingSampler(repeat_factors, seed=cfg.SEED)
499
+ elif sampler_name == "RandomSubsetTrainingSampler":
500
+ sampler = RandomSubsetTrainingSampler(
501
+ len(dataset), cfg.DATALOADER.RANDOM_SUBSET_RATIO
502
+ )
503
+ elif sampler_name == "WeightedTrainingSampler":
504
+ sampler = _build_weighted_sampler(cfg)
505
+ elif sampler_name == "WeightedCategoryTrainingSampler":
506
+ sampler = _build_weighted_sampler(cfg, enable_category_balance=True)
507
+ else:
508
+ raise ValueError("Unknown training sampler: {}".format(sampler_name))
509
+
510
+ return {
511
+ "dataset": dataset,
512
+ "sampler": sampler,
513
+ "mapper": mapper,
514
+ "total_batch_size": cfg.SOLVER.IMS_PER_BATCH,
515
+ "aspect_ratio_grouping": cfg.DATALOADER.ASPECT_RATIO_GROUPING,
516
+ "num_workers": cfg.DATALOADER.NUM_WORKERS,
517
+ }
518
+
519
+
520
+ @configurable(from_config=_train_loader_from_config)
521
+ def build_detection_train_loader(
522
+ dataset,
523
+ *,
524
+ mapper,
525
+ sampler=None,
526
+ total_batch_size,
527
+ aspect_ratio_grouping=True,
528
+ num_workers=0,
529
+ collate_fn=None,
530
+ **kwargs,
531
+ ):
532
+ """
533
+ Build a dataloader for object detection with some default features.
534
+
535
+ Args:
536
+ dataset (list or torch.utils.data.Dataset): a list of dataset dicts,
537
+ or a pytorch dataset (either map-style or iterable). It can be obtained
538
+ by using :func:`DatasetCatalog.get` or :func:`get_detection_dataset_dicts`.
539
+ mapper (callable): a callable which takes a sample (dict) from dataset and
540
+ returns the format to be consumed by the model.
541
+ When using cfg, the default choice is ``DatasetMapper(cfg, is_train=True)``.
542
+ sampler (torch.utils.data.sampler.Sampler or None): a sampler that produces
543
+ indices to be applied on ``dataset``.
544
+ If ``dataset`` is map-style, the default sampler is a :class:`TrainingSampler`,
545
+ which coordinates an infinite random shuffle sequence across all workers.
546
+ Sampler must be None if ``dataset`` is iterable.
547
+ total_batch_size (int): total batch size across all workers.
548
+ aspect_ratio_grouping (bool): whether to group images with similar
549
+ aspect ratio for efficiency. When enabled, it requires each
550
+ element in dataset be a dict with keys "width" and "height".
551
+ num_workers (int): number of parallel data loading workers
552
+ collate_fn: a function that determines how to do batching, same as the argument of
553
+ `torch.utils.data.DataLoader`. Defaults to do no collation and return a list of
554
+ data. No collation is OK for small batch size and simple data structures.
555
+ If your batch size is large and each sample contains too many small tensors,
556
+ it's more efficient to collate them in data loader.
557
+
558
+ Returns:
559
+ torch.utils.data.DataLoader:
560
+ a dataloader. Each output from it is a ``list[mapped_element]`` of length
561
+ ``total_batch_size / num_workers``, where ``mapped_element`` is produced
562
+ by the ``mapper``.
563
+ """
564
+ if isinstance(dataset, list):
565
+ dataset = DatasetFromList(dataset, copy=False)
566
+ if mapper is not None:
567
+ dataset = MapDataset(dataset, mapper)
568
+
569
+ if isinstance(dataset, torchdata.IterableDataset):
570
+ assert sampler is None, "sampler must be None if dataset is IterableDataset"
571
+ else:
572
+ if sampler is None:
573
+ sampler = TrainingSampler(len(dataset))
574
+ assert isinstance(sampler, torchdata.Sampler), f"Expect a Sampler but got {type(sampler)}"
575
+ return build_batch_data_loader(
576
+ dataset,
577
+ sampler,
578
+ total_batch_size,
579
+ aspect_ratio_grouping=aspect_ratio_grouping,
580
+ num_workers=num_workers,
581
+ collate_fn=collate_fn,
582
+ **kwargs,
583
+ )
584
+
585
+
586
+ def _test_loader_from_config(cfg, dataset_name, mapper=None):
587
+ """
588
+ Uses the given `dataset_name` argument (instead of the names in cfg), because the
589
+ standard practice is to evaluate each test set individually (not combining them).
590
+ """
591
+ if isinstance(dataset_name, str):
592
+ dataset_name = [dataset_name]
593
+
594
+ dataset = get_detection_dataset_dicts(
595
+ dataset_name,
596
+ filter_empty=False,
597
+ proposal_files=(
598
+ [
599
+ cfg.DATASETS.PROPOSAL_FILES_TEST[list(cfg.DATASETS.TEST).index(x)]
600
+ for x in dataset_name
601
+ ]
602
+ if cfg.MODEL.LOAD_PROPOSALS
603
+ else None
604
+ ),
605
+ )
606
+ if mapper is None:
607
+ mapper = DatasetMapper(cfg, False)
608
+ return {
609
+ "dataset": dataset,
610
+ "mapper": mapper,
611
+ "num_workers": cfg.DATALOADER.NUM_WORKERS,
612
+ "sampler": (
613
+ InferenceSampler(len(dataset))
614
+ if not isinstance(dataset, torchdata.IterableDataset)
615
+ else None
616
+ ),
617
+ }
618
+
619
+
620
+ @configurable(from_config=_test_loader_from_config)
621
+ def build_detection_test_loader(
622
+ dataset: Union[List[Any], torchdata.Dataset],
623
+ *,
624
+ mapper: Callable[[Dict[str, Any]], Any],
625
+ sampler: Optional[torchdata.Sampler] = None,
626
+ batch_size: int = 1,
627
+ num_workers: int = 0,
628
+ collate_fn: Optional[Callable[[List[Any]], Any]] = None,
629
+ ) -> torchdata.DataLoader:
630
+ """
631
+ Similar to `build_detection_train_loader`, with default batch size = 1,
632
+ and sampler = :class:`InferenceSampler`. This sampler coordinates all workers
633
+ to produce the exact set of all samples.
634
+
635
+ Args:
636
+ dataset: a list of dataset dicts,
637
+ or a pytorch dataset (either map-style or iterable). They can be obtained
638
+ by using :func:`DatasetCatalog.get` or :func:`get_detection_dataset_dicts`.
639
+ mapper: a callable which takes a sample (dict) from dataset
640
+ and returns the format to be consumed by the model.
641
+ When using cfg, the default choice is ``DatasetMapper(cfg, is_train=False)``.
642
+ sampler: a sampler that produces
643
+ indices to be applied on ``dataset``. Default to :class:`InferenceSampler`,
644
+ which splits the dataset across all workers. Sampler must be None
645
+ if `dataset` is iterable.
646
+ batch_size: the batch size of the data loader to be created.
647
+ Default to 1 image per worker since this is the standard when reporting
648
+ inference time in papers.
649
+ num_workers: number of parallel data loading workers
650
+ collate_fn: same as the argument of `torch.utils.data.DataLoader`.
651
+ Defaults to do no collation and return a list of data.
652
+
653
+ Returns:
654
+ DataLoader: a torch DataLoader, that loads the given detection
655
+ dataset, with test-time transformation and batching.
656
+
657
+ Examples:
658
+ ::
659
+ data_loader = build_detection_test_loader(
660
+ DatasetRegistry.get("my_test"),
661
+ mapper=DatasetMapper(...))
662
+
663
+ # or, instantiate with a CfgNode:
664
+ data_loader = build_detection_test_loader(cfg, "my_test")
665
+ """
666
+ if isinstance(dataset, list):
667
+ dataset = DatasetFromList(dataset, copy=False)
668
+ if mapper is not None:
669
+ dataset = MapDataset(dataset, mapper)
670
+ if isinstance(dataset, torchdata.IterableDataset):
671
+ assert sampler is None, "sampler must be None if dataset is IterableDataset"
672
+ else:
673
+ if sampler is None:
674
+ sampler = InferenceSampler(len(dataset))
675
+ return torchdata.DataLoader(
676
+ dataset,
677
+ batch_size=batch_size,
678
+ sampler=sampler,
679
+ drop_last=False,
680
+ num_workers=num_workers,
681
+ collate_fn=trivial_batch_collator if collate_fn is None else collate_fn,
682
+ )
683
+
684
+
685
+ def trivial_batch_collator(batch):
686
+ """
687
+ A batch collator that does nothing.
688
+ """
689
+ return batch
690
+
691
+
692
+ def worker_init_reset_seed(worker_id):
693
+ initial_seed = torch.initial_seed() % 2**31
694
+ seed_all_rng(initial_seed + worker_id)
detectron2/data/catalog.py ADDED
@@ -0,0 +1,236 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Facebook, Inc. and its affiliates.
2
+ import copy
3
+ import logging
4
+ import types
5
+ from collections import UserDict
6
+ from typing import List
7
+
8
+ from detectron2.utils.logger import log_first_n
9
+
10
+ __all__ = ["DatasetCatalog", "MetadataCatalog", "Metadata"]
11
+
12
+
13
+ class _DatasetCatalog(UserDict):
14
+ """
15
+ A global dictionary that stores information about the datasets and how to obtain them.
16
+
17
+ It contains a mapping from strings
18
+ (which are names that identify a dataset, e.g. "coco_2014_train")
19
+ to a function which parses the dataset and returns the samples in the
20
+ format of `list[dict]`.
21
+
22
+ The returned dicts should be in Detectron2 Dataset format (See DATASETS.md for details)
23
+ if used with the data loader functionalities in `data/build.py,data/detection_transform.py`.
24
+
25
+ The purpose of having this catalog is to make it easy to choose
26
+ different datasets, by just using the strings in the config.
27
+ """
28
+
29
+ def register(self, name, func):
30
+ """
31
+ Args:
32
+ name (str): the name that identifies a dataset, e.g. "coco_2014_train".
33
+ func (callable): a callable which takes no arguments and returns a list of dicts.
34
+ It must return the same results if called multiple times.
35
+ """
36
+ assert callable(func), "You must register a function with `DatasetCatalog.register`!"
37
+ assert name not in self, "Dataset '{}' is already registered!".format(name)
38
+ self[name] = func
39
+
40
+ def get(self, name):
41
+ """
42
+ Call the registered function and return its results.
43
+
44
+ Args:
45
+ name (str): the name that identifies a dataset, e.g. "coco_2014_train".
46
+
47
+ Returns:
48
+ list[dict]: dataset annotations.
49
+ """
50
+ try:
51
+ f = self[name]
52
+ except KeyError as e:
53
+ raise KeyError(
54
+ "Dataset '{}' is not registered! Available datasets are: {}".format(
55
+ name, ", ".join(list(self.keys()))
56
+ )
57
+ ) from e
58
+ return f()
59
+
60
+ def list(self) -> List[str]:
61
+ """
62
+ List all registered datasets.
63
+
64
+ Returns:
65
+ list[str]
66
+ """
67
+ return list(self.keys())
68
+
69
+ def remove(self, name):
70
+ """
71
+ Alias of ``pop``.
72
+ """
73
+ self.pop(name)
74
+
75
+ def __str__(self):
76
+ return "DatasetCatalog(registered datasets: {})".format(", ".join(self.keys()))
77
+
78
+ __repr__ = __str__
79
+
80
+
81
+ DatasetCatalog = _DatasetCatalog()
82
+ DatasetCatalog.__doc__ = (
83
+ _DatasetCatalog.__doc__
84
+ + """
85
+ .. automethod:: detectron2.data.catalog.DatasetCatalog.register
86
+ .. automethod:: detectron2.data.catalog.DatasetCatalog.get
87
+ """
88
+ )
89
+
90
+
91
+ class Metadata(types.SimpleNamespace):
92
+ """
93
+ A class that supports simple attribute setter/getter.
94
+ It is intended for storing metadata of a dataset and make it accessible globally.
95
+
96
+ Examples:
97
+ ::
98
+ # somewhere when you load the data:
99
+ MetadataCatalog.get("mydataset").thing_classes = ["person", "dog"]
100
+
101
+ # somewhere when you print statistics or visualize:
102
+ classes = MetadataCatalog.get("mydataset").thing_classes
103
+ """
104
+
105
+ # the name of the dataset
106
+ # set default to N/A so that `self.name` in the errors will not trigger getattr again
107
+ name: str = "N/A"
108
+
109
+ _RENAMED = {
110
+ "class_names": "thing_classes",
111
+ "dataset_id_to_contiguous_id": "thing_dataset_id_to_contiguous_id",
112
+ "stuff_class_names": "stuff_classes",
113
+ }
114
+
115
+ def __getattr__(self, key):
116
+ if key in self._RENAMED:
117
+ log_first_n(
118
+ logging.WARNING,
119
+ "Metadata '{}' was renamed to '{}'!".format(key, self._RENAMED[key]),
120
+ n=10,
121
+ )
122
+ return getattr(self, self._RENAMED[key])
123
+
124
+ # "name" exists in every metadata
125
+ if len(self.__dict__) > 1:
126
+ raise AttributeError(
127
+ "Attribute '{}' does not exist in the metadata of dataset '{}'. Available "
128
+ "keys are {}.".format(key, self.name, str(self.__dict__.keys()))
129
+ )
130
+ else:
131
+ raise AttributeError(
132
+ f"Attribute '{key}' does not exist in the metadata of dataset '{self.name}': "
133
+ "metadata is empty."
134
+ )
135
+
136
+ def __setattr__(self, key, val):
137
+ if key in self._RENAMED:
138
+ log_first_n(
139
+ logging.WARNING,
140
+ "Metadata '{}' was renamed to '{}'!".format(key, self._RENAMED[key]),
141
+ n=10,
142
+ )
143
+ setattr(self, self._RENAMED[key], val)
144
+
145
+ # Ensure that metadata of the same name stays consistent
146
+ try:
147
+ oldval = getattr(self, key)
148
+ assert oldval == val, (
149
+ "Attribute '{}' in the metadata of '{}' cannot be set "
150
+ "to a different value!\n{} != {}".format(key, self.name, oldval, val)
151
+ )
152
+ except AttributeError:
153
+ super().__setattr__(key, val)
154
+
155
+ def as_dict(self):
156
+ """
157
+ Returns all the metadata as a dict.
158
+ Note that modifications to the returned dict will not reflect on the Metadata object.
159
+ """
160
+ return copy.copy(self.__dict__)
161
+
162
+ def set(self, **kwargs):
163
+ """
164
+ Set multiple metadata with kwargs.
165
+ """
166
+ for k, v in kwargs.items():
167
+ setattr(self, k, v)
168
+ return self
169
+
170
+ def get(self, key, default=None):
171
+ """
172
+ Access an attribute and return its value if exists.
173
+ Otherwise return default.
174
+ """
175
+ try:
176
+ return getattr(self, key)
177
+ except AttributeError:
178
+ return default
179
+
180
+
181
+ class _MetadataCatalog(UserDict):
182
+ """
183
+ MetadataCatalog is a global dictionary that provides access to
184
+ :class:`Metadata` of a given dataset.
185
+
186
+ The metadata associated with a certain name is a singleton: once created, the
187
+ metadata will stay alive and will be returned by future calls to ``get(name)``.
188
+
189
+ It's like global variables, so don't abuse it.
190
+ It's meant for storing knowledge that's constant and shared across the execution
191
+ of the program, e.g.: the class names in COCO.
192
+ """
193
+
194
+ def get(self, name):
195
+ """
196
+ Args:
197
+ name (str): name of a dataset (e.g. coco_2014_train).
198
+
199
+ Returns:
200
+ Metadata: The :class:`Metadata` instance associated with this name,
201
+ or create an empty one if none is available.
202
+ """
203
+ assert len(name)
204
+ r = super().get(name, None)
205
+ if r is None:
206
+ r = self[name] = Metadata(name=name)
207
+ return r
208
+
209
+ def list(self):
210
+ """
211
+ List all registered metadata.
212
+
213
+ Returns:
214
+ list[str]: keys (names of datasets) of all registered metadata
215
+ """
216
+ return list(self.keys())
217
+
218
+ def remove(self, name):
219
+ """
220
+ Alias of ``pop``.
221
+ """
222
+ self.pop(name)
223
+
224
+ def __str__(self):
225
+ return "MetadataCatalog(registered metadata: {})".format(", ".join(self.keys()))
226
+
227
+ __repr__ = __str__
228
+
229
+
230
+ MetadataCatalog = _MetadataCatalog()
231
+ MetadataCatalog.__doc__ = (
232
+ _MetadataCatalog.__doc__
233
+ + """
234
+ .. automethod:: detectron2.data.catalog.MetadataCatalog.get
235
+ """
236
+ )
detectron2/data/common.py ADDED
@@ -0,0 +1,339 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Facebook, Inc. and its affiliates.
2
+ import contextlib
3
+ import copy
4
+ import itertools
5
+ import logging
6
+ import numpy as np
7
+ import pickle
8
+ import random
9
+ from typing import Callable, Union
10
+ import torch
11
+ import torch.utils.data as data
12
+ from torch.utils.data.sampler import Sampler
13
+
14
+ from detectron2.utils.serialize import PicklableWrapper
15
+
16
+ __all__ = ["MapDataset", "DatasetFromList", "AspectRatioGroupedDataset", "ToIterableDataset"]
17
+
18
+ logger = logging.getLogger(__name__)
19
+
20
+
21
+ # copied from: https://docs.python.org/3/library/itertools.html#recipes
22
+ def _roundrobin(*iterables):
23
+ "roundrobin('ABC', 'D', 'EF') --> A D E B F C"
24
+ # Recipe credited to George Sakkis
25
+ num_active = len(iterables)
26
+ nexts = itertools.cycle(iter(it).__next__ for it in iterables)
27
+ while num_active:
28
+ try:
29
+ for next in nexts:
30
+ yield next()
31
+ except StopIteration:
32
+ # Remove the iterator we just exhausted from the cycle.
33
+ num_active -= 1
34
+ nexts = itertools.cycle(itertools.islice(nexts, num_active))
35
+
36
+
37
+ def _shard_iterator_dataloader_worker(iterable, chunk_size=1):
38
+ # Shard the iterable if we're currently inside pytorch dataloader worker.
39
+ worker_info = data.get_worker_info()
40
+ if worker_info is None or worker_info.num_workers == 1:
41
+ # do nothing
42
+ yield from iterable
43
+ else:
44
+ # worker0: 0, 1, ..., chunk_size-1, num_workers*chunk_size, num_workers*chunk_size+1, ...
45
+ # worker1: chunk_size, chunk_size+1, ...
46
+ # worker2: 2*chunk_size, 2*chunk_size+1, ...
47
+ # ...
48
+ yield from _roundrobin(
49
+ *[
50
+ itertools.islice(
51
+ iterable,
52
+ worker_info.id * chunk_size + chunk_i,
53
+ None,
54
+ worker_info.num_workers * chunk_size,
55
+ )
56
+ for chunk_i in range(chunk_size)
57
+ ]
58
+ )
59
+
60
+
61
+ class _MapIterableDataset(data.IterableDataset):
62
+ """
63
+ Map a function over elements in an IterableDataset.
64
+
65
+ Similar to pytorch's MapIterDataPipe, but support filtering when map_func
66
+ returns None.
67
+
68
+ This class is not public-facing. Will be called by `MapDataset`.
69
+ """
70
+
71
+ def __init__(self, dataset, map_func):
72
+ self._dataset = dataset
73
+ self._map_func = PicklableWrapper(map_func) # wrap so that a lambda will work
74
+
75
+ def __len__(self):
76
+ return len(self._dataset)
77
+
78
+ def __iter__(self):
79
+ for x in map(self._map_func, self._dataset):
80
+ if x is not None:
81
+ yield x
82
+
83
+
84
+ class MapDataset(data.Dataset):
85
+ """
86
+ Map a function over the elements in a dataset.
87
+ """
88
+
89
+ def __init__(self, dataset, map_func):
90
+ """
91
+ Args:
92
+ dataset: a dataset where map function is applied. Can be either
93
+ map-style or iterable dataset. When given an iterable dataset,
94
+ the returned object will also be an iterable dataset.
95
+ map_func: a callable which maps the element in dataset. map_func can
96
+ return None to skip the data (e.g. in case of errors).
97
+ How None is handled depends on the style of `dataset`.
98
+ If `dataset` is map-style, it randomly tries other elements.
99
+ If `dataset` is iterable, it skips the data and tries the next.
100
+ """
101
+ self._dataset = dataset
102
+ self._map_func = PicklableWrapper(map_func) # wrap so that a lambda will work
103
+
104
+ self._rng = random.Random(42)
105
+ self._fallback_candidates = set(range(len(dataset)))
106
+
107
+ def __new__(cls, dataset, map_func):
108
+ is_iterable = isinstance(dataset, data.IterableDataset)
109
+ if is_iterable:
110
+ return _MapIterableDataset(dataset, map_func)
111
+ else:
112
+ return super().__new__(cls)
113
+
114
+ def __getnewargs__(self):
115
+ return self._dataset, self._map_func
116
+
117
+ def __len__(self):
118
+ return len(self._dataset)
119
+
120
+ def __getitem__(self, idx):
121
+ retry_count = 0
122
+ cur_idx = int(idx)
123
+
124
+ while True:
125
+ data = self._map_func(self._dataset[cur_idx])
126
+ if data is not None:
127
+ self._fallback_candidates.add(cur_idx)
128
+ return data
129
+
130
+ # _map_func fails for this idx, use a random new index from the pool
131
+ retry_count += 1
132
+ self._fallback_candidates.discard(cur_idx)
133
+ cur_idx = self._rng.sample(self._fallback_candidates, k=1)[0]
134
+
135
+ if retry_count >= 3:
136
+ logger = logging.getLogger(__name__)
137
+ logger.warning(
138
+ "Failed to apply `_map_func` for idx: {}, retry count: {}".format(
139
+ idx, retry_count
140
+ )
141
+ )
142
+
143
+
144
+ class _TorchSerializedList:
145
+ """
146
+ A list-like object whose items are serialized and stored in a torch tensor. When
147
+ launching a process that uses TorchSerializedList with "fork" start method,
148
+ the subprocess can read the same buffer without triggering copy-on-access. When
149
+ launching a process that uses TorchSerializedList with "spawn/forkserver" start
150
+ method, the list will be pickled by a special ForkingPickler registered by PyTorch
151
+ that moves data to shared memory. In both cases, this allows parent and child
152
+ processes to share RAM for the list data, hence avoids the issue in
153
+ https://github.com/pytorch/pytorch/issues/13246.
154
+
155
+ See also https://ppwwyyxx.com/blog/2022/Demystify-RAM-Usage-in-Multiprocess-DataLoader/
156
+ on how it works.
157
+ """
158
+
159
+ def __init__(self, lst: list):
160
+ self._lst = lst
161
+
162
+ def _serialize(data):
163
+ buffer = pickle.dumps(data, protocol=-1)
164
+ return np.frombuffer(buffer, dtype=np.uint8)
165
+
166
+ logger.info(
167
+ "Serializing {} elements to byte tensors and concatenating them all ...".format(
168
+ len(self._lst)
169
+ )
170
+ )
171
+ self._lst = [_serialize(x) for x in self._lst]
172
+ self._addr = np.asarray([len(x) for x in self._lst], dtype=np.int64)
173
+ self._addr = torch.from_numpy(np.cumsum(self._addr))
174
+ self._lst = torch.from_numpy(np.concatenate(self._lst))
175
+ logger.info("Serialized dataset takes {:.2f} MiB".format(len(self._lst) / 1024**2))
176
+
177
+ def __len__(self):
178
+ return len(self._addr)
179
+
180
+ def __getitem__(self, idx):
181
+ start_addr = 0 if idx == 0 else self._addr[idx - 1].item()
182
+ end_addr = self._addr[idx].item()
183
+ bytes = memoryview(self._lst[start_addr:end_addr].numpy())
184
+
185
+ # @lint-ignore PYTHONPICKLEISBAD
186
+ return pickle.loads(bytes)
187
+
188
+
189
+ _DEFAULT_DATASET_FROM_LIST_SERIALIZE_METHOD = _TorchSerializedList
190
+
191
+
192
+ @contextlib.contextmanager
193
+ def set_default_dataset_from_list_serialize_method(new):
194
+ """
195
+ Context manager for using custom serialize function when creating DatasetFromList
196
+ """
197
+
198
+ global _DEFAULT_DATASET_FROM_LIST_SERIALIZE_METHOD
199
+ orig = _DEFAULT_DATASET_FROM_LIST_SERIALIZE_METHOD
200
+ _DEFAULT_DATASET_FROM_LIST_SERIALIZE_METHOD = new
201
+ yield
202
+ _DEFAULT_DATASET_FROM_LIST_SERIALIZE_METHOD = orig
203
+
204
+
205
+ class DatasetFromList(data.Dataset):
206
+ """
207
+ Wrap a list to a torch Dataset. It produces elements of the list as data.
208
+ """
209
+
210
+ def __init__(
211
+ self,
212
+ lst: list,
213
+ copy: bool = True,
214
+ serialize: Union[bool, Callable] = True,
215
+ ):
216
+ """
217
+ Args:
218
+ lst (list): a list which contains elements to produce.
219
+ copy (bool): whether to deepcopy the element when producing it,
220
+ so that the result can be modified in place without affecting the
221
+ source in the list.
222
+ serialize (bool or callable): whether to serialize the stroage to other
223
+ backend. If `True`, the default serialize method will be used, if given
224
+ a callable, the callable will be used as serialize method.
225
+ """
226
+ self._lst = lst
227
+ self._copy = copy
228
+ if not isinstance(serialize, (bool, Callable)):
229
+ raise TypeError(f"Unsupported type for argument `serailzie`: {serialize}")
230
+ self._serialize = serialize is not False
231
+
232
+ if self._serialize:
233
+ serialize_method = (
234
+ serialize
235
+ if isinstance(serialize, Callable)
236
+ else _DEFAULT_DATASET_FROM_LIST_SERIALIZE_METHOD
237
+ )
238
+ logger.info(f"Serializing the dataset using: {serialize_method}")
239
+ self._lst = serialize_method(self._lst)
240
+
241
+ def __len__(self):
242
+ return len(self._lst)
243
+
244
+ def __getitem__(self, idx):
245
+ if self._copy and not self._serialize:
246
+ return copy.deepcopy(self._lst[idx])
247
+ else:
248
+ return self._lst[idx]
249
+
250
+
251
+ class ToIterableDataset(data.IterableDataset):
252
+ """
253
+ Convert an old indices-based (also called map-style) dataset
254
+ to an iterable-style dataset.
255
+ """
256
+
257
+ def __init__(
258
+ self,
259
+ dataset: data.Dataset,
260
+ sampler: Sampler,
261
+ shard_sampler: bool = True,
262
+ shard_chunk_size: int = 1,
263
+ ):
264
+ """
265
+ Args:
266
+ dataset: an old-style dataset with ``__getitem__``
267
+ sampler: a cheap iterable that produces indices to be applied on ``dataset``.
268
+ shard_sampler: whether to shard the sampler based on the current pytorch data loader
269
+ worker id. When an IterableDataset is forked by pytorch's DataLoader into multiple
270
+ workers, it is responsible for sharding its data based on worker id so that workers
271
+ don't produce identical data.
272
+
273
+ Most samplers (like our TrainingSampler) do not shard based on dataloader worker id
274
+ and this argument should be set to True. But certain samplers may be already
275
+ sharded, in that case this argument should be set to False.
276
+ shard_chunk_size: when sharding the sampler, each worker will
277
+ """
278
+ assert not isinstance(dataset, data.IterableDataset), dataset
279
+ assert isinstance(sampler, Sampler), sampler
280
+ self.dataset = dataset
281
+ self.sampler = sampler
282
+ self.shard_sampler = shard_sampler
283
+ self.shard_chunk_size = shard_chunk_size
284
+
285
+ def __iter__(self):
286
+ if not self.shard_sampler:
287
+ sampler = self.sampler
288
+ else:
289
+ # With map-style dataset, `DataLoader(dataset, sampler)` runs the
290
+ # sampler in main process only. But `DataLoader(ToIterableDataset(dataset, sampler))`
291
+ # will run sampler in every of the N worker. So we should only keep 1/N of the ids on
292
+ # each worker. The assumption is that sampler is cheap to iterate so it's fine to
293
+ # discard ids in workers.
294
+ sampler = _shard_iterator_dataloader_worker(self.sampler, self.shard_chunk_size)
295
+ for idx in sampler:
296
+ yield self.dataset[idx]
297
+
298
+ def __len__(self):
299
+ return len(self.sampler)
300
+
301
+
302
+ class AspectRatioGroupedDataset(data.IterableDataset):
303
+ """
304
+ Batch data that have similar aspect ratio together.
305
+ In this implementation, images whose aspect ratio < (or >) 1 will
306
+ be batched together.
307
+ This improves training speed because the images then need less padding
308
+ to form a batch.
309
+
310
+ It assumes the underlying dataset produces dicts with "width" and "height" keys.
311
+ It will then produce a list of original dicts with length = batch_size,
312
+ all with similar aspect ratios.
313
+ """
314
+
315
+ def __init__(self, dataset, batch_size):
316
+ """
317
+ Args:
318
+ dataset: an iterable. Each element must be a dict with keys
319
+ "width" and "height", which will be used to batch data.
320
+ batch_size (int):
321
+ """
322
+ self.dataset = dataset
323
+ self.batch_size = batch_size
324
+ self._buckets = [[] for _ in range(2)]
325
+ # Hard-coded two aspect ratio groups: w > h and w < h.
326
+ # Can add support for more aspect ratio groups, but doesn't seem useful
327
+
328
+ def __iter__(self):
329
+ for d in self.dataset:
330
+ w, h = d["width"], d["height"]
331
+ bucket_id = 0 if w > h else 1
332
+ bucket = self._buckets[bucket_id]
333
+ bucket.append(d)
334
+ if len(bucket) == self.batch_size:
335
+ data = bucket[:]
336
+ # Clear bucket first, because code after yield is not
337
+ # guaranteed to execute
338
+ del bucket[:]
339
+ yield data
detectron2/data/dataset_mapper.py ADDED
@@ -0,0 +1,191 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Facebook, Inc. and its affiliates.
2
+ import copy
3
+ import logging
4
+ import numpy as np
5
+ from typing import List, Optional, Union
6
+ import torch
7
+
8
+ from detectron2.config import configurable
9
+
10
+ from . import detection_utils as utils
11
+ from . import transforms as T
12
+
13
+ """
14
+ This file contains the default mapping that's applied to "dataset dicts".
15
+ """
16
+
17
+ __all__ = ["DatasetMapper"]
18
+
19
+
20
+ class DatasetMapper:
21
+ """
22
+ A callable which takes a dataset dict in Detectron2 Dataset format,
23
+ and map it into a format used by the model.
24
+
25
+ This is the default callable to be used to map your dataset dict into training data.
26
+ You may need to follow it to implement your own one for customized logic,
27
+ such as a different way to read or transform images.
28
+ See :doc:`/tutorials/data_loading` for details.
29
+
30
+ The callable currently does the following:
31
+
32
+ 1. Read the image from "file_name"
33
+ 2. Applies cropping/geometric transforms to the image and annotations
34
+ 3. Prepare data and annotations to Tensor and :class:`Instances`
35
+ """
36
+
37
+ @configurable
38
+ def __init__(
39
+ self,
40
+ is_train: bool,
41
+ *,
42
+ augmentations: List[Union[T.Augmentation, T.Transform]],
43
+ image_format: str,
44
+ use_instance_mask: bool = False,
45
+ use_keypoint: bool = False,
46
+ instance_mask_format: str = "polygon",
47
+ keypoint_hflip_indices: Optional[np.ndarray] = None,
48
+ precomputed_proposal_topk: Optional[int] = None,
49
+ recompute_boxes: bool = False,
50
+ ):
51
+ """
52
+ NOTE: this interface is experimental.
53
+
54
+ Args:
55
+ is_train: whether it's used in training or inference
56
+ augmentations: a list of augmentations or deterministic transforms to apply
57
+ image_format: an image format supported by :func:`detection_utils.read_image`.
58
+ use_instance_mask: whether to process instance segmentation annotations, if available
59
+ use_keypoint: whether to process keypoint annotations if available
60
+ instance_mask_format: one of "polygon" or "bitmask". Process instance segmentation
61
+ masks into this format.
62
+ keypoint_hflip_indices: see :func:`detection_utils.create_keypoint_hflip_indices`
63
+ precomputed_proposal_topk: if given, will load pre-computed
64
+ proposals from dataset_dict and keep the top k proposals for each image.
65
+ recompute_boxes: whether to overwrite bounding box annotations
66
+ by computing tight bounding boxes from instance mask annotations.
67
+ """
68
+ if recompute_boxes:
69
+ assert use_instance_mask, "recompute_boxes requires instance masks"
70
+ # fmt: off
71
+ self.is_train = is_train
72
+ self.augmentations = T.AugmentationList(augmentations)
73
+ self.image_format = image_format
74
+ self.use_instance_mask = use_instance_mask
75
+ self.instance_mask_format = instance_mask_format
76
+ self.use_keypoint = use_keypoint
77
+ self.keypoint_hflip_indices = keypoint_hflip_indices
78
+ self.proposal_topk = precomputed_proposal_topk
79
+ self.recompute_boxes = recompute_boxes
80
+ # fmt: on
81
+ logger = logging.getLogger(__name__)
82
+ mode = "training" if is_train else "inference"
83
+ logger.info(f"[DatasetMapper] Augmentations used in {mode}: {augmentations}")
84
+
85
+ @classmethod
86
+ def from_config(cls, cfg, is_train: bool = True):
87
+ augs = utils.build_augmentation(cfg, is_train)
88
+ if cfg.INPUT.CROP.ENABLED and is_train:
89
+ augs.insert(0, T.RandomCrop(cfg.INPUT.CROP.TYPE, cfg.INPUT.CROP.SIZE))
90
+ recompute_boxes = cfg.MODEL.MASK_ON
91
+ else:
92
+ recompute_boxes = False
93
+
94
+ ret = {
95
+ "is_train": is_train,
96
+ "augmentations": augs,
97
+ "image_format": cfg.INPUT.FORMAT,
98
+ "use_instance_mask": cfg.MODEL.MASK_ON,
99
+ "instance_mask_format": cfg.INPUT.MASK_FORMAT,
100
+ "use_keypoint": cfg.MODEL.KEYPOINT_ON,
101
+ "recompute_boxes": recompute_boxes,
102
+ }
103
+
104
+ if cfg.MODEL.KEYPOINT_ON:
105
+ ret["keypoint_hflip_indices"] = utils.create_keypoint_hflip_indices(cfg.DATASETS.TRAIN)
106
+
107
+ if cfg.MODEL.LOAD_PROPOSALS:
108
+ ret["precomputed_proposal_topk"] = (
109
+ cfg.DATASETS.PRECOMPUTED_PROPOSAL_TOPK_TRAIN
110
+ if is_train
111
+ else cfg.DATASETS.PRECOMPUTED_PROPOSAL_TOPK_TEST
112
+ )
113
+ return ret
114
+
115
+ def _transform_annotations(self, dataset_dict, transforms, image_shape):
116
+ # USER: Modify this if you want to keep them for some reason.
117
+ for anno in dataset_dict["annotations"]:
118
+ if not self.use_instance_mask:
119
+ anno.pop("segmentation", None)
120
+ if not self.use_keypoint:
121
+ anno.pop("keypoints", None)
122
+
123
+ # USER: Implement additional transformations if you have other types of data
124
+ annos = [
125
+ utils.transform_instance_annotations(
126
+ obj, transforms, image_shape, keypoint_hflip_indices=self.keypoint_hflip_indices
127
+ )
128
+ for obj in dataset_dict.pop("annotations")
129
+ if obj.get("iscrowd", 0) == 0
130
+ ]
131
+ instances = utils.annotations_to_instances(
132
+ annos, image_shape, mask_format=self.instance_mask_format
133
+ )
134
+
135
+ # After transforms such as cropping are applied, the bounding box may no longer
136
+ # tightly bound the object. As an example, imagine a triangle object
137
+ # [(0,0), (2,0), (0,2)] cropped by a box [(1,0),(2,2)] (XYXY format). The tight
138
+ # bounding box of the cropped triangle should be [(1,0),(2,1)], which is not equal to
139
+ # the intersection of original bounding box and the cropping box.
140
+ if self.recompute_boxes:
141
+ instances.gt_boxes = instances.gt_masks.get_bounding_boxes()
142
+ dataset_dict["instances"] = utils.filter_empty_instances(instances)
143
+
144
+ def __call__(self, dataset_dict):
145
+ """
146
+ Args:
147
+ dataset_dict (dict): Metadata of one image, in Detectron2 Dataset format.
148
+
149
+ Returns:
150
+ dict: a format that builtin models in detectron2 accept
151
+ """
152
+ dataset_dict = copy.deepcopy(dataset_dict) # it will be modified by code below
153
+ # USER: Write your own image loading if it's not from a file
154
+ image = utils.read_image(dataset_dict["file_name"], format=self.image_format)
155
+ utils.check_image_size(dataset_dict, image)
156
+
157
+ # USER: Remove if you don't do semantic/panoptic segmentation.
158
+ if "sem_seg_file_name" in dataset_dict:
159
+ sem_seg_gt = utils.read_image(dataset_dict.pop("sem_seg_file_name"), "L").squeeze(2)
160
+ else:
161
+ sem_seg_gt = None
162
+
163
+ aug_input = T.AugInput(image, sem_seg=sem_seg_gt)
164
+ transforms = self.augmentations(aug_input)
165
+ image, sem_seg_gt = aug_input.image, aug_input.sem_seg
166
+
167
+ image_shape = image.shape[:2] # h, w
168
+ # Pytorch's dataloader is efficient on torch.Tensor due to shared-memory,
169
+ # but not efficient on large generic data structures due to the use of pickle & mp.Queue.
170
+ # Therefore it's important to use torch.Tensor.
171
+ dataset_dict["image"] = torch.as_tensor(np.ascontiguousarray(image.transpose(2, 0, 1)))
172
+ if sem_seg_gt is not None:
173
+ dataset_dict["sem_seg"] = torch.as_tensor(sem_seg_gt.astype("long"))
174
+
175
+ # USER: Remove if you don't use pre-computed proposals.
176
+ # Most users would not need this feature.
177
+ if self.proposal_topk is not None:
178
+ utils.transform_proposals(
179
+ dataset_dict, image_shape, transforms, proposal_topk=self.proposal_topk
180
+ )
181
+
182
+ if not self.is_train:
183
+ # USER: Modify this if you want to keep them for some reason.
184
+ dataset_dict.pop("annotations", None)
185
+ dataset_dict.pop("sem_seg_file_name", None)
186
+ return dataset_dict
187
+
188
+ if "annotations" in dataset_dict:
189
+ self._transform_annotations(dataset_dict, transforms, image_shape)
190
+
191
+ return dataset_dict
detectron2/data/datasets/README.md ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+
2
+
3
+ ### Common Datasets
4
+
5
+ The dataset implemented here do not need to load the data into the final format.
6
+ It should provide the minimal data structure needed to use the dataset, so it can be very efficient.
7
+
8
+ For example, for an image dataset, just provide the file names and labels, but don't read the images.
9
+ Let the downstream decide how to read.
detectron2/data/datasets/__init__.py ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Facebook, Inc. and its affiliates.
2
+ from .coco import load_coco_json, load_sem_seg, register_coco_instances, convert_to_coco_json
3
+ from .coco_panoptic import register_coco_panoptic, register_coco_panoptic_separated
4
+ from .lvis import load_lvis_json, register_lvis_instances, get_lvis_instances_meta
5
+ from .pascal_voc import load_voc_instances, register_pascal_voc
6
+ from . import builtin as _builtin # ensure the builtin datasets are registered
7
+
8
+
9
+ __all__ = [k for k in globals().keys() if not k.startswith("_")]
detectron2/data/datasets/__pycache__/__init__.cpython-311.pyc ADDED
Binary file (1.1 kB). View file
 
detectron2/data/datasets/__pycache__/builtin.cpython-311.pyc ADDED
Binary file (11.3 kB). View file
 
detectron2/data/datasets/__pycache__/builtin_meta.cpython-311.pyc ADDED
Binary file (21.1 kB). View file
 
detectron2/data/datasets/__pycache__/cityscapes.cpython-311.pyc ADDED
Binary file (16.7 kB). View file
 
detectron2/data/datasets/__pycache__/cityscapes_panoptic.cpython-311.pyc ADDED
Binary file (9.52 kB). View file
 
detectron2/data/datasets/__pycache__/coco.cpython-311.pyc ADDED
Binary file (29.8 kB). View file
 
detectron2/data/datasets/__pycache__/coco_panoptic.cpython-311.pyc ADDED
Binary file (11.4 kB). View file
 
detectron2/data/datasets/__pycache__/lvis.cpython-311.pyc ADDED
Binary file (12.9 kB). View file
 
detectron2/data/datasets/__pycache__/lvis_v0_5_categories.cpython-311.pyc ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f78d618393dda851251dec67a12ffcd7a3e3092d4fd6fdf912213c137ec2aef7
3
+ size 269004
detectron2/data/datasets/__pycache__/lvis_v1_categories.cpython-311.pyc ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4f02029d25d9cd8351c594670b7b1f7cc0c61b6cfb2bf0f896b363abc9f33832
3
+ size 263476
detectron2/data/datasets/__pycache__/lvis_v1_category_image_count.cpython-311.pyc ADDED
Binary file (71.9 kB). View file
 
detectron2/data/datasets/__pycache__/pascal_voc.cpython-311.pyc ADDED
Binary file (4.81 kB). View file
 
detectron2/data/datasets/builtin.py ADDED
@@ -0,0 +1,259 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # -*- coding: utf-8 -*-
2
+ # Copyright (c) Facebook, Inc. and its affiliates.
3
+
4
+
5
+ """
6
+ This file registers pre-defined datasets at hard-coded paths, and their metadata.
7
+
8
+ We hard-code metadata for common datasets. This will enable:
9
+ 1. Consistency check when loading the datasets
10
+ 2. Use models on these standard datasets directly and run demos,
11
+ without having to download the dataset annotations
12
+
13
+ We hard-code some paths to the dataset that's assumed to
14
+ exist in "./datasets/".
15
+
16
+ Users SHOULD NOT use this file to create new dataset / metadata for new dataset.
17
+ To add new dataset, refer to the tutorial "docs/DATASETS.md".
18
+ """
19
+
20
+ import os
21
+
22
+ from detectron2.data import DatasetCatalog, MetadataCatalog
23
+
24
+ from .builtin_meta import ADE20K_SEM_SEG_CATEGORIES, _get_builtin_metadata
25
+ from .cityscapes import load_cityscapes_instances, load_cityscapes_semantic
26
+ from .cityscapes_panoptic import register_all_cityscapes_panoptic
27
+ from .coco import load_sem_seg, register_coco_instances
28
+ from .coco_panoptic import register_coco_panoptic, register_coco_panoptic_separated
29
+ from .lvis import get_lvis_instances_meta, register_lvis_instances
30
+ from .pascal_voc import register_pascal_voc
31
+
32
+ # ==== Predefined datasets and splits for COCO ==========
33
+
34
+ _PREDEFINED_SPLITS_COCO = {}
35
+ _PREDEFINED_SPLITS_COCO["coco"] = {
36
+ "coco_2014_train": ("coco/train2014", "coco/annotations/instances_train2014.json"),
37
+ "coco_2014_val": ("coco/val2014", "coco/annotations/instances_val2014.json"),
38
+ "coco_2014_minival": ("coco/val2014", "coco/annotations/instances_minival2014.json"),
39
+ "coco_2014_valminusminival": (
40
+ "coco/val2014",
41
+ "coco/annotations/instances_valminusminival2014.json",
42
+ ),
43
+ "coco_2017_train": ("coco/train2017", "coco/annotations/instances_train2017.json"),
44
+ "coco_2017_val": ("coco/val2017", "coco/annotations/instances_val2017.json"),
45
+ "coco_2017_test": ("coco/test2017", "coco/annotations/image_info_test2017.json"),
46
+ "coco_2017_test-dev": ("coco/test2017", "coco/annotations/image_info_test-dev2017.json"),
47
+ "coco_2017_val_100": ("coco/val2017", "coco/annotations/instances_val2017_100.json"),
48
+ }
49
+
50
+ _PREDEFINED_SPLITS_COCO["coco_person"] = {
51
+ "keypoints_coco_2014_train": (
52
+ "coco/train2014",
53
+ "coco/annotations/person_keypoints_train2014.json",
54
+ ),
55
+ "keypoints_coco_2014_val": ("coco/val2014", "coco/annotations/person_keypoints_val2014.json"),
56
+ "keypoints_coco_2014_minival": (
57
+ "coco/val2014",
58
+ "coco/annotations/person_keypoints_minival2014.json",
59
+ ),
60
+ "keypoints_coco_2014_valminusminival": (
61
+ "coco/val2014",
62
+ "coco/annotations/person_keypoints_valminusminival2014.json",
63
+ ),
64
+ "keypoints_coco_2017_train": (
65
+ "coco/train2017",
66
+ "coco/annotations/person_keypoints_train2017.json",
67
+ ),
68
+ "keypoints_coco_2017_val": ("coco/val2017", "coco/annotations/person_keypoints_val2017.json"),
69
+ "keypoints_coco_2017_val_100": (
70
+ "coco/val2017",
71
+ "coco/annotations/person_keypoints_val2017_100.json",
72
+ ),
73
+ }
74
+
75
+
76
+ _PREDEFINED_SPLITS_COCO_PANOPTIC = {
77
+ "coco_2017_train_panoptic": (
78
+ # This is the original panoptic annotation directory
79
+ "coco/panoptic_train2017",
80
+ "coco/annotations/panoptic_train2017.json",
81
+ # This directory contains semantic annotations that are
82
+ # converted from panoptic annotations.
83
+ # It is used by PanopticFPN.
84
+ # You can use the script at detectron2/datasets/prepare_panoptic_fpn.py
85
+ # to create these directories.
86
+ "coco/panoptic_stuff_train2017",
87
+ ),
88
+ "coco_2017_val_panoptic": (
89
+ "coco/panoptic_val2017",
90
+ "coco/annotations/panoptic_val2017.json",
91
+ "coco/panoptic_stuff_val2017",
92
+ ),
93
+ "coco_2017_val_100_panoptic": (
94
+ "coco/panoptic_val2017_100",
95
+ "coco/annotations/panoptic_val2017_100.json",
96
+ "coco/panoptic_stuff_val2017_100",
97
+ ),
98
+ }
99
+
100
+
101
+ def register_all_coco(root):
102
+ for dataset_name, splits_per_dataset in _PREDEFINED_SPLITS_COCO.items():
103
+ for key, (image_root, json_file) in splits_per_dataset.items():
104
+ # Assume pre-defined datasets live in `./datasets`.
105
+ register_coco_instances(
106
+ key,
107
+ _get_builtin_metadata(dataset_name),
108
+ os.path.join(root, json_file) if "://" not in json_file else json_file,
109
+ os.path.join(root, image_root),
110
+ )
111
+
112
+ for (
113
+ prefix,
114
+ (panoptic_root, panoptic_json, semantic_root),
115
+ ) in _PREDEFINED_SPLITS_COCO_PANOPTIC.items():
116
+ prefix_instances = prefix[: -len("_panoptic")]
117
+ instances_meta = MetadataCatalog.get(prefix_instances)
118
+ image_root, instances_json = instances_meta.image_root, instances_meta.json_file
119
+ # The "separated" version of COCO panoptic segmentation dataset,
120
+ # e.g. used by Panoptic FPN
121
+ register_coco_panoptic_separated(
122
+ prefix,
123
+ _get_builtin_metadata("coco_panoptic_separated"),
124
+ image_root,
125
+ os.path.join(root, panoptic_root),
126
+ os.path.join(root, panoptic_json),
127
+ os.path.join(root, semantic_root),
128
+ instances_json,
129
+ )
130
+ # The "standard" version of COCO panoptic segmentation dataset,
131
+ # e.g. used by Panoptic-DeepLab
132
+ register_coco_panoptic(
133
+ prefix,
134
+ _get_builtin_metadata("coco_panoptic_standard"),
135
+ image_root,
136
+ os.path.join(root, panoptic_root),
137
+ os.path.join(root, panoptic_json),
138
+ instances_json,
139
+ )
140
+
141
+
142
+ # ==== Predefined datasets and splits for LVIS ==========
143
+
144
+
145
+ _PREDEFINED_SPLITS_LVIS = {
146
+ "lvis_v1": {
147
+ "lvis_v1_train": ("coco/", "lvis/lvis_v1_train.json"),
148
+ "lvis_v1_val": ("coco/", "lvis/lvis_v1_val.json"),
149
+ "lvis_v1_test_dev": ("coco/", "lvis/lvis_v1_image_info_test_dev.json"),
150
+ "lvis_v1_test_challenge": ("coco/", "lvis/lvis_v1_image_info_test_challenge.json"),
151
+ },
152
+ "lvis_v0.5": {
153
+ "lvis_v0.5_train": ("coco/", "lvis/lvis_v0.5_train.json"),
154
+ "lvis_v0.5_val": ("coco/", "lvis/lvis_v0.5_val.json"),
155
+ "lvis_v0.5_val_rand_100": ("coco/", "lvis/lvis_v0.5_val_rand_100.json"),
156
+ "lvis_v0.5_test": ("coco/", "lvis/lvis_v0.5_image_info_test.json"),
157
+ },
158
+ "lvis_v0.5_cocofied": {
159
+ "lvis_v0.5_train_cocofied": ("coco/", "lvis/lvis_v0.5_train_cocofied.json"),
160
+ "lvis_v0.5_val_cocofied": ("coco/", "lvis/lvis_v0.5_val_cocofied.json"),
161
+ },
162
+ }
163
+
164
+
165
+ def register_all_lvis(root):
166
+ for dataset_name, splits_per_dataset in _PREDEFINED_SPLITS_LVIS.items():
167
+ for key, (image_root, json_file) in splits_per_dataset.items():
168
+ register_lvis_instances(
169
+ key,
170
+ get_lvis_instances_meta(dataset_name),
171
+ os.path.join(root, json_file) if "://" not in json_file else json_file,
172
+ os.path.join(root, image_root),
173
+ )
174
+
175
+
176
+ # ==== Predefined splits for raw cityscapes images ===========
177
+ _RAW_CITYSCAPES_SPLITS = {
178
+ "cityscapes_fine_{task}_train": ("cityscapes/leftImg8bit/train/", "cityscapes/gtFine/train/"),
179
+ "cityscapes_fine_{task}_val": ("cityscapes/leftImg8bit/val/", "cityscapes/gtFine/val/"),
180
+ "cityscapes_fine_{task}_test": ("cityscapes/leftImg8bit/test/", "cityscapes/gtFine/test/"),
181
+ }
182
+
183
+
184
+ def register_all_cityscapes(root):
185
+ for key, (image_dir, gt_dir) in _RAW_CITYSCAPES_SPLITS.items():
186
+ meta = _get_builtin_metadata("cityscapes")
187
+ image_dir = os.path.join(root, image_dir)
188
+ gt_dir = os.path.join(root, gt_dir)
189
+
190
+ inst_key = key.format(task="instance_seg")
191
+ DatasetCatalog.register(
192
+ inst_key,
193
+ lambda x=image_dir, y=gt_dir: load_cityscapes_instances(
194
+ x, y, from_json=True, to_polygons=True
195
+ ),
196
+ )
197
+ MetadataCatalog.get(inst_key).set(
198
+ image_dir=image_dir, gt_dir=gt_dir, evaluator_type="cityscapes_instance", **meta
199
+ )
200
+
201
+ sem_key = key.format(task="sem_seg")
202
+ DatasetCatalog.register(
203
+ sem_key, lambda x=image_dir, y=gt_dir: load_cityscapes_semantic(x, y)
204
+ )
205
+ MetadataCatalog.get(sem_key).set(
206
+ image_dir=image_dir,
207
+ gt_dir=gt_dir,
208
+ evaluator_type="cityscapes_sem_seg",
209
+ ignore_label=255,
210
+ **meta,
211
+ )
212
+
213
+
214
+ # ==== Predefined splits for PASCAL VOC ===========
215
+ def register_all_pascal_voc(root):
216
+ SPLITS = [
217
+ ("voc_2007_trainval", "VOC2007", "trainval"),
218
+ ("voc_2007_train", "VOC2007", "train"),
219
+ ("voc_2007_val", "VOC2007", "val"),
220
+ ("voc_2007_test", "VOC2007", "test"),
221
+ ("voc_2012_trainval", "VOC2012", "trainval"),
222
+ ("voc_2012_train", "VOC2012", "train"),
223
+ ("voc_2012_val", "VOC2012", "val"),
224
+ ]
225
+ for name, dirname, split in SPLITS:
226
+ year = 2007 if "2007" in name else 2012
227
+ register_pascal_voc(name, os.path.join(root, dirname), split, year)
228
+ MetadataCatalog.get(name).evaluator_type = "pascal_voc"
229
+
230
+
231
+ def register_all_ade20k(root):
232
+ root = os.path.join(root, "ADEChallengeData2016")
233
+ for name, dirname in [("train", "training"), ("val", "validation")]:
234
+ image_dir = os.path.join(root, "images", dirname)
235
+ gt_dir = os.path.join(root, "annotations_detectron2", dirname)
236
+ name = f"ade20k_sem_seg_{name}"
237
+ DatasetCatalog.register(
238
+ name, lambda x=image_dir, y=gt_dir: load_sem_seg(y, x, gt_ext="png", image_ext="jpg")
239
+ )
240
+ MetadataCatalog.get(name).set(
241
+ stuff_classes=ADE20K_SEM_SEG_CATEGORIES[:],
242
+ image_root=image_dir,
243
+ sem_seg_root=gt_dir,
244
+ evaluator_type="sem_seg",
245
+ ignore_label=255,
246
+ )
247
+
248
+
249
+ # True for open source;
250
+ # Internally at fb, we register them elsewhere
251
+ if __name__.endswith(".builtin"):
252
+ # Assume pre-defined datasets live in `./datasets`.
253
+ _root = os.path.expanduser(os.getenv("DETECTRON2_DATASETS", "datasets"))
254
+ register_all_coco(_root)
255
+ register_all_lvis(_root)
256
+ register_all_cityscapes(_root)
257
+ register_all_cityscapes_panoptic(_root)
258
+ register_all_pascal_voc(_root)
259
+ register_all_ade20k(_root)
detectron2/data/datasets/builtin_meta.py ADDED
@@ -0,0 +1,350 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # -*- coding: utf-8 -*-
2
+ # Copyright (c) Facebook, Inc. and its affiliates.
3
+
4
+ """
5
+ Note:
6
+ For your custom dataset, there is no need to hard-code metadata anywhere in the code.
7
+ For example, for COCO-format dataset, metadata will be obtained automatically
8
+ when calling `load_coco_json`. For other dataset, metadata may also be obtained in other ways
9
+ during loading.
10
+
11
+ However, we hard-coded metadata for a few common dataset here.
12
+ The only goal is to allow users who don't have these dataset to use pre-trained models.
13
+ Users don't have to download a COCO json (which contains metadata), in order to visualize a
14
+ COCO model (with correct class names and colors).
15
+ """
16
+
17
+
18
+ # All coco categories, together with their nice-looking visualization colors
19
+ # It's from https://github.com/cocodataset/panopticapi/blob/master/panoptic_coco_categories.json
20
+ COCO_CATEGORIES = [
21
+ {"color": [220, 20, 60], "isthing": 1, "id": 1, "name": "person"},
22
+ {"color": [119, 11, 32], "isthing": 1, "id": 2, "name": "bicycle"},
23
+ {"color": [0, 0, 142], "isthing": 1, "id": 3, "name": "car"},
24
+ {"color": [0, 0, 230], "isthing": 1, "id": 4, "name": "motorcycle"},
25
+ {"color": [106, 0, 228], "isthing": 1, "id": 5, "name": "airplane"},
26
+ {"color": [0, 60, 100], "isthing": 1, "id": 6, "name": "bus"},
27
+ {"color": [0, 80, 100], "isthing": 1, "id": 7, "name": "train"},
28
+ {"color": [0, 0, 70], "isthing": 1, "id": 8, "name": "truck"},
29
+ {"color": [0, 0, 192], "isthing": 1, "id": 9, "name": "boat"},
30
+ {"color": [250, 170, 30], "isthing": 1, "id": 10, "name": "traffic light"},
31
+ {"color": [100, 170, 30], "isthing": 1, "id": 11, "name": "fire hydrant"},
32
+ {"color": [220, 220, 0], "isthing": 1, "id": 13, "name": "stop sign"},
33
+ {"color": [175, 116, 175], "isthing": 1, "id": 14, "name": "parking meter"},
34
+ {"color": [250, 0, 30], "isthing": 1, "id": 15, "name": "bench"},
35
+ {"color": [165, 42, 42], "isthing": 1, "id": 16, "name": "bird"},
36
+ {"color": [255, 77, 255], "isthing": 1, "id": 17, "name": "cat"},
37
+ {"color": [0, 226, 252], "isthing": 1, "id": 18, "name": "dog"},
38
+ {"color": [182, 182, 255], "isthing": 1, "id": 19, "name": "horse"},
39
+ {"color": [0, 82, 0], "isthing": 1, "id": 20, "name": "sheep"},
40
+ {"color": [120, 166, 157], "isthing": 1, "id": 21, "name": "cow"},
41
+ {"color": [110, 76, 0], "isthing": 1, "id": 22, "name": "elephant"},
42
+ {"color": [174, 57, 255], "isthing": 1, "id": 23, "name": "bear"},
43
+ {"color": [199, 100, 0], "isthing": 1, "id": 24, "name": "zebra"},
44
+ {"color": [72, 0, 118], "isthing": 1, "id": 25, "name": "giraffe"},
45
+ {"color": [255, 179, 240], "isthing": 1, "id": 27, "name": "backpack"},
46
+ {"color": [0, 125, 92], "isthing": 1, "id": 28, "name": "umbrella"},
47
+ {"color": [209, 0, 151], "isthing": 1, "id": 31, "name": "handbag"},
48
+ {"color": [188, 208, 182], "isthing": 1, "id": 32, "name": "tie"},
49
+ {"color": [0, 220, 176], "isthing": 1, "id": 33, "name": "suitcase"},
50
+ {"color": [255, 99, 164], "isthing": 1, "id": 34, "name": "frisbee"},
51
+ {"color": [92, 0, 73], "isthing": 1, "id": 35, "name": "skis"},
52
+ {"color": [133, 129, 255], "isthing": 1, "id": 36, "name": "snowboard"},
53
+ {"color": [78, 180, 255], "isthing": 1, "id": 37, "name": "sports ball"},
54
+ {"color": [0, 228, 0], "isthing": 1, "id": 38, "name": "kite"},
55
+ {"color": [174, 255, 243], "isthing": 1, "id": 39, "name": "baseball bat"},
56
+ {"color": [45, 89, 255], "isthing": 1, "id": 40, "name": "baseball glove"},
57
+ {"color": [134, 134, 103], "isthing": 1, "id": 41, "name": "skateboard"},
58
+ {"color": [145, 148, 174], "isthing": 1, "id": 42, "name": "surfboard"},
59
+ {"color": [255, 208, 186], "isthing": 1, "id": 43, "name": "tennis racket"},
60
+ {"color": [197, 226, 255], "isthing": 1, "id": 44, "name": "bottle"},
61
+ {"color": [171, 134, 1], "isthing": 1, "id": 46, "name": "wine glass"},
62
+ {"color": [109, 63, 54], "isthing": 1, "id": 47, "name": "cup"},
63
+ {"color": [207, 138, 255], "isthing": 1, "id": 48, "name": "fork"},
64
+ {"color": [151, 0, 95], "isthing": 1, "id": 49, "name": "knife"},
65
+ {"color": [9, 80, 61], "isthing": 1, "id": 50, "name": "spoon"},
66
+ {"color": [84, 105, 51], "isthing": 1, "id": 51, "name": "bowl"},
67
+ {"color": [74, 65, 105], "isthing": 1, "id": 52, "name": "banana"},
68
+ {"color": [166, 196, 102], "isthing": 1, "id": 53, "name": "apple"},
69
+ {"color": [208, 195, 210], "isthing": 1, "id": 54, "name": "sandwich"},
70
+ {"color": [255, 109, 65], "isthing": 1, "id": 55, "name": "orange"},
71
+ {"color": [0, 143, 149], "isthing": 1, "id": 56, "name": "broccoli"},
72
+ {"color": [179, 0, 194], "isthing": 1, "id": 57, "name": "carrot"},
73
+ {"color": [209, 99, 106], "isthing": 1, "id": 58, "name": "hot dog"},
74
+ {"color": [5, 121, 0], "isthing": 1, "id": 59, "name": "pizza"},
75
+ {"color": [227, 255, 205], "isthing": 1, "id": 60, "name": "donut"},
76
+ {"color": [147, 186, 208], "isthing": 1, "id": 61, "name": "cake"},
77
+ {"color": [153, 69, 1], "isthing": 1, "id": 62, "name": "chair"},
78
+ {"color": [3, 95, 161], "isthing": 1, "id": 63, "name": "couch"},
79
+ {"color": [163, 255, 0], "isthing": 1, "id": 64, "name": "potted plant"},
80
+ {"color": [119, 0, 170], "isthing": 1, "id": 65, "name": "bed"},
81
+ {"color": [0, 182, 199], "isthing": 1, "id": 67, "name": "dining table"},
82
+ {"color": [0, 165, 120], "isthing": 1, "id": 70, "name": "toilet"},
83
+ {"color": [183, 130, 88], "isthing": 1, "id": 72, "name": "tv"},
84
+ {"color": [95, 32, 0], "isthing": 1, "id": 73, "name": "laptop"},
85
+ {"color": [130, 114, 135], "isthing": 1, "id": 74, "name": "mouse"},
86
+ {"color": [110, 129, 133], "isthing": 1, "id": 75, "name": "remote"},
87
+ {"color": [166, 74, 118], "isthing": 1, "id": 76, "name": "keyboard"},
88
+ {"color": [219, 142, 185], "isthing": 1, "id": 77, "name": "cell phone"},
89
+ {"color": [79, 210, 114], "isthing": 1, "id": 78, "name": "microwave"},
90
+ {"color": [178, 90, 62], "isthing": 1, "id": 79, "name": "oven"},
91
+ {"color": [65, 70, 15], "isthing": 1, "id": 80, "name": "toaster"},
92
+ {"color": [127, 167, 115], "isthing": 1, "id": 81, "name": "sink"},
93
+ {"color": [59, 105, 106], "isthing": 1, "id": 82, "name": "refrigerator"},
94
+ {"color": [142, 108, 45], "isthing": 1, "id": 84, "name": "book"},
95
+ {"color": [196, 172, 0], "isthing": 1, "id": 85, "name": "clock"},
96
+ {"color": [95, 54, 80], "isthing": 1, "id": 86, "name": "vase"},
97
+ {"color": [128, 76, 255], "isthing": 1, "id": 87, "name": "scissors"},
98
+ {"color": [201, 57, 1], "isthing": 1, "id": 88, "name": "teddy bear"},
99
+ {"color": [246, 0, 122], "isthing": 1, "id": 89, "name": "hair drier"},
100
+ {"color": [191, 162, 208], "isthing": 1, "id": 90, "name": "toothbrush"},
101
+ {"color": [255, 255, 128], "isthing": 0, "id": 92, "name": "banner"},
102
+ {"color": [147, 211, 203], "isthing": 0, "id": 93, "name": "blanket"},
103
+ {"color": [150, 100, 100], "isthing": 0, "id": 95, "name": "bridge"},
104
+ {"color": [168, 171, 172], "isthing": 0, "id": 100, "name": "cardboard"},
105
+ {"color": [146, 112, 198], "isthing": 0, "id": 107, "name": "counter"},
106
+ {"color": [210, 170, 100], "isthing": 0, "id": 109, "name": "curtain"},
107
+ {"color": [92, 136, 89], "isthing": 0, "id": 112, "name": "door-stuff"},
108
+ {"color": [218, 88, 184], "isthing": 0, "id": 118, "name": "floor-wood"},
109
+ {"color": [241, 129, 0], "isthing": 0, "id": 119, "name": "flower"},
110
+ {"color": [217, 17, 255], "isthing": 0, "id": 122, "name": "fruit"},
111
+ {"color": [124, 74, 181], "isthing": 0, "id": 125, "name": "gravel"},
112
+ {"color": [70, 70, 70], "isthing": 0, "id": 128, "name": "house"},
113
+ {"color": [255, 228, 255], "isthing": 0, "id": 130, "name": "light"},
114
+ {"color": [154, 208, 0], "isthing": 0, "id": 133, "name": "mirror-stuff"},
115
+ {"color": [193, 0, 92], "isthing": 0, "id": 138, "name": "net"},
116
+ {"color": [76, 91, 113], "isthing": 0, "id": 141, "name": "pillow"},
117
+ {"color": [255, 180, 195], "isthing": 0, "id": 144, "name": "platform"},
118
+ {"color": [106, 154, 176], "isthing": 0, "id": 145, "name": "playingfield"},
119
+ {"color": [230, 150, 140], "isthing": 0, "id": 147, "name": "railroad"},
120
+ {"color": [60, 143, 255], "isthing": 0, "id": 148, "name": "river"},
121
+ {"color": [128, 64, 128], "isthing": 0, "id": 149, "name": "road"},
122
+ {"color": [92, 82, 55], "isthing": 0, "id": 151, "name": "roof"},
123
+ {"color": [254, 212, 124], "isthing": 0, "id": 154, "name": "sand"},
124
+ {"color": [73, 77, 174], "isthing": 0, "id": 155, "name": "sea"},
125
+ {"color": [255, 160, 98], "isthing": 0, "id": 156, "name": "shelf"},
126
+ {"color": [255, 255, 255], "isthing": 0, "id": 159, "name": "snow"},
127
+ {"color": [104, 84, 109], "isthing": 0, "id": 161, "name": "stairs"},
128
+ {"color": [169, 164, 131], "isthing": 0, "id": 166, "name": "tent"},
129
+ {"color": [225, 199, 255], "isthing": 0, "id": 168, "name": "towel"},
130
+ {"color": [137, 54, 74], "isthing": 0, "id": 171, "name": "wall-brick"},
131
+ {"color": [135, 158, 223], "isthing": 0, "id": 175, "name": "wall-stone"},
132
+ {"color": [7, 246, 231], "isthing": 0, "id": 176, "name": "wall-tile"},
133
+ {"color": [107, 255, 200], "isthing": 0, "id": 177, "name": "wall-wood"},
134
+ {"color": [58, 41, 149], "isthing": 0, "id": 178, "name": "water-other"},
135
+ {"color": [183, 121, 142], "isthing": 0, "id": 180, "name": "window-blind"},
136
+ {"color": [255, 73, 97], "isthing": 0, "id": 181, "name": "window-other"},
137
+ {"color": [107, 142, 35], "isthing": 0, "id": 184, "name": "tree-merged"},
138
+ {"color": [190, 153, 153], "isthing": 0, "id": 185, "name": "fence-merged"},
139
+ {"color": [146, 139, 141], "isthing": 0, "id": 186, "name": "ceiling-merged"},
140
+ {"color": [70, 130, 180], "isthing": 0, "id": 187, "name": "sky-other-merged"},
141
+ {"color": [134, 199, 156], "isthing": 0, "id": 188, "name": "cabinet-merged"},
142
+ {"color": [209, 226, 140], "isthing": 0, "id": 189, "name": "table-merged"},
143
+ {"color": [96, 36, 108], "isthing": 0, "id": 190, "name": "floor-other-merged"},
144
+ {"color": [96, 96, 96], "isthing": 0, "id": 191, "name": "pavement-merged"},
145
+ {"color": [64, 170, 64], "isthing": 0, "id": 192, "name": "mountain-merged"},
146
+ {"color": [152, 251, 152], "isthing": 0, "id": 193, "name": "grass-merged"},
147
+ {"color": [208, 229, 228], "isthing": 0, "id": 194, "name": "dirt-merged"},
148
+ {"color": [206, 186, 171], "isthing": 0, "id": 195, "name": "paper-merged"},
149
+ {"color": [152, 161, 64], "isthing": 0, "id": 196, "name": "food-other-merged"},
150
+ {"color": [116, 112, 0], "isthing": 0, "id": 197, "name": "building-other-merged"},
151
+ {"color": [0, 114, 143], "isthing": 0, "id": 198, "name": "rock-merged"},
152
+ {"color": [102, 102, 156], "isthing": 0, "id": 199, "name": "wall-other-merged"},
153
+ {"color": [250, 141, 255], "isthing": 0, "id": 200, "name": "rug-merged"},
154
+ ]
155
+
156
+ # fmt: off
157
+ COCO_PERSON_KEYPOINT_NAMES = (
158
+ "nose",
159
+ "left_eye", "right_eye",
160
+ "left_ear", "right_ear",
161
+ "left_shoulder", "right_shoulder",
162
+ "left_elbow", "right_elbow",
163
+ "left_wrist", "right_wrist",
164
+ "left_hip", "right_hip",
165
+ "left_knee", "right_knee",
166
+ "left_ankle", "right_ankle",
167
+ )
168
+ # fmt: on
169
+
170
+ # Pairs of keypoints that should be exchanged under horizontal flipping
171
+ COCO_PERSON_KEYPOINT_FLIP_MAP = (
172
+ ("left_eye", "right_eye"),
173
+ ("left_ear", "right_ear"),
174
+ ("left_shoulder", "right_shoulder"),
175
+ ("left_elbow", "right_elbow"),
176
+ ("left_wrist", "right_wrist"),
177
+ ("left_hip", "right_hip"),
178
+ ("left_knee", "right_knee"),
179
+ ("left_ankle", "right_ankle"),
180
+ )
181
+
182
+ # rules for pairs of keypoints to draw a line between, and the line color to use.
183
+ KEYPOINT_CONNECTION_RULES = [
184
+ # face
185
+ ("left_ear", "left_eye", (102, 204, 255)),
186
+ ("right_ear", "right_eye", (51, 153, 255)),
187
+ ("left_eye", "nose", (102, 0, 204)),
188
+ ("nose", "right_eye", (51, 102, 255)),
189
+ # upper-body
190
+ ("left_shoulder", "right_shoulder", (255, 128, 0)),
191
+ ("left_shoulder", "left_elbow", (153, 255, 204)),
192
+ ("right_shoulder", "right_elbow", (128, 229, 255)),
193
+ ("left_elbow", "left_wrist", (153, 255, 153)),
194
+ ("right_elbow", "right_wrist", (102, 255, 224)),
195
+ # lower-body
196
+ ("left_hip", "right_hip", (255, 102, 0)),
197
+ ("left_hip", "left_knee", (255, 255, 77)),
198
+ ("right_hip", "right_knee", (153, 255, 204)),
199
+ ("left_knee", "left_ankle", (191, 255, 128)),
200
+ ("right_knee", "right_ankle", (255, 195, 77)),
201
+ ]
202
+
203
+ # All Cityscapes categories, together with their nice-looking visualization colors
204
+ # It's from https://github.com/mcordts/cityscapesScripts/blob/master/cityscapesscripts/helpers/labels.py # noqa
205
+ CITYSCAPES_CATEGORIES = [
206
+ {"color": (128, 64, 128), "isthing": 0, "id": 7, "trainId": 0, "name": "road"},
207
+ {"color": (244, 35, 232), "isthing": 0, "id": 8, "trainId": 1, "name": "sidewalk"},
208
+ {"color": (70, 70, 70), "isthing": 0, "id": 11, "trainId": 2, "name": "building"},
209
+ {"color": (102, 102, 156), "isthing": 0, "id": 12, "trainId": 3, "name": "wall"},
210
+ {"color": (190, 153, 153), "isthing": 0, "id": 13, "trainId": 4, "name": "fence"},
211
+ {"color": (153, 153, 153), "isthing": 0, "id": 17, "trainId": 5, "name": "pole"},
212
+ {"color": (250, 170, 30), "isthing": 0, "id": 19, "trainId": 6, "name": "traffic light"},
213
+ {"color": (220, 220, 0), "isthing": 0, "id": 20, "trainId": 7, "name": "traffic sign"},
214
+ {"color": (107, 142, 35), "isthing": 0, "id": 21, "trainId": 8, "name": "vegetation"},
215
+ {"color": (152, 251, 152), "isthing": 0, "id": 22, "trainId": 9, "name": "terrain"},
216
+ {"color": (70, 130, 180), "isthing": 0, "id": 23, "trainId": 10, "name": "sky"},
217
+ {"color": (220, 20, 60), "isthing": 1, "id": 24, "trainId": 11, "name": "person"},
218
+ {"color": (255, 0, 0), "isthing": 1, "id": 25, "trainId": 12, "name": "rider"},
219
+ {"color": (0, 0, 142), "isthing": 1, "id": 26, "trainId": 13, "name": "car"},
220
+ {"color": (0, 0, 70), "isthing": 1, "id": 27, "trainId": 14, "name": "truck"},
221
+ {"color": (0, 60, 100), "isthing": 1, "id": 28, "trainId": 15, "name": "bus"},
222
+ {"color": (0, 80, 100), "isthing": 1, "id": 31, "trainId": 16, "name": "train"},
223
+ {"color": (0, 0, 230), "isthing": 1, "id": 32, "trainId": 17, "name": "motorcycle"},
224
+ {"color": (119, 11, 32), "isthing": 1, "id": 33, "trainId": 18, "name": "bicycle"},
225
+ ]
226
+
227
+ # fmt: off
228
+ ADE20K_SEM_SEG_CATEGORIES = [
229
+ "wall", "building", "sky", "floor", "tree", "ceiling", "road, route", "bed", "window ", "grass", "cabinet", "sidewalk, pavement", "person", "earth, ground", "door", "table", "mountain, mount", "plant", "curtain", "chair", "car", "water", "painting, picture", "sofa", "shelf", "house", "sea", "mirror", "rug", "field", "armchair", "seat", "fence", "desk", "rock, stone", "wardrobe, closet, press", "lamp", "tub", "rail", "cushion", "base, pedestal, stand", "box", "column, pillar", "signboard, sign", "chest of drawers, chest, bureau, dresser", "counter", "sand", "sink", "skyscraper", "fireplace", "refrigerator, icebox", "grandstand, covered stand", "path", "stairs", "runway", "case, display case, showcase, vitrine", "pool table, billiard table, snooker table", "pillow", "screen door, screen", "stairway, staircase", "river", "bridge, span", "bookcase", "blind, screen", "coffee table", "toilet, can, commode, crapper, pot, potty, stool, throne", "flower", "book", "hill", "bench", "countertop", "stove", "palm, palm tree", "kitchen island", "computer", "swivel chair", "boat", "bar", "arcade machine", "hovel, hut, hutch, shack, shanty", "bus", "towel", "light", "truck", "tower", "chandelier", "awning, sunshade, sunblind", "street lamp", "booth", "tv", "plane", "dirt track", "clothes", "pole", "land, ground, soil", "bannister, banister, balustrade, balusters, handrail", "escalator, moving staircase, moving stairway", "ottoman, pouf, pouffe, puff, hassock", "bottle", "buffet, counter, sideboard", "poster, posting, placard, notice, bill, card", "stage", "van", "ship", "fountain", "conveyer belt, conveyor belt, conveyer, conveyor, transporter", "canopy", "washer, automatic washer, washing machine", "plaything, toy", "pool", "stool", "barrel, cask", "basket, handbasket", "falls", "tent", "bag", "minibike, motorbike", "cradle", "oven", "ball", "food, solid food", "step, stair", "tank, storage tank", "trade name", "microwave", "pot", "animal", "bicycle", "lake", "dishwasher", "screen", "blanket, cover", "sculpture", "hood, exhaust hood", "sconce", "vase", "traffic light", "tray", "trash can", "fan", "pier", "crt screen", "plate", "monitor", "bulletin board", "shower", "radiator", "glass, drinking glass", "clock", "flag", # noqa
230
+ ]
231
+ # After processed by `prepare_ade20k_sem_seg.py`, id 255 means ignore
232
+ # fmt: on
233
+
234
+
235
+ def _get_coco_instances_meta():
236
+ thing_ids = [k["id"] for k in COCO_CATEGORIES if k["isthing"] == 1]
237
+ thing_colors = [k["color"] for k in COCO_CATEGORIES if k["isthing"] == 1]
238
+ assert len(thing_ids) == 80, len(thing_ids)
239
+ # Mapping from the incontiguous COCO category id to an id in [0, 79]
240
+ thing_dataset_id_to_contiguous_id = {k: i for i, k in enumerate(thing_ids)}
241
+ thing_classes = [k["name"] for k in COCO_CATEGORIES if k["isthing"] == 1]
242
+ ret = {
243
+ "thing_dataset_id_to_contiguous_id": thing_dataset_id_to_contiguous_id,
244
+ "thing_classes": thing_classes,
245
+ "thing_colors": thing_colors,
246
+ }
247
+ return ret
248
+
249
+
250
+ def _get_coco_panoptic_separated_meta():
251
+ """
252
+ Returns metadata for "separated" version of the panoptic segmentation dataset.
253
+ """
254
+ stuff_ids = [k["id"] for k in COCO_CATEGORIES if k["isthing"] == 0]
255
+ assert len(stuff_ids) == 53, len(stuff_ids)
256
+
257
+ # For semantic segmentation, this mapping maps from contiguous stuff id
258
+ # (in [0, 53], used in models) to ids in the dataset (used for processing results)
259
+ # The id 0 is mapped to an extra category "thing".
260
+ stuff_dataset_id_to_contiguous_id = {k: i + 1 for i, k in enumerate(stuff_ids)}
261
+ # When converting COCO panoptic annotations to semantic annotations
262
+ # We label the "thing" category to 0
263
+ stuff_dataset_id_to_contiguous_id[0] = 0
264
+
265
+ # 54 names for COCO stuff categories (including "things")
266
+ stuff_classes = ["things"] + [
267
+ k["name"].replace("-other", "").replace("-merged", "")
268
+ for k in COCO_CATEGORIES
269
+ if k["isthing"] == 0
270
+ ]
271
+
272
+ # NOTE: I randomly picked a color for things
273
+ stuff_colors = [[82, 18, 128]] + [k["color"] for k in COCO_CATEGORIES if k["isthing"] == 0]
274
+ ret = {
275
+ "stuff_dataset_id_to_contiguous_id": stuff_dataset_id_to_contiguous_id,
276
+ "stuff_classes": stuff_classes,
277
+ "stuff_colors": stuff_colors,
278
+ }
279
+ ret.update(_get_coco_instances_meta())
280
+ return ret
281
+
282
+
283
+ def _get_builtin_metadata(dataset_name):
284
+ if dataset_name == "coco":
285
+ return _get_coco_instances_meta()
286
+ if dataset_name == "coco_panoptic_separated":
287
+ return _get_coco_panoptic_separated_meta()
288
+ elif dataset_name == "coco_panoptic_standard":
289
+ meta = {}
290
+ # The following metadata maps contiguous id from [0, #thing categories +
291
+ # #stuff categories) to their names and colors. We have to replica of the
292
+ # same name and color under "thing_*" and "stuff_*" because the current
293
+ # visualization function in D2 handles thing and class classes differently
294
+ # due to some heuristic used in Panoptic FPN. We keep the same naming to
295
+ # enable reusing existing visualization functions.
296
+ thing_classes = [k["name"] for k in COCO_CATEGORIES]
297
+ thing_colors = [k["color"] for k in COCO_CATEGORIES]
298
+ stuff_classes = [k["name"] for k in COCO_CATEGORIES]
299
+ stuff_colors = [k["color"] for k in COCO_CATEGORIES]
300
+
301
+ meta["thing_classes"] = thing_classes
302
+ meta["thing_colors"] = thing_colors
303
+ meta["stuff_classes"] = stuff_classes
304
+ meta["stuff_colors"] = stuff_colors
305
+
306
+ # Convert category id for training:
307
+ # category id: like semantic segmentation, it is the class id for each
308
+ # pixel. Since there are some classes not used in evaluation, the category
309
+ # id is not always contiguous and thus we have two set of category ids:
310
+ # - original category id: category id in the original dataset, mainly
311
+ # used for evaluation.
312
+ # - contiguous category id: [0, #classes), in order to train the linear
313
+ # softmax classifier.
314
+ thing_dataset_id_to_contiguous_id = {}
315
+ stuff_dataset_id_to_contiguous_id = {}
316
+
317
+ for i, cat in enumerate(COCO_CATEGORIES):
318
+ if cat["isthing"]:
319
+ thing_dataset_id_to_contiguous_id[cat["id"]] = i
320
+ else:
321
+ stuff_dataset_id_to_contiguous_id[cat["id"]] = i
322
+
323
+ meta["thing_dataset_id_to_contiguous_id"] = thing_dataset_id_to_contiguous_id
324
+ meta["stuff_dataset_id_to_contiguous_id"] = stuff_dataset_id_to_contiguous_id
325
+
326
+ return meta
327
+ elif dataset_name == "coco_person":
328
+ return {
329
+ "thing_classes": ["person"],
330
+ "keypoint_names": COCO_PERSON_KEYPOINT_NAMES,
331
+ "keypoint_flip_map": COCO_PERSON_KEYPOINT_FLIP_MAP,
332
+ "keypoint_connection_rules": KEYPOINT_CONNECTION_RULES,
333
+ }
334
+ elif dataset_name == "cityscapes":
335
+ # fmt: off
336
+ CITYSCAPES_THING_CLASSES = [
337
+ "person", "rider", "car", "truck",
338
+ "bus", "train", "motorcycle", "bicycle",
339
+ ]
340
+ CITYSCAPES_STUFF_CLASSES = [
341
+ "road", "sidewalk", "building", "wall", "fence", "pole", "traffic light",
342
+ "traffic sign", "vegetation", "terrain", "sky", "person", "rider", "car",
343
+ "truck", "bus", "train", "motorcycle", "bicycle",
344
+ ]
345
+ # fmt: on
346
+ return {
347
+ "thing_classes": CITYSCAPES_THING_CLASSES,
348
+ "stuff_classes": CITYSCAPES_STUFF_CLASSES,
349
+ }
350
+ raise KeyError("No built-in metadata for dataset {}".format(dataset_name))