b3h-young123 commited on
Commit
481ec5f
·
verified ·
1 Parent(s): 37508df

Add files using upload-large-folder tool

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. Leffa/3rdparty/densepose/__init__.py +20 -0
  2. Leffa/3rdparty/densepose/config.py +277 -0
  3. Leffa/3rdparty/densepose/converters/__init__.py +15 -0
  4. Leffa/3rdparty/densepose/converters/base.py +93 -0
  5. Leffa/3rdparty/densepose/converters/builtin.py +31 -0
  6. Leffa/3rdparty/densepose/converters/chart_output_hflip.py +71 -0
  7. Leffa/3rdparty/densepose/converters/chart_output_to_chart_result.py +188 -0
  8. Leffa/3rdparty/densepose/converters/hflip.py +34 -0
  9. Leffa/3rdparty/densepose/converters/segm_to_mask.py +150 -0
  10. Leffa/3rdparty/densepose/converters/to_chart_result.py +70 -0
  11. Leffa/3rdparty/densepose/converters/to_mask.py +49 -0
  12. Leffa/3rdparty/densepose/engine/__init__.py +3 -0
  13. Leffa/3rdparty/densepose/engine/trainer.py +258 -0
  14. Leffa/3rdparty/densepose/modeling/__init__.py +13 -0
  15. Leffa/3rdparty/densepose/modeling/build.py +87 -0
  16. Leffa/3rdparty/densepose/modeling/confidence.py +73 -0
  17. Leffa/3rdparty/densepose/modeling/densepose_checkpoint.py +35 -0
  18. Leffa/3rdparty/densepose/modeling/filter.py +94 -0
  19. Leffa/3rdparty/densepose/modeling/hrfpn.py +182 -0
  20. Leffa/3rdparty/densepose/modeling/hrnet.py +474 -0
  21. Leffa/3rdparty/densepose/modeling/inference.py +44 -0
  22. Leffa/3rdparty/densepose/modeling/losses/__init__.py +14 -0
  23. Leffa/3rdparty/densepose/modeling/losses/chart.py +291 -0
  24. Leffa/3rdparty/densepose/modeling/losses/chart_with_confidences.py +209 -0
  25. Leffa/3rdparty/densepose/modeling/losses/cse.py +115 -0
  26. Leffa/3rdparty/densepose/modeling/losses/cycle_pix2shape.py +152 -0
  27. Leffa/3rdparty/densepose/modeling/losses/cycle_shape2shape.py +117 -0
  28. Leffa/3rdparty/densepose/modeling/losses/embed.py +119 -0
  29. Leffa/3rdparty/densepose/modeling/losses/embed_utils.py +137 -0
  30. Leffa/3rdparty/densepose/modeling/losses/mask.py +125 -0
  31. Leffa/3rdparty/densepose/modeling/losses/mask_or_segm.py +77 -0
  32. Leffa/3rdparty/densepose/modeling/losses/registry.py +5 -0
  33. Leffa/3rdparty/densepose/modeling/losses/soft_embed.py +133 -0
  34. Leffa/3rdparty/densepose/modeling/losses/utils.py +443 -0
  35. Leffa/3rdparty/densepose/modeling/predictors/__init__.py +9 -0
  36. Leffa/3rdparty/densepose/modeling/predictors/chart.py +94 -0
  37. Leffa/3rdparty/densepose/modeling/predictors/chart_confidence.py +174 -0
  38. Leffa/3rdparty/densepose/modeling/predictors/chart_with_confidence.py +15 -0
  39. Leffa/3rdparty/densepose/modeling/predictors/cse.py +70 -0
  40. Leffa/3rdparty/densepose/modeling/predictors/cse_confidence.py +115 -0
  41. Leffa/3rdparty/densepose/modeling/predictors/cse_with_confidence.py +15 -0
  42. Leffa/3rdparty/densepose/modeling/predictors/registry.py +5 -0
  43. Leffa/3rdparty/densepose/modeling/roi_heads/__init__.py +6 -0
  44. Leffa/3rdparty/densepose/modeling/roi_heads/deeplab.py +263 -0
  45. Leffa/3rdparty/densepose/modeling/roi_heads/registry.py +5 -0
  46. Leffa/3rdparty/densepose/modeling/roi_heads/roi_head.py +218 -0
  47. Leffa/3rdparty/densepose/modeling/roi_heads/v1convx.py +64 -0
  48. Leffa/3rdparty/densepose/modeling/test_time_augmentation.py +207 -0
  49. Leffa/3rdparty/densepose/modeling/utils.py +11 -0
  50. Leffa/3rdparty/densepose/utils/__init__.py +0 -0
Leffa/3rdparty/densepose/__init__.py ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Facebook, Inc. and its affiliates.
2
+ from .data.datasets import builtin # just to register data
3
+ from .converters import builtin as builtin_converters # register converters
4
+ from .config import (
5
+ add_densepose_config,
6
+ add_densepose_head_config,
7
+ add_hrnet_config,
8
+ add_dataset_category_config,
9
+ add_bootstrap_config,
10
+ load_bootstrap_config,
11
+ )
12
+ from .structures import DensePoseDataRelative, DensePoseList, DensePoseTransformData
13
+ from .evaluation import DensePoseCOCOEvaluator
14
+ from .modeling.roi_heads import DensePoseROIHeads
15
+ from .modeling.test_time_augmentation import (
16
+ DensePoseGeneralizedRCNNWithTTA,
17
+ DensePoseDatasetMapperTTA,
18
+ )
19
+ from .utils.transform import load_from_cfg
20
+ from .modeling.hrfpn import build_hrfpn_backbone
Leffa/3rdparty/densepose/config.py ADDED
@@ -0,0 +1,277 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # -*- coding = utf-8 -*-
2
+ # Copyright (c) Facebook, Inc. and its affiliates.
3
+ # pyre-ignore-all-errors
4
+
5
+ from detectron2.config import CfgNode as CN
6
+
7
+
8
+ def add_dataset_category_config(cfg: CN) -> None:
9
+ """
10
+ Add config for additional category-related dataset options
11
+ - category whitelisting
12
+ - category mapping
13
+ """
14
+ _C = cfg
15
+ _C.DATASETS.CATEGORY_MAPS = CN(new_allowed=True)
16
+ _C.DATASETS.WHITELISTED_CATEGORIES = CN(new_allowed=True)
17
+ # class to mesh mapping
18
+ _C.DATASETS.CLASS_TO_MESH_NAME_MAPPING = CN(new_allowed=True)
19
+
20
+
21
+ def add_evaluation_config(cfg: CN) -> None:
22
+ _C = cfg
23
+ _C.DENSEPOSE_EVALUATION = CN()
24
+ # evaluator type, possible values:
25
+ # - "iou": evaluator for models that produce iou data
26
+ # - "cse": evaluator for models that produce cse data
27
+ _C.DENSEPOSE_EVALUATION.TYPE = "iou"
28
+ # storage for DensePose results, possible values:
29
+ # - "none": no explicit storage, all the results are stored in the
30
+ # dictionary with predictions, memory intensive;
31
+ # historically the default storage type
32
+ # - "ram": RAM storage, uses per-process RAM storage, which is
33
+ # reduced to a single process storage on later stages,
34
+ # less memory intensive
35
+ # - "file": file storage, uses per-process file-based storage,
36
+ # the least memory intensive, but may create bottlenecks
37
+ # on file system accesses
38
+ _C.DENSEPOSE_EVALUATION.STORAGE = "none"
39
+ # minimum threshold for IOU values: the lower its values is,
40
+ # the more matches are produced (and the higher the AP score)
41
+ _C.DENSEPOSE_EVALUATION.MIN_IOU_THRESHOLD = 0.5
42
+ # Non-distributed inference is slower (at inference time) but can avoid RAM OOM
43
+ _C.DENSEPOSE_EVALUATION.DISTRIBUTED_INFERENCE = True
44
+ # evaluate mesh alignment based on vertex embeddings, only makes sense in CSE context
45
+ _C.DENSEPOSE_EVALUATION.EVALUATE_MESH_ALIGNMENT = False
46
+ # meshes to compute mesh alignment for
47
+ _C.DENSEPOSE_EVALUATION.MESH_ALIGNMENT_MESH_NAMES = []
48
+
49
+
50
+ def add_bootstrap_config(cfg: CN) -> None:
51
+ """ """
52
+ _C = cfg
53
+ _C.BOOTSTRAP_DATASETS = []
54
+ _C.BOOTSTRAP_MODEL = CN()
55
+ _C.BOOTSTRAP_MODEL.WEIGHTS = ""
56
+ _C.BOOTSTRAP_MODEL.DEVICE = "cuda"
57
+
58
+
59
+ def get_bootstrap_dataset_config() -> CN:
60
+ _C = CN()
61
+ _C.DATASET = ""
62
+ # ratio used to mix data loaders
63
+ _C.RATIO = 0.1
64
+ # image loader
65
+ _C.IMAGE_LOADER = CN(new_allowed=True)
66
+ _C.IMAGE_LOADER.TYPE = ""
67
+ _C.IMAGE_LOADER.BATCH_SIZE = 4
68
+ _C.IMAGE_LOADER.NUM_WORKERS = 4
69
+ _C.IMAGE_LOADER.CATEGORIES = []
70
+ _C.IMAGE_LOADER.MAX_COUNT_PER_CATEGORY = 1_000_000
71
+ _C.IMAGE_LOADER.CATEGORY_TO_CLASS_MAPPING = CN(new_allowed=True)
72
+ # inference
73
+ _C.INFERENCE = CN()
74
+ # batch size for model inputs
75
+ _C.INFERENCE.INPUT_BATCH_SIZE = 4
76
+ # batch size to group model outputs
77
+ _C.INFERENCE.OUTPUT_BATCH_SIZE = 2
78
+ # sampled data
79
+ _C.DATA_SAMPLER = CN(new_allowed=True)
80
+ _C.DATA_SAMPLER.TYPE = ""
81
+ _C.DATA_SAMPLER.USE_GROUND_TRUTH_CATEGORIES = False
82
+ # filter
83
+ _C.FILTER = CN(new_allowed=True)
84
+ _C.FILTER.TYPE = ""
85
+ return _C
86
+
87
+
88
+ def load_bootstrap_config(cfg: CN) -> None:
89
+ """
90
+ Bootstrap datasets are given as a list of `dict` that are not automatically
91
+ converted into CfgNode. This method processes all bootstrap dataset entries
92
+ and ensures that they are in CfgNode format and comply with the specification
93
+ """
94
+ if not cfg.BOOTSTRAP_DATASETS:
95
+ return
96
+
97
+ bootstrap_datasets_cfgnodes = []
98
+ for dataset_cfg in cfg.BOOTSTRAP_DATASETS:
99
+ _C = get_bootstrap_dataset_config().clone()
100
+ _C.merge_from_other_cfg(CN(dataset_cfg))
101
+ bootstrap_datasets_cfgnodes.append(_C)
102
+ cfg.BOOTSTRAP_DATASETS = bootstrap_datasets_cfgnodes
103
+
104
+
105
+ def add_densepose_head_cse_config(cfg: CN) -> None:
106
+ """
107
+ Add configuration options for Continuous Surface Embeddings (CSE)
108
+ """
109
+ _C = cfg
110
+ _C.MODEL.ROI_DENSEPOSE_HEAD.CSE = CN()
111
+ # Dimensionality D of the embedding space
112
+ _C.MODEL.ROI_DENSEPOSE_HEAD.CSE.EMBED_SIZE = 16
113
+ # Embedder specifications for various mesh IDs
114
+ _C.MODEL.ROI_DENSEPOSE_HEAD.CSE.EMBEDDERS = CN(new_allowed=True)
115
+ # normalization coefficient for embedding distances
116
+ _C.MODEL.ROI_DENSEPOSE_HEAD.CSE.EMBEDDING_DIST_GAUSS_SIGMA = 0.01
117
+ # normalization coefficient for geodesic distances
118
+ _C.MODEL.ROI_DENSEPOSE_HEAD.CSE.GEODESIC_DIST_GAUSS_SIGMA = 0.01
119
+ # embedding loss weight
120
+ _C.MODEL.ROI_DENSEPOSE_HEAD.CSE.EMBED_LOSS_WEIGHT = 0.6
121
+ # embedding loss name, currently the following options are supported:
122
+ # - EmbeddingLoss: cross-entropy on vertex labels
123
+ # - SoftEmbeddingLoss: cross-entropy on vertex label combined with
124
+ # Gaussian penalty on distance between vertices
125
+ _C.MODEL.ROI_DENSEPOSE_HEAD.CSE.EMBED_LOSS_NAME = "EmbeddingLoss"
126
+ # optimizer hyperparameters
127
+ _C.MODEL.ROI_DENSEPOSE_HEAD.CSE.FEATURES_LR_FACTOR = 1.0
128
+ _C.MODEL.ROI_DENSEPOSE_HEAD.CSE.EMBEDDING_LR_FACTOR = 1.0
129
+ # Shape to shape cycle consistency loss parameters:
130
+ _C.MODEL.ROI_DENSEPOSE_HEAD.CSE.SHAPE_TO_SHAPE_CYCLE_LOSS = CN({"ENABLED": False})
131
+ # shape to shape cycle consistency loss weight
132
+ _C.MODEL.ROI_DENSEPOSE_HEAD.CSE.SHAPE_TO_SHAPE_CYCLE_LOSS.WEIGHT = 0.025
133
+ # norm type used for loss computation
134
+ _C.MODEL.ROI_DENSEPOSE_HEAD.CSE.SHAPE_TO_SHAPE_CYCLE_LOSS.NORM_P = 2
135
+ # normalization term for embedding similarity matrices
136
+ _C.MODEL.ROI_DENSEPOSE_HEAD.CSE.SHAPE_TO_SHAPE_CYCLE_LOSS.TEMPERATURE = 0.05
137
+ # maximum number of vertices to include into shape to shape cycle loss
138
+ # if negative or zero, all vertices are considered
139
+ # if positive, random subset of vertices of given size is considered
140
+ _C.MODEL.ROI_DENSEPOSE_HEAD.CSE.SHAPE_TO_SHAPE_CYCLE_LOSS.MAX_NUM_VERTICES = 4936
141
+ # Pixel to shape cycle consistency loss parameters:
142
+ _C.MODEL.ROI_DENSEPOSE_HEAD.CSE.PIX_TO_SHAPE_CYCLE_LOSS = CN({"ENABLED": False})
143
+ # pixel to shape cycle consistency loss weight
144
+ _C.MODEL.ROI_DENSEPOSE_HEAD.CSE.PIX_TO_SHAPE_CYCLE_LOSS.WEIGHT = 0.0001
145
+ # norm type used for loss computation
146
+ _C.MODEL.ROI_DENSEPOSE_HEAD.CSE.PIX_TO_SHAPE_CYCLE_LOSS.NORM_P = 2
147
+ # map images to all meshes and back (if false, use only gt meshes from the batch)
148
+ _C.MODEL.ROI_DENSEPOSE_HEAD.CSE.PIX_TO_SHAPE_CYCLE_LOSS.USE_ALL_MESHES_NOT_GT_ONLY = False
149
+ # Randomly select at most this number of pixels from every instance
150
+ # if negative or zero, all vertices are considered
151
+ _C.MODEL.ROI_DENSEPOSE_HEAD.CSE.PIX_TO_SHAPE_CYCLE_LOSS.NUM_PIXELS_TO_SAMPLE = 100
152
+ # normalization factor for pixel to pixel distances (higher value = smoother distribution)
153
+ _C.MODEL.ROI_DENSEPOSE_HEAD.CSE.PIX_TO_SHAPE_CYCLE_LOSS.PIXEL_SIGMA = 5.0
154
+ _C.MODEL.ROI_DENSEPOSE_HEAD.CSE.PIX_TO_SHAPE_CYCLE_LOSS.TEMPERATURE_PIXEL_TO_VERTEX = 0.05
155
+ _C.MODEL.ROI_DENSEPOSE_HEAD.CSE.PIX_TO_SHAPE_CYCLE_LOSS.TEMPERATURE_VERTEX_TO_PIXEL = 0.05
156
+
157
+
158
+ def add_densepose_head_config(cfg: CN) -> None:
159
+ """
160
+ Add config for densepose head.
161
+ """
162
+ _C = cfg
163
+
164
+ _C.MODEL.DENSEPOSE_ON = True
165
+
166
+ _C.MODEL.ROI_DENSEPOSE_HEAD = CN()
167
+ _C.MODEL.ROI_DENSEPOSE_HEAD.NAME = ""
168
+ _C.MODEL.ROI_DENSEPOSE_HEAD.NUM_STACKED_CONVS = 8
169
+ # Number of parts used for point labels
170
+ _C.MODEL.ROI_DENSEPOSE_HEAD.NUM_PATCHES = 24
171
+ _C.MODEL.ROI_DENSEPOSE_HEAD.DECONV_KERNEL = 4
172
+ _C.MODEL.ROI_DENSEPOSE_HEAD.CONV_HEAD_DIM = 512
173
+ _C.MODEL.ROI_DENSEPOSE_HEAD.CONV_HEAD_KERNEL = 3
174
+ _C.MODEL.ROI_DENSEPOSE_HEAD.UP_SCALE = 2
175
+ _C.MODEL.ROI_DENSEPOSE_HEAD.HEATMAP_SIZE = 112
176
+ _C.MODEL.ROI_DENSEPOSE_HEAD.POOLER_TYPE = "ROIAlignV2"
177
+ _C.MODEL.ROI_DENSEPOSE_HEAD.POOLER_RESOLUTION = 28
178
+ _C.MODEL.ROI_DENSEPOSE_HEAD.POOLER_SAMPLING_RATIO = 2
179
+ _C.MODEL.ROI_DENSEPOSE_HEAD.NUM_COARSE_SEGM_CHANNELS = 2 # 15 or 2
180
+ # Overlap threshold for an RoI to be considered foreground (if >= FG_IOU_THRESHOLD)
181
+ _C.MODEL.ROI_DENSEPOSE_HEAD.FG_IOU_THRESHOLD = 0.7
182
+ # Loss weights for annotation masks.(14 Parts)
183
+ _C.MODEL.ROI_DENSEPOSE_HEAD.INDEX_WEIGHTS = 5.0
184
+ # Loss weights for surface parts. (24 Parts)
185
+ _C.MODEL.ROI_DENSEPOSE_HEAD.PART_WEIGHTS = 1.0
186
+ # Loss weights for UV regression.
187
+ _C.MODEL.ROI_DENSEPOSE_HEAD.POINT_REGRESSION_WEIGHTS = 0.01
188
+ # Coarse segmentation is trained using instance segmentation task data
189
+ _C.MODEL.ROI_DENSEPOSE_HEAD.COARSE_SEGM_TRAINED_BY_MASKS = False
190
+ # For Decoder
191
+ _C.MODEL.ROI_DENSEPOSE_HEAD.DECODER_ON = True
192
+ _C.MODEL.ROI_DENSEPOSE_HEAD.DECODER_NUM_CLASSES = 256
193
+ _C.MODEL.ROI_DENSEPOSE_HEAD.DECODER_CONV_DIMS = 256
194
+ _C.MODEL.ROI_DENSEPOSE_HEAD.DECODER_NORM = ""
195
+ _C.MODEL.ROI_DENSEPOSE_HEAD.DECODER_COMMON_STRIDE = 4
196
+ # For DeepLab head
197
+ _C.MODEL.ROI_DENSEPOSE_HEAD.DEEPLAB = CN()
198
+ _C.MODEL.ROI_DENSEPOSE_HEAD.DEEPLAB.NORM = "GN"
199
+ _C.MODEL.ROI_DENSEPOSE_HEAD.DEEPLAB.NONLOCAL_ON = 0
200
+ # Predictor class name, must be registered in DENSEPOSE_PREDICTOR_REGISTRY
201
+ # Some registered predictors:
202
+ # "DensePoseChartPredictor": predicts segmentation and UV coordinates for predefined charts
203
+ # "DensePoseChartWithConfidencePredictor": predicts segmentation, UV coordinates
204
+ # and associated confidences for predefined charts (default)
205
+ # "DensePoseEmbeddingWithConfidencePredictor": predicts segmentation, embeddings
206
+ # and associated confidences for CSE
207
+ _C.MODEL.ROI_DENSEPOSE_HEAD.PREDICTOR_NAME = "DensePoseChartWithConfidencePredictor"
208
+ # Loss class name, must be registered in DENSEPOSE_LOSS_REGISTRY
209
+ # Some registered losses:
210
+ # "DensePoseChartLoss": loss for chart-based models that estimate
211
+ # segmentation and UV coordinates
212
+ # "DensePoseChartWithConfidenceLoss": loss for chart-based models that estimate
213
+ # segmentation, UV coordinates and the corresponding confidences (default)
214
+ _C.MODEL.ROI_DENSEPOSE_HEAD.LOSS_NAME = "DensePoseChartWithConfidenceLoss"
215
+ # Confidences
216
+ # Enable learning UV confidences (variances) along with the actual values
217
+ _C.MODEL.ROI_DENSEPOSE_HEAD.UV_CONFIDENCE = CN({"ENABLED": False})
218
+ # UV confidence lower bound
219
+ _C.MODEL.ROI_DENSEPOSE_HEAD.UV_CONFIDENCE.EPSILON = 0.01
220
+ # Enable learning segmentation confidences (variances) along with the actual values
221
+ _C.MODEL.ROI_DENSEPOSE_HEAD.SEGM_CONFIDENCE = CN({"ENABLED": False})
222
+ # Segmentation confidence lower bound
223
+ _C.MODEL.ROI_DENSEPOSE_HEAD.SEGM_CONFIDENCE.EPSILON = 0.01
224
+ # Statistical model type for confidence learning, possible values:
225
+ # - "iid_iso": statistically independent identically distributed residuals
226
+ # with isotropic covariance
227
+ # - "indep_aniso": statistically independent residuals with anisotropic
228
+ # covariances
229
+ _C.MODEL.ROI_DENSEPOSE_HEAD.UV_CONFIDENCE.TYPE = "iid_iso"
230
+ # List of angles for rotation in data augmentation during training
231
+ _C.INPUT.ROTATION_ANGLES = [0]
232
+ _C.TEST.AUG.ROTATION_ANGLES = () # Rotation TTA
233
+
234
+ add_densepose_head_cse_config(cfg)
235
+
236
+
237
+ def add_hrnet_config(cfg: CN) -> None:
238
+ """
239
+ Add config for HRNet backbone.
240
+ """
241
+ _C = cfg
242
+
243
+ # For HigherHRNet w32
244
+ _C.MODEL.HRNET = CN()
245
+ _C.MODEL.HRNET.STEM_INPLANES = 64
246
+ _C.MODEL.HRNET.STAGE2 = CN()
247
+ _C.MODEL.HRNET.STAGE2.NUM_MODULES = 1
248
+ _C.MODEL.HRNET.STAGE2.NUM_BRANCHES = 2
249
+ _C.MODEL.HRNET.STAGE2.BLOCK = "BASIC"
250
+ _C.MODEL.HRNET.STAGE2.NUM_BLOCKS = [4, 4]
251
+ _C.MODEL.HRNET.STAGE2.NUM_CHANNELS = [32, 64]
252
+ _C.MODEL.HRNET.STAGE2.FUSE_METHOD = "SUM"
253
+ _C.MODEL.HRNET.STAGE3 = CN()
254
+ _C.MODEL.HRNET.STAGE3.NUM_MODULES = 4
255
+ _C.MODEL.HRNET.STAGE3.NUM_BRANCHES = 3
256
+ _C.MODEL.HRNET.STAGE3.BLOCK = "BASIC"
257
+ _C.MODEL.HRNET.STAGE3.NUM_BLOCKS = [4, 4, 4]
258
+ _C.MODEL.HRNET.STAGE3.NUM_CHANNELS = [32, 64, 128]
259
+ _C.MODEL.HRNET.STAGE3.FUSE_METHOD = "SUM"
260
+ _C.MODEL.HRNET.STAGE4 = CN()
261
+ _C.MODEL.HRNET.STAGE4.NUM_MODULES = 3
262
+ _C.MODEL.HRNET.STAGE4.NUM_BRANCHES = 4
263
+ _C.MODEL.HRNET.STAGE4.BLOCK = "BASIC"
264
+ _C.MODEL.HRNET.STAGE4.NUM_BLOCKS = [4, 4, 4, 4]
265
+ _C.MODEL.HRNET.STAGE4.NUM_CHANNELS = [32, 64, 128, 256]
266
+ _C.MODEL.HRNET.STAGE4.FUSE_METHOD = "SUM"
267
+
268
+ _C.MODEL.HRNET.HRFPN = CN()
269
+ _C.MODEL.HRNET.HRFPN.OUT_CHANNELS = 256
270
+
271
+
272
+ def add_densepose_config(cfg: CN) -> None:
273
+ add_densepose_head_config(cfg)
274
+ add_hrnet_config(cfg)
275
+ add_bootstrap_config(cfg)
276
+ add_dataset_category_config(cfg)
277
+ add_evaluation_config(cfg)
Leffa/3rdparty/densepose/converters/__init__.py ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Facebook, Inc. and its affiliates.
2
+
3
+ from .hflip import HFlipConverter
4
+ from .to_mask import ToMaskConverter
5
+ from .to_chart_result import ToChartResultConverter, ToChartResultConverterWithConfidences
6
+ from .segm_to_mask import (
7
+ predictor_output_with_fine_and_coarse_segm_to_mask,
8
+ predictor_output_with_coarse_segm_to_mask,
9
+ resample_fine_and_coarse_segm_to_bbox,
10
+ )
11
+ from .chart_output_to_chart_result import (
12
+ densepose_chart_predictor_output_to_result,
13
+ densepose_chart_predictor_output_to_result_with_confidences,
14
+ )
15
+ from .chart_output_hflip import densepose_chart_predictor_output_hflip
Leffa/3rdparty/densepose/converters/base.py ADDED
@@ -0,0 +1,93 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Facebook, Inc. and its affiliates.
2
+
3
+ from typing import Any, Tuple, Type
4
+ import torch
5
+
6
+
7
+ class BaseConverter:
8
+ """
9
+ Converter base class to be reused by various converters.
10
+ Converter allows one to convert data from various source types to a particular
11
+ destination type. Each source type needs to register its converter. The
12
+ registration for each source type is valid for all descendants of that type.
13
+ """
14
+
15
+ @classmethod
16
+ def register(cls, from_type: Type, converter: Any = None):
17
+ """
18
+ Registers a converter for the specified type.
19
+ Can be used as a decorator (if converter is None), or called as a method.
20
+
21
+ Args:
22
+ from_type (type): type to register the converter for;
23
+ all instances of this type will use the same converter
24
+ converter (callable): converter to be registered for the given
25
+ type; if None, this method is assumed to be a decorator for the converter
26
+ """
27
+
28
+ if converter is not None:
29
+ cls._do_register(from_type, converter)
30
+
31
+ def wrapper(converter: Any) -> Any:
32
+ cls._do_register(from_type, converter)
33
+ return converter
34
+
35
+ return wrapper
36
+
37
+ @classmethod
38
+ def _do_register(cls, from_type: Type, converter: Any):
39
+ cls.registry[from_type] = converter # pyre-ignore[16]
40
+
41
+ @classmethod
42
+ def _lookup_converter(cls, from_type: Type) -> Any:
43
+ """
44
+ Perform recursive lookup for the given type
45
+ to find registered converter. If a converter was found for some base
46
+ class, it gets registered for this class to save on further lookups.
47
+
48
+ Args:
49
+ from_type: type for which to find a converter
50
+ Return:
51
+ callable or None - registered converter or None
52
+ if no suitable entry was found in the registry
53
+ """
54
+ if from_type in cls.registry: # pyre-ignore[16]
55
+ return cls.registry[from_type]
56
+ for base in from_type.__bases__:
57
+ converter = cls._lookup_converter(base)
58
+ if converter is not None:
59
+ cls._do_register(from_type, converter)
60
+ return converter
61
+ return None
62
+
63
+ @classmethod
64
+ def convert(cls, instance: Any, *args, **kwargs):
65
+ """
66
+ Convert an instance to the destination type using some registered
67
+ converter. Does recursive lookup for base classes, so there's no need
68
+ for explicit registration for derived classes.
69
+
70
+ Args:
71
+ instance: source instance to convert to the destination type
72
+ Return:
73
+ An instance of the destination type obtained from the source instance
74
+ Raises KeyError, if no suitable converter found
75
+ """
76
+ instance_type = type(instance)
77
+ converter = cls._lookup_converter(instance_type)
78
+ if converter is None:
79
+ if cls.dst_type is None: # pyre-ignore[16]
80
+ output_type_str = "itself"
81
+ else:
82
+ output_type_str = cls.dst_type
83
+ raise KeyError(f"Could not find converter from {instance_type} to {output_type_str}")
84
+ return converter(instance, *args, **kwargs)
85
+
86
+
87
+ IntTupleBox = Tuple[int, int, int, int]
88
+
89
+
90
+ def make_int_box(box: torch.Tensor) -> IntTupleBox:
91
+ int_box = [0, 0, 0, 0]
92
+ int_box[0], int_box[1], int_box[2], int_box[3] = tuple(box.long().tolist())
93
+ return int_box[0], int_box[1], int_box[2], int_box[3]
Leffa/3rdparty/densepose/converters/builtin.py ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Facebook, Inc. and its affiliates.
2
+
3
+ from ..structures import DensePoseChartPredictorOutput, DensePoseEmbeddingPredictorOutput
4
+ from . import (
5
+ HFlipConverter,
6
+ ToChartResultConverter,
7
+ ToChartResultConverterWithConfidences,
8
+ ToMaskConverter,
9
+ densepose_chart_predictor_output_hflip,
10
+ densepose_chart_predictor_output_to_result,
11
+ densepose_chart_predictor_output_to_result_with_confidences,
12
+ predictor_output_with_coarse_segm_to_mask,
13
+ predictor_output_with_fine_and_coarse_segm_to_mask,
14
+ )
15
+
16
+ ToMaskConverter.register(
17
+ DensePoseChartPredictorOutput, predictor_output_with_fine_and_coarse_segm_to_mask
18
+ )
19
+ ToMaskConverter.register(
20
+ DensePoseEmbeddingPredictorOutput, predictor_output_with_coarse_segm_to_mask
21
+ )
22
+
23
+ ToChartResultConverter.register(
24
+ DensePoseChartPredictorOutput, densepose_chart_predictor_output_to_result
25
+ )
26
+
27
+ ToChartResultConverterWithConfidences.register(
28
+ DensePoseChartPredictorOutput, densepose_chart_predictor_output_to_result_with_confidences
29
+ )
30
+
31
+ HFlipConverter.register(DensePoseChartPredictorOutput, densepose_chart_predictor_output_hflip)
Leffa/3rdparty/densepose/converters/chart_output_hflip.py ADDED
@@ -0,0 +1,71 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Facebook, Inc. and its affiliates.
2
+ from dataclasses import fields
3
+ import torch
4
+
5
+ from densepose.structures import DensePoseChartPredictorOutput, DensePoseTransformData
6
+
7
+
8
+ def densepose_chart_predictor_output_hflip(
9
+ densepose_predictor_output: DensePoseChartPredictorOutput,
10
+ transform_data: DensePoseTransformData,
11
+ ) -> DensePoseChartPredictorOutput:
12
+ """
13
+ Change to take into account a Horizontal flip.
14
+ """
15
+ if len(densepose_predictor_output) > 0:
16
+
17
+ PredictorOutput = type(densepose_predictor_output)
18
+ output_dict = {}
19
+
20
+ for field in fields(densepose_predictor_output):
21
+ field_value = getattr(densepose_predictor_output, field.name)
22
+ # flip tensors
23
+ if isinstance(field_value, torch.Tensor):
24
+ setattr(densepose_predictor_output, field.name, torch.flip(field_value, [3]))
25
+
26
+ densepose_predictor_output = _flip_iuv_semantics_tensor(
27
+ densepose_predictor_output, transform_data
28
+ )
29
+ densepose_predictor_output = _flip_segm_semantics_tensor(
30
+ densepose_predictor_output, transform_data
31
+ )
32
+
33
+ for field in fields(densepose_predictor_output):
34
+ output_dict[field.name] = getattr(densepose_predictor_output, field.name)
35
+
36
+ return PredictorOutput(**output_dict)
37
+ else:
38
+ return densepose_predictor_output
39
+
40
+
41
+ def _flip_iuv_semantics_tensor(
42
+ densepose_predictor_output: DensePoseChartPredictorOutput,
43
+ dp_transform_data: DensePoseTransformData,
44
+ ) -> DensePoseChartPredictorOutput:
45
+ point_label_symmetries = dp_transform_data.point_label_symmetries
46
+ uv_symmetries = dp_transform_data.uv_symmetries
47
+
48
+ N, C, H, W = densepose_predictor_output.u.shape
49
+ u_loc = (densepose_predictor_output.u[:, 1:, :, :].clamp(0, 1) * 255).long()
50
+ v_loc = (densepose_predictor_output.v[:, 1:, :, :].clamp(0, 1) * 255).long()
51
+ Iindex = torch.arange(C - 1, device=densepose_predictor_output.u.device)[
52
+ None, :, None, None
53
+ ].expand(N, C - 1, H, W)
54
+ densepose_predictor_output.u[:, 1:, :, :] = uv_symmetries["U_transforms"][Iindex, v_loc, u_loc]
55
+ densepose_predictor_output.v[:, 1:, :, :] = uv_symmetries["V_transforms"][Iindex, v_loc, u_loc]
56
+
57
+ for el in ["fine_segm", "u", "v"]:
58
+ densepose_predictor_output.__dict__[el] = densepose_predictor_output.__dict__[el][
59
+ :, point_label_symmetries, :, :
60
+ ]
61
+ return densepose_predictor_output
62
+
63
+
64
+ def _flip_segm_semantics_tensor(
65
+ densepose_predictor_output: DensePoseChartPredictorOutput, dp_transform_data
66
+ ):
67
+ if densepose_predictor_output.coarse_segm.shape[1] > 2:
68
+ densepose_predictor_output.coarse_segm = densepose_predictor_output.coarse_segm[
69
+ :, dp_transform_data.mask_label_symmetries, :, :
70
+ ]
71
+ return densepose_predictor_output
Leffa/3rdparty/densepose/converters/chart_output_to_chart_result.py ADDED
@@ -0,0 +1,188 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Facebook, Inc. and its affiliates.
2
+
3
+ from typing import Dict
4
+ import torch
5
+ from torch.nn import functional as F
6
+
7
+ from detectron2.structures.boxes import Boxes, BoxMode
8
+
9
+ from ..structures import (
10
+ DensePoseChartPredictorOutput,
11
+ DensePoseChartResult,
12
+ DensePoseChartResultWithConfidences,
13
+ )
14
+ from . import resample_fine_and_coarse_segm_to_bbox
15
+ from .base import IntTupleBox, make_int_box
16
+
17
+
18
+ def resample_uv_tensors_to_bbox(
19
+ u: torch.Tensor,
20
+ v: torch.Tensor,
21
+ labels: torch.Tensor,
22
+ box_xywh_abs: IntTupleBox,
23
+ ) -> torch.Tensor:
24
+ """
25
+ Resamples U and V coordinate estimates for the given bounding box
26
+
27
+ Args:
28
+ u (tensor [1, C, H, W] of float): U coordinates
29
+ v (tensor [1, C, H, W] of float): V coordinates
30
+ labels (tensor [H, W] of long): labels obtained by resampling segmentation
31
+ outputs for the given bounding box
32
+ box_xywh_abs (tuple of 4 int): bounding box that corresponds to predictor outputs
33
+ Return:
34
+ Resampled U and V coordinates - a tensor [2, H, W] of float
35
+ """
36
+ x, y, w, h = box_xywh_abs
37
+ w = max(int(w), 1)
38
+ h = max(int(h), 1)
39
+ u_bbox = F.interpolate(u, (h, w), mode="bilinear", align_corners=False)
40
+ v_bbox = F.interpolate(v, (h, w), mode="bilinear", align_corners=False)
41
+ uv = torch.zeros([2, h, w], dtype=torch.float32, device=u.device)
42
+ for part_id in range(1, u_bbox.size(1)):
43
+ uv[0][labels == part_id] = u_bbox[0, part_id][labels == part_id]
44
+ uv[1][labels == part_id] = v_bbox[0, part_id][labels == part_id]
45
+ return uv
46
+
47
+
48
+ def resample_uv_to_bbox(
49
+ predictor_output: DensePoseChartPredictorOutput,
50
+ labels: torch.Tensor,
51
+ box_xywh_abs: IntTupleBox,
52
+ ) -> torch.Tensor:
53
+ """
54
+ Resamples U and V coordinate estimates for the given bounding box
55
+
56
+ Args:
57
+ predictor_output (DensePoseChartPredictorOutput): DensePose predictor
58
+ output to be resampled
59
+ labels (tensor [H, W] of long): labels obtained by resampling segmentation
60
+ outputs for the given bounding box
61
+ box_xywh_abs (tuple of 4 int): bounding box that corresponds to predictor outputs
62
+ Return:
63
+ Resampled U and V coordinates - a tensor [2, H, W] of float
64
+ """
65
+ return resample_uv_tensors_to_bbox(
66
+ predictor_output.u,
67
+ predictor_output.v,
68
+ labels,
69
+ box_xywh_abs,
70
+ )
71
+
72
+
73
+ def densepose_chart_predictor_output_to_result(
74
+ predictor_output: DensePoseChartPredictorOutput, boxes: Boxes
75
+ ) -> DensePoseChartResult:
76
+ """
77
+ Convert densepose chart predictor outputs to results
78
+
79
+ Args:
80
+ predictor_output (DensePoseChartPredictorOutput): DensePose predictor
81
+ output to be converted to results, must contain only 1 output
82
+ boxes (Boxes): bounding box that corresponds to the predictor output,
83
+ must contain only 1 bounding box
84
+ Return:
85
+ DensePose chart-based result (DensePoseChartResult)
86
+ """
87
+ assert len(predictor_output) == 1 and len(boxes) == 1, (
88
+ f"Predictor output to result conversion can operate only single outputs"
89
+ f", got {len(predictor_output)} predictor outputs and {len(boxes)} boxes"
90
+ )
91
+
92
+ boxes_xyxy_abs = boxes.tensor.clone()
93
+ boxes_xywh_abs = BoxMode.convert(boxes_xyxy_abs, BoxMode.XYXY_ABS, BoxMode.XYWH_ABS)
94
+ box_xywh = make_int_box(boxes_xywh_abs[0])
95
+
96
+ labels = resample_fine_and_coarse_segm_to_bbox(predictor_output, box_xywh).squeeze(0)
97
+ uv = resample_uv_to_bbox(predictor_output, labels, box_xywh)
98
+ return DensePoseChartResult(labels=labels, uv=uv)
99
+
100
+
101
+ def resample_confidences_to_bbox(
102
+ predictor_output: DensePoseChartPredictorOutput,
103
+ labels: torch.Tensor,
104
+ box_xywh_abs: IntTupleBox,
105
+ ) -> Dict[str, torch.Tensor]:
106
+ """
107
+ Resamples confidences for the given bounding box
108
+
109
+ Args:
110
+ predictor_output (DensePoseChartPredictorOutput): DensePose predictor
111
+ output to be resampled
112
+ labels (tensor [H, W] of long): labels obtained by resampling segmentation
113
+ outputs for the given bounding box
114
+ box_xywh_abs (tuple of 4 int): bounding box that corresponds to predictor outputs
115
+ Return:
116
+ Resampled confidences - a dict of [H, W] tensors of float
117
+ """
118
+
119
+ x, y, w, h = box_xywh_abs
120
+ w = max(int(w), 1)
121
+ h = max(int(h), 1)
122
+
123
+ confidence_names = [
124
+ "sigma_1",
125
+ "sigma_2",
126
+ "kappa_u",
127
+ "kappa_v",
128
+ "fine_segm_confidence",
129
+ "coarse_segm_confidence",
130
+ ]
131
+ confidence_results = {key: None for key in confidence_names}
132
+ confidence_names = [
133
+ key for key in confidence_names if getattr(predictor_output, key) is not None
134
+ ]
135
+ confidence_base = torch.zeros([h, w], dtype=torch.float32, device=predictor_output.u.device)
136
+
137
+ # assign data from channels that correspond to the labels
138
+ for key in confidence_names:
139
+ resampled_confidence = F.interpolate(
140
+ getattr(predictor_output, key),
141
+ (h, w),
142
+ mode="bilinear",
143
+ align_corners=False,
144
+ )
145
+ result = confidence_base.clone()
146
+ for part_id in range(1, predictor_output.u.size(1)):
147
+ if resampled_confidence.size(1) != predictor_output.u.size(1):
148
+ # confidence is not part-based, don't try to fill it part by part
149
+ continue
150
+ result[labels == part_id] = resampled_confidence[0, part_id][labels == part_id]
151
+
152
+ if resampled_confidence.size(1) != predictor_output.u.size(1):
153
+ # confidence is not part-based, fill the data with the first channel
154
+ # (targeted for segmentation confidences that have only 1 channel)
155
+ result = resampled_confidence[0, 0]
156
+
157
+ confidence_results[key] = result
158
+
159
+ return confidence_results # pyre-ignore[7]
160
+
161
+
162
+ def densepose_chart_predictor_output_to_result_with_confidences(
163
+ predictor_output: DensePoseChartPredictorOutput, boxes: Boxes
164
+ ) -> DensePoseChartResultWithConfidences:
165
+ """
166
+ Convert densepose chart predictor outputs to results
167
+
168
+ Args:
169
+ predictor_output (DensePoseChartPredictorOutput): DensePose predictor
170
+ output with confidences to be converted to results, must contain only 1 output
171
+ boxes (Boxes): bounding box that corresponds to the predictor output,
172
+ must contain only 1 bounding box
173
+ Return:
174
+ DensePose chart-based result with confidences (DensePoseChartResultWithConfidences)
175
+ """
176
+ assert len(predictor_output) == 1 and len(boxes) == 1, (
177
+ f"Predictor output to result conversion can operate only single outputs"
178
+ f", got {len(predictor_output)} predictor outputs and {len(boxes)} boxes"
179
+ )
180
+
181
+ boxes_xyxy_abs = boxes.tensor.clone()
182
+ boxes_xywh_abs = BoxMode.convert(boxes_xyxy_abs, BoxMode.XYXY_ABS, BoxMode.XYWH_ABS)
183
+ box_xywh = make_int_box(boxes_xywh_abs[0])
184
+
185
+ labels = resample_fine_and_coarse_segm_to_bbox(predictor_output, box_xywh).squeeze(0)
186
+ uv = resample_uv_to_bbox(predictor_output, labels, box_xywh)
187
+ confidences = resample_confidences_to_bbox(predictor_output, labels, box_xywh)
188
+ return DensePoseChartResultWithConfidences(labels=labels, uv=uv, **confidences)
Leffa/3rdparty/densepose/converters/hflip.py ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Facebook, Inc. and its affiliates.
2
+
3
+ from typing import Any
4
+
5
+ from .base import BaseConverter
6
+
7
+
8
+ class HFlipConverter(BaseConverter):
9
+ """
10
+ Converts various DensePose predictor outputs to DensePose results.
11
+ Each DensePose predictor output type has to register its convertion strategy.
12
+ """
13
+
14
+ registry = {}
15
+ dst_type = None
16
+
17
+ @classmethod
18
+ # pyre-fixme[14]: `convert` overrides method defined in `BaseConverter`
19
+ # inconsistently.
20
+ def convert(cls, predictor_outputs: Any, transform_data: Any, *args, **kwargs):
21
+ """
22
+ Performs an horizontal flip on DensePose predictor outputs.
23
+ Does recursive lookup for base classes, so there's no need
24
+ for explicit registration for derived classes.
25
+
26
+ Args:
27
+ predictor_outputs: DensePose predictor output to be converted to BitMasks
28
+ transform_data: Anything useful for the flip
29
+ Return:
30
+ An instance of the same type as predictor_outputs
31
+ """
32
+ return super(HFlipConverter, cls).convert(
33
+ predictor_outputs, transform_data, *args, **kwargs
34
+ )
Leffa/3rdparty/densepose/converters/segm_to_mask.py ADDED
@@ -0,0 +1,150 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Facebook, Inc. and its affiliates.
2
+
3
+ from typing import Any
4
+ import torch
5
+ from torch.nn import functional as F
6
+
7
+ from detectron2.structures import BitMasks, Boxes, BoxMode
8
+
9
+ from .base import IntTupleBox, make_int_box
10
+ from .to_mask import ImageSizeType
11
+
12
+
13
+ def resample_coarse_segm_tensor_to_bbox(coarse_segm: torch.Tensor, box_xywh_abs: IntTupleBox):
14
+ """
15
+ Resample coarse segmentation tensor to the given
16
+ bounding box and derive labels for each pixel of the bounding box
17
+
18
+ Args:
19
+ coarse_segm: float tensor of shape [1, K, Hout, Wout]
20
+ box_xywh_abs (tuple of 4 int): bounding box given by its upper-left
21
+ corner coordinates, width (W) and height (H)
22
+ Return:
23
+ Labels for each pixel of the bounding box, a long tensor of size [1, H, W]
24
+ """
25
+ x, y, w, h = box_xywh_abs
26
+ w = max(int(w), 1)
27
+ h = max(int(h), 1)
28
+ labels = F.interpolate(coarse_segm, (h, w), mode="bilinear", align_corners=False).argmax(dim=1)
29
+ return labels
30
+
31
+
32
+ def resample_fine_and_coarse_segm_tensors_to_bbox(
33
+ fine_segm: torch.Tensor, coarse_segm: torch.Tensor, box_xywh_abs: IntTupleBox
34
+ ):
35
+ """
36
+ Resample fine and coarse segmentation tensors to the given
37
+ bounding box and derive labels for each pixel of the bounding box
38
+
39
+ Args:
40
+ fine_segm: float tensor of shape [1, C, Hout, Wout]
41
+ coarse_segm: float tensor of shape [1, K, Hout, Wout]
42
+ box_xywh_abs (tuple of 4 int): bounding box given by its upper-left
43
+ corner coordinates, width (W) and height (H)
44
+ Return:
45
+ Labels for each pixel of the bounding box, a long tensor of size [1, H, W]
46
+ """
47
+ x, y, w, h = box_xywh_abs
48
+ w = max(int(w), 1)
49
+ h = max(int(h), 1)
50
+ # coarse segmentation
51
+ coarse_segm_bbox = F.interpolate(
52
+ coarse_segm,
53
+ (h, w),
54
+ mode="bilinear",
55
+ align_corners=False,
56
+ ).argmax(dim=1)
57
+ # combined coarse and fine segmentation
58
+ labels = (
59
+ F.interpolate(fine_segm, (h, w), mode="bilinear", align_corners=False).argmax(dim=1)
60
+ * (coarse_segm_bbox > 0).long()
61
+ )
62
+ return labels
63
+
64
+
65
+ def resample_fine_and_coarse_segm_to_bbox(predictor_output: Any, box_xywh_abs: IntTupleBox):
66
+ """
67
+ Resample fine and coarse segmentation outputs from a predictor to the given
68
+ bounding box and derive labels for each pixel of the bounding box
69
+
70
+ Args:
71
+ predictor_output: DensePose predictor output that contains segmentation
72
+ results to be resampled
73
+ box_xywh_abs (tuple of 4 int): bounding box given by its upper-left
74
+ corner coordinates, width (W) and height (H)
75
+ Return:
76
+ Labels for each pixel of the bounding box, a long tensor of size [1, H, W]
77
+ """
78
+ return resample_fine_and_coarse_segm_tensors_to_bbox(
79
+ predictor_output.fine_segm,
80
+ predictor_output.coarse_segm,
81
+ box_xywh_abs,
82
+ )
83
+
84
+
85
+ def predictor_output_with_coarse_segm_to_mask(
86
+ predictor_output: Any, boxes: Boxes, image_size_hw: ImageSizeType
87
+ ) -> BitMasks:
88
+ """
89
+ Convert predictor output with coarse and fine segmentation to a mask.
90
+ Assumes that predictor output has the following attributes:
91
+ - coarse_segm (tensor of size [N, D, H, W]): coarse segmentation
92
+ unnormalized scores for N instances; D is the number of coarse
93
+ segmentation labels, H and W is the resolution of the estimate
94
+
95
+ Args:
96
+ predictor_output: DensePose predictor output to be converted to mask
97
+ boxes (Boxes): bounding boxes that correspond to the DensePose
98
+ predictor outputs
99
+ image_size_hw (tuple [int, int]): image height Himg and width Wimg
100
+ Return:
101
+ BitMasks that contain a bool tensor of size [N, Himg, Wimg] with
102
+ a mask of the size of the image for each instance
103
+ """
104
+ H, W = image_size_hw
105
+ boxes_xyxy_abs = boxes.tensor.clone()
106
+ boxes_xywh_abs = BoxMode.convert(boxes_xyxy_abs, BoxMode.XYXY_ABS, BoxMode.XYWH_ABS)
107
+ N = len(boxes_xywh_abs)
108
+ masks = torch.zeros((N, H, W), dtype=torch.bool, device=boxes.tensor.device)
109
+ for i in range(len(boxes_xywh_abs)):
110
+ box_xywh = make_int_box(boxes_xywh_abs[i])
111
+ box_mask = resample_coarse_segm_tensor_to_bbox(predictor_output[i].coarse_segm, box_xywh)
112
+ x, y, w, h = box_xywh
113
+ masks[i, y : y + h, x : x + w] = box_mask
114
+
115
+ return BitMasks(masks)
116
+
117
+
118
+ def predictor_output_with_fine_and_coarse_segm_to_mask(
119
+ predictor_output: Any, boxes: Boxes, image_size_hw: ImageSizeType
120
+ ) -> BitMasks:
121
+ """
122
+ Convert predictor output with coarse and fine segmentation to a mask.
123
+ Assumes that predictor output has the following attributes:
124
+ - coarse_segm (tensor of size [N, D, H, W]): coarse segmentation
125
+ unnormalized scores for N instances; D is the number of coarse
126
+ segmentation labels, H and W is the resolution of the estimate
127
+ - fine_segm (tensor of size [N, C, H, W]): fine segmentation
128
+ unnormalized scores for N instances; C is the number of fine
129
+ segmentation labels, H and W is the resolution of the estimate
130
+
131
+ Args:
132
+ predictor_output: DensePose predictor output to be converted to mask
133
+ boxes (Boxes): bounding boxes that correspond to the DensePose
134
+ predictor outputs
135
+ image_size_hw (tuple [int, int]): image height Himg and width Wimg
136
+ Return:
137
+ BitMasks that contain a bool tensor of size [N, Himg, Wimg] with
138
+ a mask of the size of the image for each instance
139
+ """
140
+ H, W = image_size_hw
141
+ boxes_xyxy_abs = boxes.tensor.clone()
142
+ boxes_xywh_abs = BoxMode.convert(boxes_xyxy_abs, BoxMode.XYXY_ABS, BoxMode.XYWH_ABS)
143
+ N = len(boxes_xywh_abs)
144
+ masks = torch.zeros((N, H, W), dtype=torch.bool, device=boxes.tensor.device)
145
+ for i in range(len(boxes_xywh_abs)):
146
+ box_xywh = make_int_box(boxes_xywh_abs[i])
147
+ labels_i = resample_fine_and_coarse_segm_to_bbox(predictor_output[i], box_xywh)
148
+ x, y, w, h = box_xywh
149
+ masks[i, y : y + h, x : x + w] = labels_i > 0
150
+ return BitMasks(masks)
Leffa/3rdparty/densepose/converters/to_chart_result.py ADDED
@@ -0,0 +1,70 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Facebook, Inc. and its affiliates.
2
+
3
+ from typing import Any
4
+
5
+ from detectron2.structures import Boxes
6
+
7
+ from ..structures import DensePoseChartResult, DensePoseChartResultWithConfidences
8
+ from .base import BaseConverter
9
+
10
+
11
+ class ToChartResultConverter(BaseConverter):
12
+ """
13
+ Converts various DensePose predictor outputs to DensePose results.
14
+ Each DensePose predictor output type has to register its convertion strategy.
15
+ """
16
+
17
+ registry = {}
18
+ dst_type = DensePoseChartResult
19
+
20
+ @classmethod
21
+ # pyre-fixme[14]: `convert` overrides method defined in `BaseConverter`
22
+ # inconsistently.
23
+ def convert(cls, predictor_outputs: Any, boxes: Boxes, *args, **kwargs) -> DensePoseChartResult:
24
+ """
25
+ Convert DensePose predictor outputs to DensePoseResult using some registered
26
+ converter. Does recursive lookup for base classes, so there's no need
27
+ for explicit registration for derived classes.
28
+
29
+ Args:
30
+ densepose_predictor_outputs: DensePose predictor output to be
31
+ converted to BitMasks
32
+ boxes (Boxes): bounding boxes that correspond to the DensePose
33
+ predictor outputs
34
+ Return:
35
+ An instance of DensePoseResult. If no suitable converter was found, raises KeyError
36
+ """
37
+ return super(ToChartResultConverter, cls).convert(predictor_outputs, boxes, *args, **kwargs)
38
+
39
+
40
+ class ToChartResultConverterWithConfidences(BaseConverter):
41
+ """
42
+ Converts various DensePose predictor outputs to DensePose results.
43
+ Each DensePose predictor output type has to register its convertion strategy.
44
+ """
45
+
46
+ registry = {}
47
+ dst_type = DensePoseChartResultWithConfidences
48
+
49
+ @classmethod
50
+ # pyre-fixme[14]: `convert` overrides method defined in `BaseConverter`
51
+ # inconsistently.
52
+ def convert(
53
+ cls, predictor_outputs: Any, boxes: Boxes, *args, **kwargs
54
+ ) -> DensePoseChartResultWithConfidences:
55
+ """
56
+ Convert DensePose predictor outputs to DensePoseResult with confidences
57
+ using some registered converter. Does recursive lookup for base classes,
58
+ so there's no need for explicit registration for derived classes.
59
+
60
+ Args:
61
+ densepose_predictor_outputs: DensePose predictor output with confidences
62
+ to be converted to BitMasks
63
+ boxes (Boxes): bounding boxes that correspond to the DensePose
64
+ predictor outputs
65
+ Return:
66
+ An instance of DensePoseResult. If no suitable converter was found, raises KeyError
67
+ """
68
+ return super(ToChartResultConverterWithConfidences, cls).convert(
69
+ predictor_outputs, boxes, *args, **kwargs
70
+ )
Leffa/3rdparty/densepose/converters/to_mask.py ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Facebook, Inc. and its affiliates.
2
+
3
+ from typing import Any, Tuple
4
+
5
+ from detectron2.structures import BitMasks, Boxes
6
+
7
+ from .base import BaseConverter
8
+
9
+ ImageSizeType = Tuple[int, int]
10
+
11
+
12
+ class ToMaskConverter(BaseConverter):
13
+ """
14
+ Converts various DensePose predictor outputs to masks
15
+ in bit mask format (see `BitMasks`). Each DensePose predictor output type
16
+ has to register its convertion strategy.
17
+ """
18
+
19
+ registry = {}
20
+ dst_type = BitMasks
21
+
22
+ @classmethod
23
+ # pyre-fixme[14]: `convert` overrides method defined in `BaseConverter`
24
+ # inconsistently.
25
+ def convert(
26
+ cls,
27
+ densepose_predictor_outputs: Any,
28
+ boxes: Boxes,
29
+ image_size_hw: ImageSizeType,
30
+ *args,
31
+ **kwargs
32
+ ) -> BitMasks:
33
+ """
34
+ Convert DensePose predictor outputs to BitMasks using some registered
35
+ converter. Does recursive lookup for base classes, so there's no need
36
+ for explicit registration for derived classes.
37
+
38
+ Args:
39
+ densepose_predictor_outputs: DensePose predictor output to be
40
+ converted to BitMasks
41
+ boxes (Boxes): bounding boxes that correspond to the DensePose
42
+ predictor outputs
43
+ image_size_hw (tuple [int, int]): image height and width
44
+ Return:
45
+ An instance of `BitMasks`. If no suitable converter was found, raises KeyError
46
+ """
47
+ return super(ToMaskConverter, cls).convert(
48
+ densepose_predictor_outputs, boxes, image_size_hw, *args, **kwargs
49
+ )
Leffa/3rdparty/densepose/engine/__init__.py ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ # Copyright (c) Facebook, Inc. and its affiliates.
2
+
3
+ from .trainer import Trainer
Leffa/3rdparty/densepose/engine/trainer.py ADDED
@@ -0,0 +1,258 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
2
+
3
+ import logging
4
+ import os
5
+ from collections import OrderedDict
6
+ from typing import List, Optional, Union
7
+ import torch
8
+ from torch import nn
9
+
10
+ from detectron2.checkpoint import DetectionCheckpointer
11
+ from detectron2.config import CfgNode
12
+ from detectron2.engine import DefaultTrainer
13
+ from detectron2.evaluation import (
14
+ DatasetEvaluator,
15
+ DatasetEvaluators,
16
+ inference_on_dataset,
17
+ print_csv_format,
18
+ )
19
+ from detectron2.solver.build import get_default_optimizer_params, maybe_add_gradient_clipping
20
+ from detectron2.utils import comm
21
+ from detectron2.utils.events import EventWriter, get_event_storage
22
+
23
+ from densepose import DensePoseDatasetMapperTTA, DensePoseGeneralizedRCNNWithTTA, load_from_cfg
24
+ from densepose.data import (
25
+ DatasetMapper,
26
+ build_combined_loader,
27
+ build_detection_test_loader,
28
+ build_detection_train_loader,
29
+ build_inference_based_loaders,
30
+ has_inference_based_loaders,
31
+ )
32
+ from densepose.evaluation.d2_evaluator_adapter import Detectron2COCOEvaluatorAdapter
33
+ from densepose.evaluation.evaluator import DensePoseCOCOEvaluator, build_densepose_evaluator_storage
34
+ from densepose.modeling.cse import Embedder
35
+
36
+
37
+ class SampleCountingLoader:
38
+ def __init__(self, loader):
39
+ self.loader = loader
40
+
41
+ def __iter__(self):
42
+ it = iter(self.loader)
43
+ storage = get_event_storage()
44
+ while True:
45
+ try:
46
+ batch = next(it)
47
+ num_inst_per_dataset = {}
48
+ for data in batch:
49
+ dataset_name = data["dataset"]
50
+ if dataset_name not in num_inst_per_dataset:
51
+ num_inst_per_dataset[dataset_name] = 0
52
+ num_inst = len(data["instances"])
53
+ num_inst_per_dataset[dataset_name] += num_inst
54
+ for dataset_name in num_inst_per_dataset:
55
+ storage.put_scalar(f"batch/{dataset_name}", num_inst_per_dataset[dataset_name])
56
+ yield batch
57
+ except StopIteration:
58
+ break
59
+
60
+
61
+ class SampleCountMetricPrinter(EventWriter):
62
+ def __init__(self):
63
+ self.logger = logging.getLogger(__name__)
64
+
65
+ def write(self):
66
+ storage = get_event_storage()
67
+ batch_stats_strs = []
68
+ for key, buf in storage.histories().items():
69
+ if key.startswith("batch/"):
70
+ batch_stats_strs.append(f"{key} {buf.avg(20)}")
71
+ self.logger.info(", ".join(batch_stats_strs))
72
+
73
+
74
+ class Trainer(DefaultTrainer):
75
+ @classmethod
76
+ def extract_embedder_from_model(cls, model: nn.Module) -> Optional[Embedder]:
77
+ if isinstance(model, nn.parallel.DistributedDataParallel):
78
+ model = model.module
79
+ if hasattr(model, "roi_heads") and hasattr(model.roi_heads, "embedder"):
80
+ return model.roi_heads.embedder
81
+ return None
82
+
83
+ # TODO: the only reason to copy the base class code here is to pass the embedder from
84
+ # the model to the evaluator; that should be refactored to avoid unnecessary copy-pasting
85
+ @classmethod
86
+ def test(
87
+ cls,
88
+ cfg: CfgNode,
89
+ model: nn.Module,
90
+ evaluators: Optional[Union[DatasetEvaluator, List[DatasetEvaluator]]] = None,
91
+ ):
92
+ """
93
+ Args:
94
+ cfg (CfgNode):
95
+ model (nn.Module):
96
+ evaluators (DatasetEvaluator, list[DatasetEvaluator] or None): if None, will call
97
+ :meth:`build_evaluator`. Otherwise, must have the same length as
98
+ ``cfg.DATASETS.TEST``.
99
+
100
+ Returns:
101
+ dict: a dict of result metrics
102
+ """
103
+ logger = logging.getLogger(__name__)
104
+ if isinstance(evaluators, DatasetEvaluator):
105
+ evaluators = [evaluators]
106
+ if evaluators is not None:
107
+ assert len(cfg.DATASETS.TEST) == len(evaluators), "{} != {}".format(
108
+ len(cfg.DATASETS.TEST), len(evaluators)
109
+ )
110
+
111
+ results = OrderedDict()
112
+ for idx, dataset_name in enumerate(cfg.DATASETS.TEST):
113
+ data_loader = cls.build_test_loader(cfg, dataset_name)
114
+ # When evaluators are passed in as arguments,
115
+ # implicitly assume that evaluators can be created before data_loader.
116
+ if evaluators is not None:
117
+ evaluator = evaluators[idx]
118
+ else:
119
+ try:
120
+ embedder = cls.extract_embedder_from_model(model)
121
+ evaluator = cls.build_evaluator(cfg, dataset_name, embedder=embedder)
122
+ except NotImplementedError:
123
+ logger.warn(
124
+ "No evaluator found. Use `DefaultTrainer.test(evaluators=)`, "
125
+ "or implement its `build_evaluator` method."
126
+ )
127
+ results[dataset_name] = {}
128
+ continue
129
+ if cfg.DENSEPOSE_EVALUATION.DISTRIBUTED_INFERENCE or comm.is_main_process():
130
+ results_i = inference_on_dataset(model, data_loader, evaluator)
131
+ else:
132
+ results_i = {}
133
+ results[dataset_name] = results_i
134
+ if comm.is_main_process():
135
+ assert isinstance(
136
+ results_i, dict
137
+ ), "Evaluator must return a dict on the main process. Got {} instead.".format(
138
+ results_i
139
+ )
140
+ logger.info("Evaluation results for {} in csv format:".format(dataset_name))
141
+ print_csv_format(results_i)
142
+
143
+ if len(results) == 1:
144
+ results = list(results.values())[0]
145
+ return results
146
+
147
+ @classmethod
148
+ def build_evaluator(
149
+ cls,
150
+ cfg: CfgNode,
151
+ dataset_name: str,
152
+ output_folder: Optional[str] = None,
153
+ embedder: Optional[Embedder] = None,
154
+ ) -> DatasetEvaluators:
155
+ if output_folder is None:
156
+ output_folder = os.path.join(cfg.OUTPUT_DIR, "inference")
157
+ evaluators = []
158
+ distributed = cfg.DENSEPOSE_EVALUATION.DISTRIBUTED_INFERENCE
159
+ # Note: we currently use COCO evaluator for both COCO and LVIS datasets
160
+ # to have compatible metrics. LVIS bbox evaluator could also be used
161
+ # with an adapter to properly handle filtered / mapped categories
162
+ # evaluator_type = MetadataCatalog.get(dataset_name).evaluator_type
163
+ # if evaluator_type == "coco":
164
+ # evaluators.append(COCOEvaluator(dataset_name, output_dir=output_folder))
165
+ # elif evaluator_type == "lvis":
166
+ # evaluators.append(LVISEvaluator(dataset_name, output_dir=output_folder))
167
+ evaluators.append(
168
+ Detectron2COCOEvaluatorAdapter(
169
+ dataset_name, output_dir=output_folder, distributed=distributed
170
+ )
171
+ )
172
+ if cfg.MODEL.DENSEPOSE_ON:
173
+ storage = build_densepose_evaluator_storage(cfg, output_folder)
174
+ evaluators.append(
175
+ DensePoseCOCOEvaluator(
176
+ dataset_name,
177
+ distributed,
178
+ output_folder,
179
+ evaluator_type=cfg.DENSEPOSE_EVALUATION.TYPE,
180
+ min_iou_threshold=cfg.DENSEPOSE_EVALUATION.MIN_IOU_THRESHOLD,
181
+ storage=storage,
182
+ embedder=embedder,
183
+ should_evaluate_mesh_alignment=cfg.DENSEPOSE_EVALUATION.EVALUATE_MESH_ALIGNMENT,
184
+ mesh_alignment_mesh_names=cfg.DENSEPOSE_EVALUATION.MESH_ALIGNMENT_MESH_NAMES,
185
+ )
186
+ )
187
+ return DatasetEvaluators(evaluators)
188
+
189
+ @classmethod
190
+ def build_optimizer(cls, cfg: CfgNode, model: nn.Module):
191
+ params = get_default_optimizer_params(
192
+ model,
193
+ base_lr=cfg.SOLVER.BASE_LR,
194
+ weight_decay_norm=cfg.SOLVER.WEIGHT_DECAY_NORM,
195
+ bias_lr_factor=cfg.SOLVER.BIAS_LR_FACTOR,
196
+ weight_decay_bias=cfg.SOLVER.WEIGHT_DECAY_BIAS,
197
+ overrides={
198
+ "features": {
199
+ "lr": cfg.SOLVER.BASE_LR * cfg.MODEL.ROI_DENSEPOSE_HEAD.CSE.FEATURES_LR_FACTOR,
200
+ },
201
+ "embeddings": {
202
+ "lr": cfg.SOLVER.BASE_LR * cfg.MODEL.ROI_DENSEPOSE_HEAD.CSE.EMBEDDING_LR_FACTOR,
203
+ },
204
+ },
205
+ )
206
+ optimizer = torch.optim.SGD(
207
+ params,
208
+ cfg.SOLVER.BASE_LR,
209
+ momentum=cfg.SOLVER.MOMENTUM,
210
+ nesterov=cfg.SOLVER.NESTEROV,
211
+ weight_decay=cfg.SOLVER.WEIGHT_DECAY,
212
+ )
213
+ # pyre-fixme[6]: For 2nd param expected `Type[Optimizer]` but got `SGD`.
214
+ return maybe_add_gradient_clipping(cfg, optimizer)
215
+
216
+ @classmethod
217
+ def build_test_loader(cls, cfg: CfgNode, dataset_name):
218
+ return build_detection_test_loader(cfg, dataset_name, mapper=DatasetMapper(cfg, False))
219
+
220
+ @classmethod
221
+ def build_train_loader(cls, cfg: CfgNode):
222
+ data_loader = build_detection_train_loader(cfg, mapper=DatasetMapper(cfg, True))
223
+ if not has_inference_based_loaders(cfg):
224
+ return data_loader
225
+ model = cls.build_model(cfg)
226
+ model.to(cfg.BOOTSTRAP_MODEL.DEVICE)
227
+ DetectionCheckpointer(model).resume_or_load(cfg.BOOTSTRAP_MODEL.WEIGHTS, resume=False)
228
+ inference_based_loaders, ratios = build_inference_based_loaders(cfg, model)
229
+ loaders = [data_loader] + inference_based_loaders
230
+ ratios = [1.0] + ratios
231
+ combined_data_loader = build_combined_loader(cfg, loaders, ratios)
232
+ sample_counting_loader = SampleCountingLoader(combined_data_loader)
233
+ return sample_counting_loader
234
+
235
+ def build_writers(self):
236
+ writers = super().build_writers()
237
+ writers.append(SampleCountMetricPrinter())
238
+ return writers
239
+
240
+ @classmethod
241
+ def test_with_TTA(cls, cfg: CfgNode, model):
242
+ logger = logging.getLogger("detectron2.trainer")
243
+ # In the end of training, run an evaluation with TTA
244
+ # Only support some R-CNN models.
245
+ logger.info("Running inference with test-time augmentation ...")
246
+ transform_data = load_from_cfg(cfg)
247
+ model = DensePoseGeneralizedRCNNWithTTA(
248
+ cfg, model, transform_data, DensePoseDatasetMapperTTA(cfg)
249
+ )
250
+ evaluators = [
251
+ cls.build_evaluator(
252
+ cfg, name, output_folder=os.path.join(cfg.OUTPUT_DIR, "inference_TTA")
253
+ )
254
+ for name in cfg.DATASETS.TEST
255
+ ]
256
+ res = cls.test(cfg, model, evaluators) # pyre-ignore[6]
257
+ res = OrderedDict({k + "_TTA": v for k, v in res.items()})
258
+ return res
Leffa/3rdparty/densepose/modeling/__init__.py ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Facebook, Inc. and its affiliates.
2
+
3
+ from .confidence import DensePoseConfidenceModelConfig, DensePoseUVConfidenceType
4
+ from .filter import DensePoseDataFilter
5
+ from .inference import densepose_inference
6
+ from .utils import initialize_module_params
7
+ from .build import (
8
+ build_densepose_data_filter,
9
+ build_densepose_embedder,
10
+ build_densepose_head,
11
+ build_densepose_losses,
12
+ build_densepose_predictor,
13
+ )
Leffa/3rdparty/densepose/modeling/build.py ADDED
@@ -0,0 +1,87 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Facebook, Inc. and its affiliates.
2
+
3
+ from typing import Optional
4
+ from torch import nn
5
+
6
+ from detectron2.config import CfgNode
7
+
8
+ from .cse.embedder import Embedder
9
+ from .filter import DensePoseDataFilter
10
+
11
+
12
+ def build_densepose_predictor(cfg: CfgNode, input_channels: int):
13
+ """
14
+ Create an instance of DensePose predictor based on configuration options.
15
+
16
+ Args:
17
+ cfg (CfgNode): configuration options
18
+ input_channels (int): input tensor size along the channel dimension
19
+ Return:
20
+ An instance of DensePose predictor
21
+ """
22
+ from .predictors import DENSEPOSE_PREDICTOR_REGISTRY
23
+
24
+ predictor_name = cfg.MODEL.ROI_DENSEPOSE_HEAD.PREDICTOR_NAME
25
+ return DENSEPOSE_PREDICTOR_REGISTRY.get(predictor_name)(cfg, input_channels)
26
+
27
+
28
+ def build_densepose_data_filter(cfg: CfgNode):
29
+ """
30
+ Build DensePose data filter which selects data for training
31
+
32
+ Args:
33
+ cfg (CfgNode): configuration options
34
+
35
+ Return:
36
+ Callable: list(Tensor), list(Instances) -> list(Tensor), list(Instances)
37
+ An instance of DensePose filter, which takes feature tensors and proposals
38
+ as an input and returns filtered features and proposals
39
+ """
40
+ dp_filter = DensePoseDataFilter(cfg)
41
+ return dp_filter
42
+
43
+
44
+ def build_densepose_head(cfg: CfgNode, input_channels: int):
45
+ """
46
+ Build DensePose head based on configurations options
47
+
48
+ Args:
49
+ cfg (CfgNode): configuration options
50
+ input_channels (int): input tensor size along the channel dimension
51
+ Return:
52
+ An instance of DensePose head
53
+ """
54
+ from .roi_heads.registry import ROI_DENSEPOSE_HEAD_REGISTRY
55
+
56
+ head_name = cfg.MODEL.ROI_DENSEPOSE_HEAD.NAME
57
+ return ROI_DENSEPOSE_HEAD_REGISTRY.get(head_name)(cfg, input_channels)
58
+
59
+
60
+ def build_densepose_losses(cfg: CfgNode):
61
+ """
62
+ Build DensePose loss based on configurations options
63
+
64
+ Args:
65
+ cfg (CfgNode): configuration options
66
+ Return:
67
+ An instance of DensePose loss
68
+ """
69
+ from .losses import DENSEPOSE_LOSS_REGISTRY
70
+
71
+ loss_name = cfg.MODEL.ROI_DENSEPOSE_HEAD.LOSS_NAME
72
+ return DENSEPOSE_LOSS_REGISTRY.get(loss_name)(cfg)
73
+
74
+
75
+ def build_densepose_embedder(cfg: CfgNode) -> Optional[nn.Module]:
76
+ """
77
+ Build embedder used to embed mesh vertices into an embedding space.
78
+ Embedder contains sub-embedders, one for each mesh ID.
79
+
80
+ Args:
81
+ cfg (cfgNode): configuration options
82
+ Return:
83
+ Embedding module
84
+ """
85
+ if cfg.MODEL.ROI_DENSEPOSE_HEAD.CSE.EMBEDDERS:
86
+ return Embedder(cfg)
87
+ return None
Leffa/3rdparty/densepose/modeling/confidence.py ADDED
@@ -0,0 +1,73 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Facebook, Inc. and its affiliates.
2
+
3
+ from dataclasses import dataclass
4
+ from enum import Enum
5
+
6
+ from detectron2.config import CfgNode
7
+
8
+
9
+ class DensePoseUVConfidenceType(Enum):
10
+ """
11
+ Statistical model type for confidence learning, possible values:
12
+ - "iid_iso": statistically independent identically distributed residuals
13
+ with anisotropic covariance
14
+ - "indep_aniso": statistically independent residuals with anisotropic
15
+ covariances
16
+ For details, see:
17
+ N. Neverova, D. Novotny, A. Vedaldi "Correlated Uncertainty for Learning
18
+ Dense Correspondences from Noisy Labels", p. 918--926, in Proc. NIPS 2019
19
+ """
20
+
21
+ # fmt: off
22
+ IID_ISO = "iid_iso"
23
+ INDEP_ANISO = "indep_aniso"
24
+ # fmt: on
25
+
26
+
27
+ @dataclass
28
+ class DensePoseUVConfidenceConfig:
29
+ """
30
+ Configuration options for confidence on UV data
31
+ """
32
+
33
+ enabled: bool = False
34
+ # lower bound on UV confidences
35
+ epsilon: float = 0.01
36
+ type: DensePoseUVConfidenceType = DensePoseUVConfidenceType.IID_ISO
37
+
38
+
39
+ @dataclass
40
+ class DensePoseSegmConfidenceConfig:
41
+ """
42
+ Configuration options for confidence on segmentation
43
+ """
44
+
45
+ enabled: bool = False
46
+ # lower bound on confidence values
47
+ epsilon: float = 0.01
48
+
49
+
50
+ @dataclass
51
+ class DensePoseConfidenceModelConfig:
52
+ """
53
+ Configuration options for confidence models
54
+ """
55
+
56
+ # confidence for U and V values
57
+ uv_confidence: DensePoseUVConfidenceConfig
58
+ # segmentation confidence
59
+ segm_confidence: DensePoseSegmConfidenceConfig
60
+
61
+ @staticmethod
62
+ def from_cfg(cfg: CfgNode) -> "DensePoseConfidenceModelConfig":
63
+ return DensePoseConfidenceModelConfig(
64
+ uv_confidence=DensePoseUVConfidenceConfig(
65
+ enabled=cfg.MODEL.ROI_DENSEPOSE_HEAD.UV_CONFIDENCE.ENABLED,
66
+ epsilon=cfg.MODEL.ROI_DENSEPOSE_HEAD.UV_CONFIDENCE.EPSILON,
67
+ type=DensePoseUVConfidenceType(cfg.MODEL.ROI_DENSEPOSE_HEAD.UV_CONFIDENCE.TYPE),
68
+ ),
69
+ segm_confidence=DensePoseSegmConfidenceConfig(
70
+ enabled=cfg.MODEL.ROI_DENSEPOSE_HEAD.SEGM_CONFIDENCE.ENABLED,
71
+ epsilon=cfg.MODEL.ROI_DENSEPOSE_HEAD.SEGM_CONFIDENCE.EPSILON,
72
+ ),
73
+ )
Leffa/3rdparty/densepose/modeling/densepose_checkpoint.py ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Facebook, Inc. and its affiliates.
2
+ from collections import OrderedDict
3
+
4
+ from detectron2.checkpoint import DetectionCheckpointer
5
+
6
+
7
+ def _rename_HRNet_weights(weights):
8
+ # We detect and rename HRNet weights for DensePose. 1956 and 1716 are values that are
9
+ # common to all HRNet pretrained weights, and should be enough to accurately identify them
10
+ if (
11
+ len(weights["model"].keys()) == 1956
12
+ and len([k for k in weights["model"].keys() if k.startswith("stage")]) == 1716
13
+ ):
14
+ hrnet_weights = OrderedDict()
15
+ for k in weights["model"].keys():
16
+ hrnet_weights["backbone.bottom_up." + str(k)] = weights["model"][k]
17
+ return {"model": hrnet_weights}
18
+ else:
19
+ return weights
20
+
21
+
22
+ class DensePoseCheckpointer(DetectionCheckpointer):
23
+ """
24
+ Same as :class:`DetectionCheckpointer`, but is able to handle HRNet weights
25
+ """
26
+
27
+ def __init__(self, model, save_dir="", *, save_to_disk=None, **checkpointables):
28
+ super().__init__(model, save_dir, save_to_disk=save_to_disk, **checkpointables)
29
+
30
+ def _load_file(self, filename: str) -> object:
31
+ """
32
+ Adding hrnet support
33
+ """
34
+ weights = super()._load_file(filename)
35
+ return _rename_HRNet_weights(weights)
Leffa/3rdparty/densepose/modeling/filter.py ADDED
@@ -0,0 +1,94 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Facebook, Inc. and its affiliates.
2
+
3
+ from typing import List
4
+ import torch
5
+
6
+ from detectron2.config import CfgNode
7
+ from detectron2.structures import Instances
8
+ from detectron2.structures.boxes import matched_pairwise_iou
9
+
10
+
11
+ class DensePoseDataFilter:
12
+ def __init__(self, cfg: CfgNode):
13
+ self.iou_threshold = cfg.MODEL.ROI_DENSEPOSE_HEAD.FG_IOU_THRESHOLD
14
+ self.keep_masks = cfg.MODEL.ROI_DENSEPOSE_HEAD.COARSE_SEGM_TRAINED_BY_MASKS
15
+
16
+ @torch.no_grad()
17
+ def __call__(self, features: List[torch.Tensor], proposals_with_targets: List[Instances]):
18
+ """
19
+ Filters proposals with targets to keep only the ones relevant for
20
+ DensePose training
21
+
22
+ Args:
23
+ features (list[Tensor]): input data as a list of features,
24
+ each feature is a tensor. Axis 0 represents the number of
25
+ images `N` in the input data; axes 1-3 are channels,
26
+ height, and width, which may vary between features
27
+ (e.g., if a feature pyramid is used).
28
+ proposals_with_targets (list[Instances]): length `N` list of
29
+ `Instances`. The i-th `Instances` contains instances
30
+ (proposals, GT) for the i-th input image,
31
+ Returns:
32
+ list[Tensor]: filtered features
33
+ list[Instances]: filtered proposals
34
+ """
35
+ proposals_filtered = []
36
+ # TODO: the commented out code was supposed to correctly deal with situations
37
+ # where no valid DensePose GT is available for certain images. The corresponding
38
+ # image features were sliced and proposals were filtered. This led to performance
39
+ # deterioration, both in terms of runtime and in terms of evaluation results.
40
+ #
41
+ # feature_mask = torch.ones(
42
+ # len(proposals_with_targets),
43
+ # dtype=torch.bool,
44
+ # device=features[0].device if len(features) > 0 else torch.device("cpu"),
45
+ # )
46
+ for i, proposals_per_image in enumerate(proposals_with_targets):
47
+ if not proposals_per_image.has("gt_densepose") and (
48
+ not proposals_per_image.has("gt_masks") or not self.keep_masks
49
+ ):
50
+ # feature_mask[i] = 0
51
+ continue
52
+ gt_boxes = proposals_per_image.gt_boxes
53
+ est_boxes = proposals_per_image.proposal_boxes
54
+ # apply match threshold for densepose head
55
+ iou = matched_pairwise_iou(gt_boxes, est_boxes)
56
+ iou_select = iou > self.iou_threshold
57
+ proposals_per_image = proposals_per_image[iou_select] # pyre-ignore[6]
58
+
59
+ N_gt_boxes = len(proposals_per_image.gt_boxes)
60
+ assert N_gt_boxes == len(proposals_per_image.proposal_boxes), (
61
+ f"The number of GT boxes {N_gt_boxes} is different from the "
62
+ f"number of proposal boxes {len(proposals_per_image.proposal_boxes)}"
63
+ )
64
+ # filter out any target without suitable annotation
65
+ if self.keep_masks:
66
+ gt_masks = (
67
+ proposals_per_image.gt_masks
68
+ if hasattr(proposals_per_image, "gt_masks")
69
+ else [None] * N_gt_boxes
70
+ )
71
+ else:
72
+ gt_masks = [None] * N_gt_boxes
73
+ gt_densepose = (
74
+ proposals_per_image.gt_densepose
75
+ if hasattr(proposals_per_image, "gt_densepose")
76
+ else [None] * N_gt_boxes
77
+ )
78
+ assert len(gt_masks) == N_gt_boxes
79
+ assert len(gt_densepose) == N_gt_boxes
80
+ selected_indices = [
81
+ i
82
+ for i, (dp_target, mask_target) in enumerate(zip(gt_densepose, gt_masks))
83
+ if (dp_target is not None) or (mask_target is not None)
84
+ ]
85
+ # if not len(selected_indices):
86
+ # feature_mask[i] = 0
87
+ # continue
88
+ if len(selected_indices) != N_gt_boxes:
89
+ proposals_per_image = proposals_per_image[selected_indices] # pyre-ignore[6]
90
+ assert len(proposals_per_image.gt_boxes) == len(proposals_per_image.proposal_boxes)
91
+ proposals_filtered.append(proposals_per_image)
92
+ # features_filtered = [feature[feature_mask] for feature in features]
93
+ # return features_filtered, proposals_filtered
94
+ return features, proposals_filtered
Leffa/3rdparty/densepose/modeling/hrfpn.py ADDED
@@ -0,0 +1,182 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Facebook, Inc. and its affiliates.
2
+ """
3
+ MIT License
4
+ Copyright (c) 2019 Microsoft
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+ The above copyright notice and this permission notice shall be included in all
12
+ copies or substantial portions of the Software.
13
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
19
+ SOFTWARE.
20
+ """
21
+
22
+ import torch
23
+ import torch.nn as nn
24
+ import torch.nn.functional as F
25
+
26
+ from detectron2.layers import ShapeSpec
27
+ from detectron2.modeling.backbone import BACKBONE_REGISTRY
28
+ from detectron2.modeling.backbone.backbone import Backbone
29
+
30
+ from .hrnet import build_pose_hrnet_backbone
31
+
32
+
33
+ class HRFPN(Backbone):
34
+ """HRFPN (High Resolution Feature Pyramids)
35
+ Transforms outputs of HRNet backbone so they are suitable for the ROI_heads
36
+ arXiv: https://arxiv.org/abs/1904.04514
37
+ Adapted from https://github.com/open-mmlab/mmdetection/blob/master/mmdet/models/necks/hrfpn.py
38
+ Args:
39
+ bottom_up: (list) output of HRNet
40
+ in_features (list): names of the input features (output of HRNet)
41
+ in_channels (list): number of channels for each branch
42
+ out_channels (int): output channels of feature pyramids
43
+ n_out_features (int): number of output stages
44
+ pooling (str): pooling for generating feature pyramids (from {MAX, AVG})
45
+ share_conv (bool): Have one conv per output, or share one with all the outputs
46
+ """
47
+
48
+ def __init__(
49
+ self,
50
+ bottom_up,
51
+ in_features,
52
+ n_out_features,
53
+ in_channels,
54
+ out_channels,
55
+ pooling="AVG",
56
+ share_conv=False,
57
+ ):
58
+ super(HRFPN, self).__init__()
59
+ assert isinstance(in_channels, list)
60
+ self.bottom_up = bottom_up
61
+ self.in_features = in_features
62
+ self.n_out_features = n_out_features
63
+ self.in_channels = in_channels
64
+ self.out_channels = out_channels
65
+ self.num_ins = len(in_channels)
66
+ self.share_conv = share_conv
67
+
68
+ if self.share_conv:
69
+ self.fpn_conv = nn.Conv2d(
70
+ in_channels=out_channels, out_channels=out_channels, kernel_size=3, padding=1
71
+ )
72
+ else:
73
+ self.fpn_conv = nn.ModuleList()
74
+ for _ in range(self.n_out_features):
75
+ self.fpn_conv.append(
76
+ nn.Conv2d(
77
+ in_channels=out_channels,
78
+ out_channels=out_channels,
79
+ kernel_size=3,
80
+ padding=1,
81
+ )
82
+ )
83
+
84
+ # Custom change: Replaces a simple bilinear interpolation
85
+ self.interp_conv = nn.ModuleList()
86
+ for i in range(len(self.in_features)):
87
+ self.interp_conv.append(
88
+ nn.Sequential(
89
+ nn.ConvTranspose2d(
90
+ in_channels=in_channels[i],
91
+ out_channels=in_channels[i],
92
+ kernel_size=4,
93
+ stride=2**i,
94
+ padding=0,
95
+ output_padding=0,
96
+ bias=False,
97
+ ),
98
+ nn.BatchNorm2d(in_channels[i], momentum=0.1),
99
+ nn.ReLU(inplace=True),
100
+ )
101
+ )
102
+
103
+ # Custom change: Replaces a couple (reduction conv + pooling) by one conv
104
+ self.reduction_pooling_conv = nn.ModuleList()
105
+ for i in range(self.n_out_features):
106
+ self.reduction_pooling_conv.append(
107
+ nn.Sequential(
108
+ nn.Conv2d(sum(in_channels), out_channels, kernel_size=2**i, stride=2**i),
109
+ nn.BatchNorm2d(out_channels, momentum=0.1),
110
+ nn.ReLU(inplace=True),
111
+ )
112
+ )
113
+
114
+ if pooling == "MAX":
115
+ self.pooling = F.max_pool2d
116
+ else:
117
+ self.pooling = F.avg_pool2d
118
+
119
+ self._out_features = []
120
+ self._out_feature_channels = {}
121
+ self._out_feature_strides = {}
122
+
123
+ for i in range(self.n_out_features):
124
+ self._out_features.append("p%d" % (i + 1))
125
+ self._out_feature_channels.update({self._out_features[-1]: self.out_channels})
126
+ self._out_feature_strides.update({self._out_features[-1]: 2 ** (i + 2)})
127
+
128
+ # default init_weights for conv(msra) and norm in ConvModule
129
+ def init_weights(self):
130
+ for m in self.modules():
131
+ if isinstance(m, nn.Conv2d):
132
+ nn.init.kaiming_normal_(m.weight, a=1)
133
+ nn.init.constant_(m.bias, 0)
134
+
135
+ def forward(self, inputs):
136
+ bottom_up_features = self.bottom_up(inputs)
137
+ assert len(bottom_up_features) == len(self.in_features)
138
+ inputs = [bottom_up_features[f] for f in self.in_features]
139
+
140
+ outs = []
141
+ for i in range(len(inputs)):
142
+ outs.append(self.interp_conv[i](inputs[i]))
143
+ shape_2 = min(o.shape[2] for o in outs)
144
+ shape_3 = min(o.shape[3] for o in outs)
145
+ out = torch.cat([o[:, :, :shape_2, :shape_3] for o in outs], dim=1)
146
+ outs = []
147
+ for i in range(self.n_out_features):
148
+ outs.append(self.reduction_pooling_conv[i](out))
149
+ for i in range(len(outs)): # Make shapes consistent
150
+ outs[-1 - i] = outs[-1 - i][
151
+ :, :, : outs[-1].shape[2] * 2**i, : outs[-1].shape[3] * 2**i
152
+ ]
153
+ outputs = []
154
+ for i in range(len(outs)):
155
+ if self.share_conv:
156
+ outputs.append(self.fpn_conv(outs[i]))
157
+ else:
158
+ outputs.append(self.fpn_conv[i](outs[i]))
159
+
160
+ assert len(self._out_features) == len(outputs)
161
+ return dict(zip(self._out_features, outputs))
162
+
163
+
164
+ @BACKBONE_REGISTRY.register()
165
+ def build_hrfpn_backbone(cfg, input_shape: ShapeSpec) -> HRFPN:
166
+
167
+ in_channels = cfg.MODEL.HRNET.STAGE4.NUM_CHANNELS
168
+ in_features = ["p%d" % (i + 1) for i in range(cfg.MODEL.HRNET.STAGE4.NUM_BRANCHES)]
169
+ n_out_features = len(cfg.MODEL.ROI_HEADS.IN_FEATURES)
170
+ out_channels = cfg.MODEL.HRNET.HRFPN.OUT_CHANNELS
171
+ hrnet = build_pose_hrnet_backbone(cfg, input_shape)
172
+ hrfpn = HRFPN(
173
+ hrnet,
174
+ in_features,
175
+ n_out_features,
176
+ in_channels,
177
+ out_channels,
178
+ pooling="AVG",
179
+ share_conv=False,
180
+ )
181
+
182
+ return hrfpn
Leffa/3rdparty/densepose/modeling/hrnet.py ADDED
@@ -0,0 +1,474 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Facebook, Inc. and its affiliates.
2
+ # ------------------------------------------------------------------------------
3
+ # Copyright (c) Microsoft
4
+ # Licensed under the MIT License.
5
+ # Written by Bin Xiao (leoxiaobin@gmail.com)
6
+ # Modified by Bowen Cheng (bcheng9@illinois.edu)
7
+ # Adapted from https://github.com/HRNet/Higher-HRNet-Human-Pose-Estimation/blob/master/lib/models/pose_higher_hrnet.py # noqa
8
+ # ------------------------------------------------------------------------------
9
+
10
+ from __future__ import absolute_import, division, print_function
11
+ import logging
12
+ import torch.nn as nn
13
+
14
+ from detectron2.layers import ShapeSpec
15
+ from detectron2.modeling.backbone import BACKBONE_REGISTRY
16
+ from detectron2.modeling.backbone.backbone import Backbone
17
+
18
+ BN_MOMENTUM = 0.1
19
+ logger = logging.getLogger(__name__)
20
+
21
+ __all__ = ["build_pose_hrnet_backbone", "PoseHigherResolutionNet"]
22
+
23
+
24
+ def conv3x3(in_planes, out_planes, stride=1):
25
+ """3x3 convolution with padding"""
26
+ return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, padding=1, bias=False)
27
+
28
+
29
+ class BasicBlock(nn.Module):
30
+ expansion = 1
31
+
32
+ def __init__(self, inplanes, planes, stride=1, downsample=None):
33
+ super(BasicBlock, self).__init__()
34
+ self.conv1 = conv3x3(inplanes, planes, stride)
35
+ self.bn1 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM)
36
+ self.relu = nn.ReLU(inplace=True)
37
+ self.conv2 = conv3x3(planes, planes)
38
+ self.bn2 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM)
39
+ self.downsample = downsample
40
+ self.stride = stride
41
+
42
+ def forward(self, x):
43
+ residual = x
44
+
45
+ out = self.conv1(x)
46
+ out = self.bn1(out)
47
+ out = self.relu(out)
48
+
49
+ out = self.conv2(out)
50
+ out = self.bn2(out)
51
+
52
+ if self.downsample is not None:
53
+ residual = self.downsample(x)
54
+
55
+ out += residual
56
+ out = self.relu(out)
57
+
58
+ return out
59
+
60
+
61
+ class Bottleneck(nn.Module):
62
+ expansion = 4
63
+
64
+ def __init__(self, inplanes, planes, stride=1, downsample=None):
65
+ super(Bottleneck, self).__init__()
66
+ self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False)
67
+ self.bn1 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM)
68
+ self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
69
+ self.bn2 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM)
70
+ self.conv3 = nn.Conv2d(planes, planes * self.expansion, kernel_size=1, bias=False)
71
+ self.bn3 = nn.BatchNorm2d(planes * self.expansion, momentum=BN_MOMENTUM)
72
+ self.relu = nn.ReLU(inplace=True)
73
+ self.downsample = downsample
74
+ self.stride = stride
75
+
76
+ def forward(self, x):
77
+ residual = x
78
+
79
+ out = self.conv1(x)
80
+ out = self.bn1(out)
81
+ out = self.relu(out)
82
+
83
+ out = self.conv2(out)
84
+ out = self.bn2(out)
85
+ out = self.relu(out)
86
+
87
+ out = self.conv3(out)
88
+ out = self.bn3(out)
89
+
90
+ if self.downsample is not None:
91
+ residual = self.downsample(x)
92
+
93
+ out += residual
94
+ out = self.relu(out)
95
+
96
+ return out
97
+
98
+
99
+ class HighResolutionModule(nn.Module):
100
+ """HighResolutionModule
101
+ Building block of the PoseHigherResolutionNet (see lower)
102
+ arXiv: https://arxiv.org/abs/1908.10357
103
+ Args:
104
+ num_branches (int): number of branches of the modyle
105
+ blocks (str): type of block of the module
106
+ num_blocks (int): number of blocks of the module
107
+ num_inchannels (int): number of input channels of the module
108
+ num_channels (list): number of channels of each branch
109
+ multi_scale_output (bool): only used by the last module of PoseHigherResolutionNet
110
+ """
111
+
112
+ def __init__(
113
+ self,
114
+ num_branches,
115
+ blocks,
116
+ num_blocks,
117
+ num_inchannels,
118
+ num_channels,
119
+ multi_scale_output=True,
120
+ ):
121
+ super(HighResolutionModule, self).__init__()
122
+ self._check_branches(num_branches, blocks, num_blocks, num_inchannels, num_channels)
123
+
124
+ self.num_inchannels = num_inchannels
125
+ self.num_branches = num_branches
126
+
127
+ self.multi_scale_output = multi_scale_output
128
+
129
+ self.branches = self._make_branches(num_branches, blocks, num_blocks, num_channels)
130
+ self.fuse_layers = self._make_fuse_layers()
131
+ self.relu = nn.ReLU(True)
132
+
133
+ def _check_branches(self, num_branches, blocks, num_blocks, num_inchannels, num_channels):
134
+ if num_branches != len(num_blocks):
135
+ error_msg = "NUM_BRANCHES({}) <> NUM_BLOCKS({})".format(num_branches, len(num_blocks))
136
+ logger.error(error_msg)
137
+ raise ValueError(error_msg)
138
+
139
+ if num_branches != len(num_channels):
140
+ error_msg = "NUM_BRANCHES({}) <> NUM_CHANNELS({})".format(
141
+ num_branches, len(num_channels)
142
+ )
143
+ logger.error(error_msg)
144
+ raise ValueError(error_msg)
145
+
146
+ if num_branches != len(num_inchannels):
147
+ error_msg = "NUM_BRANCHES({}) <> NUM_INCHANNELS({})".format(
148
+ num_branches, len(num_inchannels)
149
+ )
150
+ logger.error(error_msg)
151
+ raise ValueError(error_msg)
152
+
153
+ def _make_one_branch(self, branch_index, block, num_blocks, num_channels, stride=1):
154
+ downsample = None
155
+ if (
156
+ stride != 1
157
+ or self.num_inchannels[branch_index] != num_channels[branch_index] * block.expansion
158
+ ):
159
+ downsample = nn.Sequential(
160
+ nn.Conv2d(
161
+ self.num_inchannels[branch_index],
162
+ num_channels[branch_index] * block.expansion,
163
+ kernel_size=1,
164
+ stride=stride,
165
+ bias=False,
166
+ ),
167
+ nn.BatchNorm2d(num_channels[branch_index] * block.expansion, momentum=BN_MOMENTUM),
168
+ )
169
+
170
+ layers = []
171
+ layers.append(
172
+ block(self.num_inchannels[branch_index], num_channels[branch_index], stride, downsample)
173
+ )
174
+ self.num_inchannels[branch_index] = num_channels[branch_index] * block.expansion
175
+ for _ in range(1, num_blocks[branch_index]):
176
+ layers.append(block(self.num_inchannels[branch_index], num_channels[branch_index]))
177
+
178
+ return nn.Sequential(*layers)
179
+
180
+ def _make_branches(self, num_branches, block, num_blocks, num_channels):
181
+ branches = []
182
+
183
+ for i in range(num_branches):
184
+ branches.append(self._make_one_branch(i, block, num_blocks, num_channels))
185
+
186
+ return nn.ModuleList(branches)
187
+
188
+ def _make_fuse_layers(self):
189
+ if self.num_branches == 1:
190
+ return None
191
+
192
+ num_branches = self.num_branches
193
+ num_inchannels = self.num_inchannels
194
+ fuse_layers = []
195
+ for i in range(num_branches if self.multi_scale_output else 1):
196
+ fuse_layer = []
197
+ for j in range(num_branches):
198
+ if j > i:
199
+ fuse_layer.append(
200
+ nn.Sequential(
201
+ nn.Conv2d(num_inchannels[j], num_inchannels[i], 1, 1, 0, bias=False),
202
+ nn.BatchNorm2d(num_inchannels[i]),
203
+ nn.Upsample(scale_factor=2 ** (j - i), mode="nearest"),
204
+ )
205
+ )
206
+ elif j == i:
207
+ fuse_layer.append(None)
208
+ else:
209
+ conv3x3s = []
210
+ for k in range(i - j):
211
+ if k == i - j - 1:
212
+ num_outchannels_conv3x3 = num_inchannels[i]
213
+ conv3x3s.append(
214
+ nn.Sequential(
215
+ nn.Conv2d(
216
+ num_inchannels[j],
217
+ num_outchannels_conv3x3,
218
+ 3,
219
+ 2,
220
+ 1,
221
+ bias=False,
222
+ ),
223
+ nn.BatchNorm2d(num_outchannels_conv3x3),
224
+ )
225
+ )
226
+ else:
227
+ num_outchannels_conv3x3 = num_inchannels[j]
228
+ conv3x3s.append(
229
+ nn.Sequential(
230
+ nn.Conv2d(
231
+ num_inchannels[j],
232
+ num_outchannels_conv3x3,
233
+ 3,
234
+ 2,
235
+ 1,
236
+ bias=False,
237
+ ),
238
+ nn.BatchNorm2d(num_outchannels_conv3x3),
239
+ nn.ReLU(True),
240
+ )
241
+ )
242
+ fuse_layer.append(nn.Sequential(*conv3x3s))
243
+ fuse_layers.append(nn.ModuleList(fuse_layer))
244
+
245
+ return nn.ModuleList(fuse_layers)
246
+
247
+ def get_num_inchannels(self):
248
+ return self.num_inchannels
249
+
250
+ def forward(self, x):
251
+ if self.num_branches == 1:
252
+ return [self.branches[0](x[0])]
253
+
254
+ for i in range(self.num_branches):
255
+ x[i] = self.branches[i](x[i])
256
+
257
+ x_fuse = []
258
+
259
+ for i in range(len(self.fuse_layers)):
260
+ y = x[0] if i == 0 else self.fuse_layers[i][0](x[0])
261
+ for j in range(1, self.num_branches):
262
+ if i == j:
263
+ y = y + x[j]
264
+ else:
265
+ z = self.fuse_layers[i][j](x[j])[:, :, : y.shape[2], : y.shape[3]]
266
+ y = y + z
267
+ x_fuse.append(self.relu(y))
268
+
269
+ return x_fuse
270
+
271
+
272
+ blocks_dict = {"BASIC": BasicBlock, "BOTTLENECK": Bottleneck}
273
+
274
+
275
+ class PoseHigherResolutionNet(Backbone):
276
+ """PoseHigherResolutionNet
277
+ Composed of several HighResolutionModule tied together with ConvNets
278
+ Adapted from the GitHub version to fit with HRFPN and the Detectron2 infrastructure
279
+ arXiv: https://arxiv.org/abs/1908.10357
280
+ """
281
+
282
+ def __init__(self, cfg, **kwargs):
283
+ self.inplanes = cfg.MODEL.HRNET.STEM_INPLANES
284
+ super(PoseHigherResolutionNet, self).__init__()
285
+
286
+ # stem net
287
+ self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=2, padding=1, bias=False)
288
+ self.bn1 = nn.BatchNorm2d(64, momentum=BN_MOMENTUM)
289
+ self.conv2 = nn.Conv2d(64, 64, kernel_size=3, stride=2, padding=1, bias=False)
290
+ self.bn2 = nn.BatchNorm2d(64, momentum=BN_MOMENTUM)
291
+ self.relu = nn.ReLU(inplace=True)
292
+ self.layer1 = self._make_layer(Bottleneck, 64, 4)
293
+
294
+ self.stage2_cfg = cfg.MODEL.HRNET.STAGE2
295
+ num_channels = self.stage2_cfg.NUM_CHANNELS
296
+ block = blocks_dict[self.stage2_cfg.BLOCK]
297
+ num_channels = [num_channels[i] * block.expansion for i in range(len(num_channels))]
298
+ self.transition1 = self._make_transition_layer([256], num_channels)
299
+ self.stage2, pre_stage_channels = self._make_stage(self.stage2_cfg, num_channels)
300
+
301
+ self.stage3_cfg = cfg.MODEL.HRNET.STAGE3
302
+ num_channels = self.stage3_cfg.NUM_CHANNELS
303
+ block = blocks_dict[self.stage3_cfg.BLOCK]
304
+ num_channels = [num_channels[i] * block.expansion for i in range(len(num_channels))]
305
+ self.transition2 = self._make_transition_layer(pre_stage_channels, num_channels)
306
+ self.stage3, pre_stage_channels = self._make_stage(self.stage3_cfg, num_channels)
307
+
308
+ self.stage4_cfg = cfg.MODEL.HRNET.STAGE4
309
+ num_channels = self.stage4_cfg.NUM_CHANNELS
310
+ block = blocks_dict[self.stage4_cfg.BLOCK]
311
+ num_channels = [num_channels[i] * block.expansion for i in range(len(num_channels))]
312
+ self.transition3 = self._make_transition_layer(pre_stage_channels, num_channels)
313
+ self.stage4, pre_stage_channels = self._make_stage(
314
+ self.stage4_cfg, num_channels, multi_scale_output=True
315
+ )
316
+
317
+ self._out_features = []
318
+ self._out_feature_channels = {}
319
+ self._out_feature_strides = {}
320
+
321
+ for i in range(cfg.MODEL.HRNET.STAGE4.NUM_BRANCHES):
322
+ self._out_features.append("p%d" % (i + 1))
323
+ self._out_feature_channels.update(
324
+ {self._out_features[-1]: cfg.MODEL.HRNET.STAGE4.NUM_CHANNELS[i]}
325
+ )
326
+ self._out_feature_strides.update({self._out_features[-1]: 1})
327
+
328
+ def _get_deconv_cfg(self, deconv_kernel):
329
+ if deconv_kernel == 4:
330
+ padding = 1
331
+ output_padding = 0
332
+ elif deconv_kernel == 3:
333
+ padding = 1
334
+ output_padding = 1
335
+ elif deconv_kernel == 2:
336
+ padding = 0
337
+ output_padding = 0
338
+
339
+ return deconv_kernel, padding, output_padding
340
+
341
+ def _make_transition_layer(self, num_channels_pre_layer, num_channels_cur_layer):
342
+ num_branches_cur = len(num_channels_cur_layer)
343
+ num_branches_pre = len(num_channels_pre_layer)
344
+
345
+ transition_layers = []
346
+ for i in range(num_branches_cur):
347
+ if i < num_branches_pre:
348
+ if num_channels_cur_layer[i] != num_channels_pre_layer[i]:
349
+ transition_layers.append(
350
+ nn.Sequential(
351
+ nn.Conv2d(
352
+ num_channels_pre_layer[i],
353
+ num_channels_cur_layer[i],
354
+ 3,
355
+ 1,
356
+ 1,
357
+ bias=False,
358
+ ),
359
+ nn.BatchNorm2d(num_channels_cur_layer[i]),
360
+ nn.ReLU(inplace=True),
361
+ )
362
+ )
363
+ else:
364
+ transition_layers.append(None)
365
+ else:
366
+ conv3x3s = []
367
+ for j in range(i + 1 - num_branches_pre):
368
+ inchannels = num_channels_pre_layer[-1]
369
+ outchannels = (
370
+ num_channels_cur_layer[i] if j == i - num_branches_pre else inchannels
371
+ )
372
+ conv3x3s.append(
373
+ nn.Sequential(
374
+ nn.Conv2d(inchannels, outchannels, 3, 2, 1, bias=False),
375
+ nn.BatchNorm2d(outchannels),
376
+ nn.ReLU(inplace=True),
377
+ )
378
+ )
379
+ transition_layers.append(nn.Sequential(*conv3x3s))
380
+
381
+ return nn.ModuleList(transition_layers)
382
+
383
+ def _make_layer(self, block, planes, blocks, stride=1):
384
+ downsample = None
385
+ if stride != 1 or self.inplanes != planes * block.expansion:
386
+ downsample = nn.Sequential(
387
+ nn.Conv2d(
388
+ self.inplanes,
389
+ planes * block.expansion,
390
+ kernel_size=1,
391
+ stride=stride,
392
+ bias=False,
393
+ ),
394
+ nn.BatchNorm2d(planes * block.expansion, momentum=BN_MOMENTUM),
395
+ )
396
+
397
+ layers = []
398
+ layers.append(block(self.inplanes, planes, stride, downsample))
399
+ self.inplanes = planes * block.expansion
400
+ for _ in range(1, blocks):
401
+ layers.append(block(self.inplanes, planes))
402
+
403
+ return nn.Sequential(*layers)
404
+
405
+ def _make_stage(self, layer_config, num_inchannels, multi_scale_output=True):
406
+ num_modules = layer_config["NUM_MODULES"]
407
+ num_branches = layer_config["NUM_BRANCHES"]
408
+ num_blocks = layer_config["NUM_BLOCKS"]
409
+ num_channels = layer_config["NUM_CHANNELS"]
410
+ block = blocks_dict[layer_config["BLOCK"]]
411
+
412
+ modules = []
413
+ for i in range(num_modules):
414
+ # multi_scale_output is only used last module
415
+ if not multi_scale_output and i == num_modules - 1:
416
+ reset_multi_scale_output = False
417
+ else:
418
+ reset_multi_scale_output = True
419
+
420
+ modules.append(
421
+ HighResolutionModule(
422
+ num_branches,
423
+ block,
424
+ num_blocks,
425
+ num_inchannels,
426
+ num_channels,
427
+ reset_multi_scale_output,
428
+ )
429
+ )
430
+ num_inchannels = modules[-1].get_num_inchannels()
431
+
432
+ return nn.Sequential(*modules), num_inchannels
433
+
434
+ def forward(self, x):
435
+ x = self.conv1(x)
436
+ x = self.bn1(x)
437
+ x = self.relu(x)
438
+ x = self.conv2(x)
439
+ x = self.bn2(x)
440
+ x = self.relu(x)
441
+ x = self.layer1(x)
442
+
443
+ x_list = []
444
+ for i in range(self.stage2_cfg.NUM_BRANCHES):
445
+ if self.transition1[i] is not None:
446
+ x_list.append(self.transition1[i](x))
447
+ else:
448
+ x_list.append(x)
449
+ y_list = self.stage2(x_list)
450
+
451
+ x_list = []
452
+ for i in range(self.stage3_cfg.NUM_BRANCHES):
453
+ if self.transition2[i] is not None:
454
+ x_list.append(self.transition2[i](y_list[-1]))
455
+ else:
456
+ x_list.append(y_list[i])
457
+ y_list = self.stage3(x_list)
458
+
459
+ x_list = []
460
+ for i in range(self.stage4_cfg.NUM_BRANCHES):
461
+ if self.transition3[i] is not None:
462
+ x_list.append(self.transition3[i](y_list[-1]))
463
+ else:
464
+ x_list.append(y_list[i])
465
+ y_list = self.stage4(x_list)
466
+
467
+ assert len(self._out_features) == len(y_list)
468
+ return dict(zip(self._out_features, y_list)) # final_outputs
469
+
470
+
471
+ @BACKBONE_REGISTRY.register()
472
+ def build_pose_hrnet_backbone(cfg, input_shape: ShapeSpec):
473
+ model = PoseHigherResolutionNet(cfg)
474
+ return model
Leffa/3rdparty/densepose/modeling/inference.py ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Facebook, Inc. and its affiliates.
2
+ from dataclasses import fields
3
+ from typing import Any, List
4
+ import torch
5
+
6
+ from detectron2.structures import Instances
7
+
8
+
9
+ def densepose_inference(densepose_predictor_output: Any, detections: List[Instances]) -> None:
10
+ """
11
+ Splits DensePose predictor outputs into chunks, each chunk corresponds to
12
+ detections on one image. Predictor output chunks are stored in `pred_densepose`
13
+ attribute of the corresponding `Instances` object.
14
+
15
+ Args:
16
+ densepose_predictor_output: a dataclass instance (can be of different types,
17
+ depending on predictor used for inference). Each field can be `None`
18
+ (if the corresponding output was not inferred) or a tensor of size
19
+ [N, ...], where N = N_1 + N_2 + .. + N_k is a total number of
20
+ detections on all images, N_1 is the number of detections on image 1,
21
+ N_2 is the number of detections on image 2, etc.
22
+ detections: a list of objects of type `Instance`, k-th object corresponds
23
+ to detections on k-th image.
24
+ """
25
+ k = 0
26
+ for detection_i in detections:
27
+ if densepose_predictor_output is None:
28
+ # don't add `pred_densepose` attribute
29
+ continue
30
+ n_i = detection_i.__len__()
31
+
32
+ PredictorOutput = type(densepose_predictor_output)
33
+ output_i_dict = {}
34
+ # we assume here that `densepose_predictor_output` is a dataclass object
35
+ for field in fields(densepose_predictor_output):
36
+ field_value = getattr(densepose_predictor_output, field.name)
37
+ # slice tensors
38
+ if isinstance(field_value, torch.Tensor):
39
+ output_i_dict[field.name] = field_value[k : k + n_i]
40
+ # leave others as is
41
+ else:
42
+ output_i_dict[field.name] = field_value
43
+ detection_i.pred_densepose = PredictorOutput(**output_i_dict)
44
+ k += n_i
Leffa/3rdparty/densepose/modeling/losses/__init__.py ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Facebook, Inc. and its affiliates.
2
+
3
+ from .chart import DensePoseChartLoss
4
+ from .chart_with_confidences import DensePoseChartWithConfidenceLoss
5
+ from .cse import DensePoseCseLoss
6
+ from .registry import DENSEPOSE_LOSS_REGISTRY
7
+
8
+
9
+ __all__ = [
10
+ "DensePoseChartLoss",
11
+ "DensePoseChartWithConfidenceLoss",
12
+ "DensePoseCseLoss",
13
+ "DENSEPOSE_LOSS_REGISTRY",
14
+ ]
Leffa/3rdparty/densepose/modeling/losses/chart.py ADDED
@@ -0,0 +1,291 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Facebook, Inc. and its affiliates.
2
+
3
+ from typing import Any, List
4
+ import torch
5
+ from torch.nn import functional as F
6
+
7
+ from detectron2.config import CfgNode
8
+ from detectron2.structures import Instances
9
+
10
+ from .mask_or_segm import MaskOrSegmentationLoss
11
+ from .registry import DENSEPOSE_LOSS_REGISTRY
12
+ from .utils import (
13
+ BilinearInterpolationHelper,
14
+ ChartBasedAnnotationsAccumulator,
15
+ LossDict,
16
+ extract_packed_annotations_from_matches,
17
+ )
18
+
19
+
20
+ @DENSEPOSE_LOSS_REGISTRY.register()
21
+ class DensePoseChartLoss:
22
+ """
23
+ DensePose loss for chart-based training. A mesh is split into charts,
24
+ each chart is given a label (I) and parametrized by 2 coordinates referred to
25
+ as U and V. Ground truth consists of a number of points annotated with
26
+ I, U and V values and coarse segmentation S defined for all pixels of the
27
+ object bounding box. In some cases (see `COARSE_SEGM_TRAINED_BY_MASKS`),
28
+ semantic segmentation annotations can be used as ground truth inputs as well.
29
+
30
+ Estimated values are tensors:
31
+ * U coordinates, tensor of shape [N, C, S, S]
32
+ * V coordinates, tensor of shape [N, C, S, S]
33
+ * fine segmentation estimates, tensor of shape [N, C, S, S] with raw unnormalized
34
+ scores for each fine segmentation label at each location
35
+ * coarse segmentation estimates, tensor of shape [N, D, S, S] with raw unnormalized
36
+ scores for each coarse segmentation label at each location
37
+ where N is the number of detections, C is the number of fine segmentation
38
+ labels, S is the estimate size ( = width = height) and D is the number of
39
+ coarse segmentation channels.
40
+
41
+ The losses are:
42
+ * regression (smooth L1) loss for U and V coordinates
43
+ * cross entropy loss for fine (I) and coarse (S) segmentations
44
+ Each loss has an associated weight
45
+ """
46
+
47
+ def __init__(self, cfg: CfgNode):
48
+ """
49
+ Initialize chart-based loss from configuration options
50
+
51
+ Args:
52
+ cfg (CfgNode): configuration options
53
+ """
54
+ # fmt: off
55
+ self.heatmap_size = cfg.MODEL.ROI_DENSEPOSE_HEAD.HEATMAP_SIZE
56
+ self.w_points = cfg.MODEL.ROI_DENSEPOSE_HEAD.POINT_REGRESSION_WEIGHTS
57
+ self.w_part = cfg.MODEL.ROI_DENSEPOSE_HEAD.PART_WEIGHTS
58
+ self.w_segm = cfg.MODEL.ROI_DENSEPOSE_HEAD.INDEX_WEIGHTS
59
+ self.n_segm_chan = cfg.MODEL.ROI_DENSEPOSE_HEAD.NUM_COARSE_SEGM_CHANNELS
60
+ # fmt: on
61
+ self.segm_trained_by_masks = cfg.MODEL.ROI_DENSEPOSE_HEAD.COARSE_SEGM_TRAINED_BY_MASKS
62
+ self.segm_loss = MaskOrSegmentationLoss(cfg)
63
+
64
+ def __call__(
65
+ self, proposals_with_gt: List[Instances], densepose_predictor_outputs: Any, **kwargs
66
+ ) -> LossDict:
67
+ """
68
+ Produce chart-based DensePose losses
69
+
70
+ Args:
71
+ proposals_with_gt (list of Instances): detections with associated ground truth data
72
+ densepose_predictor_outputs: an object of a dataclass that contains predictor outputs
73
+ with estimated values; assumed to have the following attributes:
74
+ * coarse_segm - coarse segmentation estimates, tensor of shape [N, D, S, S]
75
+ * fine_segm - fine segmentation estimates, tensor of shape [N, C, S, S]
76
+ * u - U coordinate estimates per fine labels, tensor of shape [N, C, S, S]
77
+ * v - V coordinate estimates per fine labels, tensor of shape [N, C, S, S]
78
+ where N is the number of detections, C is the number of fine segmentation
79
+ labels, S is the estimate size ( = width = height) and D is the number of
80
+ coarse segmentation channels.
81
+
82
+ Return:
83
+ dict: str -> tensor: dict of losses with the following entries:
84
+ * `loss_densepose_U`: smooth L1 loss for U coordinate estimates
85
+ * `loss_densepose_V`: smooth L1 loss for V coordinate estimates
86
+ * `loss_densepose_I`: cross entropy for raw unnormalized scores for fine
87
+ segmentation estimates given ground truth labels;
88
+ * `loss_densepose_S`: cross entropy for raw unnormalized scores for coarse
89
+ segmentation estimates given ground truth labels;
90
+ """
91
+ # densepose outputs are computed for all images and all bounding boxes;
92
+ # i.e. if a batch has 4 images with (3, 1, 2, 1) proposals respectively,
93
+ # the outputs will have size(0) == 3+1+2+1 == 7
94
+
95
+ if not len(proposals_with_gt):
96
+ return self.produce_fake_densepose_losses(densepose_predictor_outputs)
97
+
98
+ accumulator = ChartBasedAnnotationsAccumulator()
99
+ packed_annotations = extract_packed_annotations_from_matches(proposals_with_gt, accumulator)
100
+
101
+ # NOTE: we need to keep the same computation graph on all the GPUs to
102
+ # perform reduction properly. Hence even if we have no data on one
103
+ # of the GPUs, we still need to generate the computation graph.
104
+ # Add fake (zero) loss in the form Tensor.sum() * 0
105
+ if packed_annotations is None:
106
+ return self.produce_fake_densepose_losses(densepose_predictor_outputs)
107
+
108
+ h, w = densepose_predictor_outputs.u.shape[2:]
109
+ interpolator = BilinearInterpolationHelper.from_matches(
110
+ packed_annotations,
111
+ (h, w),
112
+ )
113
+
114
+ j_valid_fg = interpolator.j_valid * ( # pyre-ignore[16]
115
+ packed_annotations.fine_segm_labels_gt > 0
116
+ )
117
+ # pyre-fixme[6]: For 1st param expected `Tensor` but got `int`.
118
+ if not torch.any(j_valid_fg):
119
+ return self.produce_fake_densepose_losses(densepose_predictor_outputs)
120
+
121
+ losses_uv = self.produce_densepose_losses_uv(
122
+ proposals_with_gt,
123
+ densepose_predictor_outputs,
124
+ packed_annotations,
125
+ interpolator,
126
+ j_valid_fg, # pyre-ignore[6]
127
+ )
128
+
129
+ losses_segm = self.produce_densepose_losses_segm(
130
+ proposals_with_gt,
131
+ densepose_predictor_outputs,
132
+ packed_annotations,
133
+ interpolator,
134
+ j_valid_fg, # pyre-ignore[6]
135
+ )
136
+
137
+ return {**losses_uv, **losses_segm}
138
+
139
+ def produce_fake_densepose_losses(self, densepose_predictor_outputs: Any) -> LossDict:
140
+ """
141
+ Fake losses for fine segmentation and U/V coordinates. These are used when
142
+ no suitable ground truth data was found in a batch. The loss has a value 0
143
+ and is primarily used to construct the computation graph, so that
144
+ `DistributedDataParallel` has similar graphs on all GPUs and can perform
145
+ reduction properly.
146
+
147
+ Args:
148
+ densepose_predictor_outputs: DensePose predictor outputs, an object
149
+ of a dataclass that is assumed to have the following attributes:
150
+ * fine_segm - fine segmentation estimates, tensor of shape [N, C, S, S]
151
+ * u - U coordinate estimates per fine labels, tensor of shape [N, C, S, S]
152
+ * v - V coordinate estimates per fine labels, tensor of shape [N, C, S, S]
153
+ Return:
154
+ dict: str -> tensor: dict of losses with the following entries:
155
+ * `loss_densepose_U`: has value 0
156
+ * `loss_densepose_V`: has value 0
157
+ * `loss_densepose_I`: has value 0
158
+ * `loss_densepose_S`: has value 0
159
+ """
160
+ losses_uv = self.produce_fake_densepose_losses_uv(densepose_predictor_outputs)
161
+ losses_segm = self.produce_fake_densepose_losses_segm(densepose_predictor_outputs)
162
+ return {**losses_uv, **losses_segm}
163
+
164
+ def produce_fake_densepose_losses_uv(self, densepose_predictor_outputs: Any) -> LossDict:
165
+ """
166
+ Fake losses for U/V coordinates. These are used when no suitable ground
167
+ truth data was found in a batch. The loss has a value 0
168
+ and is primarily used to construct the computation graph, so that
169
+ `DistributedDataParallel` has similar graphs on all GPUs and can perform
170
+ reduction properly.
171
+
172
+ Args:
173
+ densepose_predictor_outputs: DensePose predictor outputs, an object
174
+ of a dataclass that is assumed to have the following attributes:
175
+ * u - U coordinate estimates per fine labels, tensor of shape [N, C, S, S]
176
+ * v - V coordinate estimates per fine labels, tensor of shape [N, C, S, S]
177
+ Return:
178
+ dict: str -> tensor: dict of losses with the following entries:
179
+ * `loss_densepose_U`: has value 0
180
+ * `loss_densepose_V`: has value 0
181
+ """
182
+ return {
183
+ "loss_densepose_U": densepose_predictor_outputs.u.sum() * 0,
184
+ "loss_densepose_V": densepose_predictor_outputs.v.sum() * 0,
185
+ }
186
+
187
+ def produce_fake_densepose_losses_segm(self, densepose_predictor_outputs: Any) -> LossDict:
188
+ """
189
+ Fake losses for fine / coarse segmentation. These are used when
190
+ no suitable ground truth data was found in a batch. The loss has a value 0
191
+ and is primarily used to construct the computation graph, so that
192
+ `DistributedDataParallel` has similar graphs on all GPUs and can perform
193
+ reduction properly.
194
+
195
+ Args:
196
+ densepose_predictor_outputs: DensePose predictor outputs, an object
197
+ of a dataclass that is assumed to have the following attributes:
198
+ * fine_segm - fine segmentation estimates, tensor of shape [N, C, S, S]
199
+ * coarse_segm - coarse segmentation estimates, tensor of shape [N, D, S, S]
200
+ Return:
201
+ dict: str -> tensor: dict of losses with the following entries:
202
+ * `loss_densepose_I`: has value 0
203
+ * `loss_densepose_S`: has value 0, added only if `segm_trained_by_masks` is False
204
+ """
205
+ losses = {
206
+ "loss_densepose_I": densepose_predictor_outputs.fine_segm.sum() * 0,
207
+ "loss_densepose_S": self.segm_loss.fake_value(densepose_predictor_outputs),
208
+ }
209
+ return losses
210
+
211
+ def produce_densepose_losses_uv(
212
+ self,
213
+ proposals_with_gt: List[Instances],
214
+ densepose_predictor_outputs: Any,
215
+ packed_annotations: Any,
216
+ interpolator: BilinearInterpolationHelper,
217
+ j_valid_fg: torch.Tensor,
218
+ ) -> LossDict:
219
+ """
220
+ Compute losses for U/V coordinates: smooth L1 loss between
221
+ estimated coordinates and the ground truth.
222
+
223
+ Args:
224
+ proposals_with_gt (list of Instances): detections with associated ground truth data
225
+ densepose_predictor_outputs: DensePose predictor outputs, an object
226
+ of a dataclass that is assumed to have the following attributes:
227
+ * u - U coordinate estimates per fine labels, tensor of shape [N, C, S, S]
228
+ * v - V coordinate estimates per fine labels, tensor of shape [N, C, S, S]
229
+ Return:
230
+ dict: str -> tensor: dict of losses with the following entries:
231
+ * `loss_densepose_U`: smooth L1 loss for U coordinate estimates
232
+ * `loss_densepose_V`: smooth L1 loss for V coordinate estimates
233
+ """
234
+ u_gt = packed_annotations.u_gt[j_valid_fg]
235
+ u_est = interpolator.extract_at_points(densepose_predictor_outputs.u)[j_valid_fg]
236
+ v_gt = packed_annotations.v_gt[j_valid_fg]
237
+ v_est = interpolator.extract_at_points(densepose_predictor_outputs.v)[j_valid_fg]
238
+ return {
239
+ "loss_densepose_U": F.smooth_l1_loss(u_est, u_gt, reduction="sum") * self.w_points,
240
+ "loss_densepose_V": F.smooth_l1_loss(v_est, v_gt, reduction="sum") * self.w_points,
241
+ }
242
+
243
+ def produce_densepose_losses_segm(
244
+ self,
245
+ proposals_with_gt: List[Instances],
246
+ densepose_predictor_outputs: Any,
247
+ packed_annotations: Any,
248
+ interpolator: BilinearInterpolationHelper,
249
+ j_valid_fg: torch.Tensor,
250
+ ) -> LossDict:
251
+ """
252
+ Losses for fine / coarse segmentation: cross-entropy
253
+ for segmentation unnormalized scores given ground truth labels at
254
+ annotated points for fine segmentation and dense mask annotations
255
+ for coarse segmentation.
256
+
257
+ Args:
258
+ proposals_with_gt (list of Instances): detections with associated ground truth data
259
+ densepose_predictor_outputs: DensePose predictor outputs, an object
260
+ of a dataclass that is assumed to have the following attributes:
261
+ * fine_segm - fine segmentation estimates, tensor of shape [N, C, S, S]
262
+ * coarse_segm - coarse segmentation estimates, tensor of shape [N, D, S, S]
263
+ Return:
264
+ dict: str -> tensor: dict of losses with the following entries:
265
+ * `loss_densepose_I`: cross entropy for raw unnormalized scores for fine
266
+ segmentation estimates given ground truth labels
267
+ * `loss_densepose_S`: cross entropy for raw unnormalized scores for coarse
268
+ segmentation estimates given ground truth labels;
269
+ may be included if coarse segmentation is only trained
270
+ using DensePose ground truth; if additional supervision through
271
+ instance segmentation data is performed (`segm_trained_by_masks` is True),
272
+ this loss is handled by `produce_mask_losses` instead
273
+ """
274
+ fine_segm_gt = packed_annotations.fine_segm_labels_gt[
275
+ interpolator.j_valid # pyre-ignore[16]
276
+ ]
277
+ fine_segm_est = interpolator.extract_at_points(
278
+ densepose_predictor_outputs.fine_segm,
279
+ slice_fine_segm=slice(None),
280
+ w_ylo_xlo=interpolator.w_ylo_xlo[:, None], # pyre-ignore[16]
281
+ w_ylo_xhi=interpolator.w_ylo_xhi[:, None], # pyre-ignore[16]
282
+ w_yhi_xlo=interpolator.w_yhi_xlo[:, None], # pyre-ignore[16]
283
+ w_yhi_xhi=interpolator.w_yhi_xhi[:, None], # pyre-ignore[16]
284
+ )[interpolator.j_valid, :]
285
+ return {
286
+ "loss_densepose_I": F.cross_entropy(fine_segm_est, fine_segm_gt.long()) * self.w_part,
287
+ "loss_densepose_S": self.segm_loss(
288
+ proposals_with_gt, densepose_predictor_outputs, packed_annotations
289
+ )
290
+ * self.w_segm,
291
+ }
Leffa/3rdparty/densepose/modeling/losses/chart_with_confidences.py ADDED
@@ -0,0 +1,209 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Facebook, Inc. and its affiliates.
2
+ import math
3
+ from typing import Any, List
4
+ import torch
5
+ from torch import nn
6
+ from torch.nn import functional as F
7
+
8
+ from detectron2.config import CfgNode
9
+ from detectron2.structures import Instances
10
+
11
+ from .. import DensePoseConfidenceModelConfig, DensePoseUVConfidenceType
12
+ from .chart import DensePoseChartLoss
13
+ from .registry import DENSEPOSE_LOSS_REGISTRY
14
+ from .utils import BilinearInterpolationHelper, LossDict
15
+
16
+
17
+ @DENSEPOSE_LOSS_REGISTRY.register()
18
+ class DensePoseChartWithConfidenceLoss(DensePoseChartLoss):
19
+ """ """
20
+
21
+ def __init__(self, cfg: CfgNode):
22
+ super().__init__(cfg)
23
+ self.confidence_model_cfg = DensePoseConfidenceModelConfig.from_cfg(cfg)
24
+ if self.confidence_model_cfg.uv_confidence.type == DensePoseUVConfidenceType.IID_ISO:
25
+ self.uv_loss_with_confidences = IIDIsotropicGaussianUVLoss(
26
+ self.confidence_model_cfg.uv_confidence.epsilon
27
+ )
28
+ elif self.confidence_model_cfg.uv_confidence.type == DensePoseUVConfidenceType.INDEP_ANISO:
29
+ self.uv_loss_with_confidences = IndepAnisotropicGaussianUVLoss(
30
+ self.confidence_model_cfg.uv_confidence.epsilon
31
+ )
32
+
33
+ def produce_fake_densepose_losses_uv(self, densepose_predictor_outputs: Any) -> LossDict:
34
+ """
35
+ Overrides fake losses for fine segmentation and U/V coordinates to
36
+ include computation graphs for additional confidence parameters.
37
+ These are used when no suitable ground truth data was found in a batch.
38
+ The loss has a value 0 and is primarily used to construct the computation graph,
39
+ so that `DistributedDataParallel` has similar graphs on all GPUs and can
40
+ perform reduction properly.
41
+
42
+ Args:
43
+ densepose_predictor_outputs: DensePose predictor outputs, an object
44
+ of a dataclass that is assumed to have the following attributes:
45
+ * fine_segm - fine segmentation estimates, tensor of shape [N, C, S, S]
46
+ * u - U coordinate estimates per fine labels, tensor of shape [N, C, S, S]
47
+ * v - V coordinate estimates per fine labels, tensor of shape [N, C, S, S]
48
+ Return:
49
+ dict: str -> tensor: dict of losses with the following entries:
50
+ * `loss_densepose_U`: has value 0
51
+ * `loss_densepose_V`: has value 0
52
+ * `loss_densepose_I`: has value 0
53
+ """
54
+ conf_type = self.confidence_model_cfg.uv_confidence.type
55
+ if self.confidence_model_cfg.uv_confidence.enabled:
56
+ loss_uv = (
57
+ densepose_predictor_outputs.u.sum() + densepose_predictor_outputs.v.sum()
58
+ ) * 0
59
+ if conf_type == DensePoseUVConfidenceType.IID_ISO:
60
+ loss_uv += densepose_predictor_outputs.sigma_2.sum() * 0
61
+ elif conf_type == DensePoseUVConfidenceType.INDEP_ANISO:
62
+ loss_uv += (
63
+ densepose_predictor_outputs.sigma_2.sum()
64
+ + densepose_predictor_outputs.kappa_u.sum()
65
+ + densepose_predictor_outputs.kappa_v.sum()
66
+ ) * 0
67
+ return {"loss_densepose_UV": loss_uv}
68
+ else:
69
+ return super().produce_fake_densepose_losses_uv(densepose_predictor_outputs)
70
+
71
+ def produce_densepose_losses_uv(
72
+ self,
73
+ proposals_with_gt: List[Instances],
74
+ densepose_predictor_outputs: Any,
75
+ packed_annotations: Any,
76
+ interpolator: BilinearInterpolationHelper,
77
+ j_valid_fg: torch.Tensor,
78
+ ) -> LossDict:
79
+ conf_type = self.confidence_model_cfg.uv_confidence.type
80
+ if self.confidence_model_cfg.uv_confidence.enabled:
81
+ u_gt = packed_annotations.u_gt[j_valid_fg]
82
+ u_est = interpolator.extract_at_points(densepose_predictor_outputs.u)[j_valid_fg]
83
+ v_gt = packed_annotations.v_gt[j_valid_fg]
84
+ v_est = interpolator.extract_at_points(densepose_predictor_outputs.v)[j_valid_fg]
85
+ sigma_2_est = interpolator.extract_at_points(densepose_predictor_outputs.sigma_2)[
86
+ j_valid_fg
87
+ ]
88
+ if conf_type == DensePoseUVConfidenceType.IID_ISO:
89
+ return {
90
+ "loss_densepose_UV": (
91
+ self.uv_loss_with_confidences(u_est, v_est, sigma_2_est, u_gt, v_gt)
92
+ * self.w_points
93
+ )
94
+ }
95
+ elif conf_type in [DensePoseUVConfidenceType.INDEP_ANISO]:
96
+ kappa_u_est = interpolator.extract_at_points(densepose_predictor_outputs.kappa_u)[
97
+ j_valid_fg
98
+ ]
99
+ kappa_v_est = interpolator.extract_at_points(densepose_predictor_outputs.kappa_v)[
100
+ j_valid_fg
101
+ ]
102
+ return {
103
+ "loss_densepose_UV": (
104
+ self.uv_loss_with_confidences(
105
+ u_est, v_est, sigma_2_est, kappa_u_est, kappa_v_est, u_gt, v_gt
106
+ )
107
+ * self.w_points
108
+ )
109
+ }
110
+ return super().produce_densepose_losses_uv(
111
+ proposals_with_gt,
112
+ densepose_predictor_outputs,
113
+ packed_annotations,
114
+ interpolator,
115
+ j_valid_fg,
116
+ )
117
+
118
+
119
+ class IIDIsotropicGaussianUVLoss(nn.Module):
120
+ """
121
+ Loss for the case of iid residuals with isotropic covariance:
122
+ $Sigma_i = sigma_i^2 I$
123
+ The loss (negative log likelihood) is then:
124
+ $1/2 sum_{i=1}^n (log(2 pi) + 2 log sigma_i^2 + ||delta_i||^2 / sigma_i^2)$,
125
+ where $delta_i=(u - u', v - v')$ is a 2D vector containing UV coordinates
126
+ difference between estimated and ground truth UV values
127
+ For details, see:
128
+ N. Neverova, D. Novotny, A. Vedaldi "Correlated Uncertainty for Learning
129
+ Dense Correspondences from Noisy Labels", p. 918--926, in Proc. NIPS 2019
130
+ """
131
+
132
+ def __init__(self, sigma_lower_bound: float):
133
+ super(IIDIsotropicGaussianUVLoss, self).__init__()
134
+ self.sigma_lower_bound = sigma_lower_bound
135
+ self.log2pi = math.log(2 * math.pi)
136
+
137
+ def forward(
138
+ self,
139
+ u: torch.Tensor,
140
+ v: torch.Tensor,
141
+ sigma_u: torch.Tensor,
142
+ target_u: torch.Tensor,
143
+ target_v: torch.Tensor,
144
+ ):
145
+ # compute $\sigma_i^2$
146
+ # use sigma_lower_bound to avoid degenerate solution for variance
147
+ # (sigma -> 0)
148
+ sigma2 = F.softplus(sigma_u) + self.sigma_lower_bound
149
+ # compute \|delta_i\|^2
150
+ # pyre-fixme[58]: `**` is not supported for operand types `Tensor` and `int`.
151
+ delta_t_delta = (u - target_u) ** 2 + (v - target_v) ** 2
152
+ # the total loss from the formula above:
153
+ loss = 0.5 * (self.log2pi + 2 * torch.log(sigma2) + delta_t_delta / sigma2)
154
+ return loss.sum()
155
+
156
+
157
+ class IndepAnisotropicGaussianUVLoss(nn.Module):
158
+ """
159
+ Loss for the case of independent residuals with anisotropic covariances:
160
+ $Sigma_i = sigma_i^2 I + r_i r_i^T$
161
+ The loss (negative log likelihood) is then:
162
+ $1/2 sum_{i=1}^n (log(2 pi)
163
+ + log sigma_i^2 (sigma_i^2 + ||r_i||^2)
164
+ + ||delta_i||^2 / sigma_i^2
165
+ - <delta_i, r_i>^2 / (sigma_i^2 * (sigma_i^2 + ||r_i||^2)))$,
166
+ where $delta_i=(u - u', v - v')$ is a 2D vector containing UV coordinates
167
+ difference between estimated and ground truth UV values
168
+ For details, see:
169
+ N. Neverova, D. Novotny, A. Vedaldi "Correlated Uncertainty for Learning
170
+ Dense Correspondences from Noisy Labels", p. 918--926, in Proc. NIPS 2019
171
+ """
172
+
173
+ def __init__(self, sigma_lower_bound: float):
174
+ super(IndepAnisotropicGaussianUVLoss, self).__init__()
175
+ self.sigma_lower_bound = sigma_lower_bound
176
+ self.log2pi = math.log(2 * math.pi)
177
+
178
+ def forward(
179
+ self,
180
+ u: torch.Tensor,
181
+ v: torch.Tensor,
182
+ sigma_u: torch.Tensor,
183
+ kappa_u_est: torch.Tensor,
184
+ kappa_v_est: torch.Tensor,
185
+ target_u: torch.Tensor,
186
+ target_v: torch.Tensor,
187
+ ):
188
+ # compute $\sigma_i^2$
189
+ sigma2 = F.softplus(sigma_u) + self.sigma_lower_bound
190
+ # compute \|r_i\|^2
191
+ # pyre-fixme[58]: `**` is not supported for operand types `Tensor` and `int`.
192
+ r_sqnorm2 = kappa_u_est**2 + kappa_v_est**2
193
+ delta_u = u - target_u
194
+ delta_v = v - target_v
195
+ # compute \|delta_i\|^2
196
+ # pyre-fixme[58]: `**` is not supported for operand types `Tensor` and `int`.
197
+ delta_sqnorm = delta_u**2 + delta_v**2
198
+ delta_u_r_u = delta_u * kappa_u_est
199
+ delta_v_r_v = delta_v * kappa_v_est
200
+ # compute the scalar product <delta_i, r_i>
201
+ delta_r = delta_u_r_u + delta_v_r_v
202
+ # compute squared scalar product <delta_i, r_i>^2
203
+ # pyre-fixme[58]: `**` is not supported for operand types `Tensor` and `int`.
204
+ delta_r_sqnorm = delta_r**2
205
+ denom2 = sigma2 * (sigma2 + r_sqnorm2)
206
+ loss = 0.5 * (
207
+ self.log2pi + torch.log(denom2) + delta_sqnorm / sigma2 - delta_r_sqnorm / denom2
208
+ )
209
+ return loss.sum()
Leffa/3rdparty/densepose/modeling/losses/cse.py ADDED
@@ -0,0 +1,115 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
2
+
3
+ from typing import Any, List
4
+ from torch import nn
5
+
6
+ from detectron2.config import CfgNode
7
+ from detectron2.structures import Instances
8
+
9
+ from .cycle_pix2shape import PixToShapeCycleLoss
10
+ from .cycle_shape2shape import ShapeToShapeCycleLoss
11
+ from .embed import EmbeddingLoss
12
+ from .embed_utils import CseAnnotationsAccumulator
13
+ from .mask_or_segm import MaskOrSegmentationLoss
14
+ from .registry import DENSEPOSE_LOSS_REGISTRY
15
+ from .soft_embed import SoftEmbeddingLoss
16
+ from .utils import BilinearInterpolationHelper, LossDict, extract_packed_annotations_from_matches
17
+
18
+
19
+ @DENSEPOSE_LOSS_REGISTRY.register()
20
+ class DensePoseCseLoss:
21
+ """ """
22
+
23
+ _EMBED_LOSS_REGISTRY = {
24
+ EmbeddingLoss.__name__: EmbeddingLoss,
25
+ SoftEmbeddingLoss.__name__: SoftEmbeddingLoss,
26
+ }
27
+
28
+ def __init__(self, cfg: CfgNode):
29
+ """
30
+ Initialize CSE loss from configuration options
31
+
32
+ Args:
33
+ cfg (CfgNode): configuration options
34
+ """
35
+ self.w_segm = cfg.MODEL.ROI_DENSEPOSE_HEAD.INDEX_WEIGHTS
36
+ self.w_embed = cfg.MODEL.ROI_DENSEPOSE_HEAD.CSE.EMBED_LOSS_WEIGHT
37
+ self.segm_loss = MaskOrSegmentationLoss(cfg)
38
+ self.embed_loss = DensePoseCseLoss.create_embed_loss(cfg)
39
+ self.do_shape2shape = cfg.MODEL.ROI_DENSEPOSE_HEAD.CSE.SHAPE_TO_SHAPE_CYCLE_LOSS.ENABLED
40
+ if self.do_shape2shape:
41
+ self.w_shape2shape = cfg.MODEL.ROI_DENSEPOSE_HEAD.CSE.SHAPE_TO_SHAPE_CYCLE_LOSS.WEIGHT
42
+ self.shape2shape_loss = ShapeToShapeCycleLoss(cfg)
43
+ self.do_pix2shape = cfg.MODEL.ROI_DENSEPOSE_HEAD.CSE.PIX_TO_SHAPE_CYCLE_LOSS.ENABLED
44
+ if self.do_pix2shape:
45
+ self.w_pix2shape = cfg.MODEL.ROI_DENSEPOSE_HEAD.CSE.PIX_TO_SHAPE_CYCLE_LOSS.WEIGHT
46
+ self.pix2shape_loss = PixToShapeCycleLoss(cfg)
47
+
48
+ @classmethod
49
+ def create_embed_loss(cls, cfg: CfgNode):
50
+ # registry not used here, since embedding losses are currently local
51
+ # and are not used anywhere else
52
+ return cls._EMBED_LOSS_REGISTRY[cfg.MODEL.ROI_DENSEPOSE_HEAD.CSE.EMBED_LOSS_NAME](cfg)
53
+
54
+ def __call__(
55
+ self,
56
+ proposals_with_gt: List[Instances],
57
+ densepose_predictor_outputs: Any,
58
+ embedder: nn.Module,
59
+ ) -> LossDict:
60
+ if not len(proposals_with_gt):
61
+ return self.produce_fake_losses(densepose_predictor_outputs, embedder)
62
+ accumulator = CseAnnotationsAccumulator()
63
+ packed_annotations = extract_packed_annotations_from_matches(proposals_with_gt, accumulator)
64
+ if packed_annotations is None:
65
+ return self.produce_fake_losses(densepose_predictor_outputs, embedder)
66
+ h, w = densepose_predictor_outputs.embedding.shape[2:]
67
+ interpolator = BilinearInterpolationHelper.from_matches(
68
+ packed_annotations,
69
+ (h, w),
70
+ )
71
+ meshid_to_embed_losses = self.embed_loss(
72
+ proposals_with_gt,
73
+ densepose_predictor_outputs,
74
+ packed_annotations,
75
+ interpolator,
76
+ embedder,
77
+ )
78
+ embed_loss_dict = {
79
+ f"loss_densepose_E{meshid}": self.w_embed * meshid_to_embed_losses[meshid]
80
+ for meshid in meshid_to_embed_losses
81
+ }
82
+ all_loss_dict = {
83
+ "loss_densepose_S": self.w_segm
84
+ * self.segm_loss(proposals_with_gt, densepose_predictor_outputs, packed_annotations),
85
+ **embed_loss_dict,
86
+ }
87
+ if self.do_shape2shape:
88
+ all_loss_dict["loss_shape2shape"] = self.w_shape2shape * self.shape2shape_loss(embedder)
89
+ if self.do_pix2shape:
90
+ all_loss_dict["loss_pix2shape"] = self.w_pix2shape * self.pix2shape_loss(
91
+ proposals_with_gt, densepose_predictor_outputs, packed_annotations, embedder
92
+ )
93
+ return all_loss_dict
94
+
95
+ def produce_fake_losses(
96
+ self, densepose_predictor_outputs: Any, embedder: nn.Module
97
+ ) -> LossDict:
98
+ meshname_to_embed_losses = self.embed_loss.fake_values(
99
+ densepose_predictor_outputs, embedder=embedder
100
+ )
101
+ embed_loss_dict = {
102
+ f"loss_densepose_E{mesh_name}": meshname_to_embed_losses[mesh_name]
103
+ for mesh_name in meshname_to_embed_losses
104
+ }
105
+ all_loss_dict = {
106
+ "loss_densepose_S": self.segm_loss.fake_value(densepose_predictor_outputs),
107
+ **embed_loss_dict,
108
+ }
109
+ if self.do_shape2shape:
110
+ all_loss_dict["loss_shape2shape"] = self.shape2shape_loss.fake_value(embedder)
111
+ if self.do_pix2shape:
112
+ all_loss_dict["loss_pix2shape"] = self.pix2shape_loss.fake_value(
113
+ densepose_predictor_outputs, embedder
114
+ )
115
+ return all_loss_dict
Leffa/3rdparty/densepose/modeling/losses/cycle_pix2shape.py ADDED
@@ -0,0 +1,152 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
2
+
3
+ from typing import Any, List
4
+ import torch
5
+ from torch import nn
6
+ from torch.nn import functional as F
7
+
8
+ from detectron2.config import CfgNode
9
+ from detectron2.structures import Instances
10
+
11
+ from densepose.data.meshes.catalog import MeshCatalog
12
+ from densepose.modeling.cse.utils import normalize_embeddings, squared_euclidean_distance_matrix
13
+
14
+ from .embed_utils import PackedCseAnnotations
15
+ from .mask import extract_data_for_mask_loss_from_matches
16
+
17
+
18
+ def _create_pixel_dist_matrix(grid_size: int) -> torch.Tensor:
19
+ rows = torch.arange(grid_size)
20
+ cols = torch.arange(grid_size)
21
+ # at index `i` contains [row, col], where
22
+ # row = i // grid_size
23
+ # col = i % grid_size
24
+ pix_coords = (
25
+ torch.stack(torch.meshgrid(rows, cols), -1).reshape((grid_size * grid_size, 2)).float()
26
+ )
27
+ return squared_euclidean_distance_matrix(pix_coords, pix_coords)
28
+
29
+
30
+ def _sample_fg_pixels_randperm(fg_mask: torch.Tensor, sample_size: int) -> torch.Tensor:
31
+ fg_mask_flattened = fg_mask.reshape((-1,))
32
+ num_pixels = int(fg_mask_flattened.sum().item())
33
+ fg_pixel_indices = fg_mask_flattened.nonzero(as_tuple=True)[0]
34
+ if (sample_size <= 0) or (num_pixels <= sample_size):
35
+ return fg_pixel_indices
36
+ sample_indices = torch.randperm(num_pixels, device=fg_mask.device)[:sample_size]
37
+ return fg_pixel_indices[sample_indices]
38
+
39
+
40
+ def _sample_fg_pixels_multinomial(fg_mask: torch.Tensor, sample_size: int) -> torch.Tensor:
41
+ fg_mask_flattened = fg_mask.reshape((-1,))
42
+ num_pixels = int(fg_mask_flattened.sum().item())
43
+ if (sample_size <= 0) or (num_pixels <= sample_size):
44
+ return fg_mask_flattened.nonzero(as_tuple=True)[0]
45
+ return fg_mask_flattened.float().multinomial(sample_size, replacement=False)
46
+
47
+
48
+ class PixToShapeCycleLoss(nn.Module):
49
+ """
50
+ Cycle loss for pixel-vertex correspondence
51
+ """
52
+
53
+ def __init__(self, cfg: CfgNode):
54
+ super().__init__()
55
+ self.shape_names = list(cfg.MODEL.ROI_DENSEPOSE_HEAD.CSE.EMBEDDERS.keys())
56
+ self.embed_size = cfg.MODEL.ROI_DENSEPOSE_HEAD.CSE.EMBED_SIZE
57
+ self.norm_p = cfg.MODEL.ROI_DENSEPOSE_HEAD.CSE.PIX_TO_SHAPE_CYCLE_LOSS.NORM_P
58
+ self.use_all_meshes_not_gt_only = (
59
+ cfg.MODEL.ROI_DENSEPOSE_HEAD.CSE.PIX_TO_SHAPE_CYCLE_LOSS.USE_ALL_MESHES_NOT_GT_ONLY
60
+ )
61
+ self.num_pixels_to_sample = (
62
+ cfg.MODEL.ROI_DENSEPOSE_HEAD.CSE.PIX_TO_SHAPE_CYCLE_LOSS.NUM_PIXELS_TO_SAMPLE
63
+ )
64
+ self.pix_sigma = cfg.MODEL.ROI_DENSEPOSE_HEAD.CSE.PIX_TO_SHAPE_CYCLE_LOSS.PIXEL_SIGMA
65
+ self.temperature_pix_to_vertex = (
66
+ cfg.MODEL.ROI_DENSEPOSE_HEAD.CSE.PIX_TO_SHAPE_CYCLE_LOSS.TEMPERATURE_PIXEL_TO_VERTEX
67
+ )
68
+ self.temperature_vertex_to_pix = (
69
+ cfg.MODEL.ROI_DENSEPOSE_HEAD.CSE.PIX_TO_SHAPE_CYCLE_LOSS.TEMPERATURE_VERTEX_TO_PIXEL
70
+ )
71
+ self.pixel_dists = _create_pixel_dist_matrix(cfg.MODEL.ROI_DENSEPOSE_HEAD.HEATMAP_SIZE)
72
+
73
+ def forward(
74
+ self,
75
+ proposals_with_gt: List[Instances],
76
+ densepose_predictor_outputs: Any,
77
+ packed_annotations: PackedCseAnnotations,
78
+ embedder: nn.Module,
79
+ ):
80
+ """
81
+ Args:
82
+ proposals_with_gt (list of Instances): detections with associated
83
+ ground truth data; each item corresponds to instances detected
84
+ on 1 image; the number of items corresponds to the number of
85
+ images in a batch
86
+ densepose_predictor_outputs: an object of a dataclass that contains predictor
87
+ outputs with estimated values; assumed to have the following attributes:
88
+ * embedding - embedding estimates, tensor of shape [N, D, S, S], where
89
+ N = number of instances (= sum N_i, where N_i is the number of
90
+ instances on image i)
91
+ D = embedding space dimensionality (MODEL.ROI_DENSEPOSE_HEAD.CSE.EMBED_SIZE)
92
+ S = output size (width and height)
93
+ packed_annotations (PackedCseAnnotations): contains various data useful
94
+ for loss computation, each data is packed into a single tensor
95
+ embedder (nn.Module): module that computes vertex embeddings for different meshes
96
+ """
97
+ pix_embeds = densepose_predictor_outputs.embedding
98
+ if self.pixel_dists.device != pix_embeds.device:
99
+ # should normally be done only once
100
+ self.pixel_dists = self.pixel_dists.to(device=pix_embeds.device)
101
+ with torch.no_grad():
102
+ mask_loss_data = extract_data_for_mask_loss_from_matches(
103
+ proposals_with_gt, densepose_predictor_outputs.coarse_segm
104
+ )
105
+ # GT masks - tensor of shape [N, S, S] of int64
106
+ masks_gt = mask_loss_data.masks_gt.long() # pyre-ignore[16]
107
+ assert len(pix_embeds) == len(masks_gt), (
108
+ f"Number of instances with embeddings {len(pix_embeds)} != "
109
+ f"number of instances with GT masks {len(masks_gt)}"
110
+ )
111
+ losses = []
112
+ mesh_names = (
113
+ self.shape_names
114
+ if self.use_all_meshes_not_gt_only
115
+ else [
116
+ MeshCatalog.get_mesh_name(mesh_id.item())
117
+ for mesh_id in packed_annotations.vertex_mesh_ids_gt.unique()
118
+ ]
119
+ )
120
+ for pixel_embeddings, mask_gt in zip(pix_embeds, masks_gt):
121
+ # pixel_embeddings [D, S, S]
122
+ # mask_gt [S, S]
123
+ for mesh_name in mesh_names:
124
+ mesh_vertex_embeddings = embedder(mesh_name)
125
+ # pixel indices [M]
126
+ pixel_indices_flattened = _sample_fg_pixels_randperm(
127
+ mask_gt, self.num_pixels_to_sample
128
+ )
129
+ # pixel distances [M, M]
130
+ pixel_dists = self.pixel_dists.to(pixel_embeddings.device)[
131
+ torch.meshgrid(pixel_indices_flattened, pixel_indices_flattened)
132
+ ]
133
+ # pixel embeddings [M, D]
134
+ pixel_embeddings_sampled = normalize_embeddings(
135
+ pixel_embeddings.reshape((self.embed_size, -1))[:, pixel_indices_flattened].T
136
+ )
137
+ # pixel-vertex similarity [M, K]
138
+ sim_matrix = pixel_embeddings_sampled.mm(mesh_vertex_embeddings.T)
139
+ c_pix_vertex = F.softmax(sim_matrix / self.temperature_pix_to_vertex, dim=1)
140
+ c_vertex_pix = F.softmax(sim_matrix.T / self.temperature_vertex_to_pix, dim=1)
141
+ c_cycle = c_pix_vertex.mm(c_vertex_pix)
142
+ loss_cycle = torch.norm(pixel_dists * c_cycle, p=self.norm_p)
143
+ losses.append(loss_cycle)
144
+
145
+ if len(losses) == 0:
146
+ return pix_embeds.sum() * 0
147
+ return torch.stack(losses, dim=0).mean()
148
+
149
+ def fake_value(self, densepose_predictor_outputs: Any, embedder: nn.Module):
150
+ losses = [embedder(mesh_name).sum() * 0 for mesh_name in embedder.mesh_names]
151
+ losses.append(densepose_predictor_outputs.embedding.sum() * 0)
152
+ return torch.mean(torch.stack(losses))
Leffa/3rdparty/densepose/modeling/losses/cycle_shape2shape.py ADDED
@@ -0,0 +1,117 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
2
+
3
+ import random
4
+ from typing import Tuple
5
+ import torch
6
+ from torch import nn
7
+ from torch.nn import functional as F
8
+
9
+ from detectron2.config import CfgNode
10
+
11
+ from densepose.structures.mesh import create_mesh
12
+
13
+ from .utils import sample_random_indices
14
+
15
+
16
+ class ShapeToShapeCycleLoss(nn.Module):
17
+ """
18
+ Cycle Loss for Shapes.
19
+ Inspired by:
20
+ "Mapping in a Cycle: Sinkhorn Regularized Unsupervised Learning for Point Cloud Shapes".
21
+ """
22
+
23
+ def __init__(self, cfg: CfgNode):
24
+ super().__init__()
25
+ self.shape_names = list(cfg.MODEL.ROI_DENSEPOSE_HEAD.CSE.EMBEDDERS.keys())
26
+ self.all_shape_pairs = [
27
+ (x, y) for i, x in enumerate(self.shape_names) for y in self.shape_names[i + 1 :]
28
+ ]
29
+ random.shuffle(self.all_shape_pairs)
30
+ self.cur_pos = 0
31
+ self.norm_p = cfg.MODEL.ROI_DENSEPOSE_HEAD.CSE.SHAPE_TO_SHAPE_CYCLE_LOSS.NORM_P
32
+ self.temperature = cfg.MODEL.ROI_DENSEPOSE_HEAD.CSE.SHAPE_TO_SHAPE_CYCLE_LOSS.TEMPERATURE
33
+ self.max_num_vertices = (
34
+ cfg.MODEL.ROI_DENSEPOSE_HEAD.CSE.SHAPE_TO_SHAPE_CYCLE_LOSS.MAX_NUM_VERTICES
35
+ )
36
+
37
+ def _sample_random_pair(self) -> Tuple[str, str]:
38
+ """
39
+ Produce a random pair of different mesh names
40
+
41
+ Return:
42
+ tuple(str, str): a pair of different mesh names
43
+ """
44
+ if self.cur_pos >= len(self.all_shape_pairs):
45
+ random.shuffle(self.all_shape_pairs)
46
+ self.cur_pos = 0
47
+ shape_pair = self.all_shape_pairs[self.cur_pos]
48
+ self.cur_pos += 1
49
+ return shape_pair
50
+
51
+ def forward(self, embedder: nn.Module):
52
+ """
53
+ Do a forward pass with a random pair (src, dst) pair of shapes
54
+ Args:
55
+ embedder (nn.Module): module that computes vertex embeddings for different meshes
56
+ """
57
+ src_mesh_name, dst_mesh_name = self._sample_random_pair()
58
+ return self._forward_one_pair(embedder, src_mesh_name, dst_mesh_name)
59
+
60
+ def fake_value(self, embedder: nn.Module):
61
+ losses = []
62
+ for mesh_name in embedder.mesh_names:
63
+ losses.append(embedder(mesh_name).sum() * 0)
64
+ return torch.mean(torch.stack(losses))
65
+
66
+ def _get_embeddings_and_geodists_for_mesh(
67
+ self, embedder: nn.Module, mesh_name: str
68
+ ) -> Tuple[torch.Tensor, torch.Tensor]:
69
+ """
70
+ Produces embeddings and geodesic distance tensors for a given mesh. May subsample
71
+ the mesh, if it contains too many vertices (controlled by
72
+ SHAPE_CYCLE_LOSS_MAX_NUM_VERTICES parameter).
73
+ Args:
74
+ embedder (nn.Module): module that computes embeddings for mesh vertices
75
+ mesh_name (str): mesh name
76
+ Return:
77
+ embeddings (torch.Tensor of size [N, D]): embeddings for selected mesh
78
+ vertices (N = number of selected vertices, D = embedding space dim)
79
+ geodists (torch.Tensor of size [N, N]): geodesic distances for the selected
80
+ mesh vertices (N = number of selected vertices)
81
+ """
82
+ embeddings = embedder(mesh_name)
83
+ indices = sample_random_indices(
84
+ embeddings.shape[0], self.max_num_vertices, embeddings.device
85
+ )
86
+ mesh = create_mesh(mesh_name, embeddings.device)
87
+ geodists = mesh.geodists
88
+ if indices is not None:
89
+ embeddings = embeddings[indices]
90
+ geodists = geodists[torch.meshgrid(indices, indices)]
91
+ return embeddings, geodists
92
+
93
+ def _forward_one_pair(
94
+ self, embedder: nn.Module, mesh_name_1: str, mesh_name_2: str
95
+ ) -> torch.Tensor:
96
+ """
97
+ Do a forward pass with a selected pair of meshes
98
+ Args:
99
+ embedder (nn.Module): module that computes vertex embeddings for different meshes
100
+ mesh_name_1 (str): first mesh name
101
+ mesh_name_2 (str): second mesh name
102
+ Return:
103
+ Tensor containing the loss value
104
+ """
105
+ embeddings_1, geodists_1 = self._get_embeddings_and_geodists_for_mesh(embedder, mesh_name_1)
106
+ embeddings_2, geodists_2 = self._get_embeddings_and_geodists_for_mesh(embedder, mesh_name_2)
107
+ sim_matrix_12 = embeddings_1.mm(embeddings_2.T)
108
+
109
+ c_12 = F.softmax(sim_matrix_12 / self.temperature, dim=1)
110
+ c_21 = F.softmax(sim_matrix_12.T / self.temperature, dim=1)
111
+ c_11 = c_12.mm(c_21)
112
+ c_22 = c_21.mm(c_12)
113
+
114
+ loss_cycle_11 = torch.norm(geodists_1 * c_11, p=self.norm_p)
115
+ loss_cycle_22 = torch.norm(geodists_2 * c_22, p=self.norm_p)
116
+
117
+ return loss_cycle_11 + loss_cycle_22
Leffa/3rdparty/densepose/modeling/losses/embed.py ADDED
@@ -0,0 +1,119 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
2
+
3
+ from typing import Any, Dict, List
4
+ import torch
5
+ from torch import nn
6
+ from torch.nn import functional as F
7
+
8
+ from detectron2.config import CfgNode
9
+ from detectron2.structures import Instances
10
+
11
+ from densepose.data.meshes.catalog import MeshCatalog
12
+ from densepose.modeling.cse.utils import normalize_embeddings, squared_euclidean_distance_matrix
13
+
14
+ from .embed_utils import PackedCseAnnotations
15
+ from .utils import BilinearInterpolationHelper
16
+
17
+
18
+ class EmbeddingLoss:
19
+ """
20
+ Computes losses for estimated embeddings given annotated vertices.
21
+ Instances in a minibatch that correspond to the same mesh are grouped
22
+ together. For each group, loss is computed as cross-entropy for
23
+ unnormalized scores given ground truth mesh vertex ids.
24
+ Scores are based on squared distances between estimated vertex embeddings
25
+ and mesh vertex embeddings.
26
+ """
27
+
28
+ def __init__(self, cfg: CfgNode):
29
+ """
30
+ Initialize embedding loss from config
31
+ """
32
+ self.embdist_gauss_sigma = cfg.MODEL.ROI_DENSEPOSE_HEAD.CSE.EMBEDDING_DIST_GAUSS_SIGMA
33
+
34
+ def __call__(
35
+ self,
36
+ proposals_with_gt: List[Instances],
37
+ densepose_predictor_outputs: Any,
38
+ packed_annotations: PackedCseAnnotations,
39
+ interpolator: BilinearInterpolationHelper,
40
+ embedder: nn.Module,
41
+ ) -> Dict[int, torch.Tensor]:
42
+ """
43
+ Produces losses for estimated embeddings given annotated vertices.
44
+ Embeddings for all the vertices of a mesh are computed by the embedder.
45
+ Embeddings for observed pixels are estimated by a predictor.
46
+ Losses are computed as cross-entropy for squared distances between
47
+ observed vertex embeddings and all mesh vertex embeddings given
48
+ ground truth vertex IDs.
49
+
50
+ Args:
51
+ proposals_with_gt (list of Instances): detections with associated
52
+ ground truth data; each item corresponds to instances detected
53
+ on 1 image; the number of items corresponds to the number of
54
+ images in a batch
55
+ densepose_predictor_outputs: an object of a dataclass that contains predictor
56
+ outputs with estimated values; assumed to have the following attributes:
57
+ * embedding - embedding estimates, tensor of shape [N, D, S, S], where
58
+ N = number of instances (= sum N_i, where N_i is the number of
59
+ instances on image i)
60
+ D = embedding space dimensionality (MODEL.ROI_DENSEPOSE_HEAD.CSE.EMBED_SIZE)
61
+ S = output size (width and height)
62
+ packed_annotations (PackedCseAnnotations): contains various data useful
63
+ for loss computation, each data is packed into a single tensor
64
+ interpolator (BilinearInterpolationHelper): bilinear interpolation helper
65
+ embedder (nn.Module): module that computes vertex embeddings for different meshes
66
+ Return:
67
+ dict(int -> tensor): losses for different mesh IDs
68
+ """
69
+ losses = {}
70
+ for mesh_id_tensor in packed_annotations.vertex_mesh_ids_gt.unique():
71
+ mesh_id = mesh_id_tensor.item()
72
+ mesh_name = MeshCatalog.get_mesh_name(mesh_id)
73
+ # valid points are those that fall into estimated bbox
74
+ # and correspond to the current mesh
75
+ j_valid = interpolator.j_valid * ( # pyre-ignore[16]
76
+ packed_annotations.vertex_mesh_ids_gt == mesh_id
77
+ )
78
+ if not torch.any(j_valid):
79
+ continue
80
+ # extract estimated embeddings for valid points
81
+ # -> tensor [J, D]
82
+ vertex_embeddings_i = normalize_embeddings(
83
+ interpolator.extract_at_points(
84
+ densepose_predictor_outputs.embedding,
85
+ slice_fine_segm=slice(None),
86
+ w_ylo_xlo=interpolator.w_ylo_xlo[:, None], # pyre-ignore[16]
87
+ w_ylo_xhi=interpolator.w_ylo_xhi[:, None], # pyre-ignore[16]
88
+ w_yhi_xlo=interpolator.w_yhi_xlo[:, None], # pyre-ignore[16]
89
+ w_yhi_xhi=interpolator.w_yhi_xhi[:, None], # pyre-ignore[16]
90
+ )[j_valid, :]
91
+ )
92
+ # extract vertex ids for valid points
93
+ # -> tensor [J]
94
+ vertex_indices_i = packed_annotations.vertex_ids_gt[j_valid]
95
+ # embeddings for all mesh vertices
96
+ # -> tensor [K, D]
97
+ mesh_vertex_embeddings = embedder(mesh_name)
98
+ # unnormalized scores for valid points
99
+ # -> tensor [J, K]
100
+ scores = squared_euclidean_distance_matrix(
101
+ vertex_embeddings_i, mesh_vertex_embeddings
102
+ ) / (-self.embdist_gauss_sigma)
103
+ losses[mesh_name] = F.cross_entropy(scores, vertex_indices_i, ignore_index=-1)
104
+
105
+ for mesh_name in embedder.mesh_names:
106
+ if mesh_name not in losses:
107
+ losses[mesh_name] = self.fake_value(
108
+ densepose_predictor_outputs, embedder, mesh_name
109
+ )
110
+ return losses
111
+
112
+ def fake_values(self, densepose_predictor_outputs: Any, embedder: nn.Module):
113
+ losses = {}
114
+ for mesh_name in embedder.mesh_names:
115
+ losses[mesh_name] = self.fake_value(densepose_predictor_outputs, embedder, mesh_name)
116
+ return losses
117
+
118
+ def fake_value(self, densepose_predictor_outputs: Any, embedder: nn.Module, mesh_name: str):
119
+ return densepose_predictor_outputs.embedding.sum() * 0 + embedder(mesh_name).sum() * 0
Leffa/3rdparty/densepose/modeling/losses/embed_utils.py ADDED
@@ -0,0 +1,137 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
2
+
3
+ from dataclasses import dataclass
4
+ from typing import Any, Optional
5
+ import torch
6
+
7
+ from detectron2.structures import BoxMode, Instances
8
+
9
+ from .utils import AnnotationsAccumulator
10
+
11
+
12
+ @dataclass
13
+ class PackedCseAnnotations:
14
+ x_gt: torch.Tensor
15
+ y_gt: torch.Tensor
16
+ coarse_segm_gt: Optional[torch.Tensor]
17
+ vertex_mesh_ids_gt: torch.Tensor
18
+ vertex_ids_gt: torch.Tensor
19
+ bbox_xywh_gt: torch.Tensor
20
+ bbox_xywh_est: torch.Tensor
21
+ point_bbox_with_dp_indices: torch.Tensor
22
+ point_bbox_indices: torch.Tensor
23
+ bbox_indices: torch.Tensor
24
+
25
+
26
+ class CseAnnotationsAccumulator(AnnotationsAccumulator):
27
+ """
28
+ Accumulates annotations by batches that correspond to objects detected on
29
+ individual images. Can pack them together into single tensors.
30
+ """
31
+
32
+ def __init__(self):
33
+ self.x_gt = []
34
+ self.y_gt = []
35
+ self.s_gt = []
36
+ self.vertex_mesh_ids_gt = []
37
+ self.vertex_ids_gt = []
38
+ self.bbox_xywh_gt = []
39
+ self.bbox_xywh_est = []
40
+ self.point_bbox_with_dp_indices = []
41
+ self.point_bbox_indices = []
42
+ self.bbox_indices = []
43
+ self.nxt_bbox_with_dp_index = 0
44
+ self.nxt_bbox_index = 0
45
+
46
+ def accumulate(self, instances_one_image: Instances):
47
+ """
48
+ Accumulate instances data for one image
49
+
50
+ Args:
51
+ instances_one_image (Instances): instances data to accumulate
52
+ """
53
+ boxes_xywh_est = BoxMode.convert(
54
+ instances_one_image.proposal_boxes.tensor.clone(), BoxMode.XYXY_ABS, BoxMode.XYWH_ABS
55
+ )
56
+ boxes_xywh_gt = BoxMode.convert(
57
+ instances_one_image.gt_boxes.tensor.clone(), BoxMode.XYXY_ABS, BoxMode.XYWH_ABS
58
+ )
59
+ n_matches = len(boxes_xywh_gt)
60
+ assert n_matches == len(
61
+ boxes_xywh_est
62
+ ), f"Got {len(boxes_xywh_est)} proposal boxes and {len(boxes_xywh_gt)} GT boxes"
63
+ if not n_matches:
64
+ # no detection - GT matches
65
+ return
66
+ if (
67
+ not hasattr(instances_one_image, "gt_densepose")
68
+ or instances_one_image.gt_densepose is None
69
+ ):
70
+ # no densepose GT for the detections, just increase the bbox index
71
+ self.nxt_bbox_index += n_matches
72
+ return
73
+ for box_xywh_est, box_xywh_gt, dp_gt in zip(
74
+ boxes_xywh_est, boxes_xywh_gt, instances_one_image.gt_densepose
75
+ ):
76
+ if (dp_gt is not None) and (len(dp_gt.x) > 0):
77
+ # pyre-fixme[6]: For 1st argument expected `Tensor` but got `float`.
78
+ # pyre-fixme[6]: For 2nd argument expected `Tensor` but got `float`.
79
+ self._do_accumulate(box_xywh_gt, box_xywh_est, dp_gt)
80
+ self.nxt_bbox_index += 1
81
+
82
+ def _do_accumulate(self, box_xywh_gt: torch.Tensor, box_xywh_est: torch.Tensor, dp_gt: Any):
83
+ """
84
+ Accumulate instances data for one image, given that the data is not empty
85
+
86
+ Args:
87
+ box_xywh_gt (tensor): GT bounding box
88
+ box_xywh_est (tensor): estimated bounding box
89
+ dp_gt: GT densepose data with the following attributes:
90
+ - x: normalized X coordinates
91
+ - y: normalized Y coordinates
92
+ - segm: tensor of size [S, S] with coarse segmentation
93
+ -
94
+ """
95
+ self.x_gt.append(dp_gt.x)
96
+ self.y_gt.append(dp_gt.y)
97
+ if hasattr(dp_gt, "segm"):
98
+ self.s_gt.append(dp_gt.segm.unsqueeze(0))
99
+ self.vertex_ids_gt.append(dp_gt.vertex_ids)
100
+ self.vertex_mesh_ids_gt.append(torch.full_like(dp_gt.vertex_ids, dp_gt.mesh_id))
101
+ self.bbox_xywh_gt.append(box_xywh_gt.view(-1, 4))
102
+ self.bbox_xywh_est.append(box_xywh_est.view(-1, 4))
103
+ self.point_bbox_with_dp_indices.append(
104
+ torch.full_like(dp_gt.vertex_ids, self.nxt_bbox_with_dp_index)
105
+ )
106
+ self.point_bbox_indices.append(torch.full_like(dp_gt.vertex_ids, self.nxt_bbox_index))
107
+ self.bbox_indices.append(self.nxt_bbox_index)
108
+ self.nxt_bbox_with_dp_index += 1
109
+
110
+ def pack(self) -> Optional[PackedCseAnnotations]:
111
+ """
112
+ Pack data into tensors
113
+ """
114
+ if not len(self.x_gt):
115
+ # TODO:
116
+ # returning proper empty annotations would require
117
+ # creating empty tensors of appropriate shape and
118
+ # type on an appropriate device;
119
+ # we return None so far to indicate empty annotations
120
+ return None
121
+ return PackedCseAnnotations(
122
+ x_gt=torch.cat(self.x_gt, 0),
123
+ y_gt=torch.cat(self.y_gt, 0),
124
+ vertex_mesh_ids_gt=torch.cat(self.vertex_mesh_ids_gt, 0),
125
+ vertex_ids_gt=torch.cat(self.vertex_ids_gt, 0),
126
+ # ignore segmentation annotations, if not all the instances contain those
127
+ coarse_segm_gt=torch.cat(self.s_gt, 0)
128
+ if len(self.s_gt) == len(self.bbox_xywh_gt)
129
+ else None,
130
+ bbox_xywh_gt=torch.cat(self.bbox_xywh_gt, 0),
131
+ bbox_xywh_est=torch.cat(self.bbox_xywh_est, 0),
132
+ point_bbox_with_dp_indices=torch.cat(self.point_bbox_with_dp_indices, 0),
133
+ point_bbox_indices=torch.cat(self.point_bbox_indices, 0),
134
+ bbox_indices=torch.as_tensor(
135
+ self.bbox_indices, dtype=torch.long, device=self.x_gt[0].device
136
+ ),
137
+ )
Leffa/3rdparty/densepose/modeling/losses/mask.py ADDED
@@ -0,0 +1,125 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
2
+
3
+ from dataclasses import dataclass
4
+ from typing import Any, Iterable, List, Optional
5
+ import torch
6
+ from torch.nn import functional as F
7
+
8
+ from detectron2.structures import Instances
9
+
10
+
11
+ @dataclass
12
+ class DataForMaskLoss:
13
+ """
14
+ Contains mask GT and estimated data for proposals from multiple images:
15
+ """
16
+
17
+ # tensor of size (K, H, W) containing GT labels
18
+ masks_gt: Optional[torch.Tensor] = None
19
+ # tensor of size (K, C, H, W) containing estimated scores
20
+ masks_est: Optional[torch.Tensor] = None
21
+
22
+
23
+ def extract_data_for_mask_loss_from_matches(
24
+ proposals_targets: Iterable[Instances], estimated_segm: torch.Tensor
25
+ ) -> DataForMaskLoss:
26
+ """
27
+ Extract data for mask loss from instances that contain matched GT and
28
+ estimated bounding boxes.
29
+ Args:
30
+ proposals_targets: Iterable[Instances]
31
+ matched GT and estimated results, each item in the iterable
32
+ corresponds to data in 1 image
33
+ estimated_segm: tensor(K, C, S, S) of float - raw unnormalized
34
+ segmentation scores, here S is the size to which GT masks are
35
+ to be resized
36
+ Return:
37
+ masks_est: tensor(K, C, S, S) of float - class scores
38
+ masks_gt: tensor(K, S, S) of int64 - labels
39
+ """
40
+ data = DataForMaskLoss()
41
+ masks_gt = []
42
+ offset = 0
43
+ assert estimated_segm.shape[2] == estimated_segm.shape[3], (
44
+ f"Expected estimated segmentation to have a square shape, "
45
+ f"but the actual shape is {estimated_segm.shape[2:]}"
46
+ )
47
+ mask_size = estimated_segm.shape[2]
48
+ num_proposals = sum(inst.proposal_boxes.tensor.size(0) for inst in proposals_targets)
49
+ num_estimated = estimated_segm.shape[0]
50
+ assert (
51
+ num_proposals == num_estimated
52
+ ), "The number of proposals {} must be equal to the number of estimates {}".format(
53
+ num_proposals, num_estimated
54
+ )
55
+
56
+ for proposals_targets_per_image in proposals_targets:
57
+ n_i = proposals_targets_per_image.proposal_boxes.tensor.size(0)
58
+ if not n_i:
59
+ continue
60
+ gt_masks_per_image = proposals_targets_per_image.gt_masks.crop_and_resize(
61
+ proposals_targets_per_image.proposal_boxes.tensor, mask_size
62
+ ).to(device=estimated_segm.device)
63
+ masks_gt.append(gt_masks_per_image)
64
+ offset += n_i
65
+ if masks_gt:
66
+ data.masks_est = estimated_segm
67
+ data.masks_gt = torch.cat(masks_gt, dim=0)
68
+ return data
69
+
70
+
71
+ class MaskLoss:
72
+ """
73
+ Mask loss as cross-entropy for raw unnormalized scores given ground truth labels.
74
+ Mask ground truth labels are defined for the whole image and not only the
75
+ bounding box of interest. They are stored as objects that are assumed to implement
76
+ the `crop_and_resize` interface (e.g. BitMasks, PolygonMasks).
77
+ """
78
+
79
+ def __call__(
80
+ self, proposals_with_gt: List[Instances], densepose_predictor_outputs: Any
81
+ ) -> torch.Tensor:
82
+ """
83
+ Computes segmentation loss as cross-entropy for raw unnormalized
84
+ scores given ground truth labels.
85
+
86
+ Args:
87
+ proposals_with_gt (list of Instances): detections with associated ground truth data
88
+ densepose_predictor_outputs: an object of a dataclass that contains predictor outputs
89
+ with estimated values; assumed to have the following attribute:
90
+ * coarse_segm (tensor of shape [N, D, S, S]): coarse segmentation estimates
91
+ as raw unnormalized scores
92
+ where N is the number of detections, S is the estimate size ( = width = height)
93
+ and D is the number of coarse segmentation channels.
94
+ Return:
95
+ Cross entropy for raw unnormalized scores for coarse segmentation given
96
+ ground truth labels from masks
97
+ """
98
+ if not len(proposals_with_gt):
99
+ return self.fake_value(densepose_predictor_outputs)
100
+ # densepose outputs are computed for all images and all bounding boxes;
101
+ # i.e. if a batch has 4 images with (3, 1, 2, 1) proposals respectively,
102
+ # the outputs will have size(0) == 3+1+2+1 == 7
103
+ with torch.no_grad():
104
+ mask_loss_data = extract_data_for_mask_loss_from_matches(
105
+ proposals_with_gt, densepose_predictor_outputs.coarse_segm
106
+ )
107
+ if (mask_loss_data.masks_gt is None) or (mask_loss_data.masks_est is None):
108
+ return self.fake_value(densepose_predictor_outputs)
109
+ return F.cross_entropy(mask_loss_data.masks_est, mask_loss_data.masks_gt.long())
110
+
111
+ def fake_value(self, densepose_predictor_outputs: Any) -> torch.Tensor:
112
+ """
113
+ Fake segmentation loss used when no suitable ground truth data
114
+ was found in a batch. The loss has a value 0 and is primarily used to
115
+ construct the computation graph, so that `DistributedDataParallel`
116
+ has similar graphs on all GPUs and can perform reduction properly.
117
+
118
+ Args:
119
+ densepose_predictor_outputs: DensePose predictor outputs, an object
120
+ of a dataclass that is assumed to have `coarse_segm`
121
+ attribute
122
+ Return:
123
+ Zero value loss with proper computation graph
124
+ """
125
+ return densepose_predictor_outputs.coarse_segm.sum() * 0
Leffa/3rdparty/densepose/modeling/losses/mask_or_segm.py ADDED
@@ -0,0 +1,77 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
2
+
3
+ from typing import Any, List
4
+
5
+ import torch
6
+
7
+ from detectron2.config import CfgNode
8
+ from detectron2.structures import Instances
9
+
10
+ from .mask import MaskLoss
11
+ from .segm import SegmentationLoss
12
+
13
+
14
+ class MaskOrSegmentationLoss:
15
+ """
16
+ Mask or segmentation loss as cross-entropy for raw unnormalized scores
17
+ given ground truth labels. Ground truth labels are either defined by coarse
18
+ segmentation annotation, or by mask annotation, depending on the config
19
+ value MODEL.ROI_DENSEPOSE_HEAD.COARSE_SEGM_TRAINED_BY_MASKS
20
+ """
21
+
22
+ def __init__(self, cfg: CfgNode):
23
+ """
24
+ Initialize segmentation loss from configuration options
25
+
26
+ Args:
27
+ cfg (CfgNode): configuration options
28
+ """
29
+ self.segm_trained_by_masks = (
30
+ cfg.MODEL.ROI_DENSEPOSE_HEAD.COARSE_SEGM_TRAINED_BY_MASKS
31
+ )
32
+ if self.segm_trained_by_masks:
33
+ self.mask_loss = MaskLoss()
34
+ self.segm_loss = SegmentationLoss(cfg)
35
+
36
+ def __call__(
37
+ self,
38
+ proposals_with_gt: List[Instances],
39
+ densepose_predictor_outputs: Any,
40
+ packed_annotations: Any,
41
+ ) -> torch.Tensor:
42
+ """
43
+ Compute segmentation loss as cross-entropy between aligned unnormalized
44
+ score estimates and ground truth; with ground truth given
45
+ either by masks, or by coarse segmentation annotations.
46
+
47
+ Args:
48
+ proposals_with_gt (list of Instances): detections with associated ground truth data
49
+ densepose_predictor_outputs: an object of a dataclass that contains predictor outputs
50
+ with estimated values; assumed to have the following attributes:
51
+ * coarse_segm - coarse segmentation estimates, tensor of shape [N, D, S, S]
52
+ packed_annotations: packed annotations for efficient loss computation
53
+ Return:
54
+ tensor: loss value as cross-entropy for raw unnormalized scores
55
+ given ground truth labels
56
+ """
57
+ if self.segm_trained_by_masks:
58
+ return self.mask_loss(proposals_with_gt, densepose_predictor_outputs)
59
+ return self.segm_loss(
60
+ proposals_with_gt, densepose_predictor_outputs, packed_annotations
61
+ )
62
+
63
+ def fake_value(self, densepose_predictor_outputs: Any) -> torch.Tensor:
64
+ """
65
+ Fake segmentation loss used when no suitable ground truth data
66
+ was found in a batch. The loss has a value 0 and is primarily used to
67
+ construct the computation graph, so that `DistributedDataParallel`
68
+ has similar graphs on all GPUs and can perform reduction properly.
69
+
70
+ Args:
71
+ densepose_predictor_outputs: DensePose predictor outputs, an object
72
+ of a dataclass that is assumed to have `coarse_segm`
73
+ attribute
74
+ Return:
75
+ Zero value loss with proper computation graph
76
+ """
77
+ return densepose_predictor_outputs.coarse_segm.sum() * 0
Leffa/3rdparty/densepose/modeling/losses/registry.py ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ # Copyright (c) Facebook, Inc. and its affiliates.
2
+
3
+ from detectron2.utils.registry import Registry
4
+
5
+ DENSEPOSE_LOSS_REGISTRY = Registry("DENSEPOSE_LOSS")
Leffa/3rdparty/densepose/modeling/losses/soft_embed.py ADDED
@@ -0,0 +1,133 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
2
+
3
+ from typing import Any, Dict, List
4
+ import torch
5
+ from torch import nn
6
+ from torch.nn import functional as F
7
+
8
+ from detectron2.config import CfgNode
9
+ from detectron2.structures import Instances
10
+
11
+ from densepose.data.meshes.catalog import MeshCatalog
12
+ from densepose.modeling.cse.utils import normalize_embeddings, squared_euclidean_distance_matrix
13
+ from densepose.structures.mesh import create_mesh
14
+
15
+ from .embed_utils import PackedCseAnnotations
16
+ from .utils import BilinearInterpolationHelper
17
+
18
+
19
+ class SoftEmbeddingLoss:
20
+ """
21
+ Computes losses for estimated embeddings given annotated vertices.
22
+ Instances in a minibatch that correspond to the same mesh are grouped
23
+ together. For each group, loss is computed as cross-entropy for
24
+ unnormalized scores given ground truth mesh vertex ids.
25
+ Scores are based on:
26
+ 1) squared distances between estimated vertex embeddings
27
+ and mesh vertex embeddings;
28
+ 2) geodesic distances between vertices of a mesh
29
+ """
30
+
31
+ def __init__(self, cfg: CfgNode):
32
+ """
33
+ Initialize embedding loss from config
34
+ """
35
+ self.embdist_gauss_sigma = cfg.MODEL.ROI_DENSEPOSE_HEAD.CSE.EMBEDDING_DIST_GAUSS_SIGMA
36
+ self.geodist_gauss_sigma = cfg.MODEL.ROI_DENSEPOSE_HEAD.CSE.GEODESIC_DIST_GAUSS_SIGMA
37
+
38
+ def __call__(
39
+ self,
40
+ proposals_with_gt: List[Instances],
41
+ densepose_predictor_outputs: Any,
42
+ packed_annotations: PackedCseAnnotations,
43
+ interpolator: BilinearInterpolationHelper,
44
+ embedder: nn.Module,
45
+ ) -> Dict[int, torch.Tensor]:
46
+ """
47
+ Produces losses for estimated embeddings given annotated vertices.
48
+ Embeddings for all the vertices of a mesh are computed by the embedder.
49
+ Embeddings for observed pixels are estimated by a predictor.
50
+ Losses are computed as cross-entropy for unnormalized scores given
51
+ ground truth vertex IDs.
52
+ 1) squared distances between estimated vertex embeddings
53
+ and mesh vertex embeddings;
54
+ 2) geodesic distances between vertices of a mesh
55
+
56
+ Args:
57
+ proposals_with_gt (list of Instances): detections with associated
58
+ ground truth data; each item corresponds to instances detected
59
+ on 1 image; the number of items corresponds to the number of
60
+ images in a batch
61
+ densepose_predictor_outputs: an object of a dataclass that contains predictor
62
+ outputs with estimated values; assumed to have the following attributes:
63
+ * embedding - embedding estimates, tensor of shape [N, D, S, S], where
64
+ N = number of instances (= sum N_i, where N_i is the number of
65
+ instances on image i)
66
+ D = embedding space dimensionality (MODEL.ROI_DENSEPOSE_HEAD.CSE.EMBED_SIZE)
67
+ S = output size (width and height)
68
+ packed_annotations (PackedCseAnnotations): contains various data useful
69
+ for loss computation, each data is packed into a single tensor
70
+ interpolator (BilinearInterpolationHelper): bilinear interpolation helper
71
+ embedder (nn.Module): module that computes vertex embeddings for different meshes
72
+ Return:
73
+ dict(int -> tensor): losses for different mesh IDs
74
+ """
75
+ losses = {}
76
+ for mesh_id_tensor in packed_annotations.vertex_mesh_ids_gt.unique():
77
+ mesh_id = mesh_id_tensor.item()
78
+ mesh_name = MeshCatalog.get_mesh_name(mesh_id)
79
+ # valid points are those that fall into estimated bbox
80
+ # and correspond to the current mesh
81
+ j_valid = interpolator.j_valid * ( # pyre-ignore[16]
82
+ packed_annotations.vertex_mesh_ids_gt == mesh_id
83
+ )
84
+ if not torch.any(j_valid):
85
+ continue
86
+ # extract estimated embeddings for valid points
87
+ # -> tensor [J, D]
88
+ vertex_embeddings_i = normalize_embeddings(
89
+ interpolator.extract_at_points(
90
+ densepose_predictor_outputs.embedding,
91
+ slice_fine_segm=slice(None),
92
+ w_ylo_xlo=interpolator.w_ylo_xlo[:, None], # pyre-ignore[16]
93
+ w_ylo_xhi=interpolator.w_ylo_xhi[:, None], # pyre-ignore[16]
94
+ w_yhi_xlo=interpolator.w_yhi_xlo[:, None], # pyre-ignore[16]
95
+ w_yhi_xhi=interpolator.w_yhi_xhi[:, None], # pyre-ignore[16]
96
+ )[j_valid, :]
97
+ )
98
+ # extract vertex ids for valid points
99
+ # -> tensor [J]
100
+ vertex_indices_i = packed_annotations.vertex_ids_gt[j_valid]
101
+ # embeddings for all mesh vertices
102
+ # -> tensor [K, D]
103
+ mesh_vertex_embeddings = embedder(mesh_name)
104
+ # softmax values of geodesic distances for GT mesh vertices
105
+ # -> tensor [J, K]
106
+ mesh = create_mesh(mesh_name, mesh_vertex_embeddings.device)
107
+ geodist_softmax_values = F.softmax(
108
+ mesh.geodists[vertex_indices_i] / (-self.geodist_gauss_sigma), dim=1
109
+ )
110
+ # logsoftmax values for valid points
111
+ # -> tensor [J, K]
112
+ embdist_logsoftmax_values = F.log_softmax(
113
+ squared_euclidean_distance_matrix(vertex_embeddings_i, mesh_vertex_embeddings)
114
+ / (-self.embdist_gauss_sigma),
115
+ dim=1,
116
+ )
117
+ losses[mesh_name] = (-geodist_softmax_values * embdist_logsoftmax_values).sum(1).mean()
118
+
119
+ for mesh_name in embedder.mesh_names:
120
+ if mesh_name not in losses:
121
+ losses[mesh_name] = self.fake_value(
122
+ densepose_predictor_outputs, embedder, mesh_name
123
+ )
124
+ return losses
125
+
126
+ def fake_values(self, densepose_predictor_outputs: Any, embedder: nn.Module):
127
+ losses = {}
128
+ for mesh_name in embedder.mesh_names:
129
+ losses[mesh_name] = self.fake_value(densepose_predictor_outputs, embedder, mesh_name)
130
+ return losses
131
+
132
+ def fake_value(self, densepose_predictor_outputs: Any, embedder: nn.Module, mesh_name: str):
133
+ return densepose_predictor_outputs.embedding.sum() * 0 + embedder(mesh_name).sum() * 0
Leffa/3rdparty/densepose/modeling/losses/utils.py ADDED
@@ -0,0 +1,443 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Facebook, Inc. and its affiliates.
2
+
3
+ from abc import ABC, abstractmethod
4
+ from dataclasses import dataclass
5
+ from typing import Any, Dict, List, Optional, Tuple
6
+ import torch
7
+ from torch.nn import functional as F
8
+
9
+ from detectron2.structures import BoxMode, Instances
10
+
11
+ from densepose import DensePoseDataRelative
12
+
13
+ LossDict = Dict[str, torch.Tensor]
14
+
15
+
16
+ def _linear_interpolation_utilities(v_norm, v0_src, size_src, v0_dst, size_dst, size_z):
17
+ """
18
+ Computes utility values for linear interpolation at points v.
19
+ The points are given as normalized offsets in the source interval
20
+ (v0_src, v0_src + size_src), more precisely:
21
+ v = v0_src + v_norm * size_src / 256.0
22
+ The computed utilities include lower points v_lo, upper points v_hi,
23
+ interpolation weights v_w and flags j_valid indicating whether the
24
+ points falls into the destination interval (v0_dst, v0_dst + size_dst).
25
+
26
+ Args:
27
+ v_norm (:obj: `torch.Tensor`): tensor of size N containing
28
+ normalized point offsets
29
+ v0_src (:obj: `torch.Tensor`): tensor of size N containing
30
+ left bounds of source intervals for normalized points
31
+ size_src (:obj: `torch.Tensor`): tensor of size N containing
32
+ source interval sizes for normalized points
33
+ v0_dst (:obj: `torch.Tensor`): tensor of size N containing
34
+ left bounds of destination intervals
35
+ size_dst (:obj: `torch.Tensor`): tensor of size N containing
36
+ destination interval sizes
37
+ size_z (int): interval size for data to be interpolated
38
+
39
+ Returns:
40
+ v_lo (:obj: `torch.Tensor`): int tensor of size N containing
41
+ indices of lower values used for interpolation, all values are
42
+ integers from [0, size_z - 1]
43
+ v_hi (:obj: `torch.Tensor`): int tensor of size N containing
44
+ indices of upper values used for interpolation, all values are
45
+ integers from [0, size_z - 1]
46
+ v_w (:obj: `torch.Tensor`): float tensor of size N containing
47
+ interpolation weights
48
+ j_valid (:obj: `torch.Tensor`): uint8 tensor of size N containing
49
+ 0 for points outside the estimation interval
50
+ (v0_est, v0_est + size_est) and 1 otherwise
51
+ """
52
+ v = v0_src + v_norm * size_src / 256.0
53
+ j_valid = (v - v0_dst >= 0) * (v - v0_dst < size_dst)
54
+ v_grid = (v - v0_dst) * size_z / size_dst
55
+ v_lo = v_grid.floor().long().clamp(min=0, max=size_z - 1)
56
+ v_hi = (v_lo + 1).clamp(max=size_z - 1)
57
+ v_grid = torch.min(v_hi.float(), v_grid)
58
+ v_w = v_grid - v_lo.float()
59
+ return v_lo, v_hi, v_w, j_valid
60
+
61
+
62
+ class BilinearInterpolationHelper:
63
+ """
64
+ Args:
65
+ packed_annotations: object that contains packed annotations
66
+ j_valid (:obj: `torch.Tensor`): uint8 tensor of size M containing
67
+ 0 for points to be discarded and 1 for points to be selected
68
+ y_lo (:obj: `torch.Tensor`): int tensor of indices of upper values
69
+ in z_est for each point
70
+ y_hi (:obj: `torch.Tensor`): int tensor of indices of lower values
71
+ in z_est for each point
72
+ x_lo (:obj: `torch.Tensor`): int tensor of indices of left values
73
+ in z_est for each point
74
+ x_hi (:obj: `torch.Tensor`): int tensor of indices of right values
75
+ in z_est for each point
76
+ w_ylo_xlo (:obj: `torch.Tensor`): float tensor of size M;
77
+ contains upper-left value weight for each point
78
+ w_ylo_xhi (:obj: `torch.Tensor`): float tensor of size M;
79
+ contains upper-right value weight for each point
80
+ w_yhi_xlo (:obj: `torch.Tensor`): float tensor of size M;
81
+ contains lower-left value weight for each point
82
+ w_yhi_xhi (:obj: `torch.Tensor`): float tensor of size M;
83
+ contains lower-right value weight for each point
84
+ """
85
+
86
+ def __init__(
87
+ self,
88
+ packed_annotations: Any,
89
+ j_valid: torch.Tensor,
90
+ y_lo: torch.Tensor,
91
+ y_hi: torch.Tensor,
92
+ x_lo: torch.Tensor,
93
+ x_hi: torch.Tensor,
94
+ w_ylo_xlo: torch.Tensor,
95
+ w_ylo_xhi: torch.Tensor,
96
+ w_yhi_xlo: torch.Tensor,
97
+ w_yhi_xhi: torch.Tensor,
98
+ ):
99
+ for k, v in locals().items():
100
+ if k != "self":
101
+ setattr(self, k, v)
102
+
103
+ @staticmethod
104
+ def from_matches(
105
+ packed_annotations: Any, densepose_outputs_size_hw: Tuple[int, int]
106
+ ) -> "BilinearInterpolationHelper":
107
+ """
108
+ Args:
109
+ packed_annotations: annotations packed into tensors, the following
110
+ attributes are required:
111
+ - bbox_xywh_gt
112
+ - bbox_xywh_est
113
+ - x_gt
114
+ - y_gt
115
+ - point_bbox_with_dp_indices
116
+ - point_bbox_indices
117
+ densepose_outputs_size_hw (tuple [int, int]): resolution of
118
+ DensePose predictor outputs (H, W)
119
+ Return:
120
+ An instance of `BilinearInterpolationHelper` used to perform
121
+ interpolation for the given annotation points and output resolution
122
+ """
123
+
124
+ zh, zw = densepose_outputs_size_hw
125
+ x0_gt, y0_gt, w_gt, h_gt = packed_annotations.bbox_xywh_gt[
126
+ packed_annotations.point_bbox_with_dp_indices
127
+ ].unbind(dim=1)
128
+ x0_est, y0_est, w_est, h_est = packed_annotations.bbox_xywh_est[
129
+ packed_annotations.point_bbox_with_dp_indices
130
+ ].unbind(dim=1)
131
+ x_lo, x_hi, x_w, jx_valid = _linear_interpolation_utilities(
132
+ packed_annotations.x_gt, x0_gt, w_gt, x0_est, w_est, zw
133
+ )
134
+ y_lo, y_hi, y_w, jy_valid = _linear_interpolation_utilities(
135
+ packed_annotations.y_gt, y0_gt, h_gt, y0_est, h_est, zh
136
+ )
137
+ j_valid = jx_valid * jy_valid
138
+
139
+ w_ylo_xlo = (1.0 - x_w) * (1.0 - y_w)
140
+ w_ylo_xhi = x_w * (1.0 - y_w)
141
+ w_yhi_xlo = (1.0 - x_w) * y_w
142
+ w_yhi_xhi = x_w * y_w
143
+
144
+ return BilinearInterpolationHelper(
145
+ packed_annotations,
146
+ j_valid,
147
+ y_lo,
148
+ y_hi,
149
+ x_lo,
150
+ x_hi,
151
+ w_ylo_xlo, # pyre-ignore[6]
152
+ w_ylo_xhi,
153
+ # pyre-fixme[6]: Expected `Tensor` for 9th param but got `float`.
154
+ w_yhi_xlo,
155
+ w_yhi_xhi,
156
+ )
157
+
158
+ def extract_at_points(
159
+ self,
160
+ z_est,
161
+ slice_fine_segm=None,
162
+ w_ylo_xlo=None,
163
+ w_ylo_xhi=None,
164
+ w_yhi_xlo=None,
165
+ w_yhi_xhi=None,
166
+ ):
167
+ """
168
+ Extract ground truth values z_gt for valid point indices and estimated
169
+ values z_est using bilinear interpolation over top-left (y_lo, x_lo),
170
+ top-right (y_lo, x_hi), bottom-left (y_hi, x_lo) and bottom-right
171
+ (y_hi, x_hi) values in z_est with corresponding weights:
172
+ w_ylo_xlo, w_ylo_xhi, w_yhi_xlo and w_yhi_xhi.
173
+ Use slice_fine_segm to slice dim=1 in z_est
174
+ """
175
+ slice_fine_segm = (
176
+ self.packed_annotations.fine_segm_labels_gt
177
+ if slice_fine_segm is None
178
+ else slice_fine_segm
179
+ )
180
+ w_ylo_xlo = self.w_ylo_xlo if w_ylo_xlo is None else w_ylo_xlo
181
+ w_ylo_xhi = self.w_ylo_xhi if w_ylo_xhi is None else w_ylo_xhi
182
+ w_yhi_xlo = self.w_yhi_xlo if w_yhi_xlo is None else w_yhi_xlo
183
+ w_yhi_xhi = self.w_yhi_xhi if w_yhi_xhi is None else w_yhi_xhi
184
+
185
+ index_bbox = self.packed_annotations.point_bbox_indices
186
+ z_est_sampled = (
187
+ z_est[index_bbox, slice_fine_segm, self.y_lo, self.x_lo] * w_ylo_xlo
188
+ + z_est[index_bbox, slice_fine_segm, self.y_lo, self.x_hi] * w_ylo_xhi
189
+ + z_est[index_bbox, slice_fine_segm, self.y_hi, self.x_lo] * w_yhi_xlo
190
+ + z_est[index_bbox, slice_fine_segm, self.y_hi, self.x_hi] * w_yhi_xhi
191
+ )
192
+ return z_est_sampled
193
+
194
+
195
+ def resample_data(
196
+ z, bbox_xywh_src, bbox_xywh_dst, wout, hout, mode: str = "nearest", padding_mode: str = "zeros"
197
+ ):
198
+ """
199
+ Args:
200
+ z (:obj: `torch.Tensor`): tensor of size (N,C,H,W) with data to be
201
+ resampled
202
+ bbox_xywh_src (:obj: `torch.Tensor`): tensor of size (N,4) containing
203
+ source bounding boxes in format XYWH
204
+ bbox_xywh_dst (:obj: `torch.Tensor`): tensor of size (N,4) containing
205
+ destination bounding boxes in format XYWH
206
+ Return:
207
+ zresampled (:obj: `torch.Tensor`): tensor of size (N, C, Hout, Wout)
208
+ with resampled values of z, where D is the discretization size
209
+ """
210
+ n = bbox_xywh_src.size(0)
211
+ assert n == bbox_xywh_dst.size(0), (
212
+ "The number of "
213
+ "source ROIs for resampling ({}) should be equal to the number "
214
+ "of destination ROIs ({})".format(bbox_xywh_src.size(0), bbox_xywh_dst.size(0))
215
+ )
216
+ x0src, y0src, wsrc, hsrc = bbox_xywh_src.unbind(dim=1)
217
+ x0dst, y0dst, wdst, hdst = bbox_xywh_dst.unbind(dim=1)
218
+ x0dst_norm = 2 * (x0dst - x0src) / wsrc - 1
219
+ y0dst_norm = 2 * (y0dst - y0src) / hsrc - 1
220
+ x1dst_norm = 2 * (x0dst + wdst - x0src) / wsrc - 1
221
+ y1dst_norm = 2 * (y0dst + hdst - y0src) / hsrc - 1
222
+ grid_w = torch.arange(wout, device=z.device, dtype=torch.float) / wout
223
+ grid_h = torch.arange(hout, device=z.device, dtype=torch.float) / hout
224
+ grid_w_expanded = grid_w[None, None, :].expand(n, hout, wout)
225
+ grid_h_expanded = grid_h[None, :, None].expand(n, hout, wout)
226
+ dx_expanded = (x1dst_norm - x0dst_norm)[:, None, None].expand(n, hout, wout)
227
+ dy_expanded = (y1dst_norm - y0dst_norm)[:, None, None].expand(n, hout, wout)
228
+ x0_expanded = x0dst_norm[:, None, None].expand(n, hout, wout)
229
+ y0_expanded = y0dst_norm[:, None, None].expand(n, hout, wout)
230
+ grid_x = grid_w_expanded * dx_expanded + x0_expanded
231
+ grid_y = grid_h_expanded * dy_expanded + y0_expanded
232
+ grid = torch.stack((grid_x, grid_y), dim=3)
233
+ # resample Z from (N, C, H, W) into (N, C, Hout, Wout)
234
+ zresampled = F.grid_sample(z, grid, mode=mode, padding_mode=padding_mode, align_corners=True)
235
+ return zresampled
236
+
237
+
238
+ class AnnotationsAccumulator(ABC):
239
+ """
240
+ Abstract class for an accumulator for annotations that can produce
241
+ dense annotations packed into tensors.
242
+ """
243
+
244
+ @abstractmethod
245
+ def accumulate(self, instances_one_image: Instances):
246
+ """
247
+ Accumulate instances data for one image
248
+
249
+ Args:
250
+ instances_one_image (Instances): instances data to accumulate
251
+ """
252
+ pass
253
+
254
+ @abstractmethod
255
+ def pack(self) -> Any:
256
+ """
257
+ Pack data into tensors
258
+ """
259
+ pass
260
+
261
+
262
+ @dataclass
263
+ class PackedChartBasedAnnotations:
264
+ """
265
+ Packed annotations for chart-based model training. The following attributes
266
+ are defined:
267
+ - fine_segm_labels_gt (tensor [K] of `int64`): GT fine segmentation point labels
268
+ - x_gt (tensor [K] of `float32`): GT normalized X point coordinates
269
+ - y_gt (tensor [K] of `float32`): GT normalized Y point coordinates
270
+ - u_gt (tensor [K] of `float32`): GT point U values
271
+ - v_gt (tensor [K] of `float32`): GT point V values
272
+ - coarse_segm_gt (tensor [N, S, S] of `float32`): GT segmentation for bounding boxes
273
+ - bbox_xywh_gt (tensor [N, 4] of `float32`): selected GT bounding boxes in
274
+ XYWH format
275
+ - bbox_xywh_est (tensor [N, 4] of `float32`): selected matching estimated
276
+ bounding boxes in XYWH format
277
+ - point_bbox_with_dp_indices (tensor [K] of `int64`): indices of bounding boxes
278
+ with DensePose annotations that correspond to the point data
279
+ - point_bbox_indices (tensor [K] of `int64`): indices of bounding boxes
280
+ (not necessarily the selected ones with DensePose data) that correspond
281
+ to the point data
282
+ - bbox_indices (tensor [N] of `int64`): global indices of selected bounding
283
+ boxes with DensePose annotations; these indices could be used to access
284
+ features that are computed for all bounding boxes, not only the ones with
285
+ DensePose annotations.
286
+ Here K is the total number of points and N is the total number of instances
287
+ with DensePose annotations.
288
+ """
289
+
290
+ fine_segm_labels_gt: torch.Tensor
291
+ x_gt: torch.Tensor
292
+ y_gt: torch.Tensor
293
+ u_gt: torch.Tensor
294
+ v_gt: torch.Tensor
295
+ coarse_segm_gt: Optional[torch.Tensor]
296
+ bbox_xywh_gt: torch.Tensor
297
+ bbox_xywh_est: torch.Tensor
298
+ point_bbox_with_dp_indices: torch.Tensor
299
+ point_bbox_indices: torch.Tensor
300
+ bbox_indices: torch.Tensor
301
+
302
+
303
+ class ChartBasedAnnotationsAccumulator(AnnotationsAccumulator):
304
+ """
305
+ Accumulates annotations by batches that correspond to objects detected on
306
+ individual images. Can pack them together into single tensors.
307
+ """
308
+
309
+ def __init__(self):
310
+ self.i_gt = []
311
+ self.x_gt = []
312
+ self.y_gt = []
313
+ self.u_gt = []
314
+ self.v_gt = []
315
+ self.s_gt = []
316
+ self.bbox_xywh_gt = []
317
+ self.bbox_xywh_est = []
318
+ self.point_bbox_with_dp_indices = []
319
+ self.point_bbox_indices = []
320
+ self.bbox_indices = []
321
+ self.nxt_bbox_with_dp_index = 0
322
+ self.nxt_bbox_index = 0
323
+
324
+ def accumulate(self, instances_one_image: Instances):
325
+ """
326
+ Accumulate instances data for one image
327
+
328
+ Args:
329
+ instances_one_image (Instances): instances data to accumulate
330
+ """
331
+ boxes_xywh_est = BoxMode.convert(
332
+ instances_one_image.proposal_boxes.tensor.clone(), BoxMode.XYXY_ABS, BoxMode.XYWH_ABS
333
+ )
334
+ boxes_xywh_gt = BoxMode.convert(
335
+ instances_one_image.gt_boxes.tensor.clone(), BoxMode.XYXY_ABS, BoxMode.XYWH_ABS
336
+ )
337
+ n_matches = len(boxes_xywh_gt)
338
+ assert n_matches == len(
339
+ boxes_xywh_est
340
+ ), f"Got {len(boxes_xywh_est)} proposal boxes and {len(boxes_xywh_gt)} GT boxes"
341
+ if not n_matches:
342
+ # no detection - GT matches
343
+ return
344
+ if (
345
+ not hasattr(instances_one_image, "gt_densepose")
346
+ or instances_one_image.gt_densepose is None
347
+ ):
348
+ # no densepose GT for the detections, just increase the bbox index
349
+ self.nxt_bbox_index += n_matches
350
+ return
351
+ for box_xywh_est, box_xywh_gt, dp_gt in zip(
352
+ boxes_xywh_est, boxes_xywh_gt, instances_one_image.gt_densepose
353
+ ):
354
+ if (dp_gt is not None) and (len(dp_gt.x) > 0):
355
+ # pyre-fixme[6]: For 1st argument expected `Tensor` but got `float`.
356
+ # pyre-fixme[6]: For 2nd argument expected `Tensor` but got `float`.
357
+ self._do_accumulate(box_xywh_gt, box_xywh_est, dp_gt)
358
+ self.nxt_bbox_index += 1
359
+
360
+ def _do_accumulate(
361
+ self, box_xywh_gt: torch.Tensor, box_xywh_est: torch.Tensor, dp_gt: DensePoseDataRelative
362
+ ):
363
+ """
364
+ Accumulate instances data for one image, given that the data is not empty
365
+
366
+ Args:
367
+ box_xywh_gt (tensor): GT bounding box
368
+ box_xywh_est (tensor): estimated bounding box
369
+ dp_gt (DensePoseDataRelative): GT densepose data
370
+ """
371
+ self.i_gt.append(dp_gt.i)
372
+ self.x_gt.append(dp_gt.x)
373
+ self.y_gt.append(dp_gt.y)
374
+ self.u_gt.append(dp_gt.u)
375
+ self.v_gt.append(dp_gt.v)
376
+ if hasattr(dp_gt, "segm"):
377
+ self.s_gt.append(dp_gt.segm.unsqueeze(0))
378
+ self.bbox_xywh_gt.append(box_xywh_gt.view(-1, 4))
379
+ self.bbox_xywh_est.append(box_xywh_est.view(-1, 4))
380
+ self.point_bbox_with_dp_indices.append(
381
+ torch.full_like(dp_gt.i, self.nxt_bbox_with_dp_index)
382
+ )
383
+ self.point_bbox_indices.append(torch.full_like(dp_gt.i, self.nxt_bbox_index))
384
+ self.bbox_indices.append(self.nxt_bbox_index)
385
+ self.nxt_bbox_with_dp_index += 1
386
+
387
+ def pack(self) -> Optional[PackedChartBasedAnnotations]:
388
+ """
389
+ Pack data into tensors
390
+ """
391
+ if not len(self.i_gt):
392
+ # TODO:
393
+ # returning proper empty annotations would require
394
+ # creating empty tensors of appropriate shape and
395
+ # type on an appropriate device;
396
+ # we return None so far to indicate empty annotations
397
+ return None
398
+ return PackedChartBasedAnnotations(
399
+ fine_segm_labels_gt=torch.cat(self.i_gt, 0).long(),
400
+ x_gt=torch.cat(self.x_gt, 0),
401
+ y_gt=torch.cat(self.y_gt, 0),
402
+ u_gt=torch.cat(self.u_gt, 0),
403
+ v_gt=torch.cat(self.v_gt, 0),
404
+ # ignore segmentation annotations, if not all the instances contain those
405
+ coarse_segm_gt=torch.cat(self.s_gt, 0)
406
+ if len(self.s_gt) == len(self.bbox_xywh_gt)
407
+ else None,
408
+ bbox_xywh_gt=torch.cat(self.bbox_xywh_gt, 0),
409
+ bbox_xywh_est=torch.cat(self.bbox_xywh_est, 0),
410
+ point_bbox_with_dp_indices=torch.cat(self.point_bbox_with_dp_indices, 0).long(),
411
+ point_bbox_indices=torch.cat(self.point_bbox_indices, 0).long(),
412
+ bbox_indices=torch.as_tensor(
413
+ self.bbox_indices, dtype=torch.long, device=self.x_gt[0].device
414
+ ).long(),
415
+ )
416
+
417
+
418
+ def extract_packed_annotations_from_matches(
419
+ proposals_with_targets: List[Instances], accumulator: AnnotationsAccumulator
420
+ ) -> Any:
421
+ for proposals_targets_per_image in proposals_with_targets:
422
+ accumulator.accumulate(proposals_targets_per_image)
423
+ return accumulator.pack()
424
+
425
+
426
+ def sample_random_indices(
427
+ n_indices: int, n_samples: int, device: Optional[torch.device] = None
428
+ ) -> Optional[torch.Tensor]:
429
+ """
430
+ Samples `n_samples` random indices from range `[0..n_indices - 1]`.
431
+ If `n_indices` is smaller than `n_samples`, returns `None` meaning that all indices
432
+ are selected.
433
+ Args:
434
+ n_indices (int): total number of indices
435
+ n_samples (int): number of indices to sample
436
+ device (torch.device): the desired device of returned tensor
437
+ Return:
438
+ Tensor of selected vertex indices, or `None`, if all vertices are selected
439
+ """
440
+ if (n_samples <= 0) or (n_indices <= n_samples):
441
+ return None
442
+ indices = torch.randperm(n_indices, device=device)[:n_samples]
443
+ return indices
Leffa/3rdparty/densepose/modeling/predictors/__init__.py ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Facebook, Inc. and its affiliates.
2
+
3
+ from .chart import DensePoseChartPredictor
4
+ from .chart_confidence import DensePoseChartConfidencePredictorMixin
5
+ from .chart_with_confidence import DensePoseChartWithConfidencePredictor
6
+ from .cse import DensePoseEmbeddingPredictor
7
+ from .cse_confidence import DensePoseEmbeddingConfidencePredictorMixin
8
+ from .cse_with_confidence import DensePoseEmbeddingWithConfidencePredictor
9
+ from .registry import DENSEPOSE_PREDICTOR_REGISTRY
Leffa/3rdparty/densepose/modeling/predictors/chart.py ADDED
@@ -0,0 +1,94 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Facebook, Inc. and its affiliates.
2
+
3
+ import torch
4
+ from torch import nn
5
+
6
+ from detectron2.config import CfgNode
7
+ from detectron2.layers import ConvTranspose2d, interpolate
8
+
9
+ from ...structures import DensePoseChartPredictorOutput
10
+ from ..utils import initialize_module_params
11
+ from .registry import DENSEPOSE_PREDICTOR_REGISTRY
12
+
13
+
14
+ @DENSEPOSE_PREDICTOR_REGISTRY.register()
15
+ class DensePoseChartPredictor(nn.Module):
16
+ """
17
+ Predictor (last layers of a DensePose model) that takes DensePose head outputs as an input
18
+ and produces 4 tensors which represent DensePose results for predefined body parts
19
+ (patches / charts):
20
+ * coarse segmentation, a tensor of shape [N, K, Hout, Wout]
21
+ * fine segmentation, a tensor of shape [N, C, Hout, Wout]
22
+ * U coordinates, a tensor of shape [N, C, Hout, Wout]
23
+ * V coordinates, a tensor of shape [N, C, Hout, Wout]
24
+ where
25
+ - N is the number of instances
26
+ - K is the number of coarse segmentation channels (
27
+ 2 = foreground / background,
28
+ 15 = one of 14 body parts / background)
29
+ - C is the number of fine segmentation channels (
30
+ 24 fine body parts / background)
31
+ - Hout and Wout are height and width of predictions
32
+ """
33
+
34
+ def __init__(self, cfg: CfgNode, input_channels: int):
35
+ """
36
+ Initialize predictor using configuration options
37
+
38
+ Args:
39
+ cfg (CfgNode): configuration options
40
+ input_channels (int): input tensor size along the channel dimension
41
+ """
42
+ super().__init__()
43
+ dim_in = input_channels
44
+ n_segm_chan = cfg.MODEL.ROI_DENSEPOSE_HEAD.NUM_COARSE_SEGM_CHANNELS
45
+ dim_out_patches = cfg.MODEL.ROI_DENSEPOSE_HEAD.NUM_PATCHES + 1
46
+ kernel_size = cfg.MODEL.ROI_DENSEPOSE_HEAD.DECONV_KERNEL
47
+ # coarse segmentation
48
+ self.ann_index_lowres = ConvTranspose2d(
49
+ dim_in, n_segm_chan, kernel_size, stride=2, padding=int(kernel_size / 2 - 1)
50
+ )
51
+ # fine segmentation
52
+ self.index_uv_lowres = ConvTranspose2d(
53
+ dim_in, dim_out_patches, kernel_size, stride=2, padding=int(kernel_size / 2 - 1)
54
+ )
55
+ # U
56
+ self.u_lowres = ConvTranspose2d(
57
+ dim_in, dim_out_patches, kernel_size, stride=2, padding=int(kernel_size / 2 - 1)
58
+ )
59
+ # V
60
+ self.v_lowres = ConvTranspose2d(
61
+ dim_in, dim_out_patches, kernel_size, stride=2, padding=int(kernel_size / 2 - 1)
62
+ )
63
+ self.scale_factor = cfg.MODEL.ROI_DENSEPOSE_HEAD.UP_SCALE
64
+ initialize_module_params(self)
65
+
66
+ def interp2d(self, tensor_nchw: torch.Tensor):
67
+ """
68
+ Bilinear interpolation method to be used for upscaling
69
+
70
+ Args:
71
+ tensor_nchw (tensor): tensor of shape (N, C, H, W)
72
+ Return:
73
+ tensor of shape (N, C, Hout, Wout), where Hout and Wout are computed
74
+ by applying the scale factor to H and W
75
+ """
76
+ return interpolate(
77
+ tensor_nchw, scale_factor=self.scale_factor, mode="bilinear", align_corners=False
78
+ )
79
+
80
+ def forward(self, head_outputs: torch.Tensor):
81
+ """
82
+ Perform forward step on DensePose head outputs
83
+
84
+ Args:
85
+ head_outputs (tensor): DensePose head outputs, tensor of shape [N, D, H, W]
86
+ Return:
87
+ An instance of DensePoseChartPredictorOutput
88
+ """
89
+ return DensePoseChartPredictorOutput(
90
+ coarse_segm=self.interp2d(self.ann_index_lowres(head_outputs)),
91
+ fine_segm=self.interp2d(self.index_uv_lowres(head_outputs)),
92
+ u=self.interp2d(self.u_lowres(head_outputs)),
93
+ v=self.interp2d(self.v_lowres(head_outputs)),
94
+ )
Leffa/3rdparty/densepose/modeling/predictors/chart_confidence.py ADDED
@@ -0,0 +1,174 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Facebook, Inc. and its affiliates.
2
+
3
+ from typing import Any
4
+ import torch
5
+ from torch.nn import functional as F
6
+
7
+ from detectron2.config import CfgNode
8
+ from detectron2.layers import ConvTranspose2d
9
+
10
+ from ...structures import decorate_predictor_output_class_with_confidences
11
+ from ..confidence import DensePoseConfidenceModelConfig, DensePoseUVConfidenceType
12
+ from ..utils import initialize_module_params
13
+
14
+
15
+ class DensePoseChartConfidencePredictorMixin:
16
+ """
17
+ Predictor contains the last layers of a DensePose model that take DensePose head
18
+ outputs as an input and produce model outputs. Confidence predictor mixin is used
19
+ to generate confidences for segmentation and UV tensors estimated by some
20
+ base predictor. Several assumptions need to hold for the base predictor:
21
+ 1) the `forward` method must return SIUV tuple as the first result (
22
+ S = coarse segmentation, I = fine segmentation, U and V are intrinsic
23
+ chart coordinates)
24
+ 2) `interp2d` method must be defined to perform bilinear interpolation;
25
+ the same method is typically used for SIUV and confidences
26
+ Confidence predictor mixin provides confidence estimates, as described in:
27
+ N. Neverova et al., Correlated Uncertainty for Learning Dense Correspondences
28
+ from Noisy Labels, NeurIPS 2019
29
+ A. Sanakoyeu et al., Transferring Dense Pose to Proximal Animal Classes, CVPR 2020
30
+ """
31
+
32
+ def __init__(self, cfg: CfgNode, input_channels: int):
33
+ """
34
+ Initialize confidence predictor using configuration options.
35
+
36
+ Args:
37
+ cfg (CfgNode): configuration options
38
+ input_channels (int): number of input channels
39
+ """
40
+ # we rely on base predictor to call nn.Module.__init__
41
+ super().__init__(cfg, input_channels) # pyre-ignore[19]
42
+ self.confidence_model_cfg = DensePoseConfidenceModelConfig.from_cfg(cfg)
43
+ self._initialize_confidence_estimation_layers(cfg, input_channels)
44
+ self._registry = {}
45
+ initialize_module_params(self) # pyre-ignore[6]
46
+
47
+ def _initialize_confidence_estimation_layers(self, cfg: CfgNode, dim_in: int):
48
+ """
49
+ Initialize confidence estimation layers based on configuration options
50
+
51
+ Args:
52
+ cfg (CfgNode): configuration options
53
+ dim_in (int): number of input channels
54
+ """
55
+ dim_out_patches = cfg.MODEL.ROI_DENSEPOSE_HEAD.NUM_PATCHES + 1
56
+ kernel_size = cfg.MODEL.ROI_DENSEPOSE_HEAD.DECONV_KERNEL
57
+ if self.confidence_model_cfg.uv_confidence.enabled:
58
+ if self.confidence_model_cfg.uv_confidence.type == DensePoseUVConfidenceType.IID_ISO:
59
+ self.sigma_2_lowres = ConvTranspose2d( # pyre-ignore[16]
60
+ dim_in, dim_out_patches, kernel_size, stride=2, padding=int(kernel_size / 2 - 1)
61
+ )
62
+ elif (
63
+ self.confidence_model_cfg.uv_confidence.type
64
+ == DensePoseUVConfidenceType.INDEP_ANISO
65
+ ):
66
+ self.sigma_2_lowres = ConvTranspose2d(
67
+ dim_in, dim_out_patches, kernel_size, stride=2, padding=int(kernel_size / 2 - 1)
68
+ )
69
+ self.kappa_u_lowres = ConvTranspose2d( # pyre-ignore[16]
70
+ dim_in, dim_out_patches, kernel_size, stride=2, padding=int(kernel_size / 2 - 1)
71
+ )
72
+ self.kappa_v_lowres = ConvTranspose2d( # pyre-ignore[16]
73
+ dim_in, dim_out_patches, kernel_size, stride=2, padding=int(kernel_size / 2 - 1)
74
+ )
75
+ else:
76
+ raise ValueError(
77
+ f"Unknown confidence model type: "
78
+ f"{self.confidence_model_cfg.confidence_model_type}"
79
+ )
80
+ if self.confidence_model_cfg.segm_confidence.enabled:
81
+ self.fine_segm_confidence_lowres = ConvTranspose2d( # pyre-ignore[16]
82
+ dim_in, 1, kernel_size, stride=2, padding=int(kernel_size / 2 - 1)
83
+ )
84
+ self.coarse_segm_confidence_lowres = ConvTranspose2d( # pyre-ignore[16]
85
+ dim_in, 1, kernel_size, stride=2, padding=int(kernel_size / 2 - 1)
86
+ )
87
+
88
+ def forward(self, head_outputs: torch.Tensor):
89
+ """
90
+ Perform forward operation on head outputs used as inputs for the predictor.
91
+ Calls forward method from the base predictor and uses its outputs to compute
92
+ confidences.
93
+
94
+ Args:
95
+ head_outputs (Tensor): head outputs used as predictor inputs
96
+ Return:
97
+ An instance of outputs with confidences,
98
+ see `decorate_predictor_output_class_with_confidences`
99
+ """
100
+ # assuming base class returns SIUV estimates in its first result
101
+ base_predictor_outputs = super().forward(head_outputs) # pyre-ignore[16]
102
+
103
+ # create output instance by extending base predictor outputs:
104
+ output = self._create_output_instance(base_predictor_outputs)
105
+
106
+ if self.confidence_model_cfg.uv_confidence.enabled:
107
+ if self.confidence_model_cfg.uv_confidence.type == DensePoseUVConfidenceType.IID_ISO:
108
+ # assuming base class defines interp2d method for bilinear interpolation
109
+ output.sigma_2 = self.interp2d(self.sigma_2_lowres(head_outputs)) # pyre-ignore[16]
110
+ elif (
111
+ self.confidence_model_cfg.uv_confidence.type
112
+ == DensePoseUVConfidenceType.INDEP_ANISO
113
+ ):
114
+ # assuming base class defines interp2d method for bilinear interpolation
115
+ output.sigma_2 = self.interp2d(self.sigma_2_lowres(head_outputs))
116
+ output.kappa_u = self.interp2d(self.kappa_u_lowres(head_outputs)) # pyre-ignore[16]
117
+ output.kappa_v = self.interp2d(self.kappa_v_lowres(head_outputs)) # pyre-ignore[16]
118
+ else:
119
+ raise ValueError(
120
+ f"Unknown confidence model type: "
121
+ f"{self.confidence_model_cfg.confidence_model_type}"
122
+ )
123
+ if self.confidence_model_cfg.segm_confidence.enabled:
124
+ # base predictor outputs are assumed to have `fine_segm` and `coarse_segm` attributes
125
+ # base predictor is assumed to define `interp2d` method for bilinear interpolation
126
+ output.fine_segm_confidence = (
127
+ F.softplus(
128
+ self.interp2d(self.fine_segm_confidence_lowres(head_outputs)) # pyre-ignore[16]
129
+ )
130
+ + self.confidence_model_cfg.segm_confidence.epsilon
131
+ )
132
+ output.fine_segm = base_predictor_outputs.fine_segm * torch.repeat_interleave(
133
+ output.fine_segm_confidence, base_predictor_outputs.fine_segm.shape[1], dim=1
134
+ )
135
+ output.coarse_segm_confidence = (
136
+ F.softplus(
137
+ self.interp2d(
138
+ self.coarse_segm_confidence_lowres(head_outputs) # pyre-ignore[16]
139
+ )
140
+ )
141
+ + self.confidence_model_cfg.segm_confidence.epsilon
142
+ )
143
+ output.coarse_segm = base_predictor_outputs.coarse_segm * torch.repeat_interleave(
144
+ output.coarse_segm_confidence, base_predictor_outputs.coarse_segm.shape[1], dim=1
145
+ )
146
+
147
+ return output
148
+
149
+ def _create_output_instance(self, base_predictor_outputs: Any):
150
+ """
151
+ Create an instance of predictor outputs by copying the outputs from the
152
+ base predictor and initializing confidence
153
+
154
+ Args:
155
+ base_predictor_outputs: an instance of base predictor outputs
156
+ (the outputs type is assumed to be a dataclass)
157
+ Return:
158
+ An instance of outputs with confidences
159
+ """
160
+ PredictorOutput = decorate_predictor_output_class_with_confidences(
161
+ type(base_predictor_outputs) # pyre-ignore[6]
162
+ )
163
+ # base_predictor_outputs is assumed to be a dataclass
164
+ # reassign all the fields from base_predictor_outputs (no deep copy!), add new fields
165
+ output = PredictorOutput(
166
+ **base_predictor_outputs.__dict__,
167
+ coarse_segm_confidence=None,
168
+ fine_segm_confidence=None,
169
+ sigma_1=None,
170
+ sigma_2=None,
171
+ kappa_u=None,
172
+ kappa_v=None,
173
+ )
174
+ return output
Leffa/3rdparty/densepose/modeling/predictors/chart_with_confidence.py ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Facebook, Inc. and its affiliates.
2
+
3
+ from . import DensePoseChartConfidencePredictorMixin, DensePoseChartPredictor
4
+ from .registry import DENSEPOSE_PREDICTOR_REGISTRY
5
+
6
+
7
+ @DENSEPOSE_PREDICTOR_REGISTRY.register()
8
+ class DensePoseChartWithConfidencePredictor(
9
+ DensePoseChartConfidencePredictorMixin, DensePoseChartPredictor
10
+ ):
11
+ """
12
+ Predictor that combines chart and chart confidence estimation
13
+ """
14
+
15
+ pass
Leffa/3rdparty/densepose/modeling/predictors/cse.py ADDED
@@ -0,0 +1,70 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
2
+
3
+ import torch
4
+ from torch import nn
5
+
6
+ from detectron2.config import CfgNode
7
+ from detectron2.layers import ConvTranspose2d, interpolate
8
+
9
+ from ...structures import DensePoseEmbeddingPredictorOutput
10
+ from ..utils import initialize_module_params
11
+ from .registry import DENSEPOSE_PREDICTOR_REGISTRY
12
+
13
+
14
+ @DENSEPOSE_PREDICTOR_REGISTRY.register()
15
+ class DensePoseEmbeddingPredictor(nn.Module):
16
+ """
17
+ Last layers of a DensePose model that take DensePose head outputs as an input
18
+ and produce model outputs for continuous surface embeddings (CSE).
19
+ """
20
+
21
+ def __init__(self, cfg: CfgNode, input_channels: int):
22
+ """
23
+ Initialize predictor using configuration options
24
+
25
+ Args:
26
+ cfg (CfgNode): configuration options
27
+ input_channels (int): input tensor size along the channel dimension
28
+ """
29
+ super().__init__()
30
+ dim_in = input_channels
31
+ n_segm_chan = cfg.MODEL.ROI_DENSEPOSE_HEAD.NUM_COARSE_SEGM_CHANNELS
32
+ embed_size = cfg.MODEL.ROI_DENSEPOSE_HEAD.CSE.EMBED_SIZE
33
+ kernel_size = cfg.MODEL.ROI_DENSEPOSE_HEAD.DECONV_KERNEL
34
+ # coarse segmentation
35
+ self.coarse_segm_lowres = ConvTranspose2d(
36
+ dim_in, n_segm_chan, kernel_size, stride=2, padding=int(kernel_size / 2 - 1)
37
+ )
38
+ # embedding
39
+ self.embed_lowres = ConvTranspose2d(
40
+ dim_in, embed_size, kernel_size, stride=2, padding=int(kernel_size / 2 - 1)
41
+ )
42
+ self.scale_factor = cfg.MODEL.ROI_DENSEPOSE_HEAD.UP_SCALE
43
+ initialize_module_params(self)
44
+
45
+ def interp2d(self, tensor_nchw: torch.Tensor):
46
+ """
47
+ Bilinear interpolation method to be used for upscaling
48
+
49
+ Args:
50
+ tensor_nchw (tensor): tensor of shape (N, C, H, W)
51
+ Return:
52
+ tensor of shape (N, C, Hout, Wout), where Hout and Wout are computed
53
+ by applying the scale factor to H and W
54
+ """
55
+ return interpolate(
56
+ tensor_nchw, scale_factor=self.scale_factor, mode="bilinear", align_corners=False
57
+ )
58
+
59
+ def forward(self, head_outputs):
60
+ """
61
+ Perform forward step on DensePose head outputs
62
+
63
+ Args:
64
+ head_outputs (tensor): DensePose head outputs, tensor of shape [N, D, H, W]
65
+ """
66
+ embed_lowres = self.embed_lowres(head_outputs)
67
+ coarse_segm_lowres = self.coarse_segm_lowres(head_outputs)
68
+ embed = self.interp2d(embed_lowres)
69
+ coarse_segm = self.interp2d(coarse_segm_lowres)
70
+ return DensePoseEmbeddingPredictorOutput(embedding=embed, coarse_segm=coarse_segm)
Leffa/3rdparty/densepose/modeling/predictors/cse_confidence.py ADDED
@@ -0,0 +1,115 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Facebook, Inc. and its affiliates.
2
+
3
+ from typing import Any
4
+ import torch
5
+ from torch.nn import functional as F
6
+
7
+ from detectron2.config import CfgNode
8
+ from detectron2.layers import ConvTranspose2d
9
+
10
+ from densepose.modeling.confidence import DensePoseConfidenceModelConfig
11
+ from densepose.modeling.utils import initialize_module_params
12
+ from densepose.structures import decorate_cse_predictor_output_class_with_confidences
13
+
14
+
15
+ class DensePoseEmbeddingConfidencePredictorMixin:
16
+ """
17
+ Predictor contains the last layers of a DensePose model that take DensePose head
18
+ outputs as an input and produce model outputs. Confidence predictor mixin is used
19
+ to generate confidences for coarse segmentation estimated by some
20
+ base predictor. Several assumptions need to hold for the base predictor:
21
+ 1) the `forward` method must return CSE DensePose head outputs,
22
+ tensor of shape [N, D, H, W]
23
+ 2) `interp2d` method must be defined to perform bilinear interpolation;
24
+ the same method is typically used for masks and confidences
25
+ Confidence predictor mixin provides confidence estimates, as described in:
26
+ N. Neverova et al., Correlated Uncertainty for Learning Dense Correspondences
27
+ from Noisy Labels, NeurIPS 2019
28
+ A. Sanakoyeu et al., Transferring Dense Pose to Proximal Animal Classes, CVPR 2020
29
+ """
30
+
31
+ def __init__(self, cfg: CfgNode, input_channels: int):
32
+ """
33
+ Initialize confidence predictor using configuration options.
34
+
35
+ Args:
36
+ cfg (CfgNode): configuration options
37
+ input_channels (int): number of input channels
38
+ """
39
+ # we rely on base predictor to call nn.Module.__init__
40
+ super().__init__(cfg, input_channels) # pyre-ignore[19]
41
+ self.confidence_model_cfg = DensePoseConfidenceModelConfig.from_cfg(cfg)
42
+ self._initialize_confidence_estimation_layers(cfg, input_channels)
43
+ self._registry = {}
44
+ initialize_module_params(self) # pyre-ignore[6]
45
+
46
+ def _initialize_confidence_estimation_layers(self, cfg: CfgNode, dim_in: int):
47
+ """
48
+ Initialize confidence estimation layers based on configuration options
49
+
50
+ Args:
51
+ cfg (CfgNode): configuration options
52
+ dim_in (int): number of input channels
53
+ """
54
+ kernel_size = cfg.MODEL.ROI_DENSEPOSE_HEAD.DECONV_KERNEL
55
+ if self.confidence_model_cfg.segm_confidence.enabled:
56
+ self.coarse_segm_confidence_lowres = ConvTranspose2d( # pyre-ignore[16]
57
+ dim_in, 1, kernel_size, stride=2, padding=int(kernel_size / 2 - 1)
58
+ )
59
+
60
+ def forward(self, head_outputs: torch.Tensor):
61
+ """
62
+ Perform forward operation on head outputs used as inputs for the predictor.
63
+ Calls forward method from the base predictor and uses its outputs to compute
64
+ confidences.
65
+
66
+ Args:
67
+ head_outputs (Tensor): head outputs used as predictor inputs
68
+ Return:
69
+ An instance of outputs with confidences,
70
+ see `decorate_cse_predictor_output_class_with_confidences`
71
+ """
72
+ # assuming base class returns SIUV estimates in its first result
73
+ base_predictor_outputs = super().forward(head_outputs) # pyre-ignore[16]
74
+
75
+ # create output instance by extending base predictor outputs:
76
+ output = self._create_output_instance(base_predictor_outputs)
77
+
78
+ if self.confidence_model_cfg.segm_confidence.enabled:
79
+ # base predictor outputs are assumed to have `coarse_segm` attribute
80
+ # base predictor is assumed to define `interp2d` method for bilinear interpolation
81
+ output.coarse_segm_confidence = (
82
+ F.softplus(
83
+ self.interp2d( # pyre-ignore[16]
84
+ self.coarse_segm_confidence_lowres(head_outputs) # pyre-ignore[16]
85
+ )
86
+ )
87
+ + self.confidence_model_cfg.segm_confidence.epsilon
88
+ )
89
+ output.coarse_segm = base_predictor_outputs.coarse_segm * torch.repeat_interleave(
90
+ output.coarse_segm_confidence, base_predictor_outputs.coarse_segm.shape[1], dim=1
91
+ )
92
+
93
+ return output
94
+
95
+ def _create_output_instance(self, base_predictor_outputs: Any):
96
+ """
97
+ Create an instance of predictor outputs by copying the outputs from the
98
+ base predictor and initializing confidence
99
+
100
+ Args:
101
+ base_predictor_outputs: an instance of base predictor outputs
102
+ (the outputs type is assumed to be a dataclass)
103
+ Return:
104
+ An instance of outputs with confidences
105
+ """
106
+ PredictorOutput = decorate_cse_predictor_output_class_with_confidences(
107
+ type(base_predictor_outputs) # pyre-ignore[6]
108
+ )
109
+ # base_predictor_outputs is assumed to be a dataclass
110
+ # reassign all the fields from base_predictor_outputs (no deep copy!), add new fields
111
+ output = PredictorOutput(
112
+ **base_predictor_outputs.__dict__,
113
+ coarse_segm_confidence=None,
114
+ )
115
+ return output
Leffa/3rdparty/densepose/modeling/predictors/cse_with_confidence.py ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Facebook, Inc. and its affiliates.
2
+
3
+ from . import DensePoseEmbeddingConfidencePredictorMixin, DensePoseEmbeddingPredictor
4
+ from .registry import DENSEPOSE_PREDICTOR_REGISTRY
5
+
6
+
7
+ @DENSEPOSE_PREDICTOR_REGISTRY.register()
8
+ class DensePoseEmbeddingWithConfidencePredictor(
9
+ DensePoseEmbeddingConfidencePredictorMixin, DensePoseEmbeddingPredictor
10
+ ):
11
+ """
12
+ Predictor that combines CSE and CSE confidence estimation
13
+ """
14
+
15
+ pass
Leffa/3rdparty/densepose/modeling/predictors/registry.py ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ # Copyright (c) Facebook, Inc. and its affiliates.
2
+
3
+ from detectron2.utils.registry import Registry
4
+
5
+ DENSEPOSE_PREDICTOR_REGISTRY = Registry("DENSEPOSE_PREDICTOR")
Leffa/3rdparty/densepose/modeling/roi_heads/__init__.py ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ # Copyright (c) Facebook, Inc. and its affiliates.
2
+
3
+ from .v1convx import DensePoseV1ConvXHead
4
+ from .deeplab import DensePoseDeepLabHead
5
+ from .registry import ROI_DENSEPOSE_HEAD_REGISTRY
6
+ from .roi_head import Decoder, DensePoseROIHeads
Leffa/3rdparty/densepose/modeling/roi_heads/deeplab.py ADDED
@@ -0,0 +1,263 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Facebook, Inc. and its affiliates.
2
+
3
+ import fvcore.nn.weight_init as weight_init
4
+ import torch
5
+ from torch import nn
6
+ from torch.nn import functional as F
7
+
8
+ from detectron2.config import CfgNode
9
+ from detectron2.layers import Conv2d
10
+
11
+ from .registry import ROI_DENSEPOSE_HEAD_REGISTRY
12
+
13
+
14
+ @ROI_DENSEPOSE_HEAD_REGISTRY.register()
15
+ class DensePoseDeepLabHead(nn.Module):
16
+ """
17
+ DensePose head using DeepLabV3 model from
18
+ "Rethinking Atrous Convolution for Semantic Image Segmentation"
19
+ <https://arxiv.org/abs/1706.05587>.
20
+ """
21
+
22
+ def __init__(self, cfg: CfgNode, input_channels: int):
23
+ super(DensePoseDeepLabHead, self).__init__()
24
+ # fmt: off
25
+ hidden_dim = cfg.MODEL.ROI_DENSEPOSE_HEAD.CONV_HEAD_DIM
26
+ kernel_size = cfg.MODEL.ROI_DENSEPOSE_HEAD.CONV_HEAD_KERNEL
27
+ norm = cfg.MODEL.ROI_DENSEPOSE_HEAD.DEEPLAB.NORM
28
+ self.n_stacked_convs = cfg.MODEL.ROI_DENSEPOSE_HEAD.NUM_STACKED_CONVS
29
+ self.use_nonlocal = cfg.MODEL.ROI_DENSEPOSE_HEAD.DEEPLAB.NONLOCAL_ON
30
+ # fmt: on
31
+ pad_size = kernel_size // 2
32
+ n_channels = input_channels
33
+
34
+ self.ASPP = ASPP(input_channels, [6, 12, 56], n_channels) # 6, 12, 56
35
+ self.add_module("ASPP", self.ASPP)
36
+
37
+ if self.use_nonlocal:
38
+ self.NLBlock = NONLocalBlock2D(input_channels, bn_layer=True)
39
+ self.add_module("NLBlock", self.NLBlock)
40
+ # weight_init.c2_msra_fill(self.ASPP)
41
+
42
+ for i in range(self.n_stacked_convs):
43
+ norm_module = nn.GroupNorm(32, hidden_dim) if norm == "GN" else None
44
+ layer = Conv2d(
45
+ n_channels,
46
+ hidden_dim,
47
+ kernel_size,
48
+ stride=1,
49
+ padding=pad_size,
50
+ bias=not norm,
51
+ norm=norm_module,
52
+ )
53
+ weight_init.c2_msra_fill(layer)
54
+ n_channels = hidden_dim
55
+ layer_name = self._get_layer_name(i)
56
+ self.add_module(layer_name, layer)
57
+ self.n_out_channels = hidden_dim
58
+ # initialize_module_params(self)
59
+
60
+ def forward(self, features):
61
+ x0 = features
62
+ x = self.ASPP(x0)
63
+ if self.use_nonlocal:
64
+ x = self.NLBlock(x)
65
+ output = x
66
+ for i in range(self.n_stacked_convs):
67
+ layer_name = self._get_layer_name(i)
68
+ x = getattr(self, layer_name)(x)
69
+ x = F.relu(x)
70
+ output = x
71
+ return output
72
+
73
+ def _get_layer_name(self, i: int):
74
+ layer_name = "body_conv_fcn{}".format(i + 1)
75
+ return layer_name
76
+
77
+
78
+ # Copied from
79
+ # https://github.com/pytorch/vision/blob/master/torchvision/models/segmentation/deeplabv3.py
80
+ # See https://arxiv.org/pdf/1706.05587.pdf for details
81
+ class ASPPConv(nn.Sequential):
82
+ def __init__(self, in_channels, out_channels, dilation):
83
+ modules = [
84
+ nn.Conv2d(
85
+ in_channels, out_channels, 3, padding=dilation, dilation=dilation, bias=False
86
+ ),
87
+ nn.GroupNorm(32, out_channels),
88
+ nn.ReLU(),
89
+ ]
90
+ super(ASPPConv, self).__init__(*modules)
91
+
92
+
93
+ class ASPPPooling(nn.Sequential):
94
+ def __init__(self, in_channels, out_channels):
95
+ super(ASPPPooling, self).__init__(
96
+ nn.AdaptiveAvgPool2d(1),
97
+ nn.Conv2d(in_channels, out_channels, 1, bias=False),
98
+ nn.GroupNorm(32, out_channels),
99
+ nn.ReLU(),
100
+ )
101
+
102
+ def forward(self, x):
103
+ size = x.shape[-2:]
104
+ x = super(ASPPPooling, self).forward(x)
105
+ return F.interpolate(x, size=size, mode="bilinear", align_corners=False)
106
+
107
+
108
+ class ASPP(nn.Module):
109
+ def __init__(self, in_channels, atrous_rates, out_channels):
110
+ super(ASPP, self).__init__()
111
+ modules = []
112
+ modules.append(
113
+ nn.Sequential(
114
+ nn.Conv2d(in_channels, out_channels, 1, bias=False),
115
+ nn.GroupNorm(32, out_channels),
116
+ nn.ReLU(),
117
+ )
118
+ )
119
+
120
+ rate1, rate2, rate3 = tuple(atrous_rates)
121
+ modules.append(ASPPConv(in_channels, out_channels, rate1))
122
+ modules.append(ASPPConv(in_channels, out_channels, rate2))
123
+ modules.append(ASPPConv(in_channels, out_channels, rate3))
124
+ modules.append(ASPPPooling(in_channels, out_channels))
125
+
126
+ self.convs = nn.ModuleList(modules)
127
+
128
+ self.project = nn.Sequential(
129
+ nn.Conv2d(5 * out_channels, out_channels, 1, bias=False),
130
+ # nn.BatchNorm2d(out_channels),
131
+ nn.ReLU()
132
+ # nn.Dropout(0.5)
133
+ )
134
+
135
+ def forward(self, x):
136
+ res = []
137
+ for conv in self.convs:
138
+ res.append(conv(x))
139
+ res = torch.cat(res, dim=1)
140
+ return self.project(res)
141
+
142
+
143
+ # copied from
144
+ # https://github.com/AlexHex7/Non-local_pytorch/blob/master/lib/non_local_embedded_gaussian.py
145
+ # See https://arxiv.org/abs/1711.07971 for details
146
+ class _NonLocalBlockND(nn.Module):
147
+ def __init__(
148
+ self, in_channels, inter_channels=None, dimension=3, sub_sample=True, bn_layer=True
149
+ ):
150
+ super(_NonLocalBlockND, self).__init__()
151
+
152
+ assert dimension in [1, 2, 3]
153
+
154
+ self.dimension = dimension
155
+ self.sub_sample = sub_sample
156
+
157
+ self.in_channels = in_channels
158
+ self.inter_channels = inter_channels
159
+
160
+ if self.inter_channels is None:
161
+ self.inter_channels = in_channels // 2
162
+ if self.inter_channels == 0:
163
+ self.inter_channels = 1
164
+
165
+ if dimension == 3:
166
+ conv_nd = nn.Conv3d
167
+ max_pool_layer = nn.MaxPool3d(kernel_size=(1, 2, 2))
168
+ bn = nn.GroupNorm # (32, hidden_dim) #nn.BatchNorm3d
169
+ elif dimension == 2:
170
+ conv_nd = nn.Conv2d
171
+ max_pool_layer = nn.MaxPool2d(kernel_size=(2, 2))
172
+ bn = nn.GroupNorm # (32, hidden_dim)nn.BatchNorm2d
173
+ else:
174
+ conv_nd = nn.Conv1d
175
+ max_pool_layer = nn.MaxPool1d(kernel_size=2)
176
+ bn = nn.GroupNorm # (32, hidden_dim)nn.BatchNorm1d
177
+
178
+ self.g = conv_nd(
179
+ in_channels=self.in_channels,
180
+ out_channels=self.inter_channels,
181
+ kernel_size=1,
182
+ stride=1,
183
+ padding=0,
184
+ )
185
+
186
+ if bn_layer:
187
+ self.W = nn.Sequential(
188
+ conv_nd(
189
+ in_channels=self.inter_channels,
190
+ out_channels=self.in_channels,
191
+ kernel_size=1,
192
+ stride=1,
193
+ padding=0,
194
+ ),
195
+ bn(32, self.in_channels),
196
+ )
197
+ nn.init.constant_(self.W[1].weight, 0)
198
+ nn.init.constant_(self.W[1].bias, 0)
199
+ else:
200
+ self.W = conv_nd(
201
+ in_channels=self.inter_channels,
202
+ out_channels=self.in_channels,
203
+ kernel_size=1,
204
+ stride=1,
205
+ padding=0,
206
+ )
207
+ nn.init.constant_(self.W.weight, 0)
208
+ nn.init.constant_(self.W.bias, 0)
209
+
210
+ self.theta = conv_nd(
211
+ in_channels=self.in_channels,
212
+ out_channels=self.inter_channels,
213
+ kernel_size=1,
214
+ stride=1,
215
+ padding=0,
216
+ )
217
+ self.phi = conv_nd(
218
+ in_channels=self.in_channels,
219
+ out_channels=self.inter_channels,
220
+ kernel_size=1,
221
+ stride=1,
222
+ padding=0,
223
+ )
224
+
225
+ if sub_sample:
226
+ self.g = nn.Sequential(self.g, max_pool_layer)
227
+ self.phi = nn.Sequential(self.phi, max_pool_layer)
228
+
229
+ def forward(self, x):
230
+ """
231
+ :param x: (b, c, t, h, w)
232
+ :return:
233
+ """
234
+
235
+ batch_size = x.size(0)
236
+
237
+ g_x = self.g(x).view(batch_size, self.inter_channels, -1)
238
+ g_x = g_x.permute(0, 2, 1)
239
+
240
+ theta_x = self.theta(x).view(batch_size, self.inter_channels, -1)
241
+ theta_x = theta_x.permute(0, 2, 1)
242
+ phi_x = self.phi(x).view(batch_size, self.inter_channels, -1)
243
+ f = torch.matmul(theta_x, phi_x)
244
+ f_div_C = F.softmax(f, dim=-1)
245
+
246
+ y = torch.matmul(f_div_C, g_x)
247
+ y = y.permute(0, 2, 1).contiguous()
248
+ y = y.view(batch_size, self.inter_channels, *x.size()[2:])
249
+ W_y = self.W(y)
250
+ z = W_y + x
251
+
252
+ return z
253
+
254
+
255
+ class NONLocalBlock2D(_NonLocalBlockND):
256
+ def __init__(self, in_channels, inter_channels=None, sub_sample=True, bn_layer=True):
257
+ super(NONLocalBlock2D, self).__init__(
258
+ in_channels,
259
+ inter_channels=inter_channels,
260
+ dimension=2,
261
+ sub_sample=sub_sample,
262
+ bn_layer=bn_layer,
263
+ )
Leffa/3rdparty/densepose/modeling/roi_heads/registry.py ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ # Copyright (c) Facebook, Inc. and its affiliates.
2
+
3
+ from detectron2.utils.registry import Registry
4
+
5
+ ROI_DENSEPOSE_HEAD_REGISTRY = Registry("ROI_DENSEPOSE_HEAD")
Leffa/3rdparty/densepose/modeling/roi_heads/roi_head.py ADDED
@@ -0,0 +1,218 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Facebook, Inc. and its affiliates.
2
+
3
+ import numpy as np
4
+ from typing import Dict, List, Optional
5
+ import fvcore.nn.weight_init as weight_init
6
+ import torch
7
+ import torch.nn as nn
8
+ from torch.nn import functional as F
9
+
10
+ from detectron2.layers import Conv2d, ShapeSpec, get_norm
11
+ from detectron2.modeling import ROI_HEADS_REGISTRY, StandardROIHeads
12
+ from detectron2.modeling.poolers import ROIPooler
13
+ from detectron2.modeling.roi_heads import select_foreground_proposals
14
+ from detectron2.structures import ImageList, Instances
15
+
16
+ from .. import (
17
+ build_densepose_data_filter,
18
+ build_densepose_embedder,
19
+ build_densepose_head,
20
+ build_densepose_losses,
21
+ build_densepose_predictor,
22
+ densepose_inference,
23
+ )
24
+
25
+
26
+ class Decoder(nn.Module):
27
+ """
28
+ A semantic segmentation head described in detail in the Panoptic Feature Pyramid Networks paper
29
+ (https://arxiv.org/abs/1901.02446). It takes FPN features as input and merges information from
30
+ all levels of the FPN into single output.
31
+ """
32
+
33
+ def __init__(self, cfg, input_shape: Dict[str, ShapeSpec], in_features):
34
+ super(Decoder, self).__init__()
35
+
36
+ # fmt: off
37
+ self.in_features = in_features
38
+ feature_strides = {k: v.stride for k, v in input_shape.items()}
39
+ feature_channels = {k: v.channels for k, v in input_shape.items()}
40
+ num_classes = cfg.MODEL.ROI_DENSEPOSE_HEAD.DECODER_NUM_CLASSES
41
+ conv_dims = cfg.MODEL.ROI_DENSEPOSE_HEAD.DECODER_CONV_DIMS
42
+ self.common_stride = cfg.MODEL.ROI_DENSEPOSE_HEAD.DECODER_COMMON_STRIDE
43
+ norm = cfg.MODEL.ROI_DENSEPOSE_HEAD.DECODER_NORM
44
+ # fmt: on
45
+
46
+ self.scale_heads = []
47
+ for in_feature in self.in_features:
48
+ head_ops = []
49
+ head_length = max(
50
+ 1, int(np.log2(feature_strides[in_feature]) - np.log2(self.common_stride))
51
+ )
52
+ for k in range(head_length):
53
+ conv = Conv2d(
54
+ feature_channels[in_feature] if k == 0 else conv_dims,
55
+ conv_dims,
56
+ kernel_size=3,
57
+ stride=1,
58
+ padding=1,
59
+ bias=not norm,
60
+ norm=get_norm(norm, conv_dims),
61
+ activation=F.relu,
62
+ )
63
+ weight_init.c2_msra_fill(conv)
64
+ head_ops.append(conv)
65
+ if feature_strides[in_feature] != self.common_stride:
66
+ head_ops.append(
67
+ nn.Upsample(scale_factor=2, mode="bilinear", align_corners=False)
68
+ )
69
+ self.scale_heads.append(nn.Sequential(*head_ops))
70
+ self.add_module(in_feature, self.scale_heads[-1])
71
+ self.predictor = Conv2d(conv_dims, num_classes, kernel_size=1, stride=1, padding=0)
72
+ weight_init.c2_msra_fill(self.predictor)
73
+
74
+ def forward(self, features: List[torch.Tensor]):
75
+ for i, _ in enumerate(self.in_features):
76
+ if i == 0:
77
+ x = self.scale_heads[i](features[i])
78
+ else:
79
+ x = x + self.scale_heads[i](features[i])
80
+ x = self.predictor(x)
81
+ return x
82
+
83
+
84
+ @ROI_HEADS_REGISTRY.register()
85
+ class DensePoseROIHeads(StandardROIHeads):
86
+ """
87
+ A Standard ROIHeads which contains an addition of DensePose head.
88
+ """
89
+
90
+ def __init__(self, cfg, input_shape):
91
+ super().__init__(cfg, input_shape)
92
+ self._init_densepose_head(cfg, input_shape)
93
+
94
+ def _init_densepose_head(self, cfg, input_shape):
95
+ # fmt: off
96
+ self.densepose_on = cfg.MODEL.DENSEPOSE_ON
97
+ if not self.densepose_on:
98
+ return
99
+ self.densepose_data_filter = build_densepose_data_filter(cfg)
100
+ dp_pooler_resolution = cfg.MODEL.ROI_DENSEPOSE_HEAD.POOLER_RESOLUTION
101
+ dp_pooler_sampling_ratio = cfg.MODEL.ROI_DENSEPOSE_HEAD.POOLER_SAMPLING_RATIO
102
+ dp_pooler_type = cfg.MODEL.ROI_DENSEPOSE_HEAD.POOLER_TYPE
103
+ self.use_decoder = cfg.MODEL.ROI_DENSEPOSE_HEAD.DECODER_ON
104
+ # fmt: on
105
+ if self.use_decoder:
106
+ dp_pooler_scales = (1.0 / input_shape[self.in_features[0]].stride,)
107
+ else:
108
+ dp_pooler_scales = tuple(1.0 / input_shape[k].stride for k in self.in_features)
109
+ in_channels = [input_shape[f].channels for f in self.in_features][0]
110
+
111
+ if self.use_decoder:
112
+ self.decoder = Decoder(cfg, input_shape, self.in_features)
113
+
114
+ self.densepose_pooler = ROIPooler(
115
+ output_size=dp_pooler_resolution,
116
+ scales=dp_pooler_scales,
117
+ sampling_ratio=dp_pooler_sampling_ratio,
118
+ pooler_type=dp_pooler_type,
119
+ )
120
+ self.densepose_head = build_densepose_head(cfg, in_channels)
121
+ self.densepose_predictor = build_densepose_predictor(
122
+ cfg, self.densepose_head.n_out_channels
123
+ )
124
+ self.densepose_losses = build_densepose_losses(cfg)
125
+ self.embedder = build_densepose_embedder(cfg)
126
+
127
+ def _forward_densepose(self, features: Dict[str, torch.Tensor], instances: List[Instances]):
128
+ """
129
+ Forward logic of the densepose prediction branch.
130
+
131
+ Args:
132
+ features (dict[str, Tensor]): input data as a mapping from feature
133
+ map name to tensor. Axis 0 represents the number of images `N` in
134
+ the input data; axes 1-3 are channels, height, and width, which may
135
+ vary between feature maps (e.g., if a feature pyramid is used).
136
+ instances (list[Instances]): length `N` list of `Instances`. The i-th
137
+ `Instances` contains instances for the i-th input image,
138
+ In training, they can be the proposals.
139
+ In inference, they can be the predicted boxes.
140
+
141
+ Returns:
142
+ In training, a dict of losses.
143
+ In inference, update `instances` with new fields "densepose" and return it.
144
+ """
145
+ if not self.densepose_on:
146
+ return {} if self.training else instances
147
+
148
+ features_list = [features[f] for f in self.in_features]
149
+ if self.training:
150
+ proposals, _ = select_foreground_proposals(instances, self.num_classes)
151
+ features_list, proposals = self.densepose_data_filter(features_list, proposals)
152
+ if len(proposals) > 0:
153
+ proposal_boxes = [x.proposal_boxes for x in proposals]
154
+
155
+ if self.use_decoder:
156
+ features_list = [self.decoder(features_list)]
157
+
158
+ features_dp = self.densepose_pooler(features_list, proposal_boxes)
159
+ densepose_head_outputs = self.densepose_head(features_dp)
160
+ densepose_predictor_outputs = self.densepose_predictor(densepose_head_outputs)
161
+ densepose_loss_dict = self.densepose_losses(
162
+ proposals, densepose_predictor_outputs, embedder=self.embedder
163
+ )
164
+ return densepose_loss_dict
165
+ else:
166
+ pred_boxes = [x.pred_boxes for x in instances]
167
+
168
+ if self.use_decoder:
169
+ features_list = [self.decoder(features_list)]
170
+
171
+ features_dp = self.densepose_pooler(features_list, pred_boxes)
172
+ if len(features_dp) > 0:
173
+ densepose_head_outputs = self.densepose_head(features_dp)
174
+ densepose_predictor_outputs = self.densepose_predictor(densepose_head_outputs)
175
+ else:
176
+ densepose_predictor_outputs = None
177
+
178
+ densepose_inference(densepose_predictor_outputs, instances)
179
+ return instances
180
+
181
+ def forward(
182
+ self,
183
+ images: ImageList,
184
+ features: Dict[str, torch.Tensor],
185
+ proposals: List[Instances],
186
+ targets: Optional[List[Instances]] = None,
187
+ ):
188
+ instances, losses = super().forward(images, features, proposals, targets)
189
+ del targets, images
190
+
191
+ if self.training:
192
+ losses.update(self._forward_densepose(features, instances))
193
+ return instances, losses
194
+
195
+ def forward_with_given_boxes(
196
+ self, features: Dict[str, torch.Tensor], instances: List[Instances]
197
+ ):
198
+ """
199
+ Use the given boxes in `instances` to produce other (non-box) per-ROI outputs.
200
+
201
+ This is useful for downstream tasks where a box is known, but need to obtain
202
+ other attributes (outputs of other heads).
203
+ Test-time augmentation also uses this.
204
+
205
+ Args:
206
+ features: same as in `forward()`
207
+ instances (list[Instances]): instances to predict other outputs. Expect the keys
208
+ "pred_boxes" and "pred_classes" to exist.
209
+
210
+ Returns:
211
+ instances (list[Instances]):
212
+ the same `Instances` objects, with extra
213
+ fields such as `pred_masks` or `pred_keypoints`.
214
+ """
215
+
216
+ instances = super().forward_with_given_boxes(features, instances)
217
+ instances = self._forward_densepose(features, instances)
218
+ return instances
Leffa/3rdparty/densepose/modeling/roi_heads/v1convx.py ADDED
@@ -0,0 +1,64 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Facebook, Inc. and its affiliates.
2
+
3
+ import torch
4
+ from torch import nn
5
+ from torch.nn import functional as F
6
+
7
+ from detectron2.config import CfgNode
8
+ from detectron2.layers import Conv2d
9
+
10
+ from ..utils import initialize_module_params
11
+ from .registry import ROI_DENSEPOSE_HEAD_REGISTRY
12
+
13
+
14
+ @ROI_DENSEPOSE_HEAD_REGISTRY.register()
15
+ class DensePoseV1ConvXHead(nn.Module):
16
+ """
17
+ Fully convolutional DensePose head.
18
+ """
19
+
20
+ def __init__(self, cfg: CfgNode, input_channels: int):
21
+ """
22
+ Initialize DensePose fully convolutional head
23
+
24
+ Args:
25
+ cfg (CfgNode): configuration options
26
+ input_channels (int): number of input channels
27
+ """
28
+ super(DensePoseV1ConvXHead, self).__init__()
29
+ # fmt: off
30
+ hidden_dim = cfg.MODEL.ROI_DENSEPOSE_HEAD.CONV_HEAD_DIM
31
+ kernel_size = cfg.MODEL.ROI_DENSEPOSE_HEAD.CONV_HEAD_KERNEL
32
+ self.n_stacked_convs = cfg.MODEL.ROI_DENSEPOSE_HEAD.NUM_STACKED_CONVS
33
+ # fmt: on
34
+ pad_size = kernel_size // 2
35
+ n_channels = input_channels
36
+ for i in range(self.n_stacked_convs):
37
+ layer = Conv2d(n_channels, hidden_dim, kernel_size, stride=1, padding=pad_size)
38
+ layer_name = self._get_layer_name(i)
39
+ self.add_module(layer_name, layer)
40
+ n_channels = hidden_dim
41
+ self.n_out_channels = n_channels
42
+ initialize_module_params(self)
43
+
44
+ def forward(self, features: torch.Tensor):
45
+ """
46
+ Apply DensePose fully convolutional head to the input features
47
+
48
+ Args:
49
+ features (tensor): input features
50
+ Result:
51
+ A tensor of DensePose head outputs
52
+ """
53
+ x = features
54
+ output = x
55
+ for i in range(self.n_stacked_convs):
56
+ layer_name = self._get_layer_name(i)
57
+ x = getattr(self, layer_name)(x)
58
+ x = F.relu(x)
59
+ output = x
60
+ return output
61
+
62
+ def _get_layer_name(self, i: int):
63
+ layer_name = "body_conv_fcn{}".format(i + 1)
64
+ return layer_name
Leffa/3rdparty/densepose/modeling/test_time_augmentation.py ADDED
@@ -0,0 +1,207 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Facebook, Inc. and its affiliates.
2
+ import copy
3
+ import numpy as np
4
+ import torch
5
+ from fvcore.transforms import HFlipTransform, TransformList
6
+ from torch.nn import functional as F
7
+
8
+ from detectron2.data.transforms import RandomRotation, RotationTransform, apply_transform_gens
9
+ from detectron2.modeling.postprocessing import detector_postprocess
10
+ from detectron2.modeling.test_time_augmentation import DatasetMapperTTA, GeneralizedRCNNWithTTA
11
+
12
+ from ..converters import HFlipConverter
13
+
14
+
15
+ class DensePoseDatasetMapperTTA(DatasetMapperTTA):
16
+ def __init__(self, cfg):
17
+ super().__init__(cfg=cfg)
18
+ self.angles = cfg.TEST.AUG.ROTATION_ANGLES
19
+
20
+ def __call__(self, dataset_dict):
21
+ ret = super().__call__(dataset_dict=dataset_dict)
22
+ numpy_image = dataset_dict["image"].permute(1, 2, 0).numpy()
23
+ for angle in self.angles:
24
+ rotate = RandomRotation(angle=angle, expand=True)
25
+ new_numpy_image, tfms = apply_transform_gens([rotate], np.copy(numpy_image))
26
+ torch_image = torch.from_numpy(np.ascontiguousarray(new_numpy_image.transpose(2, 0, 1)))
27
+ dic = copy.deepcopy(dataset_dict)
28
+ # In DatasetMapperTTA, there is a pre_tfm transform (resize or no-op) that is
29
+ # added at the beginning of each TransformList. That's '.transforms[0]'.
30
+ dic["transforms"] = TransformList(
31
+ [ret[-1]["transforms"].transforms[0]] + tfms.transforms
32
+ )
33
+ dic["image"] = torch_image
34
+ ret.append(dic)
35
+ return ret
36
+
37
+
38
+ class DensePoseGeneralizedRCNNWithTTA(GeneralizedRCNNWithTTA):
39
+ def __init__(self, cfg, model, transform_data, tta_mapper=None, batch_size=1):
40
+ """
41
+ Args:
42
+ cfg (CfgNode):
43
+ model (GeneralizedRCNN): a GeneralizedRCNN to apply TTA on.
44
+ transform_data (DensePoseTransformData): contains symmetry label
45
+ transforms used for horizontal flip
46
+ tta_mapper (callable): takes a dataset dict and returns a list of
47
+ augmented versions of the dataset dict. Defaults to
48
+ `DatasetMapperTTA(cfg)`.
49
+ batch_size (int): batch the augmented images into this batch size for inference.
50
+ """
51
+ self._transform_data = transform_data.to(model.device)
52
+ super().__init__(cfg=cfg, model=model, tta_mapper=tta_mapper, batch_size=batch_size)
53
+
54
+ # the implementation follows closely the one from detectron2/modeling
55
+ def _inference_one_image(self, input):
56
+ """
57
+ Args:
58
+ input (dict): one dataset dict with "image" field being a CHW tensor
59
+
60
+ Returns:
61
+ dict: one output dict
62
+ """
63
+ orig_shape = (input["height"], input["width"])
64
+ # For some reason, resize with uint8 slightly increases box AP but decreases densepose AP
65
+ input["image"] = input["image"].to(torch.uint8)
66
+ augmented_inputs, tfms = self._get_augmented_inputs(input)
67
+ # Detect boxes from all augmented versions
68
+ with self._turn_off_roi_heads(["mask_on", "keypoint_on", "densepose_on"]):
69
+ # temporarily disable roi heads
70
+ all_boxes, all_scores, all_classes = self._get_augmented_boxes(augmented_inputs, tfms)
71
+ merged_instances = self._merge_detections(all_boxes, all_scores, all_classes, orig_shape)
72
+
73
+ if self.cfg.MODEL.MASK_ON or self.cfg.MODEL.DENSEPOSE_ON:
74
+ # Use the detected boxes to obtain new fields
75
+ augmented_instances = self._rescale_detected_boxes(
76
+ augmented_inputs, merged_instances, tfms
77
+ )
78
+ # run forward on the detected boxes
79
+ outputs = self._batch_inference(augmented_inputs, augmented_instances)
80
+ # Delete now useless variables to avoid being out of memory
81
+ del augmented_inputs, augmented_instances
82
+ # average the predictions
83
+ if self.cfg.MODEL.MASK_ON:
84
+ merged_instances.pred_masks = self._reduce_pred_masks(outputs, tfms)
85
+ if self.cfg.MODEL.DENSEPOSE_ON:
86
+ merged_instances.pred_densepose = self._reduce_pred_densepose(outputs, tfms)
87
+ # postprocess
88
+ merged_instances = detector_postprocess(merged_instances, *orig_shape)
89
+ return {"instances": merged_instances}
90
+ else:
91
+ return {"instances": merged_instances}
92
+
93
+ def _get_augmented_boxes(self, augmented_inputs, tfms):
94
+ # Heavily based on detectron2/modeling/test_time_augmentation.py
95
+ # Only difference is that RotationTransform is excluded from bbox computation
96
+ # 1: forward with all augmented images
97
+ outputs = self._batch_inference(augmented_inputs)
98
+ # 2: union the results
99
+ all_boxes = []
100
+ all_scores = []
101
+ all_classes = []
102
+ for output, tfm in zip(outputs, tfms):
103
+ # Need to inverse the transforms on boxes, to obtain results on original image
104
+ if not any(isinstance(t, RotationTransform) for t in tfm.transforms):
105
+ # Some transforms can't compute bbox correctly
106
+ pred_boxes = output.pred_boxes.tensor
107
+ original_pred_boxes = tfm.inverse().apply_box(pred_boxes.cpu().numpy())
108
+ all_boxes.append(torch.from_numpy(original_pred_boxes).to(pred_boxes.device))
109
+ all_scores.extend(output.scores)
110
+ all_classes.extend(output.pred_classes)
111
+ all_boxes = torch.cat(all_boxes, dim=0)
112
+ return all_boxes, all_scores, all_classes
113
+
114
+ def _reduce_pred_densepose(self, outputs, tfms):
115
+ # Should apply inverse transforms on densepose preds.
116
+ # We assume only rotation, resize & flip are used. pred_masks is a scale-invariant
117
+ # representation, so we handle the other ones specially
118
+ for idx, (output, tfm) in enumerate(zip(outputs, tfms)):
119
+ for t in tfm.transforms:
120
+ for attr in ["coarse_segm", "fine_segm", "u", "v"]:
121
+ setattr(
122
+ output.pred_densepose,
123
+ attr,
124
+ _inverse_rotation(
125
+ getattr(output.pred_densepose, attr), output.pred_boxes.tensor, t
126
+ ),
127
+ )
128
+ if any(isinstance(t, HFlipTransform) for t in tfm.transforms):
129
+ output.pred_densepose = HFlipConverter.convert(
130
+ output.pred_densepose, self._transform_data
131
+ )
132
+ self._incremental_avg_dp(outputs[0].pred_densepose, output.pred_densepose, idx)
133
+ return outputs[0].pred_densepose
134
+
135
+ # incrementally computed average: u_(n + 1) = u_n + (x_(n+1) - u_n) / (n + 1).
136
+ def _incremental_avg_dp(self, avg, new_el, idx):
137
+ for attr in ["coarse_segm", "fine_segm", "u", "v"]:
138
+ setattr(avg, attr, (getattr(avg, attr) * idx + getattr(new_el, attr)) / (idx + 1))
139
+ if idx:
140
+ # Deletion of the > 0 index intermediary values to prevent GPU OOM
141
+ setattr(new_el, attr, None)
142
+ return avg
143
+
144
+
145
+ def _inverse_rotation(densepose_attrs, boxes, transform):
146
+ # resample outputs to image size and rotate back the densepose preds
147
+ # on the rotated images to the space of the original image
148
+ if len(boxes) == 0 or not isinstance(transform, RotationTransform):
149
+ return densepose_attrs
150
+ boxes = boxes.int().cpu().numpy()
151
+ wh_boxes = boxes[:, 2:] - boxes[:, :2] # bboxes in the rotated space
152
+ inv_boxes = rotate_box_inverse(transform, boxes).astype(int) # bboxes in original image
153
+ wh_diff = (inv_boxes[:, 2:] - inv_boxes[:, :2] - wh_boxes) // 2 # diff between new/old bboxes
154
+ rotation_matrix = torch.tensor([transform.rm_image]).to(device=densepose_attrs.device).float()
155
+ rotation_matrix[:, :, -1] = 0
156
+ # To apply grid_sample for rotation, we need to have enough space to fit the original and
157
+ # rotated bboxes. l_bds and r_bds are the left/right bounds that will be used to
158
+ # crop the difference once the rotation is done
159
+ l_bds = np.maximum(0, -wh_diff)
160
+ for i in range(len(densepose_attrs)):
161
+ if min(wh_boxes[i]) <= 0:
162
+ continue
163
+ densepose_attr = densepose_attrs[[i]].clone()
164
+ # 1. Interpolate densepose attribute to size of the rotated bbox
165
+ densepose_attr = F.interpolate(densepose_attr, wh_boxes[i].tolist()[::-1], mode="bilinear")
166
+ # 2. Pad the interpolated attribute so it has room for the original + rotated bbox
167
+ densepose_attr = F.pad(densepose_attr, tuple(np.repeat(np.maximum(0, wh_diff[i]), 2)))
168
+ # 3. Compute rotation grid and transform
169
+ grid = F.affine_grid(rotation_matrix, size=densepose_attr.shape)
170
+ densepose_attr = F.grid_sample(densepose_attr, grid)
171
+ # 4. Compute right bounds and crop the densepose_attr to the size of the original bbox
172
+ r_bds = densepose_attr.shape[2:][::-1] - l_bds[i]
173
+ densepose_attr = densepose_attr[:, :, l_bds[i][1] : r_bds[1], l_bds[i][0] : r_bds[0]]
174
+ if min(densepose_attr.shape) > 0:
175
+ # Interpolate back to the original size of the densepose attribute
176
+ densepose_attr = F.interpolate(
177
+ densepose_attr, densepose_attrs.shape[-2:], mode="bilinear"
178
+ )
179
+ # Adding a very small probability to the background class to fill padded zones
180
+ densepose_attr[:, 0] += 1e-10
181
+ densepose_attrs[i] = densepose_attr
182
+ return densepose_attrs
183
+
184
+
185
+ def rotate_box_inverse(rot_tfm, rotated_box):
186
+ """
187
+ rotated_box is a N * 4 array of [x0, y0, x1, y1] boxes
188
+ When a bbox is rotated, it gets bigger, because we need to surround the tilted bbox
189
+ So when a bbox is rotated then inverse-rotated, it is much bigger than the original
190
+ This function aims to invert the rotation on the box, but also resize it to its original size
191
+ """
192
+ # 1. Compute the inverse rotation of the rotated bboxes (bigger than it )
193
+ invrot_box = rot_tfm.inverse().apply_box(rotated_box)
194
+ h, w = rotated_box[:, 3] - rotated_box[:, 1], rotated_box[:, 2] - rotated_box[:, 0]
195
+ ih, iw = invrot_box[:, 3] - invrot_box[:, 1], invrot_box[:, 2] - invrot_box[:, 0]
196
+ assert 2 * rot_tfm.abs_sin**2 != 1, "45 degrees angle can't be inverted"
197
+ # 2. Inverse the corresponding computation in the rotation transform
198
+ # to get the original height/width of the rotated boxes
199
+ orig_h = (h * rot_tfm.abs_cos - w * rot_tfm.abs_sin) / (1 - 2 * rot_tfm.abs_sin**2)
200
+ orig_w = (w * rot_tfm.abs_cos - h * rot_tfm.abs_sin) / (1 - 2 * rot_tfm.abs_sin**2)
201
+ # 3. Resize the inverse-rotated bboxes to their original size
202
+ invrot_box[:, 0] += (iw - orig_w) / 2
203
+ invrot_box[:, 1] += (ih - orig_h) / 2
204
+ invrot_box[:, 2] -= (iw - orig_w) / 2
205
+ invrot_box[:, 3] -= (ih - orig_h) / 2
206
+
207
+ return invrot_box
Leffa/3rdparty/densepose/modeling/utils.py ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Facebook, Inc. and its affiliates.
2
+
3
+ from torch import nn
4
+
5
+
6
+ def initialize_module_params(module: nn.Module) -> None:
7
+ for name, param in module.named_parameters():
8
+ if "bias" in name:
9
+ nn.init.constant_(param, 0)
10
+ elif "weight" in name:
11
+ nn.init.kaiming_normal_(param, mode="fan_out", nonlinearity="relu")
Leffa/3rdparty/densepose/utils/__init__.py ADDED
File without changes