b3h-young123 commited on
Commit
5431609
·
verified ·
1 Parent(s): a8c92ae

Add files using upload-large-folder tool

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. Leffa/densepose/__init__.py +20 -0
  2. Leffa/densepose/config.py +277 -0
  3. Leffa/densepose/converters/__init__.py +15 -0
  4. Leffa/densepose/converters/base.py +93 -0
  5. Leffa/densepose/converters/builtin.py +31 -0
  6. Leffa/densepose/converters/chart_output_hflip.py +71 -0
  7. Leffa/densepose/converters/chart_output_to_chart_result.py +188 -0
  8. Leffa/densepose/converters/hflip.py +34 -0
  9. Leffa/densepose/converters/segm_to_mask.py +150 -0
  10. Leffa/densepose/converters/to_chart_result.py +70 -0
  11. Leffa/densepose/converters/to_mask.py +49 -0
  12. Leffa/densepose/engine/__init__.py +3 -0
  13. Leffa/densepose/engine/trainer.py +258 -0
  14. Leffa/densepose/modeling/__init__.py +13 -0
  15. Leffa/densepose/modeling/build.py +87 -0
  16. Leffa/densepose/modeling/confidence.py +73 -0
  17. Leffa/densepose/modeling/densepose_checkpoint.py +35 -0
  18. Leffa/densepose/modeling/filter.py +94 -0
  19. Leffa/densepose/modeling/hrfpn.py +182 -0
  20. Leffa/densepose/modeling/hrnet.py +474 -0
  21. Leffa/densepose/modeling/inference.py +44 -0
  22. Leffa/densepose/modeling/losses/__init__.py +14 -0
  23. Leffa/densepose/modeling/losses/chart.py +291 -0
  24. Leffa/densepose/modeling/losses/embed_utils.py +137 -0
  25. Leffa/densepose/modeling/losses/mask_or_segm.py +77 -0
  26. Leffa/densepose/modeling/predictors/__init__.py +9 -0
  27. Leffa/densepose/modeling/predictors/chart.py +94 -0
  28. Leffa/densepose/modeling/predictors/chart_confidence.py +174 -0
  29. Leffa/densepose/modeling/predictors/chart_with_confidence.py +15 -0
  30. Leffa/densepose/modeling/predictors/cse.py +70 -0
  31. Leffa/densepose/modeling/predictors/cse_confidence.py +115 -0
  32. Leffa/densepose/modeling/predictors/cse_with_confidence.py +15 -0
  33. Leffa/densepose/modeling/predictors/registry.py +5 -0
  34. Leffa/densepose/modeling/roi_heads/__init__.py +6 -0
  35. Leffa/densepose/modeling/roi_heads/deeplab.py +263 -0
  36. Leffa/densepose/modeling/roi_heads/registry.py +5 -0
  37. Leffa/densepose/modeling/roi_heads/roi_head.py +218 -0
  38. Leffa/densepose/modeling/roi_heads/v1convx.py +64 -0
  39. Leffa/densepose/modeling/test_time_augmentation.py +207 -0
  40. Leffa/densepose/modeling/utils.py +11 -0
  41. Leffa/densepose/utils/__init__.py +0 -0
  42. Leffa/densepose/utils/dbhelper.py +147 -0
  43. Leffa/densepose/utils/logger.py +13 -0
  44. Leffa/densepose/utils/transform.py +15 -0
  45. Leffa/leffa_utils/densepose_for_mask.py +170 -0
  46. Leffa/leffa_utils/densepose_predictor.py +77 -0
  47. Leffa/leffa_utils/garment_agnostic_mask_predictor.py +415 -0
  48. Leffa/leffa_utils/utils.py +379 -0
  49. Leffa/preprocess/humanparsing/mhp_extension/detectron2/docker/Dockerfile +49 -0
  50. Leffa/preprocess/humanparsing/mhp_extension/detectron2/docker/Dockerfile-circleci +17 -0
Leffa/densepose/__init__.py ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Facebook, Inc. and its affiliates.
2
+ from .data.datasets import builtin # just to register data
3
+ from .converters import builtin as builtin_converters # register converters
4
+ from .config import (
5
+ add_densepose_config,
6
+ add_densepose_head_config,
7
+ add_hrnet_config,
8
+ add_dataset_category_config,
9
+ add_bootstrap_config,
10
+ load_bootstrap_config,
11
+ )
12
+ from .structures import DensePoseDataRelative, DensePoseList, DensePoseTransformData
13
+ from .evaluation import DensePoseCOCOEvaluator
14
+ from .modeling.roi_heads import DensePoseROIHeads
15
+ from .modeling.test_time_augmentation import (
16
+ DensePoseGeneralizedRCNNWithTTA,
17
+ DensePoseDatasetMapperTTA,
18
+ )
19
+ from .utils.transform import load_from_cfg
20
+ from .modeling.hrfpn import build_hrfpn_backbone
Leffa/densepose/config.py ADDED
@@ -0,0 +1,277 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # -*- coding = utf-8 -*-
2
+ # Copyright (c) Facebook, Inc. and its affiliates.
3
+ # pyre-ignore-all-errors
4
+
5
+ from detectron2.config import CfgNode as CN
6
+
7
+
8
+ def add_dataset_category_config(cfg: CN) -> None:
9
+ """
10
+ Add config for additional category-related dataset options
11
+ - category whitelisting
12
+ - category mapping
13
+ """
14
+ _C = cfg
15
+ _C.DATASETS.CATEGORY_MAPS = CN(new_allowed=True)
16
+ _C.DATASETS.WHITELISTED_CATEGORIES = CN(new_allowed=True)
17
+ # class to mesh mapping
18
+ _C.DATASETS.CLASS_TO_MESH_NAME_MAPPING = CN(new_allowed=True)
19
+
20
+
21
+ def add_evaluation_config(cfg: CN) -> None:
22
+ _C = cfg
23
+ _C.DENSEPOSE_EVALUATION = CN()
24
+ # evaluator type, possible values:
25
+ # - "iou": evaluator for models that produce iou data
26
+ # - "cse": evaluator for models that produce cse data
27
+ _C.DENSEPOSE_EVALUATION.TYPE = "iou"
28
+ # storage for DensePose results, possible values:
29
+ # - "none": no explicit storage, all the results are stored in the
30
+ # dictionary with predictions, memory intensive;
31
+ # historically the default storage type
32
+ # - "ram": RAM storage, uses per-process RAM storage, which is
33
+ # reduced to a single process storage on later stages,
34
+ # less memory intensive
35
+ # - "file": file storage, uses per-process file-based storage,
36
+ # the least memory intensive, but may create bottlenecks
37
+ # on file system accesses
38
+ _C.DENSEPOSE_EVALUATION.STORAGE = "none"
39
+ # minimum threshold for IOU values: the lower its values is,
40
+ # the more matches are produced (and the higher the AP score)
41
+ _C.DENSEPOSE_EVALUATION.MIN_IOU_THRESHOLD = 0.5
42
+ # Non-distributed inference is slower (at inference time) but can avoid RAM OOM
43
+ _C.DENSEPOSE_EVALUATION.DISTRIBUTED_INFERENCE = True
44
+ # evaluate mesh alignment based on vertex embeddings, only makes sense in CSE context
45
+ _C.DENSEPOSE_EVALUATION.EVALUATE_MESH_ALIGNMENT = False
46
+ # meshes to compute mesh alignment for
47
+ _C.DENSEPOSE_EVALUATION.MESH_ALIGNMENT_MESH_NAMES = []
48
+
49
+
50
+ def add_bootstrap_config(cfg: CN) -> None:
51
+ """ """
52
+ _C = cfg
53
+ _C.BOOTSTRAP_DATASETS = []
54
+ _C.BOOTSTRAP_MODEL = CN()
55
+ _C.BOOTSTRAP_MODEL.WEIGHTS = ""
56
+ _C.BOOTSTRAP_MODEL.DEVICE = "cuda"
57
+
58
+
59
+ def get_bootstrap_dataset_config() -> CN:
60
+ _C = CN()
61
+ _C.DATASET = ""
62
+ # ratio used to mix data loaders
63
+ _C.RATIO = 0.1
64
+ # image loader
65
+ _C.IMAGE_LOADER = CN(new_allowed=True)
66
+ _C.IMAGE_LOADER.TYPE = ""
67
+ _C.IMAGE_LOADER.BATCH_SIZE = 4
68
+ _C.IMAGE_LOADER.NUM_WORKERS = 4
69
+ _C.IMAGE_LOADER.CATEGORIES = []
70
+ _C.IMAGE_LOADER.MAX_COUNT_PER_CATEGORY = 1_000_000
71
+ _C.IMAGE_LOADER.CATEGORY_TO_CLASS_MAPPING = CN(new_allowed=True)
72
+ # inference
73
+ _C.INFERENCE = CN()
74
+ # batch size for model inputs
75
+ _C.INFERENCE.INPUT_BATCH_SIZE = 4
76
+ # batch size to group model outputs
77
+ _C.INFERENCE.OUTPUT_BATCH_SIZE = 2
78
+ # sampled data
79
+ _C.DATA_SAMPLER = CN(new_allowed=True)
80
+ _C.DATA_SAMPLER.TYPE = ""
81
+ _C.DATA_SAMPLER.USE_GROUND_TRUTH_CATEGORIES = False
82
+ # filter
83
+ _C.FILTER = CN(new_allowed=True)
84
+ _C.FILTER.TYPE = ""
85
+ return _C
86
+
87
+
88
+ def load_bootstrap_config(cfg: CN) -> None:
89
+ """
90
+ Bootstrap datasets are given as a list of `dict` that are not automatically
91
+ converted into CfgNode. This method processes all bootstrap dataset entries
92
+ and ensures that they are in CfgNode format and comply with the specification
93
+ """
94
+ if not cfg.BOOTSTRAP_DATASETS:
95
+ return
96
+
97
+ bootstrap_datasets_cfgnodes = []
98
+ for dataset_cfg in cfg.BOOTSTRAP_DATASETS:
99
+ _C = get_bootstrap_dataset_config().clone()
100
+ _C.merge_from_other_cfg(CN(dataset_cfg))
101
+ bootstrap_datasets_cfgnodes.append(_C)
102
+ cfg.BOOTSTRAP_DATASETS = bootstrap_datasets_cfgnodes
103
+
104
+
105
+ def add_densepose_head_cse_config(cfg: CN) -> None:
106
+ """
107
+ Add configuration options for Continuous Surface Embeddings (CSE)
108
+ """
109
+ _C = cfg
110
+ _C.MODEL.ROI_DENSEPOSE_HEAD.CSE = CN()
111
+ # Dimensionality D of the embedding space
112
+ _C.MODEL.ROI_DENSEPOSE_HEAD.CSE.EMBED_SIZE = 16
113
+ # Embedder specifications for various mesh IDs
114
+ _C.MODEL.ROI_DENSEPOSE_HEAD.CSE.EMBEDDERS = CN(new_allowed=True)
115
+ # normalization coefficient for embedding distances
116
+ _C.MODEL.ROI_DENSEPOSE_HEAD.CSE.EMBEDDING_DIST_GAUSS_SIGMA = 0.01
117
+ # normalization coefficient for geodesic distances
118
+ _C.MODEL.ROI_DENSEPOSE_HEAD.CSE.GEODESIC_DIST_GAUSS_SIGMA = 0.01
119
+ # embedding loss weight
120
+ _C.MODEL.ROI_DENSEPOSE_HEAD.CSE.EMBED_LOSS_WEIGHT = 0.6
121
+ # embedding loss name, currently the following options are supported:
122
+ # - EmbeddingLoss: cross-entropy on vertex labels
123
+ # - SoftEmbeddingLoss: cross-entropy on vertex label combined with
124
+ # Gaussian penalty on distance between vertices
125
+ _C.MODEL.ROI_DENSEPOSE_HEAD.CSE.EMBED_LOSS_NAME = "EmbeddingLoss"
126
+ # optimizer hyperparameters
127
+ _C.MODEL.ROI_DENSEPOSE_HEAD.CSE.FEATURES_LR_FACTOR = 1.0
128
+ _C.MODEL.ROI_DENSEPOSE_HEAD.CSE.EMBEDDING_LR_FACTOR = 1.0
129
+ # Shape to shape cycle consistency loss parameters:
130
+ _C.MODEL.ROI_DENSEPOSE_HEAD.CSE.SHAPE_TO_SHAPE_CYCLE_LOSS = CN({"ENABLED": False})
131
+ # shape to shape cycle consistency loss weight
132
+ _C.MODEL.ROI_DENSEPOSE_HEAD.CSE.SHAPE_TO_SHAPE_CYCLE_LOSS.WEIGHT = 0.025
133
+ # norm type used for loss computation
134
+ _C.MODEL.ROI_DENSEPOSE_HEAD.CSE.SHAPE_TO_SHAPE_CYCLE_LOSS.NORM_P = 2
135
+ # normalization term for embedding similarity matrices
136
+ _C.MODEL.ROI_DENSEPOSE_HEAD.CSE.SHAPE_TO_SHAPE_CYCLE_LOSS.TEMPERATURE = 0.05
137
+ # maximum number of vertices to include into shape to shape cycle loss
138
+ # if negative or zero, all vertices are considered
139
+ # if positive, random subset of vertices of given size is considered
140
+ _C.MODEL.ROI_DENSEPOSE_HEAD.CSE.SHAPE_TO_SHAPE_CYCLE_LOSS.MAX_NUM_VERTICES = 4936
141
+ # Pixel to shape cycle consistency loss parameters:
142
+ _C.MODEL.ROI_DENSEPOSE_HEAD.CSE.PIX_TO_SHAPE_CYCLE_LOSS = CN({"ENABLED": False})
143
+ # pixel to shape cycle consistency loss weight
144
+ _C.MODEL.ROI_DENSEPOSE_HEAD.CSE.PIX_TO_SHAPE_CYCLE_LOSS.WEIGHT = 0.0001
145
+ # norm type used for loss computation
146
+ _C.MODEL.ROI_DENSEPOSE_HEAD.CSE.PIX_TO_SHAPE_CYCLE_LOSS.NORM_P = 2
147
+ # map images to all meshes and back (if false, use only gt meshes from the batch)
148
+ _C.MODEL.ROI_DENSEPOSE_HEAD.CSE.PIX_TO_SHAPE_CYCLE_LOSS.USE_ALL_MESHES_NOT_GT_ONLY = False
149
+ # Randomly select at most this number of pixels from every instance
150
+ # if negative or zero, all vertices are considered
151
+ _C.MODEL.ROI_DENSEPOSE_HEAD.CSE.PIX_TO_SHAPE_CYCLE_LOSS.NUM_PIXELS_TO_SAMPLE = 100
152
+ # normalization factor for pixel to pixel distances (higher value = smoother distribution)
153
+ _C.MODEL.ROI_DENSEPOSE_HEAD.CSE.PIX_TO_SHAPE_CYCLE_LOSS.PIXEL_SIGMA = 5.0
154
+ _C.MODEL.ROI_DENSEPOSE_HEAD.CSE.PIX_TO_SHAPE_CYCLE_LOSS.TEMPERATURE_PIXEL_TO_VERTEX = 0.05
155
+ _C.MODEL.ROI_DENSEPOSE_HEAD.CSE.PIX_TO_SHAPE_CYCLE_LOSS.TEMPERATURE_VERTEX_TO_PIXEL = 0.05
156
+
157
+
158
+ def add_densepose_head_config(cfg: CN) -> None:
159
+ """
160
+ Add config for densepose head.
161
+ """
162
+ _C = cfg
163
+
164
+ _C.MODEL.DENSEPOSE_ON = True
165
+
166
+ _C.MODEL.ROI_DENSEPOSE_HEAD = CN()
167
+ _C.MODEL.ROI_DENSEPOSE_HEAD.NAME = ""
168
+ _C.MODEL.ROI_DENSEPOSE_HEAD.NUM_STACKED_CONVS = 8
169
+ # Number of parts used for point labels
170
+ _C.MODEL.ROI_DENSEPOSE_HEAD.NUM_PATCHES = 24
171
+ _C.MODEL.ROI_DENSEPOSE_HEAD.DECONV_KERNEL = 4
172
+ _C.MODEL.ROI_DENSEPOSE_HEAD.CONV_HEAD_DIM = 512
173
+ _C.MODEL.ROI_DENSEPOSE_HEAD.CONV_HEAD_KERNEL = 3
174
+ _C.MODEL.ROI_DENSEPOSE_HEAD.UP_SCALE = 2
175
+ _C.MODEL.ROI_DENSEPOSE_HEAD.HEATMAP_SIZE = 112
176
+ _C.MODEL.ROI_DENSEPOSE_HEAD.POOLER_TYPE = "ROIAlignV2"
177
+ _C.MODEL.ROI_DENSEPOSE_HEAD.POOLER_RESOLUTION = 28
178
+ _C.MODEL.ROI_DENSEPOSE_HEAD.POOLER_SAMPLING_RATIO = 2
179
+ _C.MODEL.ROI_DENSEPOSE_HEAD.NUM_COARSE_SEGM_CHANNELS = 2 # 15 or 2
180
+ # Overlap threshold for an RoI to be considered foreground (if >= FG_IOU_THRESHOLD)
181
+ _C.MODEL.ROI_DENSEPOSE_HEAD.FG_IOU_THRESHOLD = 0.7
182
+ # Loss weights for annotation masks.(14 Parts)
183
+ _C.MODEL.ROI_DENSEPOSE_HEAD.INDEX_WEIGHTS = 5.0
184
+ # Loss weights for surface parts. (24 Parts)
185
+ _C.MODEL.ROI_DENSEPOSE_HEAD.PART_WEIGHTS = 1.0
186
+ # Loss weights for UV regression.
187
+ _C.MODEL.ROI_DENSEPOSE_HEAD.POINT_REGRESSION_WEIGHTS = 0.01
188
+ # Coarse segmentation is trained using instance segmentation task data
189
+ _C.MODEL.ROI_DENSEPOSE_HEAD.COARSE_SEGM_TRAINED_BY_MASKS = False
190
+ # For Decoder
191
+ _C.MODEL.ROI_DENSEPOSE_HEAD.DECODER_ON = True
192
+ _C.MODEL.ROI_DENSEPOSE_HEAD.DECODER_NUM_CLASSES = 256
193
+ _C.MODEL.ROI_DENSEPOSE_HEAD.DECODER_CONV_DIMS = 256
194
+ _C.MODEL.ROI_DENSEPOSE_HEAD.DECODER_NORM = ""
195
+ _C.MODEL.ROI_DENSEPOSE_HEAD.DECODER_COMMON_STRIDE = 4
196
+ # For DeepLab head
197
+ _C.MODEL.ROI_DENSEPOSE_HEAD.DEEPLAB = CN()
198
+ _C.MODEL.ROI_DENSEPOSE_HEAD.DEEPLAB.NORM = "GN"
199
+ _C.MODEL.ROI_DENSEPOSE_HEAD.DEEPLAB.NONLOCAL_ON = 0
200
+ # Predictor class name, must be registered in DENSEPOSE_PREDICTOR_REGISTRY
201
+ # Some registered predictors:
202
+ # "DensePoseChartPredictor": predicts segmentation and UV coordinates for predefined charts
203
+ # "DensePoseChartWithConfidencePredictor": predicts segmentation, UV coordinates
204
+ # and associated confidences for predefined charts (default)
205
+ # "DensePoseEmbeddingWithConfidencePredictor": predicts segmentation, embeddings
206
+ # and associated confidences for CSE
207
+ _C.MODEL.ROI_DENSEPOSE_HEAD.PREDICTOR_NAME = "DensePoseChartWithConfidencePredictor"
208
+ # Loss class name, must be registered in DENSEPOSE_LOSS_REGISTRY
209
+ # Some registered losses:
210
+ # "DensePoseChartLoss": loss for chart-based models that estimate
211
+ # segmentation and UV coordinates
212
+ # "DensePoseChartWithConfidenceLoss": loss for chart-based models that estimate
213
+ # segmentation, UV coordinates and the corresponding confidences (default)
214
+ _C.MODEL.ROI_DENSEPOSE_HEAD.LOSS_NAME = "DensePoseChartWithConfidenceLoss"
215
+ # Confidences
216
+ # Enable learning UV confidences (variances) along with the actual values
217
+ _C.MODEL.ROI_DENSEPOSE_HEAD.UV_CONFIDENCE = CN({"ENABLED": False})
218
+ # UV confidence lower bound
219
+ _C.MODEL.ROI_DENSEPOSE_HEAD.UV_CONFIDENCE.EPSILON = 0.01
220
+ # Enable learning segmentation confidences (variances) along with the actual values
221
+ _C.MODEL.ROI_DENSEPOSE_HEAD.SEGM_CONFIDENCE = CN({"ENABLED": False})
222
+ # Segmentation confidence lower bound
223
+ _C.MODEL.ROI_DENSEPOSE_HEAD.SEGM_CONFIDENCE.EPSILON = 0.01
224
+ # Statistical model type for confidence learning, possible values:
225
+ # - "iid_iso": statistically independent identically distributed residuals
226
+ # with isotropic covariance
227
+ # - "indep_aniso": statistically independent residuals with anisotropic
228
+ # covariances
229
+ _C.MODEL.ROI_DENSEPOSE_HEAD.UV_CONFIDENCE.TYPE = "iid_iso"
230
+ # List of angles for rotation in data augmentation during training
231
+ _C.INPUT.ROTATION_ANGLES = [0]
232
+ _C.TEST.AUG.ROTATION_ANGLES = () # Rotation TTA
233
+
234
+ add_densepose_head_cse_config(cfg)
235
+
236
+
237
+ def add_hrnet_config(cfg: CN) -> None:
238
+ """
239
+ Add config for HRNet backbone.
240
+ """
241
+ _C = cfg
242
+
243
+ # For HigherHRNet w32
244
+ _C.MODEL.HRNET = CN()
245
+ _C.MODEL.HRNET.STEM_INPLANES = 64
246
+ _C.MODEL.HRNET.STAGE2 = CN()
247
+ _C.MODEL.HRNET.STAGE2.NUM_MODULES = 1
248
+ _C.MODEL.HRNET.STAGE2.NUM_BRANCHES = 2
249
+ _C.MODEL.HRNET.STAGE2.BLOCK = "BASIC"
250
+ _C.MODEL.HRNET.STAGE2.NUM_BLOCKS = [4, 4]
251
+ _C.MODEL.HRNET.STAGE2.NUM_CHANNELS = [32, 64]
252
+ _C.MODEL.HRNET.STAGE2.FUSE_METHOD = "SUM"
253
+ _C.MODEL.HRNET.STAGE3 = CN()
254
+ _C.MODEL.HRNET.STAGE3.NUM_MODULES = 4
255
+ _C.MODEL.HRNET.STAGE3.NUM_BRANCHES = 3
256
+ _C.MODEL.HRNET.STAGE3.BLOCK = "BASIC"
257
+ _C.MODEL.HRNET.STAGE3.NUM_BLOCKS = [4, 4, 4]
258
+ _C.MODEL.HRNET.STAGE3.NUM_CHANNELS = [32, 64, 128]
259
+ _C.MODEL.HRNET.STAGE3.FUSE_METHOD = "SUM"
260
+ _C.MODEL.HRNET.STAGE4 = CN()
261
+ _C.MODEL.HRNET.STAGE4.NUM_MODULES = 3
262
+ _C.MODEL.HRNET.STAGE4.NUM_BRANCHES = 4
263
+ _C.MODEL.HRNET.STAGE4.BLOCK = "BASIC"
264
+ _C.MODEL.HRNET.STAGE4.NUM_BLOCKS = [4, 4, 4, 4]
265
+ _C.MODEL.HRNET.STAGE4.NUM_CHANNELS = [32, 64, 128, 256]
266
+ _C.MODEL.HRNET.STAGE4.FUSE_METHOD = "SUM"
267
+
268
+ _C.MODEL.HRNET.HRFPN = CN()
269
+ _C.MODEL.HRNET.HRFPN.OUT_CHANNELS = 256
270
+
271
+
272
+ def add_densepose_config(cfg: CN) -> None:
273
+ add_densepose_head_config(cfg)
274
+ add_hrnet_config(cfg)
275
+ add_bootstrap_config(cfg)
276
+ add_dataset_category_config(cfg)
277
+ add_evaluation_config(cfg)
Leffa/densepose/converters/__init__.py ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Facebook, Inc. and its affiliates.
2
+
3
+ from .hflip import HFlipConverter
4
+ from .to_mask import ToMaskConverter
5
+ from .to_chart_result import ToChartResultConverter, ToChartResultConverterWithConfidences
6
+ from .segm_to_mask import (
7
+ predictor_output_with_fine_and_coarse_segm_to_mask,
8
+ predictor_output_with_coarse_segm_to_mask,
9
+ resample_fine_and_coarse_segm_to_bbox,
10
+ )
11
+ from .chart_output_to_chart_result import (
12
+ densepose_chart_predictor_output_to_result,
13
+ densepose_chart_predictor_output_to_result_with_confidences,
14
+ )
15
+ from .chart_output_hflip import densepose_chart_predictor_output_hflip
Leffa/densepose/converters/base.py ADDED
@@ -0,0 +1,93 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Facebook, Inc. and its affiliates.
2
+
3
+ from typing import Any, Tuple, Type
4
+ import torch
5
+
6
+
7
+ class BaseConverter:
8
+ """
9
+ Converter base class to be reused by various converters.
10
+ Converter allows one to convert data from various source types to a particular
11
+ destination type. Each source type needs to register its converter. The
12
+ registration for each source type is valid for all descendants of that type.
13
+ """
14
+
15
+ @classmethod
16
+ def register(cls, from_type: Type, converter: Any = None):
17
+ """
18
+ Registers a converter for the specified type.
19
+ Can be used as a decorator (if converter is None), or called as a method.
20
+
21
+ Args:
22
+ from_type (type): type to register the converter for;
23
+ all instances of this type will use the same converter
24
+ converter (callable): converter to be registered for the given
25
+ type; if None, this method is assumed to be a decorator for the converter
26
+ """
27
+
28
+ if converter is not None:
29
+ cls._do_register(from_type, converter)
30
+
31
+ def wrapper(converter: Any) -> Any:
32
+ cls._do_register(from_type, converter)
33
+ return converter
34
+
35
+ return wrapper
36
+
37
+ @classmethod
38
+ def _do_register(cls, from_type: Type, converter: Any):
39
+ cls.registry[from_type] = converter # pyre-ignore[16]
40
+
41
+ @classmethod
42
+ def _lookup_converter(cls, from_type: Type) -> Any:
43
+ """
44
+ Perform recursive lookup for the given type
45
+ to find registered converter. If a converter was found for some base
46
+ class, it gets registered for this class to save on further lookups.
47
+
48
+ Args:
49
+ from_type: type for which to find a converter
50
+ Return:
51
+ callable or None - registered converter or None
52
+ if no suitable entry was found in the registry
53
+ """
54
+ if from_type in cls.registry: # pyre-ignore[16]
55
+ return cls.registry[from_type]
56
+ for base in from_type.__bases__:
57
+ converter = cls._lookup_converter(base)
58
+ if converter is not None:
59
+ cls._do_register(from_type, converter)
60
+ return converter
61
+ return None
62
+
63
+ @classmethod
64
+ def convert(cls, instance: Any, *args, **kwargs):
65
+ """
66
+ Convert an instance to the destination type using some registered
67
+ converter. Does recursive lookup for base classes, so there's no need
68
+ for explicit registration for derived classes.
69
+
70
+ Args:
71
+ instance: source instance to convert to the destination type
72
+ Return:
73
+ An instance of the destination type obtained from the source instance
74
+ Raises KeyError, if no suitable converter found
75
+ """
76
+ instance_type = type(instance)
77
+ converter = cls._lookup_converter(instance_type)
78
+ if converter is None:
79
+ if cls.dst_type is None: # pyre-ignore[16]
80
+ output_type_str = "itself"
81
+ else:
82
+ output_type_str = cls.dst_type
83
+ raise KeyError(f"Could not find converter from {instance_type} to {output_type_str}")
84
+ return converter(instance, *args, **kwargs)
85
+
86
+
87
+ IntTupleBox = Tuple[int, int, int, int]
88
+
89
+
90
+ def make_int_box(box: torch.Tensor) -> IntTupleBox:
91
+ int_box = [0, 0, 0, 0]
92
+ int_box[0], int_box[1], int_box[2], int_box[3] = tuple(box.long().tolist())
93
+ return int_box[0], int_box[1], int_box[2], int_box[3]
Leffa/densepose/converters/builtin.py ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Facebook, Inc. and its affiliates.
2
+
3
+ from ..structures import DensePoseChartPredictorOutput, DensePoseEmbeddingPredictorOutput
4
+ from . import (
5
+ HFlipConverter,
6
+ ToChartResultConverter,
7
+ ToChartResultConverterWithConfidences,
8
+ ToMaskConverter,
9
+ densepose_chart_predictor_output_hflip,
10
+ densepose_chart_predictor_output_to_result,
11
+ densepose_chart_predictor_output_to_result_with_confidences,
12
+ predictor_output_with_coarse_segm_to_mask,
13
+ predictor_output_with_fine_and_coarse_segm_to_mask,
14
+ )
15
+
16
+ ToMaskConverter.register(
17
+ DensePoseChartPredictorOutput, predictor_output_with_fine_and_coarse_segm_to_mask
18
+ )
19
+ ToMaskConverter.register(
20
+ DensePoseEmbeddingPredictorOutput, predictor_output_with_coarse_segm_to_mask
21
+ )
22
+
23
+ ToChartResultConverter.register(
24
+ DensePoseChartPredictorOutput, densepose_chart_predictor_output_to_result
25
+ )
26
+
27
+ ToChartResultConverterWithConfidences.register(
28
+ DensePoseChartPredictorOutput, densepose_chart_predictor_output_to_result_with_confidences
29
+ )
30
+
31
+ HFlipConverter.register(DensePoseChartPredictorOutput, densepose_chart_predictor_output_hflip)
Leffa/densepose/converters/chart_output_hflip.py ADDED
@@ -0,0 +1,71 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Facebook, Inc. and its affiliates.
2
+ from dataclasses import fields
3
+ import torch
4
+
5
+ from densepose.structures import DensePoseChartPredictorOutput, DensePoseTransformData
6
+
7
+
8
+ def densepose_chart_predictor_output_hflip(
9
+ densepose_predictor_output: DensePoseChartPredictorOutput,
10
+ transform_data: DensePoseTransformData,
11
+ ) -> DensePoseChartPredictorOutput:
12
+ """
13
+ Change to take into account a Horizontal flip.
14
+ """
15
+ if len(densepose_predictor_output) > 0:
16
+
17
+ PredictorOutput = type(densepose_predictor_output)
18
+ output_dict = {}
19
+
20
+ for field in fields(densepose_predictor_output):
21
+ field_value = getattr(densepose_predictor_output, field.name)
22
+ # flip tensors
23
+ if isinstance(field_value, torch.Tensor):
24
+ setattr(densepose_predictor_output, field.name, torch.flip(field_value, [3]))
25
+
26
+ densepose_predictor_output = _flip_iuv_semantics_tensor(
27
+ densepose_predictor_output, transform_data
28
+ )
29
+ densepose_predictor_output = _flip_segm_semantics_tensor(
30
+ densepose_predictor_output, transform_data
31
+ )
32
+
33
+ for field in fields(densepose_predictor_output):
34
+ output_dict[field.name] = getattr(densepose_predictor_output, field.name)
35
+
36
+ return PredictorOutput(**output_dict)
37
+ else:
38
+ return densepose_predictor_output
39
+
40
+
41
+ def _flip_iuv_semantics_tensor(
42
+ densepose_predictor_output: DensePoseChartPredictorOutput,
43
+ dp_transform_data: DensePoseTransformData,
44
+ ) -> DensePoseChartPredictorOutput:
45
+ point_label_symmetries = dp_transform_data.point_label_symmetries
46
+ uv_symmetries = dp_transform_data.uv_symmetries
47
+
48
+ N, C, H, W = densepose_predictor_output.u.shape
49
+ u_loc = (densepose_predictor_output.u[:, 1:, :, :].clamp(0, 1) * 255).long()
50
+ v_loc = (densepose_predictor_output.v[:, 1:, :, :].clamp(0, 1) * 255).long()
51
+ Iindex = torch.arange(C - 1, device=densepose_predictor_output.u.device)[
52
+ None, :, None, None
53
+ ].expand(N, C - 1, H, W)
54
+ densepose_predictor_output.u[:, 1:, :, :] = uv_symmetries["U_transforms"][Iindex, v_loc, u_loc]
55
+ densepose_predictor_output.v[:, 1:, :, :] = uv_symmetries["V_transforms"][Iindex, v_loc, u_loc]
56
+
57
+ for el in ["fine_segm", "u", "v"]:
58
+ densepose_predictor_output.__dict__[el] = densepose_predictor_output.__dict__[el][
59
+ :, point_label_symmetries, :, :
60
+ ]
61
+ return densepose_predictor_output
62
+
63
+
64
+ def _flip_segm_semantics_tensor(
65
+ densepose_predictor_output: DensePoseChartPredictorOutput, dp_transform_data
66
+ ):
67
+ if densepose_predictor_output.coarse_segm.shape[1] > 2:
68
+ densepose_predictor_output.coarse_segm = densepose_predictor_output.coarse_segm[
69
+ :, dp_transform_data.mask_label_symmetries, :, :
70
+ ]
71
+ return densepose_predictor_output
Leffa/densepose/converters/chart_output_to_chart_result.py ADDED
@@ -0,0 +1,188 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Facebook, Inc. and its affiliates.
2
+
3
+ from typing import Dict
4
+ import torch
5
+ from torch.nn import functional as F
6
+
7
+ from detectron2.structures.boxes import Boxes, BoxMode
8
+
9
+ from ..structures import (
10
+ DensePoseChartPredictorOutput,
11
+ DensePoseChartResult,
12
+ DensePoseChartResultWithConfidences,
13
+ )
14
+ from . import resample_fine_and_coarse_segm_to_bbox
15
+ from .base import IntTupleBox, make_int_box
16
+
17
+
18
+ def resample_uv_tensors_to_bbox(
19
+ u: torch.Tensor,
20
+ v: torch.Tensor,
21
+ labels: torch.Tensor,
22
+ box_xywh_abs: IntTupleBox,
23
+ ) -> torch.Tensor:
24
+ """
25
+ Resamples U and V coordinate estimates for the given bounding box
26
+
27
+ Args:
28
+ u (tensor [1, C, H, W] of float): U coordinates
29
+ v (tensor [1, C, H, W] of float): V coordinates
30
+ labels (tensor [H, W] of long): labels obtained by resampling segmentation
31
+ outputs for the given bounding box
32
+ box_xywh_abs (tuple of 4 int): bounding box that corresponds to predictor outputs
33
+ Return:
34
+ Resampled U and V coordinates - a tensor [2, H, W] of float
35
+ """
36
+ x, y, w, h = box_xywh_abs
37
+ w = max(int(w), 1)
38
+ h = max(int(h), 1)
39
+ u_bbox = F.interpolate(u, (h, w), mode="bilinear", align_corners=False)
40
+ v_bbox = F.interpolate(v, (h, w), mode="bilinear", align_corners=False)
41
+ uv = torch.zeros([2, h, w], dtype=torch.float32, device=u.device)
42
+ for part_id in range(1, u_bbox.size(1)):
43
+ uv[0][labels == part_id] = u_bbox[0, part_id][labels == part_id]
44
+ uv[1][labels == part_id] = v_bbox[0, part_id][labels == part_id]
45
+ return uv
46
+
47
+
48
+ def resample_uv_to_bbox(
49
+ predictor_output: DensePoseChartPredictorOutput,
50
+ labels: torch.Tensor,
51
+ box_xywh_abs: IntTupleBox,
52
+ ) -> torch.Tensor:
53
+ """
54
+ Resamples U and V coordinate estimates for the given bounding box
55
+
56
+ Args:
57
+ predictor_output (DensePoseChartPredictorOutput): DensePose predictor
58
+ output to be resampled
59
+ labels (tensor [H, W] of long): labels obtained by resampling segmentation
60
+ outputs for the given bounding box
61
+ box_xywh_abs (tuple of 4 int): bounding box that corresponds to predictor outputs
62
+ Return:
63
+ Resampled U and V coordinates - a tensor [2, H, W] of float
64
+ """
65
+ return resample_uv_tensors_to_bbox(
66
+ predictor_output.u,
67
+ predictor_output.v,
68
+ labels,
69
+ box_xywh_abs,
70
+ )
71
+
72
+
73
+ def densepose_chart_predictor_output_to_result(
74
+ predictor_output: DensePoseChartPredictorOutput, boxes: Boxes
75
+ ) -> DensePoseChartResult:
76
+ """
77
+ Convert densepose chart predictor outputs to results
78
+
79
+ Args:
80
+ predictor_output (DensePoseChartPredictorOutput): DensePose predictor
81
+ output to be converted to results, must contain only 1 output
82
+ boxes (Boxes): bounding box that corresponds to the predictor output,
83
+ must contain only 1 bounding box
84
+ Return:
85
+ DensePose chart-based result (DensePoseChartResult)
86
+ """
87
+ assert len(predictor_output) == 1 and len(boxes) == 1, (
88
+ f"Predictor output to result conversion can operate only single outputs"
89
+ f", got {len(predictor_output)} predictor outputs and {len(boxes)} boxes"
90
+ )
91
+
92
+ boxes_xyxy_abs = boxes.tensor.clone()
93
+ boxes_xywh_abs = BoxMode.convert(boxes_xyxy_abs, BoxMode.XYXY_ABS, BoxMode.XYWH_ABS)
94
+ box_xywh = make_int_box(boxes_xywh_abs[0])
95
+
96
+ labels = resample_fine_and_coarse_segm_to_bbox(predictor_output, box_xywh).squeeze(0)
97
+ uv = resample_uv_to_bbox(predictor_output, labels, box_xywh)
98
+ return DensePoseChartResult(labels=labels, uv=uv)
99
+
100
+
101
+ def resample_confidences_to_bbox(
102
+ predictor_output: DensePoseChartPredictorOutput,
103
+ labels: torch.Tensor,
104
+ box_xywh_abs: IntTupleBox,
105
+ ) -> Dict[str, torch.Tensor]:
106
+ """
107
+ Resamples confidences for the given bounding box
108
+
109
+ Args:
110
+ predictor_output (DensePoseChartPredictorOutput): DensePose predictor
111
+ output to be resampled
112
+ labels (tensor [H, W] of long): labels obtained by resampling segmentation
113
+ outputs for the given bounding box
114
+ box_xywh_abs (tuple of 4 int): bounding box that corresponds to predictor outputs
115
+ Return:
116
+ Resampled confidences - a dict of [H, W] tensors of float
117
+ """
118
+
119
+ x, y, w, h = box_xywh_abs
120
+ w = max(int(w), 1)
121
+ h = max(int(h), 1)
122
+
123
+ confidence_names = [
124
+ "sigma_1",
125
+ "sigma_2",
126
+ "kappa_u",
127
+ "kappa_v",
128
+ "fine_segm_confidence",
129
+ "coarse_segm_confidence",
130
+ ]
131
+ confidence_results = {key: None for key in confidence_names}
132
+ confidence_names = [
133
+ key for key in confidence_names if getattr(predictor_output, key) is not None
134
+ ]
135
+ confidence_base = torch.zeros([h, w], dtype=torch.float32, device=predictor_output.u.device)
136
+
137
+ # assign data from channels that correspond to the labels
138
+ for key in confidence_names:
139
+ resampled_confidence = F.interpolate(
140
+ getattr(predictor_output, key),
141
+ (h, w),
142
+ mode="bilinear",
143
+ align_corners=False,
144
+ )
145
+ result = confidence_base.clone()
146
+ for part_id in range(1, predictor_output.u.size(1)):
147
+ if resampled_confidence.size(1) != predictor_output.u.size(1):
148
+ # confidence is not part-based, don't try to fill it part by part
149
+ continue
150
+ result[labels == part_id] = resampled_confidence[0, part_id][labels == part_id]
151
+
152
+ if resampled_confidence.size(1) != predictor_output.u.size(1):
153
+ # confidence is not part-based, fill the data with the first channel
154
+ # (targeted for segmentation confidences that have only 1 channel)
155
+ result = resampled_confidence[0, 0]
156
+
157
+ confidence_results[key] = result
158
+
159
+ return confidence_results # pyre-ignore[7]
160
+
161
+
162
+ def densepose_chart_predictor_output_to_result_with_confidences(
163
+ predictor_output: DensePoseChartPredictorOutput, boxes: Boxes
164
+ ) -> DensePoseChartResultWithConfidences:
165
+ """
166
+ Convert densepose chart predictor outputs to results
167
+
168
+ Args:
169
+ predictor_output (DensePoseChartPredictorOutput): DensePose predictor
170
+ output with confidences to be converted to results, must contain only 1 output
171
+ boxes (Boxes): bounding box that corresponds to the predictor output,
172
+ must contain only 1 bounding box
173
+ Return:
174
+ DensePose chart-based result with confidences (DensePoseChartResultWithConfidences)
175
+ """
176
+ assert len(predictor_output) == 1 and len(boxes) == 1, (
177
+ f"Predictor output to result conversion can operate only single outputs"
178
+ f", got {len(predictor_output)} predictor outputs and {len(boxes)} boxes"
179
+ )
180
+
181
+ boxes_xyxy_abs = boxes.tensor.clone()
182
+ boxes_xywh_abs = BoxMode.convert(boxes_xyxy_abs, BoxMode.XYXY_ABS, BoxMode.XYWH_ABS)
183
+ box_xywh = make_int_box(boxes_xywh_abs[0])
184
+
185
+ labels = resample_fine_and_coarse_segm_to_bbox(predictor_output, box_xywh).squeeze(0)
186
+ uv = resample_uv_to_bbox(predictor_output, labels, box_xywh)
187
+ confidences = resample_confidences_to_bbox(predictor_output, labels, box_xywh)
188
+ return DensePoseChartResultWithConfidences(labels=labels, uv=uv, **confidences)
Leffa/densepose/converters/hflip.py ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Facebook, Inc. and its affiliates.
2
+
3
+ from typing import Any
4
+
5
+ from .base import BaseConverter
6
+
7
+
8
+ class HFlipConverter(BaseConverter):
9
+ """
10
+ Converts various DensePose predictor outputs to DensePose results.
11
+ Each DensePose predictor output type has to register its convertion strategy.
12
+ """
13
+
14
+ registry = {}
15
+ dst_type = None
16
+
17
+ @classmethod
18
+ # pyre-fixme[14]: `convert` overrides method defined in `BaseConverter`
19
+ # inconsistently.
20
+ def convert(cls, predictor_outputs: Any, transform_data: Any, *args, **kwargs):
21
+ """
22
+ Performs an horizontal flip on DensePose predictor outputs.
23
+ Does recursive lookup for base classes, so there's no need
24
+ for explicit registration for derived classes.
25
+
26
+ Args:
27
+ predictor_outputs: DensePose predictor output to be converted to BitMasks
28
+ transform_data: Anything useful for the flip
29
+ Return:
30
+ An instance of the same type as predictor_outputs
31
+ """
32
+ return super(HFlipConverter, cls).convert(
33
+ predictor_outputs, transform_data, *args, **kwargs
34
+ )
Leffa/densepose/converters/segm_to_mask.py ADDED
@@ -0,0 +1,150 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Facebook, Inc. and its affiliates.
2
+
3
+ from typing import Any
4
+ import torch
5
+ from torch.nn import functional as F
6
+
7
+ from detectron2.structures import BitMasks, Boxes, BoxMode
8
+
9
+ from .base import IntTupleBox, make_int_box
10
+ from .to_mask import ImageSizeType
11
+
12
+
13
+ def resample_coarse_segm_tensor_to_bbox(coarse_segm: torch.Tensor, box_xywh_abs: IntTupleBox):
14
+ """
15
+ Resample coarse segmentation tensor to the given
16
+ bounding box and derive labels for each pixel of the bounding box
17
+
18
+ Args:
19
+ coarse_segm: float tensor of shape [1, K, Hout, Wout]
20
+ box_xywh_abs (tuple of 4 int): bounding box given by its upper-left
21
+ corner coordinates, width (W) and height (H)
22
+ Return:
23
+ Labels for each pixel of the bounding box, a long tensor of size [1, H, W]
24
+ """
25
+ x, y, w, h = box_xywh_abs
26
+ w = max(int(w), 1)
27
+ h = max(int(h), 1)
28
+ labels = F.interpolate(coarse_segm, (h, w), mode="bilinear", align_corners=False).argmax(dim=1)
29
+ return labels
30
+
31
+
32
+ def resample_fine_and_coarse_segm_tensors_to_bbox(
33
+ fine_segm: torch.Tensor, coarse_segm: torch.Tensor, box_xywh_abs: IntTupleBox
34
+ ):
35
+ """
36
+ Resample fine and coarse segmentation tensors to the given
37
+ bounding box and derive labels for each pixel of the bounding box
38
+
39
+ Args:
40
+ fine_segm: float tensor of shape [1, C, Hout, Wout]
41
+ coarse_segm: float tensor of shape [1, K, Hout, Wout]
42
+ box_xywh_abs (tuple of 4 int): bounding box given by its upper-left
43
+ corner coordinates, width (W) and height (H)
44
+ Return:
45
+ Labels for each pixel of the bounding box, a long tensor of size [1, H, W]
46
+ """
47
+ x, y, w, h = box_xywh_abs
48
+ w = max(int(w), 1)
49
+ h = max(int(h), 1)
50
+ # coarse segmentation
51
+ coarse_segm_bbox = F.interpolate(
52
+ coarse_segm,
53
+ (h, w),
54
+ mode="bilinear",
55
+ align_corners=False,
56
+ ).argmax(dim=1)
57
+ # combined coarse and fine segmentation
58
+ labels = (
59
+ F.interpolate(fine_segm, (h, w), mode="bilinear", align_corners=False).argmax(dim=1)
60
+ * (coarse_segm_bbox > 0).long()
61
+ )
62
+ return labels
63
+
64
+
65
+ def resample_fine_and_coarse_segm_to_bbox(predictor_output: Any, box_xywh_abs: IntTupleBox):
66
+ """
67
+ Resample fine and coarse segmentation outputs from a predictor to the given
68
+ bounding box and derive labels for each pixel of the bounding box
69
+
70
+ Args:
71
+ predictor_output: DensePose predictor output that contains segmentation
72
+ results to be resampled
73
+ box_xywh_abs (tuple of 4 int): bounding box given by its upper-left
74
+ corner coordinates, width (W) and height (H)
75
+ Return:
76
+ Labels for each pixel of the bounding box, a long tensor of size [1, H, W]
77
+ """
78
+ return resample_fine_and_coarse_segm_tensors_to_bbox(
79
+ predictor_output.fine_segm,
80
+ predictor_output.coarse_segm,
81
+ box_xywh_abs,
82
+ )
83
+
84
+
85
+ def predictor_output_with_coarse_segm_to_mask(
86
+ predictor_output: Any, boxes: Boxes, image_size_hw: ImageSizeType
87
+ ) -> BitMasks:
88
+ """
89
+ Convert predictor output with coarse and fine segmentation to a mask.
90
+ Assumes that predictor output has the following attributes:
91
+ - coarse_segm (tensor of size [N, D, H, W]): coarse segmentation
92
+ unnormalized scores for N instances; D is the number of coarse
93
+ segmentation labels, H and W is the resolution of the estimate
94
+
95
+ Args:
96
+ predictor_output: DensePose predictor output to be converted to mask
97
+ boxes (Boxes): bounding boxes that correspond to the DensePose
98
+ predictor outputs
99
+ image_size_hw (tuple [int, int]): image height Himg and width Wimg
100
+ Return:
101
+ BitMasks that contain a bool tensor of size [N, Himg, Wimg] with
102
+ a mask of the size of the image for each instance
103
+ """
104
+ H, W = image_size_hw
105
+ boxes_xyxy_abs = boxes.tensor.clone()
106
+ boxes_xywh_abs = BoxMode.convert(boxes_xyxy_abs, BoxMode.XYXY_ABS, BoxMode.XYWH_ABS)
107
+ N = len(boxes_xywh_abs)
108
+ masks = torch.zeros((N, H, W), dtype=torch.bool, device=boxes.tensor.device)
109
+ for i in range(len(boxes_xywh_abs)):
110
+ box_xywh = make_int_box(boxes_xywh_abs[i])
111
+ box_mask = resample_coarse_segm_tensor_to_bbox(predictor_output[i].coarse_segm, box_xywh)
112
+ x, y, w, h = box_xywh
113
+ masks[i, y : y + h, x : x + w] = box_mask
114
+
115
+ return BitMasks(masks)
116
+
117
+
118
+ def predictor_output_with_fine_and_coarse_segm_to_mask(
119
+ predictor_output: Any, boxes: Boxes, image_size_hw: ImageSizeType
120
+ ) -> BitMasks:
121
+ """
122
+ Convert predictor output with coarse and fine segmentation to a mask.
123
+ Assumes that predictor output has the following attributes:
124
+ - coarse_segm (tensor of size [N, D, H, W]): coarse segmentation
125
+ unnormalized scores for N instances; D is the number of coarse
126
+ segmentation labels, H and W is the resolution of the estimate
127
+ - fine_segm (tensor of size [N, C, H, W]): fine segmentation
128
+ unnormalized scores for N instances; C is the number of fine
129
+ segmentation labels, H and W is the resolution of the estimate
130
+
131
+ Args:
132
+ predictor_output: DensePose predictor output to be converted to mask
133
+ boxes (Boxes): bounding boxes that correspond to the DensePose
134
+ predictor outputs
135
+ image_size_hw (tuple [int, int]): image height Himg and width Wimg
136
+ Return:
137
+ BitMasks that contain a bool tensor of size [N, Himg, Wimg] with
138
+ a mask of the size of the image for each instance
139
+ """
140
+ H, W = image_size_hw
141
+ boxes_xyxy_abs = boxes.tensor.clone()
142
+ boxes_xywh_abs = BoxMode.convert(boxes_xyxy_abs, BoxMode.XYXY_ABS, BoxMode.XYWH_ABS)
143
+ N = len(boxes_xywh_abs)
144
+ masks = torch.zeros((N, H, W), dtype=torch.bool, device=boxes.tensor.device)
145
+ for i in range(len(boxes_xywh_abs)):
146
+ box_xywh = make_int_box(boxes_xywh_abs[i])
147
+ labels_i = resample_fine_and_coarse_segm_to_bbox(predictor_output[i], box_xywh)
148
+ x, y, w, h = box_xywh
149
+ masks[i, y : y + h, x : x + w] = labels_i > 0
150
+ return BitMasks(masks)
Leffa/densepose/converters/to_chart_result.py ADDED
@@ -0,0 +1,70 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Facebook, Inc. and its affiliates.
2
+
3
+ from typing import Any
4
+
5
+ from detectron2.structures import Boxes
6
+
7
+ from ..structures import DensePoseChartResult, DensePoseChartResultWithConfidences
8
+ from .base import BaseConverter
9
+
10
+
11
+ class ToChartResultConverter(BaseConverter):
12
+ """
13
+ Converts various DensePose predictor outputs to DensePose results.
14
+ Each DensePose predictor output type has to register its convertion strategy.
15
+ """
16
+
17
+ registry = {}
18
+ dst_type = DensePoseChartResult
19
+
20
+ @classmethod
21
+ # pyre-fixme[14]: `convert` overrides method defined in `BaseConverter`
22
+ # inconsistently.
23
+ def convert(cls, predictor_outputs: Any, boxes: Boxes, *args, **kwargs) -> DensePoseChartResult:
24
+ """
25
+ Convert DensePose predictor outputs to DensePoseResult using some registered
26
+ converter. Does recursive lookup for base classes, so there's no need
27
+ for explicit registration for derived classes.
28
+
29
+ Args:
30
+ densepose_predictor_outputs: DensePose predictor output to be
31
+ converted to BitMasks
32
+ boxes (Boxes): bounding boxes that correspond to the DensePose
33
+ predictor outputs
34
+ Return:
35
+ An instance of DensePoseResult. If no suitable converter was found, raises KeyError
36
+ """
37
+ return super(ToChartResultConverter, cls).convert(predictor_outputs, boxes, *args, **kwargs)
38
+
39
+
40
+ class ToChartResultConverterWithConfidences(BaseConverter):
41
+ """
42
+ Converts various DensePose predictor outputs to DensePose results.
43
+ Each DensePose predictor output type has to register its convertion strategy.
44
+ """
45
+
46
+ registry = {}
47
+ dst_type = DensePoseChartResultWithConfidences
48
+
49
+ @classmethod
50
+ # pyre-fixme[14]: `convert` overrides method defined in `BaseConverter`
51
+ # inconsistently.
52
+ def convert(
53
+ cls, predictor_outputs: Any, boxes: Boxes, *args, **kwargs
54
+ ) -> DensePoseChartResultWithConfidences:
55
+ """
56
+ Convert DensePose predictor outputs to DensePoseResult with confidences
57
+ using some registered converter. Does recursive lookup for base classes,
58
+ so there's no need for explicit registration for derived classes.
59
+
60
+ Args:
61
+ densepose_predictor_outputs: DensePose predictor output with confidences
62
+ to be converted to BitMasks
63
+ boxes (Boxes): bounding boxes that correspond to the DensePose
64
+ predictor outputs
65
+ Return:
66
+ An instance of DensePoseResult. If no suitable converter was found, raises KeyError
67
+ """
68
+ return super(ToChartResultConverterWithConfidences, cls).convert(
69
+ predictor_outputs, boxes, *args, **kwargs
70
+ )
Leffa/densepose/converters/to_mask.py ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Facebook, Inc. and its affiliates.
2
+
3
+ from typing import Any, Tuple
4
+
5
+ from detectron2.structures import BitMasks, Boxes
6
+
7
+ from .base import BaseConverter
8
+
9
+ ImageSizeType = Tuple[int, int]
10
+
11
+
12
+ class ToMaskConverter(BaseConverter):
13
+ """
14
+ Converts various DensePose predictor outputs to masks
15
+ in bit mask format (see `BitMasks`). Each DensePose predictor output type
16
+ has to register its convertion strategy.
17
+ """
18
+
19
+ registry = {}
20
+ dst_type = BitMasks
21
+
22
+ @classmethod
23
+ # pyre-fixme[14]: `convert` overrides method defined in `BaseConverter`
24
+ # inconsistently.
25
+ def convert(
26
+ cls,
27
+ densepose_predictor_outputs: Any,
28
+ boxes: Boxes,
29
+ image_size_hw: ImageSizeType,
30
+ *args,
31
+ **kwargs
32
+ ) -> BitMasks:
33
+ """
34
+ Convert DensePose predictor outputs to BitMasks using some registered
35
+ converter. Does recursive lookup for base classes, so there's no need
36
+ for explicit registration for derived classes.
37
+
38
+ Args:
39
+ densepose_predictor_outputs: DensePose predictor output to be
40
+ converted to BitMasks
41
+ boxes (Boxes): bounding boxes that correspond to the DensePose
42
+ predictor outputs
43
+ image_size_hw (tuple [int, int]): image height and width
44
+ Return:
45
+ An instance of `BitMasks`. If no suitable converter was found, raises KeyError
46
+ """
47
+ return super(ToMaskConverter, cls).convert(
48
+ densepose_predictor_outputs, boxes, image_size_hw, *args, **kwargs
49
+ )
Leffa/densepose/engine/__init__.py ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ # Copyright (c) Facebook, Inc. and its affiliates.
2
+
3
+ from .trainer import Trainer
Leffa/densepose/engine/trainer.py ADDED
@@ -0,0 +1,258 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
2
+
3
+ import logging
4
+ import os
5
+ from collections import OrderedDict
6
+ from typing import List, Optional, Union
7
+ import torch
8
+ from torch import nn
9
+
10
+ from detectron2.checkpoint import DetectionCheckpointer
11
+ from detectron2.config import CfgNode
12
+ from detectron2.engine import DefaultTrainer
13
+ from detectron2.evaluation import (
14
+ DatasetEvaluator,
15
+ DatasetEvaluators,
16
+ inference_on_dataset,
17
+ print_csv_format,
18
+ )
19
+ from detectron2.solver.build import get_default_optimizer_params, maybe_add_gradient_clipping
20
+ from detectron2.utils import comm
21
+ from detectron2.utils.events import EventWriter, get_event_storage
22
+
23
+ from densepose import DensePoseDatasetMapperTTA, DensePoseGeneralizedRCNNWithTTA, load_from_cfg
24
+ from densepose.data import (
25
+ DatasetMapper,
26
+ build_combined_loader,
27
+ build_detection_test_loader,
28
+ build_detection_train_loader,
29
+ build_inference_based_loaders,
30
+ has_inference_based_loaders,
31
+ )
32
+ from densepose.evaluation.d2_evaluator_adapter import Detectron2COCOEvaluatorAdapter
33
+ from densepose.evaluation.evaluator import DensePoseCOCOEvaluator, build_densepose_evaluator_storage
34
+ from densepose.modeling.cse import Embedder
35
+
36
+
37
+ class SampleCountingLoader:
38
+ def __init__(self, loader):
39
+ self.loader = loader
40
+
41
+ def __iter__(self):
42
+ it = iter(self.loader)
43
+ storage = get_event_storage()
44
+ while True:
45
+ try:
46
+ batch = next(it)
47
+ num_inst_per_dataset = {}
48
+ for data in batch:
49
+ dataset_name = data["dataset"]
50
+ if dataset_name not in num_inst_per_dataset:
51
+ num_inst_per_dataset[dataset_name] = 0
52
+ num_inst = len(data["instances"])
53
+ num_inst_per_dataset[dataset_name] += num_inst
54
+ for dataset_name in num_inst_per_dataset:
55
+ storage.put_scalar(f"batch/{dataset_name}", num_inst_per_dataset[dataset_name])
56
+ yield batch
57
+ except StopIteration:
58
+ break
59
+
60
+
61
+ class SampleCountMetricPrinter(EventWriter):
62
+ def __init__(self):
63
+ self.logger = logging.getLogger(__name__)
64
+
65
+ def write(self):
66
+ storage = get_event_storage()
67
+ batch_stats_strs = []
68
+ for key, buf in storage.histories().items():
69
+ if key.startswith("batch/"):
70
+ batch_stats_strs.append(f"{key} {buf.avg(20)}")
71
+ self.logger.info(", ".join(batch_stats_strs))
72
+
73
+
74
+ class Trainer(DefaultTrainer):
75
+ @classmethod
76
+ def extract_embedder_from_model(cls, model: nn.Module) -> Optional[Embedder]:
77
+ if isinstance(model, nn.parallel.DistributedDataParallel):
78
+ model = model.module
79
+ if hasattr(model, "roi_heads") and hasattr(model.roi_heads, "embedder"):
80
+ return model.roi_heads.embedder
81
+ return None
82
+
83
+ # TODO: the only reason to copy the base class code here is to pass the embedder from
84
+ # the model to the evaluator; that should be refactored to avoid unnecessary copy-pasting
85
+ @classmethod
86
+ def test(
87
+ cls,
88
+ cfg: CfgNode,
89
+ model: nn.Module,
90
+ evaluators: Optional[Union[DatasetEvaluator, List[DatasetEvaluator]]] = None,
91
+ ):
92
+ """
93
+ Args:
94
+ cfg (CfgNode):
95
+ model (nn.Module):
96
+ evaluators (DatasetEvaluator, list[DatasetEvaluator] or None): if None, will call
97
+ :meth:`build_evaluator`. Otherwise, must have the same length as
98
+ ``cfg.DATASETS.TEST``.
99
+
100
+ Returns:
101
+ dict: a dict of result metrics
102
+ """
103
+ logger = logging.getLogger(__name__)
104
+ if isinstance(evaluators, DatasetEvaluator):
105
+ evaluators = [evaluators]
106
+ if evaluators is not None:
107
+ assert len(cfg.DATASETS.TEST) == len(evaluators), "{} != {}".format(
108
+ len(cfg.DATASETS.TEST), len(evaluators)
109
+ )
110
+
111
+ results = OrderedDict()
112
+ for idx, dataset_name in enumerate(cfg.DATASETS.TEST):
113
+ data_loader = cls.build_test_loader(cfg, dataset_name)
114
+ # When evaluators are passed in as arguments,
115
+ # implicitly assume that evaluators can be created before data_loader.
116
+ if evaluators is not None:
117
+ evaluator = evaluators[idx]
118
+ else:
119
+ try:
120
+ embedder = cls.extract_embedder_from_model(model)
121
+ evaluator = cls.build_evaluator(cfg, dataset_name, embedder=embedder)
122
+ except NotImplementedError:
123
+ logger.warn(
124
+ "No evaluator found. Use `DefaultTrainer.test(evaluators=)`, "
125
+ "or implement its `build_evaluator` method."
126
+ )
127
+ results[dataset_name] = {}
128
+ continue
129
+ if cfg.DENSEPOSE_EVALUATION.DISTRIBUTED_INFERENCE or comm.is_main_process():
130
+ results_i = inference_on_dataset(model, data_loader, evaluator)
131
+ else:
132
+ results_i = {}
133
+ results[dataset_name] = results_i
134
+ if comm.is_main_process():
135
+ assert isinstance(
136
+ results_i, dict
137
+ ), "Evaluator must return a dict on the main process. Got {} instead.".format(
138
+ results_i
139
+ )
140
+ logger.info("Evaluation results for {} in csv format:".format(dataset_name))
141
+ print_csv_format(results_i)
142
+
143
+ if len(results) == 1:
144
+ results = list(results.values())[0]
145
+ return results
146
+
147
+ @classmethod
148
+ def build_evaluator(
149
+ cls,
150
+ cfg: CfgNode,
151
+ dataset_name: str,
152
+ output_folder: Optional[str] = None,
153
+ embedder: Optional[Embedder] = None,
154
+ ) -> DatasetEvaluators:
155
+ if output_folder is None:
156
+ output_folder = os.path.join(cfg.OUTPUT_DIR, "inference")
157
+ evaluators = []
158
+ distributed = cfg.DENSEPOSE_EVALUATION.DISTRIBUTED_INFERENCE
159
+ # Note: we currently use COCO evaluator for both COCO and LVIS datasets
160
+ # to have compatible metrics. LVIS bbox evaluator could also be used
161
+ # with an adapter to properly handle filtered / mapped categories
162
+ # evaluator_type = MetadataCatalog.get(dataset_name).evaluator_type
163
+ # if evaluator_type == "coco":
164
+ # evaluators.append(COCOEvaluator(dataset_name, output_dir=output_folder))
165
+ # elif evaluator_type == "lvis":
166
+ # evaluators.append(LVISEvaluator(dataset_name, output_dir=output_folder))
167
+ evaluators.append(
168
+ Detectron2COCOEvaluatorAdapter(
169
+ dataset_name, output_dir=output_folder, distributed=distributed
170
+ )
171
+ )
172
+ if cfg.MODEL.DENSEPOSE_ON:
173
+ storage = build_densepose_evaluator_storage(cfg, output_folder)
174
+ evaluators.append(
175
+ DensePoseCOCOEvaluator(
176
+ dataset_name,
177
+ distributed,
178
+ output_folder,
179
+ evaluator_type=cfg.DENSEPOSE_EVALUATION.TYPE,
180
+ min_iou_threshold=cfg.DENSEPOSE_EVALUATION.MIN_IOU_THRESHOLD,
181
+ storage=storage,
182
+ embedder=embedder,
183
+ should_evaluate_mesh_alignment=cfg.DENSEPOSE_EVALUATION.EVALUATE_MESH_ALIGNMENT,
184
+ mesh_alignment_mesh_names=cfg.DENSEPOSE_EVALUATION.MESH_ALIGNMENT_MESH_NAMES,
185
+ )
186
+ )
187
+ return DatasetEvaluators(evaluators)
188
+
189
+ @classmethod
190
+ def build_optimizer(cls, cfg: CfgNode, model: nn.Module):
191
+ params = get_default_optimizer_params(
192
+ model,
193
+ base_lr=cfg.SOLVER.BASE_LR,
194
+ weight_decay_norm=cfg.SOLVER.WEIGHT_DECAY_NORM,
195
+ bias_lr_factor=cfg.SOLVER.BIAS_LR_FACTOR,
196
+ weight_decay_bias=cfg.SOLVER.WEIGHT_DECAY_BIAS,
197
+ overrides={
198
+ "features": {
199
+ "lr": cfg.SOLVER.BASE_LR * cfg.MODEL.ROI_DENSEPOSE_HEAD.CSE.FEATURES_LR_FACTOR,
200
+ },
201
+ "embeddings": {
202
+ "lr": cfg.SOLVER.BASE_LR * cfg.MODEL.ROI_DENSEPOSE_HEAD.CSE.EMBEDDING_LR_FACTOR,
203
+ },
204
+ },
205
+ )
206
+ optimizer = torch.optim.SGD(
207
+ params,
208
+ cfg.SOLVER.BASE_LR,
209
+ momentum=cfg.SOLVER.MOMENTUM,
210
+ nesterov=cfg.SOLVER.NESTEROV,
211
+ weight_decay=cfg.SOLVER.WEIGHT_DECAY,
212
+ )
213
+ # pyre-fixme[6]: For 2nd param expected `Type[Optimizer]` but got `SGD`.
214
+ return maybe_add_gradient_clipping(cfg, optimizer)
215
+
216
+ @classmethod
217
+ def build_test_loader(cls, cfg: CfgNode, dataset_name):
218
+ return build_detection_test_loader(cfg, dataset_name, mapper=DatasetMapper(cfg, False))
219
+
220
+ @classmethod
221
+ def build_train_loader(cls, cfg: CfgNode):
222
+ data_loader = build_detection_train_loader(cfg, mapper=DatasetMapper(cfg, True))
223
+ if not has_inference_based_loaders(cfg):
224
+ return data_loader
225
+ model = cls.build_model(cfg)
226
+ model.to(cfg.BOOTSTRAP_MODEL.DEVICE)
227
+ DetectionCheckpointer(model).resume_or_load(cfg.BOOTSTRAP_MODEL.WEIGHTS, resume=False)
228
+ inference_based_loaders, ratios = build_inference_based_loaders(cfg, model)
229
+ loaders = [data_loader] + inference_based_loaders
230
+ ratios = [1.0] + ratios
231
+ combined_data_loader = build_combined_loader(cfg, loaders, ratios)
232
+ sample_counting_loader = SampleCountingLoader(combined_data_loader)
233
+ return sample_counting_loader
234
+
235
+ def build_writers(self):
236
+ writers = super().build_writers()
237
+ writers.append(SampleCountMetricPrinter())
238
+ return writers
239
+
240
+ @classmethod
241
+ def test_with_TTA(cls, cfg: CfgNode, model):
242
+ logger = logging.getLogger("detectron2.trainer")
243
+ # In the end of training, run an evaluation with TTA
244
+ # Only support some R-CNN models.
245
+ logger.info("Running inference with test-time augmentation ...")
246
+ transform_data = load_from_cfg(cfg)
247
+ model = DensePoseGeneralizedRCNNWithTTA(
248
+ cfg, model, transform_data, DensePoseDatasetMapperTTA(cfg)
249
+ )
250
+ evaluators = [
251
+ cls.build_evaluator(
252
+ cfg, name, output_folder=os.path.join(cfg.OUTPUT_DIR, "inference_TTA")
253
+ )
254
+ for name in cfg.DATASETS.TEST
255
+ ]
256
+ res = cls.test(cfg, model, evaluators) # pyre-ignore[6]
257
+ res = OrderedDict({k + "_TTA": v for k, v in res.items()})
258
+ return res
Leffa/densepose/modeling/__init__.py ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Facebook, Inc. and its affiliates.
2
+
3
+ from .confidence import DensePoseConfidenceModelConfig, DensePoseUVConfidenceType
4
+ from .filter import DensePoseDataFilter
5
+ from .inference import densepose_inference
6
+ from .utils import initialize_module_params
7
+ from .build import (
8
+ build_densepose_data_filter,
9
+ build_densepose_embedder,
10
+ build_densepose_head,
11
+ build_densepose_losses,
12
+ build_densepose_predictor,
13
+ )
Leffa/densepose/modeling/build.py ADDED
@@ -0,0 +1,87 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Facebook, Inc. and its affiliates.
2
+
3
+ from typing import Optional
4
+ from torch import nn
5
+
6
+ from detectron2.config import CfgNode
7
+
8
+ from .cse.embedder import Embedder
9
+ from .filter import DensePoseDataFilter
10
+
11
+
12
+ def build_densepose_predictor(cfg: CfgNode, input_channels: int):
13
+ """
14
+ Create an instance of DensePose predictor based on configuration options.
15
+
16
+ Args:
17
+ cfg (CfgNode): configuration options
18
+ input_channels (int): input tensor size along the channel dimension
19
+ Return:
20
+ An instance of DensePose predictor
21
+ """
22
+ from .predictors import DENSEPOSE_PREDICTOR_REGISTRY
23
+
24
+ predictor_name = cfg.MODEL.ROI_DENSEPOSE_HEAD.PREDICTOR_NAME
25
+ return DENSEPOSE_PREDICTOR_REGISTRY.get(predictor_name)(cfg, input_channels)
26
+
27
+
28
+ def build_densepose_data_filter(cfg: CfgNode):
29
+ """
30
+ Build DensePose data filter which selects data for training
31
+
32
+ Args:
33
+ cfg (CfgNode): configuration options
34
+
35
+ Return:
36
+ Callable: list(Tensor), list(Instances) -> list(Tensor), list(Instances)
37
+ An instance of DensePose filter, which takes feature tensors and proposals
38
+ as an input and returns filtered features and proposals
39
+ """
40
+ dp_filter = DensePoseDataFilter(cfg)
41
+ return dp_filter
42
+
43
+
44
+ def build_densepose_head(cfg: CfgNode, input_channels: int):
45
+ """
46
+ Build DensePose head based on configurations options
47
+
48
+ Args:
49
+ cfg (CfgNode): configuration options
50
+ input_channels (int): input tensor size along the channel dimension
51
+ Return:
52
+ An instance of DensePose head
53
+ """
54
+ from .roi_heads.registry import ROI_DENSEPOSE_HEAD_REGISTRY
55
+
56
+ head_name = cfg.MODEL.ROI_DENSEPOSE_HEAD.NAME
57
+ return ROI_DENSEPOSE_HEAD_REGISTRY.get(head_name)(cfg, input_channels)
58
+
59
+
60
+ def build_densepose_losses(cfg: CfgNode):
61
+ """
62
+ Build DensePose loss based on configurations options
63
+
64
+ Args:
65
+ cfg (CfgNode): configuration options
66
+ Return:
67
+ An instance of DensePose loss
68
+ """
69
+ from .losses import DENSEPOSE_LOSS_REGISTRY
70
+
71
+ loss_name = cfg.MODEL.ROI_DENSEPOSE_HEAD.LOSS_NAME
72
+ return DENSEPOSE_LOSS_REGISTRY.get(loss_name)(cfg)
73
+
74
+
75
+ def build_densepose_embedder(cfg: CfgNode) -> Optional[nn.Module]:
76
+ """
77
+ Build embedder used to embed mesh vertices into an embedding space.
78
+ Embedder contains sub-embedders, one for each mesh ID.
79
+
80
+ Args:
81
+ cfg (cfgNode): configuration options
82
+ Return:
83
+ Embedding module
84
+ """
85
+ if cfg.MODEL.ROI_DENSEPOSE_HEAD.CSE.EMBEDDERS:
86
+ return Embedder(cfg)
87
+ return None
Leffa/densepose/modeling/confidence.py ADDED
@@ -0,0 +1,73 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Facebook, Inc. and its affiliates.
2
+
3
+ from dataclasses import dataclass
4
+ from enum import Enum
5
+
6
+ from detectron2.config import CfgNode
7
+
8
+
9
+ class DensePoseUVConfidenceType(Enum):
10
+ """
11
+ Statistical model type for confidence learning, possible values:
12
+ - "iid_iso": statistically independent identically distributed residuals
13
+ with anisotropic covariance
14
+ - "indep_aniso": statistically independent residuals with anisotropic
15
+ covariances
16
+ For details, see:
17
+ N. Neverova, D. Novotny, A. Vedaldi "Correlated Uncertainty for Learning
18
+ Dense Correspondences from Noisy Labels", p. 918--926, in Proc. NIPS 2019
19
+ """
20
+
21
+ # fmt: off
22
+ IID_ISO = "iid_iso"
23
+ INDEP_ANISO = "indep_aniso"
24
+ # fmt: on
25
+
26
+
27
+ @dataclass
28
+ class DensePoseUVConfidenceConfig:
29
+ """
30
+ Configuration options for confidence on UV data
31
+ """
32
+
33
+ enabled: bool = False
34
+ # lower bound on UV confidences
35
+ epsilon: float = 0.01
36
+ type: DensePoseUVConfidenceType = DensePoseUVConfidenceType.IID_ISO
37
+
38
+
39
+ @dataclass
40
+ class DensePoseSegmConfidenceConfig:
41
+ """
42
+ Configuration options for confidence on segmentation
43
+ """
44
+
45
+ enabled: bool = False
46
+ # lower bound on confidence values
47
+ epsilon: float = 0.01
48
+
49
+
50
+ @dataclass
51
+ class DensePoseConfidenceModelConfig:
52
+ """
53
+ Configuration options for confidence models
54
+ """
55
+
56
+ # confidence for U and V values
57
+ uv_confidence: DensePoseUVConfidenceConfig
58
+ # segmentation confidence
59
+ segm_confidence: DensePoseSegmConfidenceConfig
60
+
61
+ @staticmethod
62
+ def from_cfg(cfg: CfgNode) -> "DensePoseConfidenceModelConfig":
63
+ return DensePoseConfidenceModelConfig(
64
+ uv_confidence=DensePoseUVConfidenceConfig(
65
+ enabled=cfg.MODEL.ROI_DENSEPOSE_HEAD.UV_CONFIDENCE.ENABLED,
66
+ epsilon=cfg.MODEL.ROI_DENSEPOSE_HEAD.UV_CONFIDENCE.EPSILON,
67
+ type=DensePoseUVConfidenceType(cfg.MODEL.ROI_DENSEPOSE_HEAD.UV_CONFIDENCE.TYPE),
68
+ ),
69
+ segm_confidence=DensePoseSegmConfidenceConfig(
70
+ enabled=cfg.MODEL.ROI_DENSEPOSE_HEAD.SEGM_CONFIDENCE.ENABLED,
71
+ epsilon=cfg.MODEL.ROI_DENSEPOSE_HEAD.SEGM_CONFIDENCE.EPSILON,
72
+ ),
73
+ )
Leffa/densepose/modeling/densepose_checkpoint.py ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Facebook, Inc. and its affiliates.
2
+ from collections import OrderedDict
3
+
4
+ from detectron2.checkpoint import DetectionCheckpointer
5
+
6
+
7
+ def _rename_HRNet_weights(weights):
8
+ # We detect and rename HRNet weights for DensePose. 1956 and 1716 are values that are
9
+ # common to all HRNet pretrained weights, and should be enough to accurately identify them
10
+ if (
11
+ len(weights["model"].keys()) == 1956
12
+ and len([k for k in weights["model"].keys() if k.startswith("stage")]) == 1716
13
+ ):
14
+ hrnet_weights = OrderedDict()
15
+ for k in weights["model"].keys():
16
+ hrnet_weights["backbone.bottom_up." + str(k)] = weights["model"][k]
17
+ return {"model": hrnet_weights}
18
+ else:
19
+ return weights
20
+
21
+
22
+ class DensePoseCheckpointer(DetectionCheckpointer):
23
+ """
24
+ Same as :class:`DetectionCheckpointer`, but is able to handle HRNet weights
25
+ """
26
+
27
+ def __init__(self, model, save_dir="", *, save_to_disk=None, **checkpointables):
28
+ super().__init__(model, save_dir, save_to_disk=save_to_disk, **checkpointables)
29
+
30
+ def _load_file(self, filename: str) -> object:
31
+ """
32
+ Adding hrnet support
33
+ """
34
+ weights = super()._load_file(filename)
35
+ return _rename_HRNet_weights(weights)
Leffa/densepose/modeling/filter.py ADDED
@@ -0,0 +1,94 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Facebook, Inc. and its affiliates.
2
+
3
+ from typing import List
4
+ import torch
5
+
6
+ from detectron2.config import CfgNode
7
+ from detectron2.structures import Instances
8
+ from detectron2.structures.boxes import matched_pairwise_iou
9
+
10
+
11
+ class DensePoseDataFilter:
12
+ def __init__(self, cfg: CfgNode):
13
+ self.iou_threshold = cfg.MODEL.ROI_DENSEPOSE_HEAD.FG_IOU_THRESHOLD
14
+ self.keep_masks = cfg.MODEL.ROI_DENSEPOSE_HEAD.COARSE_SEGM_TRAINED_BY_MASKS
15
+
16
+ @torch.no_grad()
17
+ def __call__(self, features: List[torch.Tensor], proposals_with_targets: List[Instances]):
18
+ """
19
+ Filters proposals with targets to keep only the ones relevant for
20
+ DensePose training
21
+
22
+ Args:
23
+ features (list[Tensor]): input data as a list of features,
24
+ each feature is a tensor. Axis 0 represents the number of
25
+ images `N` in the input data; axes 1-3 are channels,
26
+ height, and width, which may vary between features
27
+ (e.g., if a feature pyramid is used).
28
+ proposals_with_targets (list[Instances]): length `N` list of
29
+ `Instances`. The i-th `Instances` contains instances
30
+ (proposals, GT) for the i-th input image,
31
+ Returns:
32
+ list[Tensor]: filtered features
33
+ list[Instances]: filtered proposals
34
+ """
35
+ proposals_filtered = []
36
+ # TODO: the commented out code was supposed to correctly deal with situations
37
+ # where no valid DensePose GT is available for certain images. The corresponding
38
+ # image features were sliced and proposals were filtered. This led to performance
39
+ # deterioration, both in terms of runtime and in terms of evaluation results.
40
+ #
41
+ # feature_mask = torch.ones(
42
+ # len(proposals_with_targets),
43
+ # dtype=torch.bool,
44
+ # device=features[0].device if len(features) > 0 else torch.device("cpu"),
45
+ # )
46
+ for i, proposals_per_image in enumerate(proposals_with_targets):
47
+ if not proposals_per_image.has("gt_densepose") and (
48
+ not proposals_per_image.has("gt_masks") or not self.keep_masks
49
+ ):
50
+ # feature_mask[i] = 0
51
+ continue
52
+ gt_boxes = proposals_per_image.gt_boxes
53
+ est_boxes = proposals_per_image.proposal_boxes
54
+ # apply match threshold for densepose head
55
+ iou = matched_pairwise_iou(gt_boxes, est_boxes)
56
+ iou_select = iou > self.iou_threshold
57
+ proposals_per_image = proposals_per_image[iou_select] # pyre-ignore[6]
58
+
59
+ N_gt_boxes = len(proposals_per_image.gt_boxes)
60
+ assert N_gt_boxes == len(proposals_per_image.proposal_boxes), (
61
+ f"The number of GT boxes {N_gt_boxes} is different from the "
62
+ f"number of proposal boxes {len(proposals_per_image.proposal_boxes)}"
63
+ )
64
+ # filter out any target without suitable annotation
65
+ if self.keep_masks:
66
+ gt_masks = (
67
+ proposals_per_image.gt_masks
68
+ if hasattr(proposals_per_image, "gt_masks")
69
+ else [None] * N_gt_boxes
70
+ )
71
+ else:
72
+ gt_masks = [None] * N_gt_boxes
73
+ gt_densepose = (
74
+ proposals_per_image.gt_densepose
75
+ if hasattr(proposals_per_image, "gt_densepose")
76
+ else [None] * N_gt_boxes
77
+ )
78
+ assert len(gt_masks) == N_gt_boxes
79
+ assert len(gt_densepose) == N_gt_boxes
80
+ selected_indices = [
81
+ i
82
+ for i, (dp_target, mask_target) in enumerate(zip(gt_densepose, gt_masks))
83
+ if (dp_target is not None) or (mask_target is not None)
84
+ ]
85
+ # if not len(selected_indices):
86
+ # feature_mask[i] = 0
87
+ # continue
88
+ if len(selected_indices) != N_gt_boxes:
89
+ proposals_per_image = proposals_per_image[selected_indices] # pyre-ignore[6]
90
+ assert len(proposals_per_image.gt_boxes) == len(proposals_per_image.proposal_boxes)
91
+ proposals_filtered.append(proposals_per_image)
92
+ # features_filtered = [feature[feature_mask] for feature in features]
93
+ # return features_filtered, proposals_filtered
94
+ return features, proposals_filtered
Leffa/densepose/modeling/hrfpn.py ADDED
@@ -0,0 +1,182 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Facebook, Inc. and its affiliates.
2
+ """
3
+ MIT License
4
+ Copyright (c) 2019 Microsoft
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+ The above copyright notice and this permission notice shall be included in all
12
+ copies or substantial portions of the Software.
13
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
19
+ SOFTWARE.
20
+ """
21
+
22
+ import torch
23
+ import torch.nn as nn
24
+ import torch.nn.functional as F
25
+
26
+ from detectron2.layers import ShapeSpec
27
+ from detectron2.modeling.backbone import BACKBONE_REGISTRY
28
+ from detectron2.modeling.backbone.backbone import Backbone
29
+
30
+ from .hrnet import build_pose_hrnet_backbone
31
+
32
+
33
+ class HRFPN(Backbone):
34
+ """HRFPN (High Resolution Feature Pyramids)
35
+ Transforms outputs of HRNet backbone so they are suitable for the ROI_heads
36
+ arXiv: https://arxiv.org/abs/1904.04514
37
+ Adapted from https://github.com/open-mmlab/mmdetection/blob/master/mmdet/models/necks/hrfpn.py
38
+ Args:
39
+ bottom_up: (list) output of HRNet
40
+ in_features (list): names of the input features (output of HRNet)
41
+ in_channels (list): number of channels for each branch
42
+ out_channels (int): output channels of feature pyramids
43
+ n_out_features (int): number of output stages
44
+ pooling (str): pooling for generating feature pyramids (from {MAX, AVG})
45
+ share_conv (bool): Have one conv per output, or share one with all the outputs
46
+ """
47
+
48
+ def __init__(
49
+ self,
50
+ bottom_up,
51
+ in_features,
52
+ n_out_features,
53
+ in_channels,
54
+ out_channels,
55
+ pooling="AVG",
56
+ share_conv=False,
57
+ ):
58
+ super(HRFPN, self).__init__()
59
+ assert isinstance(in_channels, list)
60
+ self.bottom_up = bottom_up
61
+ self.in_features = in_features
62
+ self.n_out_features = n_out_features
63
+ self.in_channels = in_channels
64
+ self.out_channels = out_channels
65
+ self.num_ins = len(in_channels)
66
+ self.share_conv = share_conv
67
+
68
+ if self.share_conv:
69
+ self.fpn_conv = nn.Conv2d(
70
+ in_channels=out_channels, out_channels=out_channels, kernel_size=3, padding=1
71
+ )
72
+ else:
73
+ self.fpn_conv = nn.ModuleList()
74
+ for _ in range(self.n_out_features):
75
+ self.fpn_conv.append(
76
+ nn.Conv2d(
77
+ in_channels=out_channels,
78
+ out_channels=out_channels,
79
+ kernel_size=3,
80
+ padding=1,
81
+ )
82
+ )
83
+
84
+ # Custom change: Replaces a simple bilinear interpolation
85
+ self.interp_conv = nn.ModuleList()
86
+ for i in range(len(self.in_features)):
87
+ self.interp_conv.append(
88
+ nn.Sequential(
89
+ nn.ConvTranspose2d(
90
+ in_channels=in_channels[i],
91
+ out_channels=in_channels[i],
92
+ kernel_size=4,
93
+ stride=2**i,
94
+ padding=0,
95
+ output_padding=0,
96
+ bias=False,
97
+ ),
98
+ nn.BatchNorm2d(in_channels[i], momentum=0.1),
99
+ nn.ReLU(inplace=True),
100
+ )
101
+ )
102
+
103
+ # Custom change: Replaces a couple (reduction conv + pooling) by one conv
104
+ self.reduction_pooling_conv = nn.ModuleList()
105
+ for i in range(self.n_out_features):
106
+ self.reduction_pooling_conv.append(
107
+ nn.Sequential(
108
+ nn.Conv2d(sum(in_channels), out_channels, kernel_size=2**i, stride=2**i),
109
+ nn.BatchNorm2d(out_channels, momentum=0.1),
110
+ nn.ReLU(inplace=True),
111
+ )
112
+ )
113
+
114
+ if pooling == "MAX":
115
+ self.pooling = F.max_pool2d
116
+ else:
117
+ self.pooling = F.avg_pool2d
118
+
119
+ self._out_features = []
120
+ self._out_feature_channels = {}
121
+ self._out_feature_strides = {}
122
+
123
+ for i in range(self.n_out_features):
124
+ self._out_features.append("p%d" % (i + 1))
125
+ self._out_feature_channels.update({self._out_features[-1]: self.out_channels})
126
+ self._out_feature_strides.update({self._out_features[-1]: 2 ** (i + 2)})
127
+
128
+ # default init_weights for conv(msra) and norm in ConvModule
129
+ def init_weights(self):
130
+ for m in self.modules():
131
+ if isinstance(m, nn.Conv2d):
132
+ nn.init.kaiming_normal_(m.weight, a=1)
133
+ nn.init.constant_(m.bias, 0)
134
+
135
+ def forward(self, inputs):
136
+ bottom_up_features = self.bottom_up(inputs)
137
+ assert len(bottom_up_features) == len(self.in_features)
138
+ inputs = [bottom_up_features[f] for f in self.in_features]
139
+
140
+ outs = []
141
+ for i in range(len(inputs)):
142
+ outs.append(self.interp_conv[i](inputs[i]))
143
+ shape_2 = min(o.shape[2] for o in outs)
144
+ shape_3 = min(o.shape[3] for o in outs)
145
+ out = torch.cat([o[:, :, :shape_2, :shape_3] for o in outs], dim=1)
146
+ outs = []
147
+ for i in range(self.n_out_features):
148
+ outs.append(self.reduction_pooling_conv[i](out))
149
+ for i in range(len(outs)): # Make shapes consistent
150
+ outs[-1 - i] = outs[-1 - i][
151
+ :, :, : outs[-1].shape[2] * 2**i, : outs[-1].shape[3] * 2**i
152
+ ]
153
+ outputs = []
154
+ for i in range(len(outs)):
155
+ if self.share_conv:
156
+ outputs.append(self.fpn_conv(outs[i]))
157
+ else:
158
+ outputs.append(self.fpn_conv[i](outs[i]))
159
+
160
+ assert len(self._out_features) == len(outputs)
161
+ return dict(zip(self._out_features, outputs))
162
+
163
+
164
+ @BACKBONE_REGISTRY.register()
165
+ def build_hrfpn_backbone(cfg, input_shape: ShapeSpec) -> HRFPN:
166
+
167
+ in_channels = cfg.MODEL.HRNET.STAGE4.NUM_CHANNELS
168
+ in_features = ["p%d" % (i + 1) for i in range(cfg.MODEL.HRNET.STAGE4.NUM_BRANCHES)]
169
+ n_out_features = len(cfg.MODEL.ROI_HEADS.IN_FEATURES)
170
+ out_channels = cfg.MODEL.HRNET.HRFPN.OUT_CHANNELS
171
+ hrnet = build_pose_hrnet_backbone(cfg, input_shape)
172
+ hrfpn = HRFPN(
173
+ hrnet,
174
+ in_features,
175
+ n_out_features,
176
+ in_channels,
177
+ out_channels,
178
+ pooling="AVG",
179
+ share_conv=False,
180
+ )
181
+
182
+ return hrfpn
Leffa/densepose/modeling/hrnet.py ADDED
@@ -0,0 +1,474 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Facebook, Inc. and its affiliates.
2
+ # ------------------------------------------------------------------------------
3
+ # Copyright (c) Microsoft
4
+ # Licensed under the MIT License.
5
+ # Written by Bin Xiao (leoxiaobin@gmail.com)
6
+ # Modified by Bowen Cheng (bcheng9@illinois.edu)
7
+ # Adapted from https://github.com/HRNet/Higher-HRNet-Human-Pose-Estimation/blob/master/lib/models/pose_higher_hrnet.py # noqa
8
+ # ------------------------------------------------------------------------------
9
+
10
+ from __future__ import absolute_import, division, print_function
11
+ import logging
12
+ import torch.nn as nn
13
+
14
+ from detectron2.layers import ShapeSpec
15
+ from detectron2.modeling.backbone import BACKBONE_REGISTRY
16
+ from detectron2.modeling.backbone.backbone import Backbone
17
+
18
+ BN_MOMENTUM = 0.1
19
+ logger = logging.getLogger(__name__)
20
+
21
+ __all__ = ["build_pose_hrnet_backbone", "PoseHigherResolutionNet"]
22
+
23
+
24
+ def conv3x3(in_planes, out_planes, stride=1):
25
+ """3x3 convolution with padding"""
26
+ return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, padding=1, bias=False)
27
+
28
+
29
+ class BasicBlock(nn.Module):
30
+ expansion = 1
31
+
32
+ def __init__(self, inplanes, planes, stride=1, downsample=None):
33
+ super(BasicBlock, self).__init__()
34
+ self.conv1 = conv3x3(inplanes, planes, stride)
35
+ self.bn1 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM)
36
+ self.relu = nn.ReLU(inplace=True)
37
+ self.conv2 = conv3x3(planes, planes)
38
+ self.bn2 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM)
39
+ self.downsample = downsample
40
+ self.stride = stride
41
+
42
+ def forward(self, x):
43
+ residual = x
44
+
45
+ out = self.conv1(x)
46
+ out = self.bn1(out)
47
+ out = self.relu(out)
48
+
49
+ out = self.conv2(out)
50
+ out = self.bn2(out)
51
+
52
+ if self.downsample is not None:
53
+ residual = self.downsample(x)
54
+
55
+ out += residual
56
+ out = self.relu(out)
57
+
58
+ return out
59
+
60
+
61
+ class Bottleneck(nn.Module):
62
+ expansion = 4
63
+
64
+ def __init__(self, inplanes, planes, stride=1, downsample=None):
65
+ super(Bottleneck, self).__init__()
66
+ self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False)
67
+ self.bn1 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM)
68
+ self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
69
+ self.bn2 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM)
70
+ self.conv3 = nn.Conv2d(planes, planes * self.expansion, kernel_size=1, bias=False)
71
+ self.bn3 = nn.BatchNorm2d(planes * self.expansion, momentum=BN_MOMENTUM)
72
+ self.relu = nn.ReLU(inplace=True)
73
+ self.downsample = downsample
74
+ self.stride = stride
75
+
76
+ def forward(self, x):
77
+ residual = x
78
+
79
+ out = self.conv1(x)
80
+ out = self.bn1(out)
81
+ out = self.relu(out)
82
+
83
+ out = self.conv2(out)
84
+ out = self.bn2(out)
85
+ out = self.relu(out)
86
+
87
+ out = self.conv3(out)
88
+ out = self.bn3(out)
89
+
90
+ if self.downsample is not None:
91
+ residual = self.downsample(x)
92
+
93
+ out += residual
94
+ out = self.relu(out)
95
+
96
+ return out
97
+
98
+
99
+ class HighResolutionModule(nn.Module):
100
+ """HighResolutionModule
101
+ Building block of the PoseHigherResolutionNet (see lower)
102
+ arXiv: https://arxiv.org/abs/1908.10357
103
+ Args:
104
+ num_branches (int): number of branches of the modyle
105
+ blocks (str): type of block of the module
106
+ num_blocks (int): number of blocks of the module
107
+ num_inchannels (int): number of input channels of the module
108
+ num_channels (list): number of channels of each branch
109
+ multi_scale_output (bool): only used by the last module of PoseHigherResolutionNet
110
+ """
111
+
112
+ def __init__(
113
+ self,
114
+ num_branches,
115
+ blocks,
116
+ num_blocks,
117
+ num_inchannels,
118
+ num_channels,
119
+ multi_scale_output=True,
120
+ ):
121
+ super(HighResolutionModule, self).__init__()
122
+ self._check_branches(num_branches, blocks, num_blocks, num_inchannels, num_channels)
123
+
124
+ self.num_inchannels = num_inchannels
125
+ self.num_branches = num_branches
126
+
127
+ self.multi_scale_output = multi_scale_output
128
+
129
+ self.branches = self._make_branches(num_branches, blocks, num_blocks, num_channels)
130
+ self.fuse_layers = self._make_fuse_layers()
131
+ self.relu = nn.ReLU(True)
132
+
133
+ def _check_branches(self, num_branches, blocks, num_blocks, num_inchannels, num_channels):
134
+ if num_branches != len(num_blocks):
135
+ error_msg = "NUM_BRANCHES({}) <> NUM_BLOCKS({})".format(num_branches, len(num_blocks))
136
+ logger.error(error_msg)
137
+ raise ValueError(error_msg)
138
+
139
+ if num_branches != len(num_channels):
140
+ error_msg = "NUM_BRANCHES({}) <> NUM_CHANNELS({})".format(
141
+ num_branches, len(num_channels)
142
+ )
143
+ logger.error(error_msg)
144
+ raise ValueError(error_msg)
145
+
146
+ if num_branches != len(num_inchannels):
147
+ error_msg = "NUM_BRANCHES({}) <> NUM_INCHANNELS({})".format(
148
+ num_branches, len(num_inchannels)
149
+ )
150
+ logger.error(error_msg)
151
+ raise ValueError(error_msg)
152
+
153
+ def _make_one_branch(self, branch_index, block, num_blocks, num_channels, stride=1):
154
+ downsample = None
155
+ if (
156
+ stride != 1
157
+ or self.num_inchannels[branch_index] != num_channels[branch_index] * block.expansion
158
+ ):
159
+ downsample = nn.Sequential(
160
+ nn.Conv2d(
161
+ self.num_inchannels[branch_index],
162
+ num_channels[branch_index] * block.expansion,
163
+ kernel_size=1,
164
+ stride=stride,
165
+ bias=False,
166
+ ),
167
+ nn.BatchNorm2d(num_channels[branch_index] * block.expansion, momentum=BN_MOMENTUM),
168
+ )
169
+
170
+ layers = []
171
+ layers.append(
172
+ block(self.num_inchannels[branch_index], num_channels[branch_index], stride, downsample)
173
+ )
174
+ self.num_inchannels[branch_index] = num_channels[branch_index] * block.expansion
175
+ for _ in range(1, num_blocks[branch_index]):
176
+ layers.append(block(self.num_inchannels[branch_index], num_channels[branch_index]))
177
+
178
+ return nn.Sequential(*layers)
179
+
180
+ def _make_branches(self, num_branches, block, num_blocks, num_channels):
181
+ branches = []
182
+
183
+ for i in range(num_branches):
184
+ branches.append(self._make_one_branch(i, block, num_blocks, num_channels))
185
+
186
+ return nn.ModuleList(branches)
187
+
188
+ def _make_fuse_layers(self):
189
+ if self.num_branches == 1:
190
+ return None
191
+
192
+ num_branches = self.num_branches
193
+ num_inchannels = self.num_inchannels
194
+ fuse_layers = []
195
+ for i in range(num_branches if self.multi_scale_output else 1):
196
+ fuse_layer = []
197
+ for j in range(num_branches):
198
+ if j > i:
199
+ fuse_layer.append(
200
+ nn.Sequential(
201
+ nn.Conv2d(num_inchannels[j], num_inchannels[i], 1, 1, 0, bias=False),
202
+ nn.BatchNorm2d(num_inchannels[i]),
203
+ nn.Upsample(scale_factor=2 ** (j - i), mode="nearest"),
204
+ )
205
+ )
206
+ elif j == i:
207
+ fuse_layer.append(None)
208
+ else:
209
+ conv3x3s = []
210
+ for k in range(i - j):
211
+ if k == i - j - 1:
212
+ num_outchannels_conv3x3 = num_inchannels[i]
213
+ conv3x3s.append(
214
+ nn.Sequential(
215
+ nn.Conv2d(
216
+ num_inchannels[j],
217
+ num_outchannels_conv3x3,
218
+ 3,
219
+ 2,
220
+ 1,
221
+ bias=False,
222
+ ),
223
+ nn.BatchNorm2d(num_outchannels_conv3x3),
224
+ )
225
+ )
226
+ else:
227
+ num_outchannels_conv3x3 = num_inchannels[j]
228
+ conv3x3s.append(
229
+ nn.Sequential(
230
+ nn.Conv2d(
231
+ num_inchannels[j],
232
+ num_outchannels_conv3x3,
233
+ 3,
234
+ 2,
235
+ 1,
236
+ bias=False,
237
+ ),
238
+ nn.BatchNorm2d(num_outchannels_conv3x3),
239
+ nn.ReLU(True),
240
+ )
241
+ )
242
+ fuse_layer.append(nn.Sequential(*conv3x3s))
243
+ fuse_layers.append(nn.ModuleList(fuse_layer))
244
+
245
+ return nn.ModuleList(fuse_layers)
246
+
247
+ def get_num_inchannels(self):
248
+ return self.num_inchannels
249
+
250
+ def forward(self, x):
251
+ if self.num_branches == 1:
252
+ return [self.branches[0](x[0])]
253
+
254
+ for i in range(self.num_branches):
255
+ x[i] = self.branches[i](x[i])
256
+
257
+ x_fuse = []
258
+
259
+ for i in range(len(self.fuse_layers)):
260
+ y = x[0] if i == 0 else self.fuse_layers[i][0](x[0])
261
+ for j in range(1, self.num_branches):
262
+ if i == j:
263
+ y = y + x[j]
264
+ else:
265
+ z = self.fuse_layers[i][j](x[j])[:, :, : y.shape[2], : y.shape[3]]
266
+ y = y + z
267
+ x_fuse.append(self.relu(y))
268
+
269
+ return x_fuse
270
+
271
+
272
+ blocks_dict = {"BASIC": BasicBlock, "BOTTLENECK": Bottleneck}
273
+
274
+
275
+ class PoseHigherResolutionNet(Backbone):
276
+ """PoseHigherResolutionNet
277
+ Composed of several HighResolutionModule tied together with ConvNets
278
+ Adapted from the GitHub version to fit with HRFPN and the Detectron2 infrastructure
279
+ arXiv: https://arxiv.org/abs/1908.10357
280
+ """
281
+
282
+ def __init__(self, cfg, **kwargs):
283
+ self.inplanes = cfg.MODEL.HRNET.STEM_INPLANES
284
+ super(PoseHigherResolutionNet, self).__init__()
285
+
286
+ # stem net
287
+ self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=2, padding=1, bias=False)
288
+ self.bn1 = nn.BatchNorm2d(64, momentum=BN_MOMENTUM)
289
+ self.conv2 = nn.Conv2d(64, 64, kernel_size=3, stride=2, padding=1, bias=False)
290
+ self.bn2 = nn.BatchNorm2d(64, momentum=BN_MOMENTUM)
291
+ self.relu = nn.ReLU(inplace=True)
292
+ self.layer1 = self._make_layer(Bottleneck, 64, 4)
293
+
294
+ self.stage2_cfg = cfg.MODEL.HRNET.STAGE2
295
+ num_channels = self.stage2_cfg.NUM_CHANNELS
296
+ block = blocks_dict[self.stage2_cfg.BLOCK]
297
+ num_channels = [num_channels[i] * block.expansion for i in range(len(num_channels))]
298
+ self.transition1 = self._make_transition_layer([256], num_channels)
299
+ self.stage2, pre_stage_channels = self._make_stage(self.stage2_cfg, num_channels)
300
+
301
+ self.stage3_cfg = cfg.MODEL.HRNET.STAGE3
302
+ num_channels = self.stage3_cfg.NUM_CHANNELS
303
+ block = blocks_dict[self.stage3_cfg.BLOCK]
304
+ num_channels = [num_channels[i] * block.expansion for i in range(len(num_channels))]
305
+ self.transition2 = self._make_transition_layer(pre_stage_channels, num_channels)
306
+ self.stage3, pre_stage_channels = self._make_stage(self.stage3_cfg, num_channels)
307
+
308
+ self.stage4_cfg = cfg.MODEL.HRNET.STAGE4
309
+ num_channels = self.stage4_cfg.NUM_CHANNELS
310
+ block = blocks_dict[self.stage4_cfg.BLOCK]
311
+ num_channels = [num_channels[i] * block.expansion for i in range(len(num_channels))]
312
+ self.transition3 = self._make_transition_layer(pre_stage_channels, num_channels)
313
+ self.stage4, pre_stage_channels = self._make_stage(
314
+ self.stage4_cfg, num_channels, multi_scale_output=True
315
+ )
316
+
317
+ self._out_features = []
318
+ self._out_feature_channels = {}
319
+ self._out_feature_strides = {}
320
+
321
+ for i in range(cfg.MODEL.HRNET.STAGE4.NUM_BRANCHES):
322
+ self._out_features.append("p%d" % (i + 1))
323
+ self._out_feature_channels.update(
324
+ {self._out_features[-1]: cfg.MODEL.HRNET.STAGE4.NUM_CHANNELS[i]}
325
+ )
326
+ self._out_feature_strides.update({self._out_features[-1]: 1})
327
+
328
+ def _get_deconv_cfg(self, deconv_kernel):
329
+ if deconv_kernel == 4:
330
+ padding = 1
331
+ output_padding = 0
332
+ elif deconv_kernel == 3:
333
+ padding = 1
334
+ output_padding = 1
335
+ elif deconv_kernel == 2:
336
+ padding = 0
337
+ output_padding = 0
338
+
339
+ return deconv_kernel, padding, output_padding
340
+
341
+ def _make_transition_layer(self, num_channels_pre_layer, num_channels_cur_layer):
342
+ num_branches_cur = len(num_channels_cur_layer)
343
+ num_branches_pre = len(num_channels_pre_layer)
344
+
345
+ transition_layers = []
346
+ for i in range(num_branches_cur):
347
+ if i < num_branches_pre:
348
+ if num_channels_cur_layer[i] != num_channels_pre_layer[i]:
349
+ transition_layers.append(
350
+ nn.Sequential(
351
+ nn.Conv2d(
352
+ num_channels_pre_layer[i],
353
+ num_channels_cur_layer[i],
354
+ 3,
355
+ 1,
356
+ 1,
357
+ bias=False,
358
+ ),
359
+ nn.BatchNorm2d(num_channels_cur_layer[i]),
360
+ nn.ReLU(inplace=True),
361
+ )
362
+ )
363
+ else:
364
+ transition_layers.append(None)
365
+ else:
366
+ conv3x3s = []
367
+ for j in range(i + 1 - num_branches_pre):
368
+ inchannels = num_channels_pre_layer[-1]
369
+ outchannels = (
370
+ num_channels_cur_layer[i] if j == i - num_branches_pre else inchannels
371
+ )
372
+ conv3x3s.append(
373
+ nn.Sequential(
374
+ nn.Conv2d(inchannels, outchannels, 3, 2, 1, bias=False),
375
+ nn.BatchNorm2d(outchannels),
376
+ nn.ReLU(inplace=True),
377
+ )
378
+ )
379
+ transition_layers.append(nn.Sequential(*conv3x3s))
380
+
381
+ return nn.ModuleList(transition_layers)
382
+
383
+ def _make_layer(self, block, planes, blocks, stride=1):
384
+ downsample = None
385
+ if stride != 1 or self.inplanes != planes * block.expansion:
386
+ downsample = nn.Sequential(
387
+ nn.Conv2d(
388
+ self.inplanes,
389
+ planes * block.expansion,
390
+ kernel_size=1,
391
+ stride=stride,
392
+ bias=False,
393
+ ),
394
+ nn.BatchNorm2d(planes * block.expansion, momentum=BN_MOMENTUM),
395
+ )
396
+
397
+ layers = []
398
+ layers.append(block(self.inplanes, planes, stride, downsample))
399
+ self.inplanes = planes * block.expansion
400
+ for _ in range(1, blocks):
401
+ layers.append(block(self.inplanes, planes))
402
+
403
+ return nn.Sequential(*layers)
404
+
405
+ def _make_stage(self, layer_config, num_inchannels, multi_scale_output=True):
406
+ num_modules = layer_config["NUM_MODULES"]
407
+ num_branches = layer_config["NUM_BRANCHES"]
408
+ num_blocks = layer_config["NUM_BLOCKS"]
409
+ num_channels = layer_config["NUM_CHANNELS"]
410
+ block = blocks_dict[layer_config["BLOCK"]]
411
+
412
+ modules = []
413
+ for i in range(num_modules):
414
+ # multi_scale_output is only used last module
415
+ if not multi_scale_output and i == num_modules - 1:
416
+ reset_multi_scale_output = False
417
+ else:
418
+ reset_multi_scale_output = True
419
+
420
+ modules.append(
421
+ HighResolutionModule(
422
+ num_branches,
423
+ block,
424
+ num_blocks,
425
+ num_inchannels,
426
+ num_channels,
427
+ reset_multi_scale_output,
428
+ )
429
+ )
430
+ num_inchannels = modules[-1].get_num_inchannels()
431
+
432
+ return nn.Sequential(*modules), num_inchannels
433
+
434
+ def forward(self, x):
435
+ x = self.conv1(x)
436
+ x = self.bn1(x)
437
+ x = self.relu(x)
438
+ x = self.conv2(x)
439
+ x = self.bn2(x)
440
+ x = self.relu(x)
441
+ x = self.layer1(x)
442
+
443
+ x_list = []
444
+ for i in range(self.stage2_cfg.NUM_BRANCHES):
445
+ if self.transition1[i] is not None:
446
+ x_list.append(self.transition1[i](x))
447
+ else:
448
+ x_list.append(x)
449
+ y_list = self.stage2(x_list)
450
+
451
+ x_list = []
452
+ for i in range(self.stage3_cfg.NUM_BRANCHES):
453
+ if self.transition2[i] is not None:
454
+ x_list.append(self.transition2[i](y_list[-1]))
455
+ else:
456
+ x_list.append(y_list[i])
457
+ y_list = self.stage3(x_list)
458
+
459
+ x_list = []
460
+ for i in range(self.stage4_cfg.NUM_BRANCHES):
461
+ if self.transition3[i] is not None:
462
+ x_list.append(self.transition3[i](y_list[-1]))
463
+ else:
464
+ x_list.append(y_list[i])
465
+ y_list = self.stage4(x_list)
466
+
467
+ assert len(self._out_features) == len(y_list)
468
+ return dict(zip(self._out_features, y_list)) # final_outputs
469
+
470
+
471
+ @BACKBONE_REGISTRY.register()
472
+ def build_pose_hrnet_backbone(cfg, input_shape: ShapeSpec):
473
+ model = PoseHigherResolutionNet(cfg)
474
+ return model
Leffa/densepose/modeling/inference.py ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Facebook, Inc. and its affiliates.
2
+ from dataclasses import fields
3
+ from typing import Any, List
4
+ import torch
5
+
6
+ from detectron2.structures import Instances
7
+
8
+
9
+ def densepose_inference(densepose_predictor_output: Any, detections: List[Instances]) -> None:
10
+ """
11
+ Splits DensePose predictor outputs into chunks, each chunk corresponds to
12
+ detections on one image. Predictor output chunks are stored in `pred_densepose`
13
+ attribute of the corresponding `Instances` object.
14
+
15
+ Args:
16
+ densepose_predictor_output: a dataclass instance (can be of different types,
17
+ depending on predictor used for inference). Each field can be `None`
18
+ (if the corresponding output was not inferred) or a tensor of size
19
+ [N, ...], where N = N_1 + N_2 + .. + N_k is a total number of
20
+ detections on all images, N_1 is the number of detections on image 1,
21
+ N_2 is the number of detections on image 2, etc.
22
+ detections: a list of objects of type `Instance`, k-th object corresponds
23
+ to detections on k-th image.
24
+ """
25
+ k = 0
26
+ for detection_i in detections:
27
+ if densepose_predictor_output is None:
28
+ # don't add `pred_densepose` attribute
29
+ continue
30
+ n_i = detection_i.__len__()
31
+
32
+ PredictorOutput = type(densepose_predictor_output)
33
+ output_i_dict = {}
34
+ # we assume here that `densepose_predictor_output` is a dataclass object
35
+ for field in fields(densepose_predictor_output):
36
+ field_value = getattr(densepose_predictor_output, field.name)
37
+ # slice tensors
38
+ if isinstance(field_value, torch.Tensor):
39
+ output_i_dict[field.name] = field_value[k : k + n_i]
40
+ # leave others as is
41
+ else:
42
+ output_i_dict[field.name] = field_value
43
+ detection_i.pred_densepose = PredictorOutput(**output_i_dict)
44
+ k += n_i
Leffa/densepose/modeling/losses/__init__.py ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Facebook, Inc. and its affiliates.
2
+
3
+ from .chart import DensePoseChartLoss
4
+ from .chart_with_confidences import DensePoseChartWithConfidenceLoss
5
+ from .cse import DensePoseCseLoss
6
+ from .registry import DENSEPOSE_LOSS_REGISTRY
7
+
8
+
9
+ __all__ = [
10
+ "DensePoseChartLoss",
11
+ "DensePoseChartWithConfidenceLoss",
12
+ "DensePoseCseLoss",
13
+ "DENSEPOSE_LOSS_REGISTRY",
14
+ ]
Leffa/densepose/modeling/losses/chart.py ADDED
@@ -0,0 +1,291 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Facebook, Inc. and its affiliates.
2
+
3
+ from typing import Any, List
4
+ import torch
5
+ from torch.nn import functional as F
6
+
7
+ from detectron2.config import CfgNode
8
+ from detectron2.structures import Instances
9
+
10
+ from .mask_or_segm import MaskOrSegmentationLoss
11
+ from .registry import DENSEPOSE_LOSS_REGISTRY
12
+ from .utils import (
13
+ BilinearInterpolationHelper,
14
+ ChartBasedAnnotationsAccumulator,
15
+ LossDict,
16
+ extract_packed_annotations_from_matches,
17
+ )
18
+
19
+
20
+ @DENSEPOSE_LOSS_REGISTRY.register()
21
+ class DensePoseChartLoss:
22
+ """
23
+ DensePose loss for chart-based training. A mesh is split into charts,
24
+ each chart is given a label (I) and parametrized by 2 coordinates referred to
25
+ as U and V. Ground truth consists of a number of points annotated with
26
+ I, U and V values and coarse segmentation S defined for all pixels of the
27
+ object bounding box. In some cases (see `COARSE_SEGM_TRAINED_BY_MASKS`),
28
+ semantic segmentation annotations can be used as ground truth inputs as well.
29
+
30
+ Estimated values are tensors:
31
+ * U coordinates, tensor of shape [N, C, S, S]
32
+ * V coordinates, tensor of shape [N, C, S, S]
33
+ * fine segmentation estimates, tensor of shape [N, C, S, S] with raw unnormalized
34
+ scores for each fine segmentation label at each location
35
+ * coarse segmentation estimates, tensor of shape [N, D, S, S] with raw unnormalized
36
+ scores for each coarse segmentation label at each location
37
+ where N is the number of detections, C is the number of fine segmentation
38
+ labels, S is the estimate size ( = width = height) and D is the number of
39
+ coarse segmentation channels.
40
+
41
+ The losses are:
42
+ * regression (smooth L1) loss for U and V coordinates
43
+ * cross entropy loss for fine (I) and coarse (S) segmentations
44
+ Each loss has an associated weight
45
+ """
46
+
47
+ def __init__(self, cfg: CfgNode):
48
+ """
49
+ Initialize chart-based loss from configuration options
50
+
51
+ Args:
52
+ cfg (CfgNode): configuration options
53
+ """
54
+ # fmt: off
55
+ self.heatmap_size = cfg.MODEL.ROI_DENSEPOSE_HEAD.HEATMAP_SIZE
56
+ self.w_points = cfg.MODEL.ROI_DENSEPOSE_HEAD.POINT_REGRESSION_WEIGHTS
57
+ self.w_part = cfg.MODEL.ROI_DENSEPOSE_HEAD.PART_WEIGHTS
58
+ self.w_segm = cfg.MODEL.ROI_DENSEPOSE_HEAD.INDEX_WEIGHTS
59
+ self.n_segm_chan = cfg.MODEL.ROI_DENSEPOSE_HEAD.NUM_COARSE_SEGM_CHANNELS
60
+ # fmt: on
61
+ self.segm_trained_by_masks = cfg.MODEL.ROI_DENSEPOSE_HEAD.COARSE_SEGM_TRAINED_BY_MASKS
62
+ self.segm_loss = MaskOrSegmentationLoss(cfg)
63
+
64
+ def __call__(
65
+ self, proposals_with_gt: List[Instances], densepose_predictor_outputs: Any, **kwargs
66
+ ) -> LossDict:
67
+ """
68
+ Produce chart-based DensePose losses
69
+
70
+ Args:
71
+ proposals_with_gt (list of Instances): detections with associated ground truth data
72
+ densepose_predictor_outputs: an object of a dataclass that contains predictor outputs
73
+ with estimated values; assumed to have the following attributes:
74
+ * coarse_segm - coarse segmentation estimates, tensor of shape [N, D, S, S]
75
+ * fine_segm - fine segmentation estimates, tensor of shape [N, C, S, S]
76
+ * u - U coordinate estimates per fine labels, tensor of shape [N, C, S, S]
77
+ * v - V coordinate estimates per fine labels, tensor of shape [N, C, S, S]
78
+ where N is the number of detections, C is the number of fine segmentation
79
+ labels, S is the estimate size ( = width = height) and D is the number of
80
+ coarse segmentation channels.
81
+
82
+ Return:
83
+ dict: str -> tensor: dict of losses with the following entries:
84
+ * `loss_densepose_U`: smooth L1 loss for U coordinate estimates
85
+ * `loss_densepose_V`: smooth L1 loss for V coordinate estimates
86
+ * `loss_densepose_I`: cross entropy for raw unnormalized scores for fine
87
+ segmentation estimates given ground truth labels;
88
+ * `loss_densepose_S`: cross entropy for raw unnormalized scores for coarse
89
+ segmentation estimates given ground truth labels;
90
+ """
91
+ # densepose outputs are computed for all images and all bounding boxes;
92
+ # i.e. if a batch has 4 images with (3, 1, 2, 1) proposals respectively,
93
+ # the outputs will have size(0) == 3+1+2+1 == 7
94
+
95
+ if not len(proposals_with_gt):
96
+ return self.produce_fake_densepose_losses(densepose_predictor_outputs)
97
+
98
+ accumulator = ChartBasedAnnotationsAccumulator()
99
+ packed_annotations = extract_packed_annotations_from_matches(proposals_with_gt, accumulator)
100
+
101
+ # NOTE: we need to keep the same computation graph on all the GPUs to
102
+ # perform reduction properly. Hence even if we have no data on one
103
+ # of the GPUs, we still need to generate the computation graph.
104
+ # Add fake (zero) loss in the form Tensor.sum() * 0
105
+ if packed_annotations is None:
106
+ return self.produce_fake_densepose_losses(densepose_predictor_outputs)
107
+
108
+ h, w = densepose_predictor_outputs.u.shape[2:]
109
+ interpolator = BilinearInterpolationHelper.from_matches(
110
+ packed_annotations,
111
+ (h, w),
112
+ )
113
+
114
+ j_valid_fg = interpolator.j_valid * ( # pyre-ignore[16]
115
+ packed_annotations.fine_segm_labels_gt > 0
116
+ )
117
+ # pyre-fixme[6]: For 1st param expected `Tensor` but got `int`.
118
+ if not torch.any(j_valid_fg):
119
+ return self.produce_fake_densepose_losses(densepose_predictor_outputs)
120
+
121
+ losses_uv = self.produce_densepose_losses_uv(
122
+ proposals_with_gt,
123
+ densepose_predictor_outputs,
124
+ packed_annotations,
125
+ interpolator,
126
+ j_valid_fg, # pyre-ignore[6]
127
+ )
128
+
129
+ losses_segm = self.produce_densepose_losses_segm(
130
+ proposals_with_gt,
131
+ densepose_predictor_outputs,
132
+ packed_annotations,
133
+ interpolator,
134
+ j_valid_fg, # pyre-ignore[6]
135
+ )
136
+
137
+ return {**losses_uv, **losses_segm}
138
+
139
+ def produce_fake_densepose_losses(self, densepose_predictor_outputs: Any) -> LossDict:
140
+ """
141
+ Fake losses for fine segmentation and U/V coordinates. These are used when
142
+ no suitable ground truth data was found in a batch. The loss has a value 0
143
+ and is primarily used to construct the computation graph, so that
144
+ `DistributedDataParallel` has similar graphs on all GPUs and can perform
145
+ reduction properly.
146
+
147
+ Args:
148
+ densepose_predictor_outputs: DensePose predictor outputs, an object
149
+ of a dataclass that is assumed to have the following attributes:
150
+ * fine_segm - fine segmentation estimates, tensor of shape [N, C, S, S]
151
+ * u - U coordinate estimates per fine labels, tensor of shape [N, C, S, S]
152
+ * v - V coordinate estimates per fine labels, tensor of shape [N, C, S, S]
153
+ Return:
154
+ dict: str -> tensor: dict of losses with the following entries:
155
+ * `loss_densepose_U`: has value 0
156
+ * `loss_densepose_V`: has value 0
157
+ * `loss_densepose_I`: has value 0
158
+ * `loss_densepose_S`: has value 0
159
+ """
160
+ losses_uv = self.produce_fake_densepose_losses_uv(densepose_predictor_outputs)
161
+ losses_segm = self.produce_fake_densepose_losses_segm(densepose_predictor_outputs)
162
+ return {**losses_uv, **losses_segm}
163
+
164
+ def produce_fake_densepose_losses_uv(self, densepose_predictor_outputs: Any) -> LossDict:
165
+ """
166
+ Fake losses for U/V coordinates. These are used when no suitable ground
167
+ truth data was found in a batch. The loss has a value 0
168
+ and is primarily used to construct the computation graph, so that
169
+ `DistributedDataParallel` has similar graphs on all GPUs and can perform
170
+ reduction properly.
171
+
172
+ Args:
173
+ densepose_predictor_outputs: DensePose predictor outputs, an object
174
+ of a dataclass that is assumed to have the following attributes:
175
+ * u - U coordinate estimates per fine labels, tensor of shape [N, C, S, S]
176
+ * v - V coordinate estimates per fine labels, tensor of shape [N, C, S, S]
177
+ Return:
178
+ dict: str -> tensor: dict of losses with the following entries:
179
+ * `loss_densepose_U`: has value 0
180
+ * `loss_densepose_V`: has value 0
181
+ """
182
+ return {
183
+ "loss_densepose_U": densepose_predictor_outputs.u.sum() * 0,
184
+ "loss_densepose_V": densepose_predictor_outputs.v.sum() * 0,
185
+ }
186
+
187
+ def produce_fake_densepose_losses_segm(self, densepose_predictor_outputs: Any) -> LossDict:
188
+ """
189
+ Fake losses for fine / coarse segmentation. These are used when
190
+ no suitable ground truth data was found in a batch. The loss has a value 0
191
+ and is primarily used to construct the computation graph, so that
192
+ `DistributedDataParallel` has similar graphs on all GPUs and can perform
193
+ reduction properly.
194
+
195
+ Args:
196
+ densepose_predictor_outputs: DensePose predictor outputs, an object
197
+ of a dataclass that is assumed to have the following attributes:
198
+ * fine_segm - fine segmentation estimates, tensor of shape [N, C, S, S]
199
+ * coarse_segm - coarse segmentation estimates, tensor of shape [N, D, S, S]
200
+ Return:
201
+ dict: str -> tensor: dict of losses with the following entries:
202
+ * `loss_densepose_I`: has value 0
203
+ * `loss_densepose_S`: has value 0, added only if `segm_trained_by_masks` is False
204
+ """
205
+ losses = {
206
+ "loss_densepose_I": densepose_predictor_outputs.fine_segm.sum() * 0,
207
+ "loss_densepose_S": self.segm_loss.fake_value(densepose_predictor_outputs),
208
+ }
209
+ return losses
210
+
211
+ def produce_densepose_losses_uv(
212
+ self,
213
+ proposals_with_gt: List[Instances],
214
+ densepose_predictor_outputs: Any,
215
+ packed_annotations: Any,
216
+ interpolator: BilinearInterpolationHelper,
217
+ j_valid_fg: torch.Tensor,
218
+ ) -> LossDict:
219
+ """
220
+ Compute losses for U/V coordinates: smooth L1 loss between
221
+ estimated coordinates and the ground truth.
222
+
223
+ Args:
224
+ proposals_with_gt (list of Instances): detections with associated ground truth data
225
+ densepose_predictor_outputs: DensePose predictor outputs, an object
226
+ of a dataclass that is assumed to have the following attributes:
227
+ * u - U coordinate estimates per fine labels, tensor of shape [N, C, S, S]
228
+ * v - V coordinate estimates per fine labels, tensor of shape [N, C, S, S]
229
+ Return:
230
+ dict: str -> tensor: dict of losses with the following entries:
231
+ * `loss_densepose_U`: smooth L1 loss for U coordinate estimates
232
+ * `loss_densepose_V`: smooth L1 loss for V coordinate estimates
233
+ """
234
+ u_gt = packed_annotations.u_gt[j_valid_fg]
235
+ u_est = interpolator.extract_at_points(densepose_predictor_outputs.u)[j_valid_fg]
236
+ v_gt = packed_annotations.v_gt[j_valid_fg]
237
+ v_est = interpolator.extract_at_points(densepose_predictor_outputs.v)[j_valid_fg]
238
+ return {
239
+ "loss_densepose_U": F.smooth_l1_loss(u_est, u_gt, reduction="sum") * self.w_points,
240
+ "loss_densepose_V": F.smooth_l1_loss(v_est, v_gt, reduction="sum") * self.w_points,
241
+ }
242
+
243
+ def produce_densepose_losses_segm(
244
+ self,
245
+ proposals_with_gt: List[Instances],
246
+ densepose_predictor_outputs: Any,
247
+ packed_annotations: Any,
248
+ interpolator: BilinearInterpolationHelper,
249
+ j_valid_fg: torch.Tensor,
250
+ ) -> LossDict:
251
+ """
252
+ Losses for fine / coarse segmentation: cross-entropy
253
+ for segmentation unnormalized scores given ground truth labels at
254
+ annotated points for fine segmentation and dense mask annotations
255
+ for coarse segmentation.
256
+
257
+ Args:
258
+ proposals_with_gt (list of Instances): detections with associated ground truth data
259
+ densepose_predictor_outputs: DensePose predictor outputs, an object
260
+ of a dataclass that is assumed to have the following attributes:
261
+ * fine_segm - fine segmentation estimates, tensor of shape [N, C, S, S]
262
+ * coarse_segm - coarse segmentation estimates, tensor of shape [N, D, S, S]
263
+ Return:
264
+ dict: str -> tensor: dict of losses with the following entries:
265
+ * `loss_densepose_I`: cross entropy for raw unnormalized scores for fine
266
+ segmentation estimates given ground truth labels
267
+ * `loss_densepose_S`: cross entropy for raw unnormalized scores for coarse
268
+ segmentation estimates given ground truth labels;
269
+ may be included if coarse segmentation is only trained
270
+ using DensePose ground truth; if additional supervision through
271
+ instance segmentation data is performed (`segm_trained_by_masks` is True),
272
+ this loss is handled by `produce_mask_losses` instead
273
+ """
274
+ fine_segm_gt = packed_annotations.fine_segm_labels_gt[
275
+ interpolator.j_valid # pyre-ignore[16]
276
+ ]
277
+ fine_segm_est = interpolator.extract_at_points(
278
+ densepose_predictor_outputs.fine_segm,
279
+ slice_fine_segm=slice(None),
280
+ w_ylo_xlo=interpolator.w_ylo_xlo[:, None], # pyre-ignore[16]
281
+ w_ylo_xhi=interpolator.w_ylo_xhi[:, None], # pyre-ignore[16]
282
+ w_yhi_xlo=interpolator.w_yhi_xlo[:, None], # pyre-ignore[16]
283
+ w_yhi_xhi=interpolator.w_yhi_xhi[:, None], # pyre-ignore[16]
284
+ )[interpolator.j_valid, :]
285
+ return {
286
+ "loss_densepose_I": F.cross_entropy(fine_segm_est, fine_segm_gt.long()) * self.w_part,
287
+ "loss_densepose_S": self.segm_loss(
288
+ proposals_with_gt, densepose_predictor_outputs, packed_annotations
289
+ )
290
+ * self.w_segm,
291
+ }
Leffa/densepose/modeling/losses/embed_utils.py ADDED
@@ -0,0 +1,137 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
2
+
3
+ from dataclasses import dataclass
4
+ from typing import Any, Optional
5
+ import torch
6
+
7
+ from detectron2.structures import BoxMode, Instances
8
+
9
+ from .utils import AnnotationsAccumulator
10
+
11
+
12
+ @dataclass
13
+ class PackedCseAnnotations:
14
+ x_gt: torch.Tensor
15
+ y_gt: torch.Tensor
16
+ coarse_segm_gt: Optional[torch.Tensor]
17
+ vertex_mesh_ids_gt: torch.Tensor
18
+ vertex_ids_gt: torch.Tensor
19
+ bbox_xywh_gt: torch.Tensor
20
+ bbox_xywh_est: torch.Tensor
21
+ point_bbox_with_dp_indices: torch.Tensor
22
+ point_bbox_indices: torch.Tensor
23
+ bbox_indices: torch.Tensor
24
+
25
+
26
+ class CseAnnotationsAccumulator(AnnotationsAccumulator):
27
+ """
28
+ Accumulates annotations by batches that correspond to objects detected on
29
+ individual images. Can pack them together into single tensors.
30
+ """
31
+
32
+ def __init__(self):
33
+ self.x_gt = []
34
+ self.y_gt = []
35
+ self.s_gt = []
36
+ self.vertex_mesh_ids_gt = []
37
+ self.vertex_ids_gt = []
38
+ self.bbox_xywh_gt = []
39
+ self.bbox_xywh_est = []
40
+ self.point_bbox_with_dp_indices = []
41
+ self.point_bbox_indices = []
42
+ self.bbox_indices = []
43
+ self.nxt_bbox_with_dp_index = 0
44
+ self.nxt_bbox_index = 0
45
+
46
+ def accumulate(self, instances_one_image: Instances):
47
+ """
48
+ Accumulate instances data for one image
49
+
50
+ Args:
51
+ instances_one_image (Instances): instances data to accumulate
52
+ """
53
+ boxes_xywh_est = BoxMode.convert(
54
+ instances_one_image.proposal_boxes.tensor.clone(), BoxMode.XYXY_ABS, BoxMode.XYWH_ABS
55
+ )
56
+ boxes_xywh_gt = BoxMode.convert(
57
+ instances_one_image.gt_boxes.tensor.clone(), BoxMode.XYXY_ABS, BoxMode.XYWH_ABS
58
+ )
59
+ n_matches = len(boxes_xywh_gt)
60
+ assert n_matches == len(
61
+ boxes_xywh_est
62
+ ), f"Got {len(boxes_xywh_est)} proposal boxes and {len(boxes_xywh_gt)} GT boxes"
63
+ if not n_matches:
64
+ # no detection - GT matches
65
+ return
66
+ if (
67
+ not hasattr(instances_one_image, "gt_densepose")
68
+ or instances_one_image.gt_densepose is None
69
+ ):
70
+ # no densepose GT for the detections, just increase the bbox index
71
+ self.nxt_bbox_index += n_matches
72
+ return
73
+ for box_xywh_est, box_xywh_gt, dp_gt in zip(
74
+ boxes_xywh_est, boxes_xywh_gt, instances_one_image.gt_densepose
75
+ ):
76
+ if (dp_gt is not None) and (len(dp_gt.x) > 0):
77
+ # pyre-fixme[6]: For 1st argument expected `Tensor` but got `float`.
78
+ # pyre-fixme[6]: For 2nd argument expected `Tensor` but got `float`.
79
+ self._do_accumulate(box_xywh_gt, box_xywh_est, dp_gt)
80
+ self.nxt_bbox_index += 1
81
+
82
+ def _do_accumulate(self, box_xywh_gt: torch.Tensor, box_xywh_est: torch.Tensor, dp_gt: Any):
83
+ """
84
+ Accumulate instances data for one image, given that the data is not empty
85
+
86
+ Args:
87
+ box_xywh_gt (tensor): GT bounding box
88
+ box_xywh_est (tensor): estimated bounding box
89
+ dp_gt: GT densepose data with the following attributes:
90
+ - x: normalized X coordinates
91
+ - y: normalized Y coordinates
92
+ - segm: tensor of size [S, S] with coarse segmentation
93
+ -
94
+ """
95
+ self.x_gt.append(dp_gt.x)
96
+ self.y_gt.append(dp_gt.y)
97
+ if hasattr(dp_gt, "segm"):
98
+ self.s_gt.append(dp_gt.segm.unsqueeze(0))
99
+ self.vertex_ids_gt.append(dp_gt.vertex_ids)
100
+ self.vertex_mesh_ids_gt.append(torch.full_like(dp_gt.vertex_ids, dp_gt.mesh_id))
101
+ self.bbox_xywh_gt.append(box_xywh_gt.view(-1, 4))
102
+ self.bbox_xywh_est.append(box_xywh_est.view(-1, 4))
103
+ self.point_bbox_with_dp_indices.append(
104
+ torch.full_like(dp_gt.vertex_ids, self.nxt_bbox_with_dp_index)
105
+ )
106
+ self.point_bbox_indices.append(torch.full_like(dp_gt.vertex_ids, self.nxt_bbox_index))
107
+ self.bbox_indices.append(self.nxt_bbox_index)
108
+ self.nxt_bbox_with_dp_index += 1
109
+
110
+ def pack(self) -> Optional[PackedCseAnnotations]:
111
+ """
112
+ Pack data into tensors
113
+ """
114
+ if not len(self.x_gt):
115
+ # TODO:
116
+ # returning proper empty annotations would require
117
+ # creating empty tensors of appropriate shape and
118
+ # type on an appropriate device;
119
+ # we return None so far to indicate empty annotations
120
+ return None
121
+ return PackedCseAnnotations(
122
+ x_gt=torch.cat(self.x_gt, 0),
123
+ y_gt=torch.cat(self.y_gt, 0),
124
+ vertex_mesh_ids_gt=torch.cat(self.vertex_mesh_ids_gt, 0),
125
+ vertex_ids_gt=torch.cat(self.vertex_ids_gt, 0),
126
+ # ignore segmentation annotations, if not all the instances contain those
127
+ coarse_segm_gt=torch.cat(self.s_gt, 0)
128
+ if len(self.s_gt) == len(self.bbox_xywh_gt)
129
+ else None,
130
+ bbox_xywh_gt=torch.cat(self.bbox_xywh_gt, 0),
131
+ bbox_xywh_est=torch.cat(self.bbox_xywh_est, 0),
132
+ point_bbox_with_dp_indices=torch.cat(self.point_bbox_with_dp_indices, 0),
133
+ point_bbox_indices=torch.cat(self.point_bbox_indices, 0),
134
+ bbox_indices=torch.as_tensor(
135
+ self.bbox_indices, dtype=torch.long, device=self.x_gt[0].device
136
+ ),
137
+ )
Leffa/densepose/modeling/losses/mask_or_segm.py ADDED
@@ -0,0 +1,77 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
2
+
3
+ from typing import Any, List
4
+
5
+ import torch
6
+
7
+ from detectron2.config import CfgNode
8
+ from detectron2.structures import Instances
9
+
10
+ from .mask import MaskLoss
11
+ from .segm import SegmentationLoss
12
+
13
+
14
+ class MaskOrSegmentationLoss:
15
+ """
16
+ Mask or segmentation loss as cross-entropy for raw unnormalized scores
17
+ given ground truth labels. Ground truth labels are either defined by coarse
18
+ segmentation annotation, or by mask annotation, depending on the config
19
+ value MODEL.ROI_DENSEPOSE_HEAD.COARSE_SEGM_TRAINED_BY_MASKS
20
+ """
21
+
22
+ def __init__(self, cfg: CfgNode):
23
+ """
24
+ Initialize segmentation loss from configuration options
25
+
26
+ Args:
27
+ cfg (CfgNode): configuration options
28
+ """
29
+ self.segm_trained_by_masks = (
30
+ cfg.MODEL.ROI_DENSEPOSE_HEAD.COARSE_SEGM_TRAINED_BY_MASKS
31
+ )
32
+ if self.segm_trained_by_masks:
33
+ self.mask_loss = MaskLoss()
34
+ self.segm_loss = SegmentationLoss(cfg)
35
+
36
+ def __call__(
37
+ self,
38
+ proposals_with_gt: List[Instances],
39
+ densepose_predictor_outputs: Any,
40
+ packed_annotations: Any,
41
+ ) -> torch.Tensor:
42
+ """
43
+ Compute segmentation loss as cross-entropy between aligned unnormalized
44
+ score estimates and ground truth; with ground truth given
45
+ either by masks, or by coarse segmentation annotations.
46
+
47
+ Args:
48
+ proposals_with_gt (list of Instances): detections with associated ground truth data
49
+ densepose_predictor_outputs: an object of a dataclass that contains predictor outputs
50
+ with estimated values; assumed to have the following attributes:
51
+ * coarse_segm - coarse segmentation estimates, tensor of shape [N, D, S, S]
52
+ packed_annotations: packed annotations for efficient loss computation
53
+ Return:
54
+ tensor: loss value as cross-entropy for raw unnormalized scores
55
+ given ground truth labels
56
+ """
57
+ if self.segm_trained_by_masks:
58
+ return self.mask_loss(proposals_with_gt, densepose_predictor_outputs)
59
+ return self.segm_loss(
60
+ proposals_with_gt, densepose_predictor_outputs, packed_annotations
61
+ )
62
+
63
+ def fake_value(self, densepose_predictor_outputs: Any) -> torch.Tensor:
64
+ """
65
+ Fake segmentation loss used when no suitable ground truth data
66
+ was found in a batch. The loss has a value 0 and is primarily used to
67
+ construct the computation graph, so that `DistributedDataParallel`
68
+ has similar graphs on all GPUs and can perform reduction properly.
69
+
70
+ Args:
71
+ densepose_predictor_outputs: DensePose predictor outputs, an object
72
+ of a dataclass that is assumed to have `coarse_segm`
73
+ attribute
74
+ Return:
75
+ Zero value loss with proper computation graph
76
+ """
77
+ return densepose_predictor_outputs.coarse_segm.sum() * 0
Leffa/densepose/modeling/predictors/__init__.py ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Facebook, Inc. and its affiliates.
2
+
3
+ from .chart import DensePoseChartPredictor
4
+ from .chart_confidence import DensePoseChartConfidencePredictorMixin
5
+ from .chart_with_confidence import DensePoseChartWithConfidencePredictor
6
+ from .cse import DensePoseEmbeddingPredictor
7
+ from .cse_confidence import DensePoseEmbeddingConfidencePredictorMixin
8
+ from .cse_with_confidence import DensePoseEmbeddingWithConfidencePredictor
9
+ from .registry import DENSEPOSE_PREDICTOR_REGISTRY
Leffa/densepose/modeling/predictors/chart.py ADDED
@@ -0,0 +1,94 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Facebook, Inc. and its affiliates.
2
+
3
+ import torch
4
+ from torch import nn
5
+
6
+ from detectron2.config import CfgNode
7
+ from detectron2.layers import ConvTranspose2d, interpolate
8
+
9
+ from ...structures import DensePoseChartPredictorOutput
10
+ from ..utils import initialize_module_params
11
+ from .registry import DENSEPOSE_PREDICTOR_REGISTRY
12
+
13
+
14
+ @DENSEPOSE_PREDICTOR_REGISTRY.register()
15
+ class DensePoseChartPredictor(nn.Module):
16
+ """
17
+ Predictor (last layers of a DensePose model) that takes DensePose head outputs as an input
18
+ and produces 4 tensors which represent DensePose results for predefined body parts
19
+ (patches / charts):
20
+ * coarse segmentation, a tensor of shape [N, K, Hout, Wout]
21
+ * fine segmentation, a tensor of shape [N, C, Hout, Wout]
22
+ * U coordinates, a tensor of shape [N, C, Hout, Wout]
23
+ * V coordinates, a tensor of shape [N, C, Hout, Wout]
24
+ where
25
+ - N is the number of instances
26
+ - K is the number of coarse segmentation channels (
27
+ 2 = foreground / background,
28
+ 15 = one of 14 body parts / background)
29
+ - C is the number of fine segmentation channels (
30
+ 24 fine body parts / background)
31
+ - Hout and Wout are height and width of predictions
32
+ """
33
+
34
+ def __init__(self, cfg: CfgNode, input_channels: int):
35
+ """
36
+ Initialize predictor using configuration options
37
+
38
+ Args:
39
+ cfg (CfgNode): configuration options
40
+ input_channels (int): input tensor size along the channel dimension
41
+ """
42
+ super().__init__()
43
+ dim_in = input_channels
44
+ n_segm_chan = cfg.MODEL.ROI_DENSEPOSE_HEAD.NUM_COARSE_SEGM_CHANNELS
45
+ dim_out_patches = cfg.MODEL.ROI_DENSEPOSE_HEAD.NUM_PATCHES + 1
46
+ kernel_size = cfg.MODEL.ROI_DENSEPOSE_HEAD.DECONV_KERNEL
47
+ # coarse segmentation
48
+ self.ann_index_lowres = ConvTranspose2d(
49
+ dim_in, n_segm_chan, kernel_size, stride=2, padding=int(kernel_size / 2 - 1)
50
+ )
51
+ # fine segmentation
52
+ self.index_uv_lowres = ConvTranspose2d(
53
+ dim_in, dim_out_patches, kernel_size, stride=2, padding=int(kernel_size / 2 - 1)
54
+ )
55
+ # U
56
+ self.u_lowres = ConvTranspose2d(
57
+ dim_in, dim_out_patches, kernel_size, stride=2, padding=int(kernel_size / 2 - 1)
58
+ )
59
+ # V
60
+ self.v_lowres = ConvTranspose2d(
61
+ dim_in, dim_out_patches, kernel_size, stride=2, padding=int(kernel_size / 2 - 1)
62
+ )
63
+ self.scale_factor = cfg.MODEL.ROI_DENSEPOSE_HEAD.UP_SCALE
64
+ initialize_module_params(self)
65
+
66
+ def interp2d(self, tensor_nchw: torch.Tensor):
67
+ """
68
+ Bilinear interpolation method to be used for upscaling
69
+
70
+ Args:
71
+ tensor_nchw (tensor): tensor of shape (N, C, H, W)
72
+ Return:
73
+ tensor of shape (N, C, Hout, Wout), where Hout and Wout are computed
74
+ by applying the scale factor to H and W
75
+ """
76
+ return interpolate(
77
+ tensor_nchw, scale_factor=self.scale_factor, mode="bilinear", align_corners=False
78
+ )
79
+
80
+ def forward(self, head_outputs: torch.Tensor):
81
+ """
82
+ Perform forward step on DensePose head outputs
83
+
84
+ Args:
85
+ head_outputs (tensor): DensePose head outputs, tensor of shape [N, D, H, W]
86
+ Return:
87
+ An instance of DensePoseChartPredictorOutput
88
+ """
89
+ return DensePoseChartPredictorOutput(
90
+ coarse_segm=self.interp2d(self.ann_index_lowres(head_outputs)),
91
+ fine_segm=self.interp2d(self.index_uv_lowres(head_outputs)),
92
+ u=self.interp2d(self.u_lowres(head_outputs)),
93
+ v=self.interp2d(self.v_lowres(head_outputs)),
94
+ )
Leffa/densepose/modeling/predictors/chart_confidence.py ADDED
@@ -0,0 +1,174 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Facebook, Inc. and its affiliates.
2
+
3
+ from typing import Any
4
+ import torch
5
+ from torch.nn import functional as F
6
+
7
+ from detectron2.config import CfgNode
8
+ from detectron2.layers import ConvTranspose2d
9
+
10
+ from ...structures import decorate_predictor_output_class_with_confidences
11
+ from ..confidence import DensePoseConfidenceModelConfig, DensePoseUVConfidenceType
12
+ from ..utils import initialize_module_params
13
+
14
+
15
+ class DensePoseChartConfidencePredictorMixin:
16
+ """
17
+ Predictor contains the last layers of a DensePose model that take DensePose head
18
+ outputs as an input and produce model outputs. Confidence predictor mixin is used
19
+ to generate confidences for segmentation and UV tensors estimated by some
20
+ base predictor. Several assumptions need to hold for the base predictor:
21
+ 1) the `forward` method must return SIUV tuple as the first result (
22
+ S = coarse segmentation, I = fine segmentation, U and V are intrinsic
23
+ chart coordinates)
24
+ 2) `interp2d` method must be defined to perform bilinear interpolation;
25
+ the same method is typically used for SIUV and confidences
26
+ Confidence predictor mixin provides confidence estimates, as described in:
27
+ N. Neverova et al., Correlated Uncertainty for Learning Dense Correspondences
28
+ from Noisy Labels, NeurIPS 2019
29
+ A. Sanakoyeu et al., Transferring Dense Pose to Proximal Animal Classes, CVPR 2020
30
+ """
31
+
32
+ def __init__(self, cfg: CfgNode, input_channels: int):
33
+ """
34
+ Initialize confidence predictor using configuration options.
35
+
36
+ Args:
37
+ cfg (CfgNode): configuration options
38
+ input_channels (int): number of input channels
39
+ """
40
+ # we rely on base predictor to call nn.Module.__init__
41
+ super().__init__(cfg, input_channels) # pyre-ignore[19]
42
+ self.confidence_model_cfg = DensePoseConfidenceModelConfig.from_cfg(cfg)
43
+ self._initialize_confidence_estimation_layers(cfg, input_channels)
44
+ self._registry = {}
45
+ initialize_module_params(self) # pyre-ignore[6]
46
+
47
+ def _initialize_confidence_estimation_layers(self, cfg: CfgNode, dim_in: int):
48
+ """
49
+ Initialize confidence estimation layers based on configuration options
50
+
51
+ Args:
52
+ cfg (CfgNode): configuration options
53
+ dim_in (int): number of input channels
54
+ """
55
+ dim_out_patches = cfg.MODEL.ROI_DENSEPOSE_HEAD.NUM_PATCHES + 1
56
+ kernel_size = cfg.MODEL.ROI_DENSEPOSE_HEAD.DECONV_KERNEL
57
+ if self.confidence_model_cfg.uv_confidence.enabled:
58
+ if self.confidence_model_cfg.uv_confidence.type == DensePoseUVConfidenceType.IID_ISO:
59
+ self.sigma_2_lowres = ConvTranspose2d( # pyre-ignore[16]
60
+ dim_in, dim_out_patches, kernel_size, stride=2, padding=int(kernel_size / 2 - 1)
61
+ )
62
+ elif (
63
+ self.confidence_model_cfg.uv_confidence.type
64
+ == DensePoseUVConfidenceType.INDEP_ANISO
65
+ ):
66
+ self.sigma_2_lowres = ConvTranspose2d(
67
+ dim_in, dim_out_patches, kernel_size, stride=2, padding=int(kernel_size / 2 - 1)
68
+ )
69
+ self.kappa_u_lowres = ConvTranspose2d( # pyre-ignore[16]
70
+ dim_in, dim_out_patches, kernel_size, stride=2, padding=int(kernel_size / 2 - 1)
71
+ )
72
+ self.kappa_v_lowres = ConvTranspose2d( # pyre-ignore[16]
73
+ dim_in, dim_out_patches, kernel_size, stride=2, padding=int(kernel_size / 2 - 1)
74
+ )
75
+ else:
76
+ raise ValueError(
77
+ f"Unknown confidence model type: "
78
+ f"{self.confidence_model_cfg.confidence_model_type}"
79
+ )
80
+ if self.confidence_model_cfg.segm_confidence.enabled:
81
+ self.fine_segm_confidence_lowres = ConvTranspose2d( # pyre-ignore[16]
82
+ dim_in, 1, kernel_size, stride=2, padding=int(kernel_size / 2 - 1)
83
+ )
84
+ self.coarse_segm_confidence_lowres = ConvTranspose2d( # pyre-ignore[16]
85
+ dim_in, 1, kernel_size, stride=2, padding=int(kernel_size / 2 - 1)
86
+ )
87
+
88
+ def forward(self, head_outputs: torch.Tensor):
89
+ """
90
+ Perform forward operation on head outputs used as inputs for the predictor.
91
+ Calls forward method from the base predictor and uses its outputs to compute
92
+ confidences.
93
+
94
+ Args:
95
+ head_outputs (Tensor): head outputs used as predictor inputs
96
+ Return:
97
+ An instance of outputs with confidences,
98
+ see `decorate_predictor_output_class_with_confidences`
99
+ """
100
+ # assuming base class returns SIUV estimates in its first result
101
+ base_predictor_outputs = super().forward(head_outputs) # pyre-ignore[16]
102
+
103
+ # create output instance by extending base predictor outputs:
104
+ output = self._create_output_instance(base_predictor_outputs)
105
+
106
+ if self.confidence_model_cfg.uv_confidence.enabled:
107
+ if self.confidence_model_cfg.uv_confidence.type == DensePoseUVConfidenceType.IID_ISO:
108
+ # assuming base class defines interp2d method for bilinear interpolation
109
+ output.sigma_2 = self.interp2d(self.sigma_2_lowres(head_outputs)) # pyre-ignore[16]
110
+ elif (
111
+ self.confidence_model_cfg.uv_confidence.type
112
+ == DensePoseUVConfidenceType.INDEP_ANISO
113
+ ):
114
+ # assuming base class defines interp2d method for bilinear interpolation
115
+ output.sigma_2 = self.interp2d(self.sigma_2_lowres(head_outputs))
116
+ output.kappa_u = self.interp2d(self.kappa_u_lowres(head_outputs)) # pyre-ignore[16]
117
+ output.kappa_v = self.interp2d(self.kappa_v_lowres(head_outputs)) # pyre-ignore[16]
118
+ else:
119
+ raise ValueError(
120
+ f"Unknown confidence model type: "
121
+ f"{self.confidence_model_cfg.confidence_model_type}"
122
+ )
123
+ if self.confidence_model_cfg.segm_confidence.enabled:
124
+ # base predictor outputs are assumed to have `fine_segm` and `coarse_segm` attributes
125
+ # base predictor is assumed to define `interp2d` method for bilinear interpolation
126
+ output.fine_segm_confidence = (
127
+ F.softplus(
128
+ self.interp2d(self.fine_segm_confidence_lowres(head_outputs)) # pyre-ignore[16]
129
+ )
130
+ + self.confidence_model_cfg.segm_confidence.epsilon
131
+ )
132
+ output.fine_segm = base_predictor_outputs.fine_segm * torch.repeat_interleave(
133
+ output.fine_segm_confidence, base_predictor_outputs.fine_segm.shape[1], dim=1
134
+ )
135
+ output.coarse_segm_confidence = (
136
+ F.softplus(
137
+ self.interp2d(
138
+ self.coarse_segm_confidence_lowres(head_outputs) # pyre-ignore[16]
139
+ )
140
+ )
141
+ + self.confidence_model_cfg.segm_confidence.epsilon
142
+ )
143
+ output.coarse_segm = base_predictor_outputs.coarse_segm * torch.repeat_interleave(
144
+ output.coarse_segm_confidence, base_predictor_outputs.coarse_segm.shape[1], dim=1
145
+ )
146
+
147
+ return output
148
+
149
+ def _create_output_instance(self, base_predictor_outputs: Any):
150
+ """
151
+ Create an instance of predictor outputs by copying the outputs from the
152
+ base predictor and initializing confidence
153
+
154
+ Args:
155
+ base_predictor_outputs: an instance of base predictor outputs
156
+ (the outputs type is assumed to be a dataclass)
157
+ Return:
158
+ An instance of outputs with confidences
159
+ """
160
+ PredictorOutput = decorate_predictor_output_class_with_confidences(
161
+ type(base_predictor_outputs) # pyre-ignore[6]
162
+ )
163
+ # base_predictor_outputs is assumed to be a dataclass
164
+ # reassign all the fields from base_predictor_outputs (no deep copy!), add new fields
165
+ output = PredictorOutput(
166
+ **base_predictor_outputs.__dict__,
167
+ coarse_segm_confidence=None,
168
+ fine_segm_confidence=None,
169
+ sigma_1=None,
170
+ sigma_2=None,
171
+ kappa_u=None,
172
+ kappa_v=None,
173
+ )
174
+ return output
Leffa/densepose/modeling/predictors/chart_with_confidence.py ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Facebook, Inc. and its affiliates.
2
+
3
+ from . import DensePoseChartConfidencePredictorMixin, DensePoseChartPredictor
4
+ from .registry import DENSEPOSE_PREDICTOR_REGISTRY
5
+
6
+
7
+ @DENSEPOSE_PREDICTOR_REGISTRY.register()
8
+ class DensePoseChartWithConfidencePredictor(
9
+ DensePoseChartConfidencePredictorMixin, DensePoseChartPredictor
10
+ ):
11
+ """
12
+ Predictor that combines chart and chart confidence estimation
13
+ """
14
+
15
+ pass
Leffa/densepose/modeling/predictors/cse.py ADDED
@@ -0,0 +1,70 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
2
+
3
+ import torch
4
+ from torch import nn
5
+
6
+ from detectron2.config import CfgNode
7
+ from detectron2.layers import ConvTranspose2d, interpolate
8
+
9
+ from ...structures import DensePoseEmbeddingPredictorOutput
10
+ from ..utils import initialize_module_params
11
+ from .registry import DENSEPOSE_PREDICTOR_REGISTRY
12
+
13
+
14
+ @DENSEPOSE_PREDICTOR_REGISTRY.register()
15
+ class DensePoseEmbeddingPredictor(nn.Module):
16
+ """
17
+ Last layers of a DensePose model that take DensePose head outputs as an input
18
+ and produce model outputs for continuous surface embeddings (CSE).
19
+ """
20
+
21
+ def __init__(self, cfg: CfgNode, input_channels: int):
22
+ """
23
+ Initialize predictor using configuration options
24
+
25
+ Args:
26
+ cfg (CfgNode): configuration options
27
+ input_channels (int): input tensor size along the channel dimension
28
+ """
29
+ super().__init__()
30
+ dim_in = input_channels
31
+ n_segm_chan = cfg.MODEL.ROI_DENSEPOSE_HEAD.NUM_COARSE_SEGM_CHANNELS
32
+ embed_size = cfg.MODEL.ROI_DENSEPOSE_HEAD.CSE.EMBED_SIZE
33
+ kernel_size = cfg.MODEL.ROI_DENSEPOSE_HEAD.DECONV_KERNEL
34
+ # coarse segmentation
35
+ self.coarse_segm_lowres = ConvTranspose2d(
36
+ dim_in, n_segm_chan, kernel_size, stride=2, padding=int(kernel_size / 2 - 1)
37
+ )
38
+ # embedding
39
+ self.embed_lowres = ConvTranspose2d(
40
+ dim_in, embed_size, kernel_size, stride=2, padding=int(kernel_size / 2 - 1)
41
+ )
42
+ self.scale_factor = cfg.MODEL.ROI_DENSEPOSE_HEAD.UP_SCALE
43
+ initialize_module_params(self)
44
+
45
+ def interp2d(self, tensor_nchw: torch.Tensor):
46
+ """
47
+ Bilinear interpolation method to be used for upscaling
48
+
49
+ Args:
50
+ tensor_nchw (tensor): tensor of shape (N, C, H, W)
51
+ Return:
52
+ tensor of shape (N, C, Hout, Wout), where Hout and Wout are computed
53
+ by applying the scale factor to H and W
54
+ """
55
+ return interpolate(
56
+ tensor_nchw, scale_factor=self.scale_factor, mode="bilinear", align_corners=False
57
+ )
58
+
59
+ def forward(self, head_outputs):
60
+ """
61
+ Perform forward step on DensePose head outputs
62
+
63
+ Args:
64
+ head_outputs (tensor): DensePose head outputs, tensor of shape [N, D, H, W]
65
+ """
66
+ embed_lowres = self.embed_lowres(head_outputs)
67
+ coarse_segm_lowres = self.coarse_segm_lowres(head_outputs)
68
+ embed = self.interp2d(embed_lowres)
69
+ coarse_segm = self.interp2d(coarse_segm_lowres)
70
+ return DensePoseEmbeddingPredictorOutput(embedding=embed, coarse_segm=coarse_segm)
Leffa/densepose/modeling/predictors/cse_confidence.py ADDED
@@ -0,0 +1,115 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Facebook, Inc. and its affiliates.
2
+
3
+ from typing import Any
4
+ import torch
5
+ from torch.nn import functional as F
6
+
7
+ from detectron2.config import CfgNode
8
+ from detectron2.layers import ConvTranspose2d
9
+
10
+ from densepose.modeling.confidence import DensePoseConfidenceModelConfig
11
+ from densepose.modeling.utils import initialize_module_params
12
+ from densepose.structures import decorate_cse_predictor_output_class_with_confidences
13
+
14
+
15
+ class DensePoseEmbeddingConfidencePredictorMixin:
16
+ """
17
+ Predictor contains the last layers of a DensePose model that take DensePose head
18
+ outputs as an input and produce model outputs. Confidence predictor mixin is used
19
+ to generate confidences for coarse segmentation estimated by some
20
+ base predictor. Several assumptions need to hold for the base predictor:
21
+ 1) the `forward` method must return CSE DensePose head outputs,
22
+ tensor of shape [N, D, H, W]
23
+ 2) `interp2d` method must be defined to perform bilinear interpolation;
24
+ the same method is typically used for masks and confidences
25
+ Confidence predictor mixin provides confidence estimates, as described in:
26
+ N. Neverova et al., Correlated Uncertainty for Learning Dense Correspondences
27
+ from Noisy Labels, NeurIPS 2019
28
+ A. Sanakoyeu et al., Transferring Dense Pose to Proximal Animal Classes, CVPR 2020
29
+ """
30
+
31
+ def __init__(self, cfg: CfgNode, input_channels: int):
32
+ """
33
+ Initialize confidence predictor using configuration options.
34
+
35
+ Args:
36
+ cfg (CfgNode): configuration options
37
+ input_channels (int): number of input channels
38
+ """
39
+ # we rely on base predictor to call nn.Module.__init__
40
+ super().__init__(cfg, input_channels) # pyre-ignore[19]
41
+ self.confidence_model_cfg = DensePoseConfidenceModelConfig.from_cfg(cfg)
42
+ self._initialize_confidence_estimation_layers(cfg, input_channels)
43
+ self._registry = {}
44
+ initialize_module_params(self) # pyre-ignore[6]
45
+
46
+ def _initialize_confidence_estimation_layers(self, cfg: CfgNode, dim_in: int):
47
+ """
48
+ Initialize confidence estimation layers based on configuration options
49
+
50
+ Args:
51
+ cfg (CfgNode): configuration options
52
+ dim_in (int): number of input channels
53
+ """
54
+ kernel_size = cfg.MODEL.ROI_DENSEPOSE_HEAD.DECONV_KERNEL
55
+ if self.confidence_model_cfg.segm_confidence.enabled:
56
+ self.coarse_segm_confidence_lowres = ConvTranspose2d( # pyre-ignore[16]
57
+ dim_in, 1, kernel_size, stride=2, padding=int(kernel_size / 2 - 1)
58
+ )
59
+
60
+ def forward(self, head_outputs: torch.Tensor):
61
+ """
62
+ Perform forward operation on head outputs used as inputs for the predictor.
63
+ Calls forward method from the base predictor and uses its outputs to compute
64
+ confidences.
65
+
66
+ Args:
67
+ head_outputs (Tensor): head outputs used as predictor inputs
68
+ Return:
69
+ An instance of outputs with confidences,
70
+ see `decorate_cse_predictor_output_class_with_confidences`
71
+ """
72
+ # assuming base class returns SIUV estimates in its first result
73
+ base_predictor_outputs = super().forward(head_outputs) # pyre-ignore[16]
74
+
75
+ # create output instance by extending base predictor outputs:
76
+ output = self._create_output_instance(base_predictor_outputs)
77
+
78
+ if self.confidence_model_cfg.segm_confidence.enabled:
79
+ # base predictor outputs are assumed to have `coarse_segm` attribute
80
+ # base predictor is assumed to define `interp2d` method for bilinear interpolation
81
+ output.coarse_segm_confidence = (
82
+ F.softplus(
83
+ self.interp2d( # pyre-ignore[16]
84
+ self.coarse_segm_confidence_lowres(head_outputs) # pyre-ignore[16]
85
+ )
86
+ )
87
+ + self.confidence_model_cfg.segm_confidence.epsilon
88
+ )
89
+ output.coarse_segm = base_predictor_outputs.coarse_segm * torch.repeat_interleave(
90
+ output.coarse_segm_confidence, base_predictor_outputs.coarse_segm.shape[1], dim=1
91
+ )
92
+
93
+ return output
94
+
95
+ def _create_output_instance(self, base_predictor_outputs: Any):
96
+ """
97
+ Create an instance of predictor outputs by copying the outputs from the
98
+ base predictor and initializing confidence
99
+
100
+ Args:
101
+ base_predictor_outputs: an instance of base predictor outputs
102
+ (the outputs type is assumed to be a dataclass)
103
+ Return:
104
+ An instance of outputs with confidences
105
+ """
106
+ PredictorOutput = decorate_cse_predictor_output_class_with_confidences(
107
+ type(base_predictor_outputs) # pyre-ignore[6]
108
+ )
109
+ # base_predictor_outputs is assumed to be a dataclass
110
+ # reassign all the fields from base_predictor_outputs (no deep copy!), add new fields
111
+ output = PredictorOutput(
112
+ **base_predictor_outputs.__dict__,
113
+ coarse_segm_confidence=None,
114
+ )
115
+ return output
Leffa/densepose/modeling/predictors/cse_with_confidence.py ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Facebook, Inc. and its affiliates.
2
+
3
+ from . import DensePoseEmbeddingConfidencePredictorMixin, DensePoseEmbeddingPredictor
4
+ from .registry import DENSEPOSE_PREDICTOR_REGISTRY
5
+
6
+
7
+ @DENSEPOSE_PREDICTOR_REGISTRY.register()
8
+ class DensePoseEmbeddingWithConfidencePredictor(
9
+ DensePoseEmbeddingConfidencePredictorMixin, DensePoseEmbeddingPredictor
10
+ ):
11
+ """
12
+ Predictor that combines CSE and CSE confidence estimation
13
+ """
14
+
15
+ pass
Leffa/densepose/modeling/predictors/registry.py ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ # Copyright (c) Facebook, Inc. and its affiliates.
2
+
3
+ from detectron2.utils.registry import Registry
4
+
5
+ DENSEPOSE_PREDICTOR_REGISTRY = Registry("DENSEPOSE_PREDICTOR")
Leffa/densepose/modeling/roi_heads/__init__.py ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ # Copyright (c) Facebook, Inc. and its affiliates.
2
+
3
+ from .v1convx import DensePoseV1ConvXHead
4
+ from .deeplab import DensePoseDeepLabHead
5
+ from .registry import ROI_DENSEPOSE_HEAD_REGISTRY
6
+ from .roi_head import Decoder, DensePoseROIHeads
Leffa/densepose/modeling/roi_heads/deeplab.py ADDED
@@ -0,0 +1,263 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Facebook, Inc. and its affiliates.
2
+
3
+ import fvcore.nn.weight_init as weight_init
4
+ import torch
5
+ from torch import nn
6
+ from torch.nn import functional as F
7
+
8
+ from detectron2.config import CfgNode
9
+ from detectron2.layers import Conv2d
10
+
11
+ from .registry import ROI_DENSEPOSE_HEAD_REGISTRY
12
+
13
+
14
+ @ROI_DENSEPOSE_HEAD_REGISTRY.register()
15
+ class DensePoseDeepLabHead(nn.Module):
16
+ """
17
+ DensePose head using DeepLabV3 model from
18
+ "Rethinking Atrous Convolution for Semantic Image Segmentation"
19
+ <https://arxiv.org/abs/1706.05587>.
20
+ """
21
+
22
+ def __init__(self, cfg: CfgNode, input_channels: int):
23
+ super(DensePoseDeepLabHead, self).__init__()
24
+ # fmt: off
25
+ hidden_dim = cfg.MODEL.ROI_DENSEPOSE_HEAD.CONV_HEAD_DIM
26
+ kernel_size = cfg.MODEL.ROI_DENSEPOSE_HEAD.CONV_HEAD_KERNEL
27
+ norm = cfg.MODEL.ROI_DENSEPOSE_HEAD.DEEPLAB.NORM
28
+ self.n_stacked_convs = cfg.MODEL.ROI_DENSEPOSE_HEAD.NUM_STACKED_CONVS
29
+ self.use_nonlocal = cfg.MODEL.ROI_DENSEPOSE_HEAD.DEEPLAB.NONLOCAL_ON
30
+ # fmt: on
31
+ pad_size = kernel_size // 2
32
+ n_channels = input_channels
33
+
34
+ self.ASPP = ASPP(input_channels, [6, 12, 56], n_channels) # 6, 12, 56
35
+ self.add_module("ASPP", self.ASPP)
36
+
37
+ if self.use_nonlocal:
38
+ self.NLBlock = NONLocalBlock2D(input_channels, bn_layer=True)
39
+ self.add_module("NLBlock", self.NLBlock)
40
+ # weight_init.c2_msra_fill(self.ASPP)
41
+
42
+ for i in range(self.n_stacked_convs):
43
+ norm_module = nn.GroupNorm(32, hidden_dim) if norm == "GN" else None
44
+ layer = Conv2d(
45
+ n_channels,
46
+ hidden_dim,
47
+ kernel_size,
48
+ stride=1,
49
+ padding=pad_size,
50
+ bias=not norm,
51
+ norm=norm_module,
52
+ )
53
+ weight_init.c2_msra_fill(layer)
54
+ n_channels = hidden_dim
55
+ layer_name = self._get_layer_name(i)
56
+ self.add_module(layer_name, layer)
57
+ self.n_out_channels = hidden_dim
58
+ # initialize_module_params(self)
59
+
60
+ def forward(self, features):
61
+ x0 = features
62
+ x = self.ASPP(x0)
63
+ if self.use_nonlocal:
64
+ x = self.NLBlock(x)
65
+ output = x
66
+ for i in range(self.n_stacked_convs):
67
+ layer_name = self._get_layer_name(i)
68
+ x = getattr(self, layer_name)(x)
69
+ x = F.relu(x)
70
+ output = x
71
+ return output
72
+
73
+ def _get_layer_name(self, i: int):
74
+ layer_name = "body_conv_fcn{}".format(i + 1)
75
+ return layer_name
76
+
77
+
78
+ # Copied from
79
+ # https://github.com/pytorch/vision/blob/master/torchvision/models/segmentation/deeplabv3.py
80
+ # See https://arxiv.org/pdf/1706.05587.pdf for details
81
+ class ASPPConv(nn.Sequential):
82
+ def __init__(self, in_channels, out_channels, dilation):
83
+ modules = [
84
+ nn.Conv2d(
85
+ in_channels, out_channels, 3, padding=dilation, dilation=dilation, bias=False
86
+ ),
87
+ nn.GroupNorm(32, out_channels),
88
+ nn.ReLU(),
89
+ ]
90
+ super(ASPPConv, self).__init__(*modules)
91
+
92
+
93
+ class ASPPPooling(nn.Sequential):
94
+ def __init__(self, in_channels, out_channels):
95
+ super(ASPPPooling, self).__init__(
96
+ nn.AdaptiveAvgPool2d(1),
97
+ nn.Conv2d(in_channels, out_channels, 1, bias=False),
98
+ nn.GroupNorm(32, out_channels),
99
+ nn.ReLU(),
100
+ )
101
+
102
+ def forward(self, x):
103
+ size = x.shape[-2:]
104
+ x = super(ASPPPooling, self).forward(x)
105
+ return F.interpolate(x, size=size, mode="bilinear", align_corners=False)
106
+
107
+
108
+ class ASPP(nn.Module):
109
+ def __init__(self, in_channels, atrous_rates, out_channels):
110
+ super(ASPP, self).__init__()
111
+ modules = []
112
+ modules.append(
113
+ nn.Sequential(
114
+ nn.Conv2d(in_channels, out_channels, 1, bias=False),
115
+ nn.GroupNorm(32, out_channels),
116
+ nn.ReLU(),
117
+ )
118
+ )
119
+
120
+ rate1, rate2, rate3 = tuple(atrous_rates)
121
+ modules.append(ASPPConv(in_channels, out_channels, rate1))
122
+ modules.append(ASPPConv(in_channels, out_channels, rate2))
123
+ modules.append(ASPPConv(in_channels, out_channels, rate3))
124
+ modules.append(ASPPPooling(in_channels, out_channels))
125
+
126
+ self.convs = nn.ModuleList(modules)
127
+
128
+ self.project = nn.Sequential(
129
+ nn.Conv2d(5 * out_channels, out_channels, 1, bias=False),
130
+ # nn.BatchNorm2d(out_channels),
131
+ nn.ReLU()
132
+ # nn.Dropout(0.5)
133
+ )
134
+
135
+ def forward(self, x):
136
+ res = []
137
+ for conv in self.convs:
138
+ res.append(conv(x))
139
+ res = torch.cat(res, dim=1)
140
+ return self.project(res)
141
+
142
+
143
+ # copied from
144
+ # https://github.com/AlexHex7/Non-local_pytorch/blob/master/lib/non_local_embedded_gaussian.py
145
+ # See https://arxiv.org/abs/1711.07971 for details
146
+ class _NonLocalBlockND(nn.Module):
147
+ def __init__(
148
+ self, in_channels, inter_channels=None, dimension=3, sub_sample=True, bn_layer=True
149
+ ):
150
+ super(_NonLocalBlockND, self).__init__()
151
+
152
+ assert dimension in [1, 2, 3]
153
+
154
+ self.dimension = dimension
155
+ self.sub_sample = sub_sample
156
+
157
+ self.in_channels = in_channels
158
+ self.inter_channels = inter_channels
159
+
160
+ if self.inter_channels is None:
161
+ self.inter_channels = in_channels // 2
162
+ if self.inter_channels == 0:
163
+ self.inter_channels = 1
164
+
165
+ if dimension == 3:
166
+ conv_nd = nn.Conv3d
167
+ max_pool_layer = nn.MaxPool3d(kernel_size=(1, 2, 2))
168
+ bn = nn.GroupNorm # (32, hidden_dim) #nn.BatchNorm3d
169
+ elif dimension == 2:
170
+ conv_nd = nn.Conv2d
171
+ max_pool_layer = nn.MaxPool2d(kernel_size=(2, 2))
172
+ bn = nn.GroupNorm # (32, hidden_dim)nn.BatchNorm2d
173
+ else:
174
+ conv_nd = nn.Conv1d
175
+ max_pool_layer = nn.MaxPool1d(kernel_size=2)
176
+ bn = nn.GroupNorm # (32, hidden_dim)nn.BatchNorm1d
177
+
178
+ self.g = conv_nd(
179
+ in_channels=self.in_channels,
180
+ out_channels=self.inter_channels,
181
+ kernel_size=1,
182
+ stride=1,
183
+ padding=0,
184
+ )
185
+
186
+ if bn_layer:
187
+ self.W = nn.Sequential(
188
+ conv_nd(
189
+ in_channels=self.inter_channels,
190
+ out_channels=self.in_channels,
191
+ kernel_size=1,
192
+ stride=1,
193
+ padding=0,
194
+ ),
195
+ bn(32, self.in_channels),
196
+ )
197
+ nn.init.constant_(self.W[1].weight, 0)
198
+ nn.init.constant_(self.W[1].bias, 0)
199
+ else:
200
+ self.W = conv_nd(
201
+ in_channels=self.inter_channels,
202
+ out_channels=self.in_channels,
203
+ kernel_size=1,
204
+ stride=1,
205
+ padding=0,
206
+ )
207
+ nn.init.constant_(self.W.weight, 0)
208
+ nn.init.constant_(self.W.bias, 0)
209
+
210
+ self.theta = conv_nd(
211
+ in_channels=self.in_channels,
212
+ out_channels=self.inter_channels,
213
+ kernel_size=1,
214
+ stride=1,
215
+ padding=0,
216
+ )
217
+ self.phi = conv_nd(
218
+ in_channels=self.in_channels,
219
+ out_channels=self.inter_channels,
220
+ kernel_size=1,
221
+ stride=1,
222
+ padding=0,
223
+ )
224
+
225
+ if sub_sample:
226
+ self.g = nn.Sequential(self.g, max_pool_layer)
227
+ self.phi = nn.Sequential(self.phi, max_pool_layer)
228
+
229
+ def forward(self, x):
230
+ """
231
+ :param x: (b, c, t, h, w)
232
+ :return:
233
+ """
234
+
235
+ batch_size = x.size(0)
236
+
237
+ g_x = self.g(x).view(batch_size, self.inter_channels, -1)
238
+ g_x = g_x.permute(0, 2, 1)
239
+
240
+ theta_x = self.theta(x).view(batch_size, self.inter_channels, -1)
241
+ theta_x = theta_x.permute(0, 2, 1)
242
+ phi_x = self.phi(x).view(batch_size, self.inter_channels, -1)
243
+ f = torch.matmul(theta_x, phi_x)
244
+ f_div_C = F.softmax(f, dim=-1)
245
+
246
+ y = torch.matmul(f_div_C, g_x)
247
+ y = y.permute(0, 2, 1).contiguous()
248
+ y = y.view(batch_size, self.inter_channels, *x.size()[2:])
249
+ W_y = self.W(y)
250
+ z = W_y + x
251
+
252
+ return z
253
+
254
+
255
+ class NONLocalBlock2D(_NonLocalBlockND):
256
+ def __init__(self, in_channels, inter_channels=None, sub_sample=True, bn_layer=True):
257
+ super(NONLocalBlock2D, self).__init__(
258
+ in_channels,
259
+ inter_channels=inter_channels,
260
+ dimension=2,
261
+ sub_sample=sub_sample,
262
+ bn_layer=bn_layer,
263
+ )
Leffa/densepose/modeling/roi_heads/registry.py ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ # Copyright (c) Facebook, Inc. and its affiliates.
2
+
3
+ from detectron2.utils.registry import Registry
4
+
5
+ ROI_DENSEPOSE_HEAD_REGISTRY = Registry("ROI_DENSEPOSE_HEAD")
Leffa/densepose/modeling/roi_heads/roi_head.py ADDED
@@ -0,0 +1,218 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Facebook, Inc. and its affiliates.
2
+
3
+ import numpy as np
4
+ from typing import Dict, List, Optional
5
+ import fvcore.nn.weight_init as weight_init
6
+ import torch
7
+ import torch.nn as nn
8
+ from torch.nn import functional as F
9
+
10
+ from detectron2.layers import Conv2d, ShapeSpec, get_norm
11
+ from detectron2.modeling import ROI_HEADS_REGISTRY, StandardROIHeads
12
+ from detectron2.modeling.poolers import ROIPooler
13
+ from detectron2.modeling.roi_heads import select_foreground_proposals
14
+ from detectron2.structures import ImageList, Instances
15
+
16
+ from .. import (
17
+ build_densepose_data_filter,
18
+ build_densepose_embedder,
19
+ build_densepose_head,
20
+ build_densepose_losses,
21
+ build_densepose_predictor,
22
+ densepose_inference,
23
+ )
24
+
25
+
26
+ class Decoder(nn.Module):
27
+ """
28
+ A semantic segmentation head described in detail in the Panoptic Feature Pyramid Networks paper
29
+ (https://arxiv.org/abs/1901.02446). It takes FPN features as input and merges information from
30
+ all levels of the FPN into single output.
31
+ """
32
+
33
+ def __init__(self, cfg, input_shape: Dict[str, ShapeSpec], in_features):
34
+ super(Decoder, self).__init__()
35
+
36
+ # fmt: off
37
+ self.in_features = in_features
38
+ feature_strides = {k: v.stride for k, v in input_shape.items()}
39
+ feature_channels = {k: v.channels for k, v in input_shape.items()}
40
+ num_classes = cfg.MODEL.ROI_DENSEPOSE_HEAD.DECODER_NUM_CLASSES
41
+ conv_dims = cfg.MODEL.ROI_DENSEPOSE_HEAD.DECODER_CONV_DIMS
42
+ self.common_stride = cfg.MODEL.ROI_DENSEPOSE_HEAD.DECODER_COMMON_STRIDE
43
+ norm = cfg.MODEL.ROI_DENSEPOSE_HEAD.DECODER_NORM
44
+ # fmt: on
45
+
46
+ self.scale_heads = []
47
+ for in_feature in self.in_features:
48
+ head_ops = []
49
+ head_length = max(
50
+ 1, int(np.log2(feature_strides[in_feature]) - np.log2(self.common_stride))
51
+ )
52
+ for k in range(head_length):
53
+ conv = Conv2d(
54
+ feature_channels[in_feature] if k == 0 else conv_dims,
55
+ conv_dims,
56
+ kernel_size=3,
57
+ stride=1,
58
+ padding=1,
59
+ bias=not norm,
60
+ norm=get_norm(norm, conv_dims),
61
+ activation=F.relu,
62
+ )
63
+ weight_init.c2_msra_fill(conv)
64
+ head_ops.append(conv)
65
+ if feature_strides[in_feature] != self.common_stride:
66
+ head_ops.append(
67
+ nn.Upsample(scale_factor=2, mode="bilinear", align_corners=False)
68
+ )
69
+ self.scale_heads.append(nn.Sequential(*head_ops))
70
+ self.add_module(in_feature, self.scale_heads[-1])
71
+ self.predictor = Conv2d(conv_dims, num_classes, kernel_size=1, stride=1, padding=0)
72
+ weight_init.c2_msra_fill(self.predictor)
73
+
74
+ def forward(self, features: List[torch.Tensor]):
75
+ for i, _ in enumerate(self.in_features):
76
+ if i == 0:
77
+ x = self.scale_heads[i](features[i])
78
+ else:
79
+ x = x + self.scale_heads[i](features[i])
80
+ x = self.predictor(x)
81
+ return x
82
+
83
+
84
+ @ROI_HEADS_REGISTRY.register()
85
+ class DensePoseROIHeads(StandardROIHeads):
86
+ """
87
+ A Standard ROIHeads which contains an addition of DensePose head.
88
+ """
89
+
90
+ def __init__(self, cfg, input_shape):
91
+ super().__init__(cfg, input_shape)
92
+ self._init_densepose_head(cfg, input_shape)
93
+
94
+ def _init_densepose_head(self, cfg, input_shape):
95
+ # fmt: off
96
+ self.densepose_on = cfg.MODEL.DENSEPOSE_ON
97
+ if not self.densepose_on:
98
+ return
99
+ self.densepose_data_filter = build_densepose_data_filter(cfg)
100
+ dp_pooler_resolution = cfg.MODEL.ROI_DENSEPOSE_HEAD.POOLER_RESOLUTION
101
+ dp_pooler_sampling_ratio = cfg.MODEL.ROI_DENSEPOSE_HEAD.POOLER_SAMPLING_RATIO
102
+ dp_pooler_type = cfg.MODEL.ROI_DENSEPOSE_HEAD.POOLER_TYPE
103
+ self.use_decoder = cfg.MODEL.ROI_DENSEPOSE_HEAD.DECODER_ON
104
+ # fmt: on
105
+ if self.use_decoder:
106
+ dp_pooler_scales = (1.0 / input_shape[self.in_features[0]].stride,)
107
+ else:
108
+ dp_pooler_scales = tuple(1.0 / input_shape[k].stride for k in self.in_features)
109
+ in_channels = [input_shape[f].channels for f in self.in_features][0]
110
+
111
+ if self.use_decoder:
112
+ self.decoder = Decoder(cfg, input_shape, self.in_features)
113
+
114
+ self.densepose_pooler = ROIPooler(
115
+ output_size=dp_pooler_resolution,
116
+ scales=dp_pooler_scales,
117
+ sampling_ratio=dp_pooler_sampling_ratio,
118
+ pooler_type=dp_pooler_type,
119
+ )
120
+ self.densepose_head = build_densepose_head(cfg, in_channels)
121
+ self.densepose_predictor = build_densepose_predictor(
122
+ cfg, self.densepose_head.n_out_channels
123
+ )
124
+ self.densepose_losses = build_densepose_losses(cfg)
125
+ self.embedder = build_densepose_embedder(cfg)
126
+
127
+ def _forward_densepose(self, features: Dict[str, torch.Tensor], instances: List[Instances]):
128
+ """
129
+ Forward logic of the densepose prediction branch.
130
+
131
+ Args:
132
+ features (dict[str, Tensor]): input data as a mapping from feature
133
+ map name to tensor. Axis 0 represents the number of images `N` in
134
+ the input data; axes 1-3 are channels, height, and width, which may
135
+ vary between feature maps (e.g., if a feature pyramid is used).
136
+ instances (list[Instances]): length `N` list of `Instances`. The i-th
137
+ `Instances` contains instances for the i-th input image,
138
+ In training, they can be the proposals.
139
+ In inference, they can be the predicted boxes.
140
+
141
+ Returns:
142
+ In training, a dict of losses.
143
+ In inference, update `instances` with new fields "densepose" and return it.
144
+ """
145
+ if not self.densepose_on:
146
+ return {} if self.training else instances
147
+
148
+ features_list = [features[f] for f in self.in_features]
149
+ if self.training:
150
+ proposals, _ = select_foreground_proposals(instances, self.num_classes)
151
+ features_list, proposals = self.densepose_data_filter(features_list, proposals)
152
+ if len(proposals) > 0:
153
+ proposal_boxes = [x.proposal_boxes for x in proposals]
154
+
155
+ if self.use_decoder:
156
+ features_list = [self.decoder(features_list)]
157
+
158
+ features_dp = self.densepose_pooler(features_list, proposal_boxes)
159
+ densepose_head_outputs = self.densepose_head(features_dp)
160
+ densepose_predictor_outputs = self.densepose_predictor(densepose_head_outputs)
161
+ densepose_loss_dict = self.densepose_losses(
162
+ proposals, densepose_predictor_outputs, embedder=self.embedder
163
+ )
164
+ return densepose_loss_dict
165
+ else:
166
+ pred_boxes = [x.pred_boxes for x in instances]
167
+
168
+ if self.use_decoder:
169
+ features_list = [self.decoder(features_list)]
170
+
171
+ features_dp = self.densepose_pooler(features_list, pred_boxes)
172
+ if len(features_dp) > 0:
173
+ densepose_head_outputs = self.densepose_head(features_dp)
174
+ densepose_predictor_outputs = self.densepose_predictor(densepose_head_outputs)
175
+ else:
176
+ densepose_predictor_outputs = None
177
+
178
+ densepose_inference(densepose_predictor_outputs, instances)
179
+ return instances
180
+
181
+ def forward(
182
+ self,
183
+ images: ImageList,
184
+ features: Dict[str, torch.Tensor],
185
+ proposals: List[Instances],
186
+ targets: Optional[List[Instances]] = None,
187
+ ):
188
+ instances, losses = super().forward(images, features, proposals, targets)
189
+ del targets, images
190
+
191
+ if self.training:
192
+ losses.update(self._forward_densepose(features, instances))
193
+ return instances, losses
194
+
195
+ def forward_with_given_boxes(
196
+ self, features: Dict[str, torch.Tensor], instances: List[Instances]
197
+ ):
198
+ """
199
+ Use the given boxes in `instances` to produce other (non-box) per-ROI outputs.
200
+
201
+ This is useful for downstream tasks where a box is known, but need to obtain
202
+ other attributes (outputs of other heads).
203
+ Test-time augmentation also uses this.
204
+
205
+ Args:
206
+ features: same as in `forward()`
207
+ instances (list[Instances]): instances to predict other outputs. Expect the keys
208
+ "pred_boxes" and "pred_classes" to exist.
209
+
210
+ Returns:
211
+ instances (list[Instances]):
212
+ the same `Instances` objects, with extra
213
+ fields such as `pred_masks` or `pred_keypoints`.
214
+ """
215
+
216
+ instances = super().forward_with_given_boxes(features, instances)
217
+ instances = self._forward_densepose(features, instances)
218
+ return instances
Leffa/densepose/modeling/roi_heads/v1convx.py ADDED
@@ -0,0 +1,64 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Facebook, Inc. and its affiliates.
2
+
3
+ import torch
4
+ from torch import nn
5
+ from torch.nn import functional as F
6
+
7
+ from detectron2.config import CfgNode
8
+ from detectron2.layers import Conv2d
9
+
10
+ from ..utils import initialize_module_params
11
+ from .registry import ROI_DENSEPOSE_HEAD_REGISTRY
12
+
13
+
14
+ @ROI_DENSEPOSE_HEAD_REGISTRY.register()
15
+ class DensePoseV1ConvXHead(nn.Module):
16
+ """
17
+ Fully convolutional DensePose head.
18
+ """
19
+
20
+ def __init__(self, cfg: CfgNode, input_channels: int):
21
+ """
22
+ Initialize DensePose fully convolutional head
23
+
24
+ Args:
25
+ cfg (CfgNode): configuration options
26
+ input_channels (int): number of input channels
27
+ """
28
+ super(DensePoseV1ConvXHead, self).__init__()
29
+ # fmt: off
30
+ hidden_dim = cfg.MODEL.ROI_DENSEPOSE_HEAD.CONV_HEAD_DIM
31
+ kernel_size = cfg.MODEL.ROI_DENSEPOSE_HEAD.CONV_HEAD_KERNEL
32
+ self.n_stacked_convs = cfg.MODEL.ROI_DENSEPOSE_HEAD.NUM_STACKED_CONVS
33
+ # fmt: on
34
+ pad_size = kernel_size // 2
35
+ n_channels = input_channels
36
+ for i in range(self.n_stacked_convs):
37
+ layer = Conv2d(n_channels, hidden_dim, kernel_size, stride=1, padding=pad_size)
38
+ layer_name = self._get_layer_name(i)
39
+ self.add_module(layer_name, layer)
40
+ n_channels = hidden_dim
41
+ self.n_out_channels = n_channels
42
+ initialize_module_params(self)
43
+
44
+ def forward(self, features: torch.Tensor):
45
+ """
46
+ Apply DensePose fully convolutional head to the input features
47
+
48
+ Args:
49
+ features (tensor): input features
50
+ Result:
51
+ A tensor of DensePose head outputs
52
+ """
53
+ x = features
54
+ output = x
55
+ for i in range(self.n_stacked_convs):
56
+ layer_name = self._get_layer_name(i)
57
+ x = getattr(self, layer_name)(x)
58
+ x = F.relu(x)
59
+ output = x
60
+ return output
61
+
62
+ def _get_layer_name(self, i: int):
63
+ layer_name = "body_conv_fcn{}".format(i + 1)
64
+ return layer_name
Leffa/densepose/modeling/test_time_augmentation.py ADDED
@@ -0,0 +1,207 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Facebook, Inc. and its affiliates.
2
+ import copy
3
+ import numpy as np
4
+ import torch
5
+ from fvcore.transforms import HFlipTransform, TransformList
6
+ from torch.nn import functional as F
7
+
8
+ from detectron2.data.transforms import RandomRotation, RotationTransform, apply_transform_gens
9
+ from detectron2.modeling.postprocessing import detector_postprocess
10
+ from detectron2.modeling.test_time_augmentation import DatasetMapperTTA, GeneralizedRCNNWithTTA
11
+
12
+ from ..converters import HFlipConverter
13
+
14
+
15
+ class DensePoseDatasetMapperTTA(DatasetMapperTTA):
16
+ def __init__(self, cfg):
17
+ super().__init__(cfg=cfg)
18
+ self.angles = cfg.TEST.AUG.ROTATION_ANGLES
19
+
20
+ def __call__(self, dataset_dict):
21
+ ret = super().__call__(dataset_dict=dataset_dict)
22
+ numpy_image = dataset_dict["image"].permute(1, 2, 0).numpy()
23
+ for angle in self.angles:
24
+ rotate = RandomRotation(angle=angle, expand=True)
25
+ new_numpy_image, tfms = apply_transform_gens([rotate], np.copy(numpy_image))
26
+ torch_image = torch.from_numpy(np.ascontiguousarray(new_numpy_image.transpose(2, 0, 1)))
27
+ dic = copy.deepcopy(dataset_dict)
28
+ # In DatasetMapperTTA, there is a pre_tfm transform (resize or no-op) that is
29
+ # added at the beginning of each TransformList. That's '.transforms[0]'.
30
+ dic["transforms"] = TransformList(
31
+ [ret[-1]["transforms"].transforms[0]] + tfms.transforms
32
+ )
33
+ dic["image"] = torch_image
34
+ ret.append(dic)
35
+ return ret
36
+
37
+
38
+ class DensePoseGeneralizedRCNNWithTTA(GeneralizedRCNNWithTTA):
39
+ def __init__(self, cfg, model, transform_data, tta_mapper=None, batch_size=1):
40
+ """
41
+ Args:
42
+ cfg (CfgNode):
43
+ model (GeneralizedRCNN): a GeneralizedRCNN to apply TTA on.
44
+ transform_data (DensePoseTransformData): contains symmetry label
45
+ transforms used for horizontal flip
46
+ tta_mapper (callable): takes a dataset dict and returns a list of
47
+ augmented versions of the dataset dict. Defaults to
48
+ `DatasetMapperTTA(cfg)`.
49
+ batch_size (int): batch the augmented images into this batch size for inference.
50
+ """
51
+ self._transform_data = transform_data.to(model.device)
52
+ super().__init__(cfg=cfg, model=model, tta_mapper=tta_mapper, batch_size=batch_size)
53
+
54
+ # the implementation follows closely the one from detectron2/modeling
55
+ def _inference_one_image(self, input):
56
+ """
57
+ Args:
58
+ input (dict): one dataset dict with "image" field being a CHW tensor
59
+
60
+ Returns:
61
+ dict: one output dict
62
+ """
63
+ orig_shape = (input["height"], input["width"])
64
+ # For some reason, resize with uint8 slightly increases box AP but decreases densepose AP
65
+ input["image"] = input["image"].to(torch.uint8)
66
+ augmented_inputs, tfms = self._get_augmented_inputs(input)
67
+ # Detect boxes from all augmented versions
68
+ with self._turn_off_roi_heads(["mask_on", "keypoint_on", "densepose_on"]):
69
+ # temporarily disable roi heads
70
+ all_boxes, all_scores, all_classes = self._get_augmented_boxes(augmented_inputs, tfms)
71
+ merged_instances = self._merge_detections(all_boxes, all_scores, all_classes, orig_shape)
72
+
73
+ if self.cfg.MODEL.MASK_ON or self.cfg.MODEL.DENSEPOSE_ON:
74
+ # Use the detected boxes to obtain new fields
75
+ augmented_instances = self._rescale_detected_boxes(
76
+ augmented_inputs, merged_instances, tfms
77
+ )
78
+ # run forward on the detected boxes
79
+ outputs = self._batch_inference(augmented_inputs, augmented_instances)
80
+ # Delete now useless variables to avoid being out of memory
81
+ del augmented_inputs, augmented_instances
82
+ # average the predictions
83
+ if self.cfg.MODEL.MASK_ON:
84
+ merged_instances.pred_masks = self._reduce_pred_masks(outputs, tfms)
85
+ if self.cfg.MODEL.DENSEPOSE_ON:
86
+ merged_instances.pred_densepose = self._reduce_pred_densepose(outputs, tfms)
87
+ # postprocess
88
+ merged_instances = detector_postprocess(merged_instances, *orig_shape)
89
+ return {"instances": merged_instances}
90
+ else:
91
+ return {"instances": merged_instances}
92
+
93
+ def _get_augmented_boxes(self, augmented_inputs, tfms):
94
+ # Heavily based on detectron2/modeling/test_time_augmentation.py
95
+ # Only difference is that RotationTransform is excluded from bbox computation
96
+ # 1: forward with all augmented images
97
+ outputs = self._batch_inference(augmented_inputs)
98
+ # 2: union the results
99
+ all_boxes = []
100
+ all_scores = []
101
+ all_classes = []
102
+ for output, tfm in zip(outputs, tfms):
103
+ # Need to inverse the transforms on boxes, to obtain results on original image
104
+ if not any(isinstance(t, RotationTransform) for t in tfm.transforms):
105
+ # Some transforms can't compute bbox correctly
106
+ pred_boxes = output.pred_boxes.tensor
107
+ original_pred_boxes = tfm.inverse().apply_box(pred_boxes.cpu().numpy())
108
+ all_boxes.append(torch.from_numpy(original_pred_boxes).to(pred_boxes.device))
109
+ all_scores.extend(output.scores)
110
+ all_classes.extend(output.pred_classes)
111
+ all_boxes = torch.cat(all_boxes, dim=0)
112
+ return all_boxes, all_scores, all_classes
113
+
114
+ def _reduce_pred_densepose(self, outputs, tfms):
115
+ # Should apply inverse transforms on densepose preds.
116
+ # We assume only rotation, resize & flip are used. pred_masks is a scale-invariant
117
+ # representation, so we handle the other ones specially
118
+ for idx, (output, tfm) in enumerate(zip(outputs, tfms)):
119
+ for t in tfm.transforms:
120
+ for attr in ["coarse_segm", "fine_segm", "u", "v"]:
121
+ setattr(
122
+ output.pred_densepose,
123
+ attr,
124
+ _inverse_rotation(
125
+ getattr(output.pred_densepose, attr), output.pred_boxes.tensor, t
126
+ ),
127
+ )
128
+ if any(isinstance(t, HFlipTransform) for t in tfm.transforms):
129
+ output.pred_densepose = HFlipConverter.convert(
130
+ output.pred_densepose, self._transform_data
131
+ )
132
+ self._incremental_avg_dp(outputs[0].pred_densepose, output.pred_densepose, idx)
133
+ return outputs[0].pred_densepose
134
+
135
+ # incrementally computed average: u_(n + 1) = u_n + (x_(n+1) - u_n) / (n + 1).
136
+ def _incremental_avg_dp(self, avg, new_el, idx):
137
+ for attr in ["coarse_segm", "fine_segm", "u", "v"]:
138
+ setattr(avg, attr, (getattr(avg, attr) * idx + getattr(new_el, attr)) / (idx + 1))
139
+ if idx:
140
+ # Deletion of the > 0 index intermediary values to prevent GPU OOM
141
+ setattr(new_el, attr, None)
142
+ return avg
143
+
144
+
145
+ def _inverse_rotation(densepose_attrs, boxes, transform):
146
+ # resample outputs to image size and rotate back the densepose preds
147
+ # on the rotated images to the space of the original image
148
+ if len(boxes) == 0 or not isinstance(transform, RotationTransform):
149
+ return densepose_attrs
150
+ boxes = boxes.int().cpu().numpy()
151
+ wh_boxes = boxes[:, 2:] - boxes[:, :2] # bboxes in the rotated space
152
+ inv_boxes = rotate_box_inverse(transform, boxes).astype(int) # bboxes in original image
153
+ wh_diff = (inv_boxes[:, 2:] - inv_boxes[:, :2] - wh_boxes) // 2 # diff between new/old bboxes
154
+ rotation_matrix = torch.tensor([transform.rm_image]).to(device=densepose_attrs.device).float()
155
+ rotation_matrix[:, :, -1] = 0
156
+ # To apply grid_sample for rotation, we need to have enough space to fit the original and
157
+ # rotated bboxes. l_bds and r_bds are the left/right bounds that will be used to
158
+ # crop the difference once the rotation is done
159
+ l_bds = np.maximum(0, -wh_diff)
160
+ for i in range(len(densepose_attrs)):
161
+ if min(wh_boxes[i]) <= 0:
162
+ continue
163
+ densepose_attr = densepose_attrs[[i]].clone()
164
+ # 1. Interpolate densepose attribute to size of the rotated bbox
165
+ densepose_attr = F.interpolate(densepose_attr, wh_boxes[i].tolist()[::-1], mode="bilinear")
166
+ # 2. Pad the interpolated attribute so it has room for the original + rotated bbox
167
+ densepose_attr = F.pad(densepose_attr, tuple(np.repeat(np.maximum(0, wh_diff[i]), 2)))
168
+ # 3. Compute rotation grid and transform
169
+ grid = F.affine_grid(rotation_matrix, size=densepose_attr.shape)
170
+ densepose_attr = F.grid_sample(densepose_attr, grid)
171
+ # 4. Compute right bounds and crop the densepose_attr to the size of the original bbox
172
+ r_bds = densepose_attr.shape[2:][::-1] - l_bds[i]
173
+ densepose_attr = densepose_attr[:, :, l_bds[i][1] : r_bds[1], l_bds[i][0] : r_bds[0]]
174
+ if min(densepose_attr.shape) > 0:
175
+ # Interpolate back to the original size of the densepose attribute
176
+ densepose_attr = F.interpolate(
177
+ densepose_attr, densepose_attrs.shape[-2:], mode="bilinear"
178
+ )
179
+ # Adding a very small probability to the background class to fill padded zones
180
+ densepose_attr[:, 0] += 1e-10
181
+ densepose_attrs[i] = densepose_attr
182
+ return densepose_attrs
183
+
184
+
185
+ def rotate_box_inverse(rot_tfm, rotated_box):
186
+ """
187
+ rotated_box is a N * 4 array of [x0, y0, x1, y1] boxes
188
+ When a bbox is rotated, it gets bigger, because we need to surround the tilted bbox
189
+ So when a bbox is rotated then inverse-rotated, it is much bigger than the original
190
+ This function aims to invert the rotation on the box, but also resize it to its original size
191
+ """
192
+ # 1. Compute the inverse rotation of the rotated bboxes (bigger than it )
193
+ invrot_box = rot_tfm.inverse().apply_box(rotated_box)
194
+ h, w = rotated_box[:, 3] - rotated_box[:, 1], rotated_box[:, 2] - rotated_box[:, 0]
195
+ ih, iw = invrot_box[:, 3] - invrot_box[:, 1], invrot_box[:, 2] - invrot_box[:, 0]
196
+ assert 2 * rot_tfm.abs_sin**2 != 1, "45 degrees angle can't be inverted"
197
+ # 2. Inverse the corresponding computation in the rotation transform
198
+ # to get the original height/width of the rotated boxes
199
+ orig_h = (h * rot_tfm.abs_cos - w * rot_tfm.abs_sin) / (1 - 2 * rot_tfm.abs_sin**2)
200
+ orig_w = (w * rot_tfm.abs_cos - h * rot_tfm.abs_sin) / (1 - 2 * rot_tfm.abs_sin**2)
201
+ # 3. Resize the inverse-rotated bboxes to their original size
202
+ invrot_box[:, 0] += (iw - orig_w) / 2
203
+ invrot_box[:, 1] += (ih - orig_h) / 2
204
+ invrot_box[:, 2] -= (iw - orig_w) / 2
205
+ invrot_box[:, 3] -= (ih - orig_h) / 2
206
+
207
+ return invrot_box
Leffa/densepose/modeling/utils.py ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Facebook, Inc. and its affiliates.
2
+
3
+ from torch import nn
4
+
5
+
6
+ def initialize_module_params(module: nn.Module) -> None:
7
+ for name, param in module.named_parameters():
8
+ if "bias" in name:
9
+ nn.init.constant_(param, 0)
10
+ elif "weight" in name:
11
+ nn.init.kaiming_normal_(param, mode="fan_out", nonlinearity="relu")
Leffa/densepose/utils/__init__.py ADDED
File without changes
Leffa/densepose/utils/dbhelper.py ADDED
@@ -0,0 +1,147 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Facebook, Inc. and its affiliates.
2
+ from typing import Any, Dict, Optional, Tuple
3
+
4
+
5
+ class EntrySelector:
6
+ """
7
+ Base class for entry selectors
8
+ """
9
+
10
+ @staticmethod
11
+ def from_string(spec: str) -> "EntrySelector":
12
+ if spec == "*":
13
+ return AllEntrySelector()
14
+ return FieldEntrySelector(spec)
15
+
16
+
17
+ class AllEntrySelector(EntrySelector):
18
+ """
19
+ Selector that accepts all entries
20
+ """
21
+
22
+ SPECIFIER = "*"
23
+
24
+ def __call__(self, entry):
25
+ return True
26
+
27
+
28
+ class FieldEntrySelector(EntrySelector):
29
+ """
30
+ Selector that accepts only entries that match provided field
31
+ specifier(s). Only a limited set of specifiers is supported for now:
32
+ <specifiers>::=<specifier>[<comma><specifiers>]
33
+ <specifier>::=<field_name>[<type_delim><type>]<equal><value_or_range>
34
+ <field_name> is a valid identifier
35
+ <type> ::= "int" | "str"
36
+ <equal> ::= "="
37
+ <comma> ::= ","
38
+ <type_delim> ::= ":"
39
+ <value_or_range> ::= <value> | <range>
40
+ <range> ::= <value><range_delim><value>
41
+ <range_delim> ::= "-"
42
+ <value> is a string without spaces and special symbols
43
+ (e.g. <comma>, <equal>, <type_delim>, <range_delim>)
44
+ """
45
+
46
+ _SPEC_DELIM = ","
47
+ _TYPE_DELIM = ":"
48
+ _RANGE_DELIM = "-"
49
+ _EQUAL = "="
50
+ _ERROR_PREFIX = "Invalid field selector specifier"
51
+
52
+ class _FieldEntryValuePredicate:
53
+ """
54
+ Predicate that checks strict equality for the specified entry field
55
+ """
56
+
57
+ def __init__(self, name: str, typespec: Optional[str], value: str):
58
+ import builtins
59
+
60
+ self.name = name
61
+ self.type = getattr(builtins, typespec) if typespec is not None else str
62
+ self.value = value
63
+
64
+ def __call__(self, entry):
65
+ return entry[self.name] == self.type(self.value)
66
+
67
+ class _FieldEntryRangePredicate:
68
+ """
69
+ Predicate that checks whether an entry field falls into the specified range
70
+ """
71
+
72
+ def __init__(self, name: str, typespec: Optional[str], vmin: str, vmax: str):
73
+ import builtins
74
+
75
+ self.name = name
76
+ self.type = getattr(builtins, typespec) if typespec is not None else str
77
+ self.vmin = vmin
78
+ self.vmax = vmax
79
+
80
+ def __call__(self, entry):
81
+ return (entry[self.name] >= self.type(self.vmin)) and (
82
+ entry[self.name] <= self.type(self.vmax)
83
+ )
84
+
85
+ def __init__(self, spec: str):
86
+ self._predicates = self._parse_specifier_into_predicates(spec)
87
+
88
+ def __call__(self, entry: Dict[str, Any]):
89
+ for predicate in self._predicates:
90
+ if not predicate(entry):
91
+ return False
92
+ return True
93
+
94
+ def _parse_specifier_into_predicates(self, spec: str):
95
+ predicates = []
96
+ specs = spec.split(self._SPEC_DELIM)
97
+ for subspec in specs:
98
+ eq_idx = subspec.find(self._EQUAL)
99
+ if eq_idx > 0:
100
+ field_name_with_type = subspec[:eq_idx]
101
+ field_name, field_type = self._parse_field_name_type(field_name_with_type)
102
+ field_value_or_range = subspec[eq_idx + 1 :]
103
+ if self._is_range_spec(field_value_or_range):
104
+ vmin, vmax = self._get_range_spec(field_value_or_range)
105
+ predicate = FieldEntrySelector._FieldEntryRangePredicate(
106
+ field_name, field_type, vmin, vmax
107
+ )
108
+ else:
109
+ predicate = FieldEntrySelector._FieldEntryValuePredicate(
110
+ field_name, field_type, field_value_or_range
111
+ )
112
+ predicates.append(predicate)
113
+ elif eq_idx == 0:
114
+ self._parse_error(f'"{subspec}", field name is empty!')
115
+ else:
116
+ self._parse_error(f'"{subspec}", should have format ' "<field>=<value_or_range>!")
117
+ return predicates
118
+
119
+ def _parse_field_name_type(self, field_name_with_type: str) -> Tuple[str, Optional[str]]:
120
+ type_delim_idx = field_name_with_type.find(self._TYPE_DELIM)
121
+ if type_delim_idx > 0:
122
+ field_name = field_name_with_type[:type_delim_idx]
123
+ field_type = field_name_with_type[type_delim_idx + 1 :]
124
+ elif type_delim_idx == 0:
125
+ self._parse_error(f'"{field_name_with_type}", field name is empty!')
126
+ else:
127
+ field_name = field_name_with_type
128
+ field_type = None
129
+ # pyre-fixme[61]: `field_name` may not be initialized here.
130
+ # pyre-fixme[61]: `field_type` may not be initialized here.
131
+ return field_name, field_type
132
+
133
+ def _is_range_spec(self, field_value_or_range):
134
+ delim_idx = field_value_or_range.find(self._RANGE_DELIM)
135
+ return delim_idx > 0
136
+
137
+ def _get_range_spec(self, field_value_or_range):
138
+ if self._is_range_spec(field_value_or_range):
139
+ delim_idx = field_value_or_range.find(self._RANGE_DELIM)
140
+ vmin = field_value_or_range[:delim_idx]
141
+ vmax = field_value_or_range[delim_idx + 1 :]
142
+ return vmin, vmax
143
+ else:
144
+ self._parse_error('"field_value_or_range", range of values expected!')
145
+
146
+ def _parse_error(self, msg):
147
+ raise ValueError(f"{self._ERROR_PREFIX}: {msg}")
Leffa/densepose/utils/logger.py ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Facebook, Inc. and its affiliates.
2
+ import logging
3
+
4
+
5
+ def verbosity_to_level(verbosity) -> int:
6
+ if verbosity is not None:
7
+ if verbosity == 0:
8
+ return logging.WARNING
9
+ elif verbosity == 1:
10
+ return logging.INFO
11
+ elif verbosity >= 2:
12
+ return logging.DEBUG
13
+ return logging.WARNING
Leffa/densepose/utils/transform.py ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Facebook, Inc. and its affiliates.
2
+ from detectron2.data import MetadataCatalog
3
+ from detectron2.utils.file_io import PathManager
4
+
5
+ from densepose import DensePoseTransformData
6
+
7
+
8
+ def load_for_dataset(dataset_name):
9
+ path = MetadataCatalog.get(dataset_name).densepose_transform_src
10
+ densepose_transform_data_fpath = PathManager.get_local_path(path)
11
+ return DensePoseTransformData.load(densepose_transform_data_fpath)
12
+
13
+
14
+ def load_from_cfg(cfg):
15
+ return load_for_dataset(cfg.DATASETS.TEST[0])
Leffa/leffa_utils/densepose_for_mask.py ADDED
@@ -0,0 +1,170 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import glob
2
+ import os
3
+ import shutil
4
+ import time
5
+ from random import randint
6
+
7
+ import cv2
8
+ import numpy as np
9
+ import torch
10
+ from densepose import add_densepose_config
11
+ from densepose.vis.base import CompoundVisualizer
12
+ from densepose.vis.densepose_results import DensePoseResultsFineSegmentationVisualizer
13
+ from densepose.vis.extractor import CompoundExtractor, create_extractor
14
+ from detectron2.config import get_cfg
15
+ from detectron2.data.detection_utils import read_image
16
+ from detectron2.engine.defaults import DefaultPredictor
17
+ from PIL import Image
18
+
19
+
20
+ class DensePose:
21
+ """
22
+ DensePose used in this project is from Detectron2 (https://github.com/facebookresearch/detectron2).
23
+ These codes are modified from https://github.com/facebookresearch/detectron2/tree/main/projects/DensePose.
24
+ The checkpoint is downloaded from https://github.com/facebookresearch/detectron2/blob/main/projects/DensePose/doc/DENSEPOSE_IUV.md#ModelZoo.
25
+
26
+ We use the model R_50_FPN_s1x with id 165712039, but other models should also work.
27
+ The config file is downloaded from https://github.com/facebookresearch/detectron2/tree/main/projects/DensePose/configs.
28
+ Noted that the config file should match the model checkpoint and Base-DensePose-RCNN-FPN.yaml is also needed.
29
+ """
30
+
31
+ def __init__(self, model_path="./checkpoints/densepose_", device="cuda"):
32
+ self.device = device
33
+ self.config_path = os.path.join(model_path, "densepose_rcnn_R_50_FPN_s1x.yaml")
34
+ self.model_path = os.path.join(model_path, "model_final_162be9.pkl")
35
+ self.visualizations = ["dp_segm"]
36
+ self.VISUALIZERS = {"dp_segm": DensePoseResultsFineSegmentationVisualizer}
37
+ self.min_score = 0.8
38
+
39
+ self.cfg = self.setup_config()
40
+ self.predictor = DefaultPredictor(self.cfg)
41
+ self.predictor.model.to(self.device)
42
+
43
+ def setup_config(self):
44
+ opts = ["MODEL.ROI_HEADS.SCORE_THRESH_TEST", str(self.min_score)]
45
+ cfg = get_cfg()
46
+ add_densepose_config(cfg)
47
+ cfg.merge_from_file(self.config_path)
48
+ cfg.merge_from_list(opts)
49
+ cfg.MODEL.WEIGHTS = self.model_path
50
+ cfg.freeze()
51
+ return cfg
52
+
53
+ @staticmethod
54
+ def _get_input_file_list(input_spec: str):
55
+ if os.path.isdir(input_spec):
56
+ file_list = [
57
+ os.path.join(input_spec, fname)
58
+ for fname in os.listdir(input_spec)
59
+ if os.path.isfile(os.path.join(input_spec, fname))
60
+ ]
61
+ elif os.path.isfile(input_spec):
62
+ file_list = [input_spec]
63
+ else:
64
+ file_list = glob.glob(input_spec)
65
+ return file_list
66
+
67
+ def create_context(self, cfg, output_path):
68
+ vis_specs = self.visualizations
69
+ visualizers = []
70
+ extractors = []
71
+ for vis_spec in vis_specs:
72
+ texture_atlas = texture_atlases_dict = None
73
+ vis = self.VISUALIZERS[vis_spec](
74
+ cfg=cfg,
75
+ texture_atlas=texture_atlas,
76
+ texture_atlases_dict=texture_atlases_dict,
77
+ alpha=1.0,
78
+ )
79
+ visualizers.append(vis)
80
+ extractor = create_extractor(vis)
81
+ extractors.append(extractor)
82
+ visualizer = CompoundVisualizer(visualizers)
83
+ extractor = CompoundExtractor(extractors)
84
+ context = {
85
+ "extractor": extractor,
86
+ "visualizer": visualizer,
87
+ "out_fname": output_path,
88
+ "entry_idx": 0,
89
+ }
90
+ return context
91
+
92
+ def execute_on_outputs(self, context, entry, outputs):
93
+ extractor = context["extractor"]
94
+
95
+ data = extractor(outputs)
96
+
97
+ H, W, _ = entry["image"].shape
98
+ result = np.zeros((H, W), dtype=np.uint8)
99
+
100
+ data, box = data[0]
101
+ x, y, w, h = [int(_) for _ in box[0].cpu().numpy()]
102
+ i_array = data[0].labels[None].cpu().numpy()[0]
103
+ result[y : y + h, x : x + w] = i_array
104
+ result = Image.fromarray(result)
105
+ result.save(context["out_fname"])
106
+
107
+ def __call__(self, image_or_path, resize=512) -> Image.Image:
108
+ """
109
+ :param image_or_path: Path of the input image.
110
+ :param resize: Resize the input image if its max size is larger than this value.
111
+ :return: Dense pose image.
112
+ """
113
+ # random tmp path with timestamp
114
+ tmp_path = f"./densepose_/tmp/"
115
+ if not os.path.exists(tmp_path):
116
+ os.makedirs(tmp_path)
117
+
118
+ image_path = os.path.join(
119
+ tmp_path, f"{int(time.time())}-{self.device}-{randint(0, 100000)}.png"
120
+ )
121
+ if isinstance(image_or_path, str):
122
+ assert image_or_path.split(".")[-1] in [
123
+ "jpg",
124
+ "png",
125
+ ], "Only support jpg and png images."
126
+ shutil.copy(image_or_path, image_path)
127
+ elif isinstance(image_or_path, Image.Image):
128
+ image_or_path.save(image_path)
129
+ else:
130
+ shutil.rmtree(tmp_path)
131
+ raise TypeError("image_path must be str or PIL.Image.Image")
132
+
133
+ output_path = image_path.replace(".png", "_dense.png").replace(
134
+ ".jpg", "_dense.png"
135
+ )
136
+ w, h = Image.open(image_path).size
137
+
138
+ file_list = self._get_input_file_list(image_path)
139
+ assert len(file_list), "No input images found!"
140
+ context = self.create_context(self.cfg, output_path)
141
+ for file_name in file_list:
142
+ img = read_image(file_name, format="BGR") # predictor expects BGR image.
143
+ # resize
144
+ if (_ := max(img.shape)) > resize:
145
+ scale = resize / _
146
+ img = cv2.resize(
147
+ img, (int(img.shape[1] * scale), int(img.shape[0] * scale))
148
+ )
149
+
150
+ with torch.no_grad():
151
+ outputs = self.predictor(img)["instances"]
152
+ try:
153
+ self.execute_on_outputs(
154
+ context, {"file_name": file_name, "image": img}, outputs
155
+ )
156
+ except Exception as e:
157
+ null_gray = Image.new("L", (1, 1))
158
+ null_gray.save(output_path)
159
+
160
+ dense_gray = Image.open(output_path).convert("L")
161
+ dense_gray = dense_gray.resize((w, h), Image.NEAREST)
162
+ # remove image_path and output_path
163
+ os.remove(image_path)
164
+ os.remove(output_path)
165
+
166
+ return dense_gray
167
+
168
+
169
+ if __name__ == "__main__":
170
+ pass
Leffa/leffa_utils/densepose_predictor.py ADDED
@@ -0,0 +1,77 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import torch
3
+ from densepose import add_densepose_config
4
+ from densepose.vis.densepose_results import (
5
+ DensePoseResultsFineSegmentationVisualizer as Visualizer,
6
+ )
7
+ from densepose.vis.extractor import DensePoseResultExtractor
8
+ from detectron2.config import get_cfg
9
+ from detectron2.engine import DefaultPredictor
10
+
11
+
12
+ class DensePosePredictor(object):
13
+ def __init__(self,
14
+ config_path="./ckpts/densepose/densepose_rcnn_R_50_FPN_s1x.yaml",
15
+ weights_path="./ckpts/densepose/model_final_162be9.pkl"
16
+ ):
17
+ cfg = get_cfg()
18
+ add_densepose_config(cfg)
19
+ cfg.merge_from_file(
20
+ config_path) # Use the path to the config file from densepose
21
+ cfg.MODEL.WEIGHTS = weights_path # Use the path to the pre-trained model weights
22
+ cfg.MODEL.DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
23
+ cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.5 # Adjust as needed
24
+ self.predictor = DefaultPredictor(cfg)
25
+ self.extractor = DensePoseResultExtractor()
26
+ self.visualizer = Visualizer()
27
+
28
+ def predict(self, image):
29
+ if isinstance(image, str):
30
+ image = cv2.imread(image)
31
+ with torch.no_grad():
32
+ outputs = self.predictor(image)["instances"]
33
+ outputs = self.extractor(outputs)
34
+ return outputs
35
+
36
+ def predict_iuv(self, image):
37
+ outputs = self.predict(image)
38
+
39
+ img_i = outputs[0][0].labels[None, ...]
40
+ img_uv = outputs[0][0].uv
41
+ img_uv = (img_uv - img_uv.min()) / (img_uv.max() - img_uv.min())
42
+ img_uv *= 255
43
+ img_iuv = torch.cat([img_i, img_uv], dim=0)
44
+ img_iuv = img_iuv.permute(1, 2, 0)
45
+ img_iuv = img_iuv.cpu().numpy()
46
+
47
+ position = [int(x) for x in outputs[1][0].cpu().numpy().tolist()]
48
+ x1, y1, w, h = position
49
+ x2 = x1 + w
50
+ y2 = y1 + h
51
+ image_iuv = np.zeros(image.shape, dtype=image.dtype)
52
+ image_iuv[y1:y2, x1:x2, :] = img_iuv
53
+ image_iuv = image_iuv[:, :, [0, 2, 1]]
54
+
55
+ return image_iuv
56
+
57
+ def predict_seg(self, image):
58
+ outputs = self.predict(image)
59
+
60
+ image_seg = np.zeros(image.shape, dtype=image.dtype)
61
+ self.visualizer.visualize(image_seg, outputs)
62
+
63
+ return image_seg
64
+
65
+
66
+ if __name__ == "__main__":
67
+ import sys
68
+
69
+ import cv2
70
+
71
+ image_path = sys.argv[1]
72
+ image = cv2.imread(image_path)
73
+ predictor = DensePosePredictor()
74
+ image_iuv = predictor.predict_iuv(image)
75
+ image_seg = predictor.predict_seg(image)
76
+ cv2.imwrite(".".join(image_path.split(".")[:-1]) + "_iuv.jpg", image_iuv)
77
+ cv2.imwrite(".".join(image_path.split(".")[:-1]) + "_seg.jpg", image_seg)
Leffa/leffa_utils/garment_agnostic_mask_predictor.py ADDED
@@ -0,0 +1,415 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from typing import Union
3
+
4
+ import cv2
5
+ import numpy as np
6
+ import torch
7
+ from diffusers.image_processor import VaeImageProcessor
8
+ from PIL import Image
9
+ from SCHP import SCHP # type: ignore
10
+
11
+ from leffa_utils.densepose_for_mask import DensePose # type: ignore
12
+
13
+ DENSE_INDEX_MAP = {
14
+ "background": [0],
15
+ "torso": [1, 2],
16
+ "right hand": [3],
17
+ "left hand": [4],
18
+ "right foot": [5],
19
+ "left foot": [6],
20
+ "right thigh": [7, 9],
21
+ "left thigh": [8, 10],
22
+ "right leg": [11, 13],
23
+ "left leg": [12, 14],
24
+ "left big arm": [15, 17],
25
+ "right big arm": [16, 18],
26
+ "left forearm": [19, 21],
27
+ "right forearm": [20, 22],
28
+ "face": [23, 24],
29
+ "thighs": [7, 8, 9, 10],
30
+ "legs": [11, 12, 13, 14],
31
+ "hands": [3, 4],
32
+ "feet": [5, 6],
33
+ "big arms": [15, 16, 17, 18],
34
+ "forearms": [19, 20, 21, 22],
35
+ }
36
+
37
+ ATR_MAPPING = {
38
+ "Background": 0,
39
+ "Hat": 1,
40
+ "Hair": 2,
41
+ "Sunglasses": 3,
42
+ "Upper-clothes": 4,
43
+ "Skirt": 5,
44
+ "Pants": 6,
45
+ "Dress": 7,
46
+ "Belt": 8,
47
+ "Left-shoe": 9,
48
+ "Right-shoe": 10,
49
+ "Face": 11,
50
+ "Left-leg": 12,
51
+ "Right-leg": 13,
52
+ "Left-arm": 14,
53
+ "Right-arm": 15,
54
+ "Bag": 16,
55
+ "Scarf": 17,
56
+ }
57
+
58
+ LIP_MAPPING = {
59
+ "Background": 0,
60
+ "Hat": 1,
61
+ "Hair": 2,
62
+ "Glove": 3,
63
+ "Sunglasses": 4,
64
+ "Upper-clothes": 5,
65
+ "Dress": 6,
66
+ "Coat": 7,
67
+ "Socks": 8,
68
+ "Pants": 9,
69
+ "Jumpsuits": 10,
70
+ "Scarf": 11,
71
+ "Skirt": 12,
72
+ "Face": 13,
73
+ "Left-arm": 14,
74
+ "Right-arm": 15,
75
+ "Left-leg": 16,
76
+ "Right-leg": 17,
77
+ "Left-shoe": 18,
78
+ "Right-shoe": 19,
79
+ }
80
+
81
+ PROTECT_BODY_PARTS = {
82
+ "upper": ["Left-leg", "Right-leg"],
83
+ "lower": ["Right-arm", "Left-arm", "Face"],
84
+ "overall": [],
85
+ "inner": ["Left-leg", "Right-leg"],
86
+ "outer": ["Left-leg", "Right-leg"],
87
+ }
88
+ PROTECT_CLOTH_PARTS = {
89
+ "upper": {"ATR": ["Skirt", "Pants"], "LIP": ["Skirt", "Pants"]},
90
+ "lower": {"ATR": ["Upper-clothes"], "LIP": ["Upper-clothes", "Coat"]},
91
+ "overall": {"ATR": [], "LIP": []},
92
+ "inner": {
93
+ "ATR": ["Dress", "Coat", "Skirt", "Pants"],
94
+ "LIP": ["Dress", "Coat", "Skirt", "Pants", "Jumpsuits"],
95
+ },
96
+ "outer": {
97
+ "ATR": ["Dress", "Pants", "Skirt"],
98
+ "LIP": ["Upper-clothes", "Dress", "Pants", "Skirt", "Jumpsuits"],
99
+ },
100
+ }
101
+ MASK_CLOTH_PARTS = {
102
+ "upper": ["Upper-clothes", "Coat", "Dress", "Jumpsuits"],
103
+ "lower": ["Pants", "Skirt", "Dress", "Jumpsuits"],
104
+ "overall": ["Upper-clothes", "Dress", "Pants", "Skirt", "Coat", "Jumpsuits"],
105
+ "inner": ["Upper-clothes"],
106
+ "outer": [
107
+ "Coat",
108
+ ],
109
+ }
110
+ MASK_DENSE_PARTS = {
111
+ "upper": ["torso", "big arms", "forearms"],
112
+ "lower": ["thighs", "legs"],
113
+ "overall": ["torso", "thighs", "legs", "big arms", "forearms"],
114
+ "inner": ["torso"],
115
+ "outer": ["torso", "big arms", "forearms"],
116
+ }
117
+
118
+ schp_public_protect_parts = [
119
+ "Hat",
120
+ "Hair",
121
+ "Sunglasses",
122
+ "Left-shoe",
123
+ "Right-shoe",
124
+ "Bag",
125
+ "Glove",
126
+ "Scarf",
127
+ ]
128
+ schp_protect_parts = {
129
+ "upper": ["Left-leg", "Right-leg", "Skirt", "Pants", "Jumpsuits"],
130
+ "lower": ["Left-arm", "Right-arm", "Upper-clothes", "Coat"],
131
+ "overall": [],
132
+ "inner": ["Left-leg", "Right-leg", "Skirt", "Pants", "Jumpsuits", "Coat"],
133
+ "outer": ["Left-leg", "Right-leg", "Skirt", "Pants", "Jumpsuits", "Upper-clothes"],
134
+ }
135
+ schp_mask_parts = {
136
+ "upper": ["Upper-clothes", "Dress", "Coat", "Jumpsuits"],
137
+ "lower": ["Pants", "Skirt", "Dress", "Jumpsuits", "socks"],
138
+ "overall": [
139
+ "Upper-clothes",
140
+ "Dress",
141
+ "Pants",
142
+ "Skirt",
143
+ "Coat",
144
+ "Jumpsuits",
145
+ "socks",
146
+ ],
147
+ "inner": ["Upper-clothes"],
148
+ "outer": [
149
+ "Coat",
150
+ ],
151
+ }
152
+
153
+ dense_mask_parts = {
154
+ "upper": ["torso", "big arms", "forearms"],
155
+ "lower": ["thighs", "legs"],
156
+ "overall": ["torso", "thighs", "legs", "big arms", "forearms"],
157
+ "inner": ["torso"],
158
+ "outer": ["torso", "big arms", "forearms"],
159
+ }
160
+
161
+
162
+ def vis_mask(image, mask):
163
+ image = np.array(image).astype(np.uint8)
164
+ mask = np.array(mask).astype(np.uint8)
165
+ mask[mask > 127] = 255
166
+ mask[mask <= 127] = 0
167
+ mask = np.expand_dims(mask, axis=-1)
168
+ mask = np.repeat(mask, 3, axis=-1)
169
+ mask = mask / 255
170
+ return Image.fromarray((image * (1 - mask)).astype(np.uint8))
171
+
172
+
173
+ def part_mask_of(part: Union[str, list], parse: np.ndarray, mapping: dict):
174
+ if isinstance(part, str):
175
+ part = [part]
176
+ mask = np.zeros_like(parse)
177
+ for _ in part:
178
+ if _ not in mapping:
179
+ continue
180
+ if isinstance(mapping[_], list):
181
+ for i in mapping[_]:
182
+ mask += parse == i
183
+ else:
184
+ mask += parse == mapping[_]
185
+ return mask
186
+
187
+
188
+ def hull_mask(mask_area: np.ndarray):
189
+ ret, binary = cv2.threshold(mask_area, 127, 255, cv2.THRESH_BINARY)
190
+ contours, hierarchy = cv2.findContours(
191
+ binary, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE
192
+ )
193
+ hull_mask = np.zeros_like(mask_area)
194
+ for c in contours:
195
+ hull = cv2.convexHull(c)
196
+ hull_mask = cv2.fillPoly(np.zeros_like(mask_area), [hull], 255) | hull_mask
197
+ return hull_mask
198
+
199
+
200
+ class AutoMasker:
201
+ def __init__(
202
+ self,
203
+ densepose_path: str = "./ckpts/densepose",
204
+ schp_path: str = "./ckpts/schp",
205
+ device="cuda",
206
+ ):
207
+ np.random.seed(0)
208
+ torch.manual_seed(0)
209
+ torch.cuda.manual_seed(0)
210
+
211
+ self.densepose_processor = DensePose(densepose_path, device)
212
+ self.schp_processor_atr = SCHP(
213
+ ckpt_path=os.path.join(schp_path, "exp-schp-201908301523-atr.pth"),
214
+ device=device,
215
+ )
216
+ self.schp_processor_lip = SCHP(
217
+ ckpt_path=os.path.join(schp_path, "exp-schp-201908261155-lip.pth"),
218
+ device=device,
219
+ )
220
+
221
+ self.mask_processor = VaeImageProcessor(
222
+ vae_scale_factor=8,
223
+ do_normalize=False,
224
+ do_binarize=True,
225
+ do_convert_grayscale=True,
226
+ )
227
+
228
+ def process_densepose(self, image_or_path):
229
+ return self.densepose_processor(image_or_path, resize=1024)
230
+
231
+ def process_schp_lip(self, image_or_path):
232
+ return self.schp_processor_lip(image_or_path)
233
+
234
+ def process_schp_atr(self, image_or_path):
235
+ return self.schp_processor_atr(image_or_path)
236
+
237
+ def preprocess_image(self, image_or_path):
238
+ return {
239
+ "densepose": self.densepose_processor(image_or_path, resize=1024),
240
+ "schp_atr": self.schp_processor_atr(image_or_path),
241
+ "schp_lip": self.schp_processor_lip(image_or_path),
242
+ }
243
+
244
+ @staticmethod
245
+ def cloth_agnostic_mask(
246
+ densepose_mask: Image.Image,
247
+ schp_lip_mask: Image.Image,
248
+ schp_atr_mask: Image.Image,
249
+ part: str = "overall",
250
+ **kwargs,
251
+ ):
252
+ assert part in [
253
+ "upper",
254
+ "lower",
255
+ "overall",
256
+ "inner",
257
+ "outer",
258
+ ], f"part should be one of ['upper', 'lower', 'overall', 'inner', 'outer'], but got {part}"
259
+ w, h = densepose_mask.size
260
+
261
+ dilate_kernel = max(w, h) // 250
262
+ dilate_kernel = dilate_kernel if dilate_kernel % 2 == 1 else dilate_kernel + 1
263
+ dilate_kernel = np.ones((dilate_kernel, dilate_kernel), np.uint8)
264
+
265
+ kernal_size = max(w, h) // 25
266
+ kernal_size = kernal_size if kernal_size % 2 == 1 else kernal_size + 1
267
+
268
+ densepose_mask = np.array(densepose_mask)
269
+ schp_lip_mask = np.array(schp_lip_mask)
270
+ schp_atr_mask = np.array(schp_atr_mask)
271
+
272
+ # Strong Protect Area (Hands, Face, Accessory, Feet)
273
+ hands_protect_area = part_mask_of(
274
+ ["hands", "feet"], densepose_mask, DENSE_INDEX_MAP
275
+ )
276
+ hands_protect_area = cv2.dilate(hands_protect_area, dilate_kernel, iterations=1)
277
+ hands_protect_area = hands_protect_area & (
278
+ part_mask_of(
279
+ ["Left-arm", "Right-arm", "Left-leg", "Right-leg"],
280
+ schp_atr_mask,
281
+ ATR_MAPPING,
282
+ )
283
+ | part_mask_of(
284
+ ["Left-arm", "Right-arm", "Left-leg", "Right-leg"],
285
+ schp_lip_mask,
286
+ LIP_MAPPING,
287
+ )
288
+ )
289
+ face_protect_area = part_mask_of("Face", schp_lip_mask, LIP_MAPPING)
290
+
291
+ strong_protect_area = hands_protect_area | face_protect_area
292
+
293
+ # Weak Protect Area (Hair, Irrelevant Clothes, Body Parts)
294
+ body_protect_area = part_mask_of(
295
+ PROTECT_BODY_PARTS[part], schp_lip_mask, LIP_MAPPING
296
+ ) | part_mask_of(PROTECT_BODY_PARTS[part], schp_atr_mask, ATR_MAPPING)
297
+ hair_protect_area = part_mask_of(
298
+ ["Hair"], schp_lip_mask, LIP_MAPPING
299
+ ) | part_mask_of(["Hair"], schp_atr_mask, ATR_MAPPING)
300
+ cloth_protect_area = part_mask_of(
301
+ PROTECT_CLOTH_PARTS[part]["LIP"], schp_lip_mask, LIP_MAPPING
302
+ ) | part_mask_of(PROTECT_CLOTH_PARTS[part]["ATR"], schp_atr_mask, ATR_MAPPING)
303
+ accessory_protect_area = part_mask_of(
304
+ (
305
+ accessory_parts := [
306
+ "Hat",
307
+ "Glove",
308
+ "Sunglasses",
309
+ "Bag",
310
+ "Left-shoe",
311
+ "Right-shoe",
312
+ "Scarf",
313
+ "Socks",
314
+ ]
315
+ ),
316
+ schp_lip_mask,
317
+ LIP_MAPPING,
318
+ ) | part_mask_of(accessory_parts, schp_atr_mask, ATR_MAPPING)
319
+ weak_protect_area = (
320
+ body_protect_area
321
+ | cloth_protect_area
322
+ | hair_protect_area
323
+ | strong_protect_area
324
+ | accessory_protect_area
325
+ )
326
+
327
+ # Mask Area
328
+ strong_mask_area = part_mask_of(
329
+ MASK_CLOTH_PARTS[part], schp_lip_mask, LIP_MAPPING
330
+ ) | part_mask_of(MASK_CLOTH_PARTS[part], schp_atr_mask, ATR_MAPPING)
331
+ background_area = part_mask_of(
332
+ ["Background"], schp_lip_mask, LIP_MAPPING
333
+ ) & part_mask_of(["Background"], schp_atr_mask, ATR_MAPPING)
334
+ mask_dense_area = part_mask_of(
335
+ MASK_DENSE_PARTS[part], densepose_mask, DENSE_INDEX_MAP
336
+ )
337
+ mask_dense_area = cv2.resize(
338
+ mask_dense_area.astype(np.uint8),
339
+ None,
340
+ fx=0.25,
341
+ fy=0.25,
342
+ interpolation=cv2.INTER_NEAREST,
343
+ )
344
+ mask_dense_area = cv2.dilate(mask_dense_area, dilate_kernel, iterations=2)
345
+ mask_dense_area = cv2.resize(
346
+ mask_dense_area.astype(np.uint8),
347
+ None,
348
+ fx=4,
349
+ fy=4,
350
+ interpolation=cv2.INTER_NEAREST,
351
+ )
352
+
353
+ mask_area = (
354
+ np.ones_like(densepose_mask) & (~weak_protect_area) & (~background_area)
355
+ ) | mask_dense_area
356
+
357
+ mask_area = (
358
+ hull_mask(mask_area * 255) // 255
359
+ ) # Convex Hull to expand the mask area
360
+ mask_area = mask_area & (~weak_protect_area)
361
+ mask_area = cv2.GaussianBlur(mask_area * 255, (kernal_size, kernal_size), 0)
362
+ mask_area[mask_area < 25] = 0
363
+ mask_area[mask_area >= 25] = 1
364
+ mask_area = (mask_area | strong_mask_area) & (~strong_protect_area)
365
+ mask_area = cv2.dilate(mask_area, dilate_kernel, iterations=1)
366
+
367
+ return Image.fromarray(mask_area * 255)
368
+
369
+ def __call__(
370
+ self,
371
+ image: Union[str, Image.Image],
372
+ mask_type: str = "upper",
373
+ ):
374
+ assert mask_type in [
375
+ "upper",
376
+ "lower",
377
+ "overall",
378
+ "inner",
379
+ "outer",
380
+ ], f"mask_type should be one of ['upper', 'lower', 'overall', 'inner', 'outer'], but got {mask_type}"
381
+ preprocess_results = self.preprocess_image(image)
382
+ mask = self.cloth_agnostic_mask(
383
+ preprocess_results["densepose"],
384
+ preprocess_results["schp_lip"],
385
+ preprocess_results["schp_atr"],
386
+ part=mask_type,
387
+ )
388
+ return {
389
+ "mask": mask,
390
+ "densepose": preprocess_results["densepose"],
391
+ "schp_lip": preprocess_results["schp_lip"],
392
+ "schp_atr": preprocess_results["schp_atr"],
393
+ }
394
+
395
+
396
+ if __name__ == "__main__":
397
+ import os
398
+ import sys
399
+
400
+ from PIL import Image
401
+
402
+ automasker = AutoMasker()
403
+
404
+ image_path = sys.argv[1]
405
+ image = Image.open(image_path).convert("RGB")
406
+ outputs = automasker(
407
+ image,
408
+ "upper",
409
+ # "lower",
410
+ )
411
+ mask = outputs["mask"]
412
+ # densepose = outputs["densepose"] # densepose I map, range 0~24
413
+ # schp_lip = outputs["schp_lip"]
414
+ # schp_atr = outputs["schp_atr"]
415
+ mask.save(".".join(image_path.split(".")[:-1]) + "_mask.jpg")
Leffa/leffa_utils/utils.py ADDED
@@ -0,0 +1,379 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import cv2
3
+ import torch
4
+ import numpy as np
5
+ from numpy.linalg import lstsq
6
+ from PIL import Image, ImageDraw
7
+
8
+
9
+ def resize_and_center(image, target_width, target_height):
10
+ img = np.array(image)
11
+
12
+ if img.shape[-1] == 4:
13
+ img = cv2.cvtColor(img, cv2.COLOR_RGBA2RGB)
14
+ elif len(img.shape) == 2 or img.shape[-1] == 1:
15
+ img = cv2.cvtColor(img, cv2.COLOR_GRAY2RGB)
16
+
17
+ original_height, original_width = img.shape[:2]
18
+
19
+ scale = min(target_height / original_height, target_width / original_width)
20
+ new_height = int(original_height * scale)
21
+ new_width = int(original_width * scale)
22
+
23
+ resized_img = cv2.resize(img, (new_width, new_height),
24
+ interpolation=cv2.INTER_CUBIC)
25
+
26
+ padded_img = np.ones((target_height, target_width, 3),
27
+ dtype=np.uint8) * 255
28
+
29
+ top = (target_height - new_height) // 2
30
+ left = (target_width - new_width) // 2
31
+
32
+ padded_img[top:top + new_height, left:left + new_width] = resized_img
33
+
34
+ return Image.fromarray(padded_img)
35
+
36
+
37
+ def list_dir(folder_path):
38
+ # Collect all file paths within the directory
39
+ file_paths = []
40
+ for root, _, files in os.walk(folder_path):
41
+ for file in files:
42
+ file_paths.append(os.path.join(root, file))
43
+
44
+ file_paths = sorted(file_paths)
45
+ return file_paths
46
+
47
+
48
+ label_map = {
49
+ "background": 0,
50
+ "hat": 1,
51
+ "hair": 2,
52
+ "sunglasses": 3,
53
+ "upper_clothes": 4,
54
+ "skirt": 5,
55
+ "pants": 6,
56
+ "dress": 7,
57
+ "belt": 8,
58
+ "left_shoe": 9,
59
+ "right_shoe": 10,
60
+ "head": 11,
61
+ "left_leg": 12,
62
+ "right_leg": 13,
63
+ "left_arm": 14,
64
+ "right_arm": 15,
65
+ "bag": 16,
66
+ "scarf": 17,
67
+ "neck": 18,
68
+ }
69
+
70
+
71
+ def extend_arm_mask(wrist, elbow, scale):
72
+ wrist = elbow + scale * (wrist - elbow)
73
+ return wrist
74
+
75
+
76
+ def hole_fill(img):
77
+ img = np.pad(img[1:-1, 1:-1], pad_width=1,
78
+ mode='constant', constant_values=0)
79
+ img_copy = img.copy()
80
+ mask = np.zeros((img.shape[0] + 2, img.shape[1] + 2), dtype=np.uint8)
81
+
82
+ cv2.floodFill(img, mask, (0, 0), 255)
83
+ img_inverse = cv2.bitwise_not(img)
84
+ dst = cv2.bitwise_or(img_copy, img_inverse)
85
+ return dst
86
+
87
+
88
+ def refine_mask(mask):
89
+ contours, hierarchy = cv2.findContours(mask.astype(np.uint8),
90
+ cv2.RETR_CCOMP, cv2.CHAIN_APPROX_TC89_L1)
91
+ area = []
92
+ for j in range(len(contours)):
93
+ a_d = cv2.contourArea(contours[j], True)
94
+ area.append(abs(a_d))
95
+ refine_mask = np.zeros_like(mask).astype(np.uint8)
96
+ if len(area) != 0:
97
+ i = area.index(max(area))
98
+ cv2.drawContours(refine_mask, contours, i, color=255, thickness=-1)
99
+
100
+ return refine_mask
101
+
102
+
103
+ def get_agnostic_mask_hd(model_parse, keypoint, category, size=(384, 512)):
104
+ model_type = "hd"
105
+ ##############################
106
+ width, height = size
107
+ im_parse = model_parse.resize((width, height), Image.NEAREST)
108
+ parse_array = np.array(im_parse)
109
+
110
+ if model_type == 'hd':
111
+ arm_width = 60
112
+ elif model_type == 'dc':
113
+ arm_width = 45
114
+ else:
115
+ raise ValueError("model_type must be \'hd\' or \'dc\'!")
116
+
117
+ parse_head = (parse_array == 1).astype(np.float32) + \
118
+ (parse_array == 3).astype(np.float32) + \
119
+ (parse_array == 11).astype(np.float32)
120
+
121
+ parser_mask_fixed = (parse_array == label_map["left_shoe"]).astype(np.float32) + \
122
+ (parse_array == label_map["right_shoe"]).astype(np.float32) + \
123
+ (parse_array == label_map["hat"]).astype(np.float32) + \
124
+ (parse_array == label_map["sunglasses"]).astype(np.float32) + \
125
+ (parse_array == label_map["bag"]).astype(np.float32)
126
+
127
+ parser_mask_changeable = (
128
+ parse_array == label_map["background"]).astype(np.float32)
129
+
130
+ arms_left = (parse_array == 14).astype(np.float32)
131
+ arms_right = (parse_array == 15).astype(np.float32)
132
+
133
+ if category == 'dresses':
134
+ parse_mask = (parse_array == 7).astype(np.float32) + \
135
+ (parse_array == 4).astype(np.float32) + \
136
+ (parse_array == 5).astype(np.float32) + \
137
+ (parse_array == 6).astype(np.float32)
138
+
139
+ parser_mask_changeable += np.logical_and(
140
+ parse_array, np.logical_not(parser_mask_fixed))
141
+
142
+ elif category == 'upper_body':
143
+ parse_mask = (parse_array == 4).astype(np.float32) + \
144
+ (parse_array == 7).astype(np.float32)
145
+ parser_mask_fixed_lower_cloth = (parse_array == label_map["skirt"]).astype(np.float32) + \
146
+ (parse_array == label_map["pants"]).astype(
147
+ np.float32)
148
+ parser_mask_fixed += parser_mask_fixed_lower_cloth
149
+ parser_mask_changeable += np.logical_and(
150
+ parse_array, np.logical_not(parser_mask_fixed))
151
+ elif category == 'lower_body':
152
+ parse_mask = (parse_array == 6).astype(np.float32) + \
153
+ (parse_array == 12).astype(np.float32) + \
154
+ (parse_array == 13).astype(np.float32) + \
155
+ (parse_array == 5).astype(np.float32)
156
+ parser_mask_fixed += (parse_array == label_map["upper_clothes"]).astype(np.float32) + \
157
+ (parse_array == 14).astype(np.float32) + \
158
+ (parse_array == 15).astype(np.float32)
159
+ parser_mask_changeable += np.logical_and(
160
+ parse_array, np.logical_not(parser_mask_fixed))
161
+ else:
162
+ raise NotImplementedError
163
+
164
+ # Load pose points
165
+ pose_data = keypoint["pose_keypoints_2d"]
166
+ pose_data = np.array(pose_data)
167
+ pose_data = pose_data.reshape((-1, 2))
168
+
169
+ im_arms_left = Image.new('L', (width, height))
170
+ im_arms_right = Image.new('L', (width, height))
171
+ arms_draw_left = ImageDraw.Draw(im_arms_left)
172
+ arms_draw_right = ImageDraw.Draw(im_arms_right)
173
+ if category == 'dresses' or category == 'upper_body':
174
+ shoulder_right = np.multiply(tuple(pose_data[2][:2]), height / 512.0)
175
+ shoulder_left = np.multiply(tuple(pose_data[5][:2]), height / 512.0)
176
+ elbow_right = np.multiply(tuple(pose_data[3][:2]), height / 512.0)
177
+ elbow_left = np.multiply(tuple(pose_data[6][:2]), height / 512.0)
178
+ wrist_right = np.multiply(tuple(pose_data[4][:2]), height / 512.0)
179
+ wrist_left = np.multiply(tuple(pose_data[7][:2]), height / 512.0)
180
+ ARM_LINE_WIDTH = int(arm_width / 512 * height)
181
+ size_left = [shoulder_left[0] - ARM_LINE_WIDTH // 2, shoulder_left[1] - ARM_LINE_WIDTH //
182
+ 2, shoulder_left[0] + ARM_LINE_WIDTH // 2, shoulder_left[1] + ARM_LINE_WIDTH // 2]
183
+ size_right = [shoulder_right[0] - ARM_LINE_WIDTH // 2, shoulder_right[1] - ARM_LINE_WIDTH // 2, shoulder_right[0] + ARM_LINE_WIDTH // 2,
184
+ shoulder_right[1] + ARM_LINE_WIDTH // 2]
185
+
186
+ if wrist_right[0] <= 1. and wrist_right[1] <= 1.:
187
+ im_arms_right = arms_right
188
+ else:
189
+ wrist_right = extend_arm_mask(wrist_right, elbow_right, 1.2)
190
+ arms_draw_right.line(np.concatenate((shoulder_right, elbow_right, wrist_right)).astype(
191
+ np.uint16).tolist(), 'white', ARM_LINE_WIDTH, 'curve')
192
+ arms_draw_right.arc(size_right, 0, 360,
193
+ 'white', ARM_LINE_WIDTH // 2)
194
+
195
+ if wrist_left[0] <= 1. and wrist_left[1] <= 1.:
196
+ im_arms_left = arms_left
197
+ else:
198
+ wrist_left = extend_arm_mask(wrist_left, elbow_left, 1.2)
199
+ arms_draw_left.line(np.concatenate((wrist_left, elbow_left, shoulder_left)).astype(
200
+ np.uint16).tolist(), 'white', ARM_LINE_WIDTH, 'curve')
201
+ arms_draw_left.arc(size_left, 0, 360, 'white', ARM_LINE_WIDTH // 2)
202
+
203
+ hands_left = np.logical_and(np.logical_not(im_arms_left), arms_left)
204
+ hands_right = np.logical_and(np.logical_not(im_arms_right), arms_right)
205
+ parser_mask_fixed += hands_left + hands_right
206
+
207
+ parser_mask_fixed = cv2.erode(parser_mask_fixed, np.ones(
208
+ (5, 5), np.uint16), iterations=1)
209
+
210
+ parser_mask_fixed = np.logical_or(parser_mask_fixed, parse_head)
211
+ parse_mask = cv2.dilate(parse_mask, np.ones(
212
+ (10, 10), np.uint16), iterations=5)
213
+ if category == 'dresses' or category == 'upper_body':
214
+ neck_mask = (parse_array == 18).astype(np.float32)
215
+ neck_mask = cv2.dilate(neck_mask, np.ones(
216
+ (5, 5), np.uint16), iterations=1)
217
+ neck_mask = np.logical_and(neck_mask, np.logical_not(parse_head))
218
+ parse_mask = np.logical_or(parse_mask, neck_mask)
219
+ arm_mask = cv2.dilate(np.logical_or(im_arms_left, im_arms_right).astype(
220
+ 'float32'), np.ones((5, 5), np.uint16), iterations=4)
221
+ parse_mask += np.logical_or(parse_mask, arm_mask)
222
+
223
+ parse_mask = np.logical_and(
224
+ parser_mask_changeable, np.logical_not(parse_mask))
225
+
226
+ parse_mask_total = np.logical_or(parse_mask, parser_mask_fixed)
227
+ inpaint_mask = 1 - parse_mask_total
228
+ img = np.where(inpaint_mask, 255, 0)
229
+ dst = hole_fill(img.astype(np.uint8))
230
+ dst = refine_mask(dst)
231
+ inpaint_mask = dst / 255 * 1
232
+ mask = Image.fromarray(inpaint_mask.astype(np.uint8) * 255)
233
+
234
+ return mask
235
+
236
+
237
+ def get_agnostic_mask_dc(model_parse, keypoint, category, size=(384, 512)):
238
+ parse_array = np.array(model_parse)
239
+ pose_data = keypoint["pose_keypoints_2d"]
240
+ pose_data = np.array(pose_data)
241
+ pose_data = pose_data.reshape((-1, 2))
242
+
243
+ parse_shape = (parse_array > 0).astype(np.float32)
244
+
245
+ parse_head = (parse_array == 1).astype(np.float32) + \
246
+ (parse_array == 2).astype(np.float32) + \
247
+ (parse_array == 3).astype(np.float32) + \
248
+ (parse_array == 11).astype(np.float32) + \
249
+ (parse_array == 18).astype(np.float32)
250
+
251
+ parser_mask_fixed = (parse_array == label_map["hair"]).astype(np.float32) + \
252
+ (parse_array == label_map["left_shoe"]).astype(np.float32) + \
253
+ (parse_array == label_map["right_shoe"]).astype(np.float32) + \
254
+ (parse_array == label_map["hat"]).astype(np.float32) + \
255
+ (parse_array == label_map["sunglasses"]).astype(np.float32) + \
256
+ (parse_array == label_map["scarf"]).astype(np.float32) + \
257
+ (parse_array == label_map["bag"]).astype(np.float32)
258
+
259
+ parser_mask_changeable = (
260
+ parse_array == label_map["background"]).astype(np.float32)
261
+
262
+ arms = (parse_array == 14).astype(np.float32) + \
263
+ (parse_array == 15).astype(np.float32)
264
+
265
+ if category == 'dresses':
266
+ label_cat = 7
267
+ parse_mask = (parse_array == 7).astype(np.float32) + \
268
+ (parse_array == 12).astype(np.float32) + \
269
+ (parse_array == 13).astype(np.float32)
270
+ parser_mask_changeable += np.logical_and(
271
+ parse_array, np.logical_not(parser_mask_fixed))
272
+
273
+ elif category == 'upper_body':
274
+ label_cat = 4
275
+ parse_mask = (parse_array == 4).astype(np.float32)
276
+
277
+ parser_mask_fixed += (parse_array == label_map["skirt"]).astype(np.float32) + \
278
+ (parse_array == label_map["pants"]).astype(np.float32)
279
+
280
+ parser_mask_changeable += np.logical_and(
281
+ parse_array, np.logical_not(parser_mask_fixed))
282
+ elif category == 'lower_body':
283
+ label_cat = 6
284
+ parse_mask = (parse_array == 6).astype(np.float32) + \
285
+ (parse_array == 12).astype(np.float32) + \
286
+ (parse_array == 13).astype(np.float32)
287
+
288
+ parser_mask_fixed += (parse_array == label_map["upper_clothes"]).astype(np.float32) + \
289
+ (parse_array == 14).astype(np.float32) + \
290
+ (parse_array == 15).astype(np.float32)
291
+ parser_mask_changeable += np.logical_and(
292
+ parse_array, np.logical_not(parser_mask_fixed))
293
+
294
+ parse_head = torch.from_numpy(parse_head) # [0,1]
295
+ parse_mask = torch.from_numpy(parse_mask) # [0,1]
296
+ parser_mask_fixed = torch.from_numpy(parser_mask_fixed)
297
+ parser_mask_changeable = torch.from_numpy(parser_mask_changeable)
298
+
299
+ # dilation
300
+ parse_without_cloth = np.logical_and(
301
+ parse_shape, np.logical_not(parse_mask))
302
+ parse_mask = parse_mask.cpu().numpy()
303
+
304
+ width = size[0]
305
+ height = size[1]
306
+
307
+ im_arms = Image.new('L', (width, height))
308
+ arms_draw = ImageDraw.Draw(im_arms)
309
+ if category == 'dresses' or category == 'upper_body':
310
+ shoulder_right = tuple(np.multiply(pose_data[2, :2], height / 512.0))
311
+ shoulder_left = tuple(np.multiply(pose_data[5, :2], height / 512.0))
312
+ elbow_right = tuple(np.multiply(pose_data[3, :2], height / 512.0))
313
+ elbow_left = tuple(np.multiply(pose_data[6, :2], height / 512.0))
314
+ wrist_right = tuple(np.multiply(pose_data[4, :2], height / 512.0))
315
+ wrist_left = tuple(np.multiply(pose_data[7, :2], height / 512.0))
316
+ if wrist_right[0] <= 1. and wrist_right[1] <= 1.:
317
+ if elbow_right[0] <= 1. and elbow_right[1] <= 1.:
318
+ arms_draw.line(
319
+ [wrist_left, elbow_left, shoulder_left, shoulder_right], 'white', 30, 'curve')
320
+ else:
321
+ arms_draw.line([wrist_left, elbow_left, shoulder_left, shoulder_right, elbow_right], 'white', 30,
322
+ 'curve')
323
+ elif wrist_left[0] <= 1. and wrist_left[1] <= 1.:
324
+ if elbow_left[0] <= 1. and elbow_left[1] <= 1.:
325
+ arms_draw.line([shoulder_left, shoulder_right,
326
+ elbow_right, wrist_right], 'white', 30, 'curve')
327
+ else:
328
+ arms_draw.line([elbow_left, shoulder_left, shoulder_right, elbow_right, wrist_right], 'white', 30,
329
+ 'curve')
330
+ else:
331
+ arms_draw.line([wrist_left, elbow_left, shoulder_left, shoulder_right, elbow_right, wrist_right], 'white',
332
+ 30, 'curve')
333
+
334
+ if height > 512:
335
+ im_arms = cv2.dilate(np.float32(im_arms), np.ones(
336
+ (10, 10), np.uint16), iterations=5)
337
+ elif height > 256:
338
+ im_arms = cv2.dilate(np.float32(im_arms), np.ones(
339
+ (5, 5), np.uint16), iterations=5)
340
+ hands = np.logical_and(np.logical_not(im_arms), arms)
341
+ parse_mask += im_arms
342
+ parser_mask_fixed += hands
343
+
344
+ # delete neck
345
+ parse_head_2 = torch.clone(parse_head)
346
+ if category == 'dresses' or category == 'upper_body':
347
+ points = []
348
+ points.append(np.multiply(pose_data[2, :2], height / 512.0))
349
+ points.append(np.multiply(pose_data[5, :2], height / 512.0))
350
+ x_coords, y_coords = zip(*points)
351
+ A = np.vstack([x_coords, np.ones(len(x_coords))]).T
352
+ m, c = lstsq(A, y_coords, rcond=None)[0]
353
+ for i in range(parse_array.shape[1]):
354
+ y = i * m + c
355
+ parse_head_2[int(y - 20 * (height / 512.0)):, i] = 0
356
+
357
+ parser_mask_fixed = np.logical_or(
358
+ parser_mask_fixed, np.array(parse_head_2, dtype=np.uint16))
359
+ parse_mask += np.logical_or(parse_mask, np.logical_and(np.array(parse_head, dtype=np.uint16),
360
+ np.logical_not(np.array(parse_head_2, dtype=np.uint16))))
361
+
362
+ if height > 512:
363
+ parse_mask = cv2.dilate(parse_mask, np.ones(
364
+ (20, 20), np.uint16), iterations=5)
365
+ elif height > 256:
366
+ parse_mask = cv2.dilate(parse_mask, np.ones(
367
+ (10, 10), np.uint16), iterations=5)
368
+ else:
369
+ parse_mask = cv2.dilate(parse_mask, np.ones(
370
+ (5, 5), np.uint16), iterations=5)
371
+ parse_mask = np.logical_and(
372
+ parser_mask_changeable, np.logical_not(parse_mask))
373
+ parse_mask_total = np.logical_or(parse_mask, parser_mask_fixed)
374
+ inpaint_mask = 1 - parse_mask_total
375
+ img = np.where(inpaint_mask, 255, 0)
376
+ img = hole_fill(img.astype(np.uint8))
377
+ inpaint_mask = img / 255 * 1
378
+ mask = Image.fromarray(inpaint_mask.astype(np.uint8) * 255)
379
+ return mask
Leffa/preprocess/humanparsing/mhp_extension/detectron2/docker/Dockerfile ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM nvidia/cuda:10.1-cudnn7-devel
2
+
3
+ ENV DEBIAN_FRONTEND noninteractive
4
+ RUN apt-get update && apt-get install -y \
5
+ python3-opencv ca-certificates python3-dev git wget sudo \
6
+ cmake ninja-build protobuf-compiler libprotobuf-dev && \
7
+ rm -rf /var/lib/apt/lists/*
8
+ RUN ln -sv /usr/bin/python3 /usr/bin/python
9
+
10
+ # create a non-root user
11
+ ARG USER_ID=1000
12
+ RUN useradd -m --no-log-init --system --uid ${USER_ID} appuser -g sudo
13
+ RUN echo '%sudo ALL=(ALL) NOPASSWD:ALL' >> /etc/sudoers
14
+ USER appuser
15
+ WORKDIR /home/appuser
16
+
17
+ ENV PATH="/home/appuser/.local/bin:${PATH}"
18
+ RUN wget https://bootstrap.pypa.io/get-pip.py && \
19
+ python3 get-pip.py --user && \
20
+ rm get-pip.py
21
+
22
+ # install dependencies
23
+ # See https://pytorch.org/ for other options if you use a different version of CUDA
24
+ RUN pip install --user tensorboard cython
25
+ RUN pip install --user torch==1.5+cu101 torchvision==0.6+cu101 -f https://download.pytorch.org/whl/torch_stable.html
26
+ RUN pip install --user 'git+https://github.com/cocodataset/cocoapi.git#subdirectory=PythonAPI'
27
+
28
+ RUN pip install --user 'git+https://github.com/facebookresearch/fvcore'
29
+ # install detectron2
30
+ RUN git clone https://github.com/facebookresearch/detectron2 detectron2_repo
31
+ # set FORCE_CUDA because during `docker build` cuda is not accessible
32
+ ENV FORCE_CUDA="1"
33
+ # This will by default build detectron2 for all common cuda architectures and take a lot more time,
34
+ # because inside `docker build`, there is no way to tell which architecture will be used.
35
+ ARG TORCH_CUDA_ARCH_LIST="Kepler;Kepler+Tesla;Maxwell;Maxwell+Tegra;Pascal;Volta;Turing"
36
+ ENV TORCH_CUDA_ARCH_LIST="${TORCH_CUDA_ARCH_LIST}"
37
+
38
+ RUN pip install --user -e detectron2_repo
39
+
40
+ # Set a fixed model cache directory.
41
+ ENV FVCORE_CACHE="/tmp"
42
+ WORKDIR /home/appuser/detectron2_repo
43
+
44
+ # run detectron2 under user "appuser":
45
+ # wget http://images.cocodataset.org/val2017/000000439715.jpg -O input.jpg
46
+ # python3 demo/demo.py \
47
+ #--config-file configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml \
48
+ #--input input.jpg --output outputs/ \
49
+ #--opts MODEL.WEIGHTS detectron2://COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x/137849600/model_final_f10217.pkl
Leffa/preprocess/humanparsing/mhp_extension/detectron2/docker/Dockerfile-circleci ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM nvidia/cuda:10.1-cudnn7-devel
2
+ # This dockerfile only aims to provide an environment for unittest on CircleCI
3
+
4
+ ENV DEBIAN_FRONTEND noninteractive
5
+ RUN apt-get update && apt-get install -y \
6
+ python3-opencv ca-certificates python3-dev git wget sudo ninja-build && \
7
+ rm -rf /var/lib/apt/lists/*
8
+
9
+ RUN wget -q https://bootstrap.pypa.io/get-pip.py && \
10
+ python3 get-pip.py && \
11
+ rm get-pip.py
12
+
13
+ # install dependencies
14
+ # See https://pytorch.org/ for other options if you use a different version of CUDA
15
+ RUN pip install tensorboard cython
16
+ RUN pip install torch==1.5+cu101 torchvision==0.6+cu101 -f https://download.pytorch.org/whl/torch_stable.html
17
+ RUN pip install 'git+https://github.com/cocodataset/cocoapi.git#subdirectory=PythonAPI'