Add files using upload-large-folder tool
Browse filesThis view is limited to 50 files because it contains too many changes. See raw diff
- Leffa/3rdparty/densepose/__init__.py +20 -0
- Leffa/3rdparty/densepose/config.py +277 -0
- Leffa/3rdparty/densepose/converters/__init__.py +15 -0
- Leffa/3rdparty/densepose/converters/base.py +93 -0
- Leffa/3rdparty/densepose/converters/builtin.py +31 -0
- Leffa/3rdparty/densepose/converters/chart_output_hflip.py +71 -0
- Leffa/3rdparty/densepose/converters/chart_output_to_chart_result.py +188 -0
- Leffa/3rdparty/densepose/converters/hflip.py +34 -0
- Leffa/3rdparty/densepose/converters/segm_to_mask.py +150 -0
- Leffa/3rdparty/densepose/converters/to_chart_result.py +70 -0
- Leffa/3rdparty/densepose/converters/to_mask.py +49 -0
- Leffa/3rdparty/densepose/engine/__init__.py +3 -0
- Leffa/3rdparty/densepose/engine/trainer.py +258 -0
- Leffa/3rdparty/densepose/modeling/__init__.py +13 -0
- Leffa/3rdparty/densepose/modeling/build.py +87 -0
- Leffa/3rdparty/densepose/modeling/confidence.py +73 -0
- Leffa/3rdparty/densepose/modeling/densepose_checkpoint.py +35 -0
- Leffa/3rdparty/densepose/modeling/filter.py +94 -0
- Leffa/3rdparty/densepose/modeling/hrfpn.py +182 -0
- Leffa/3rdparty/densepose/modeling/hrnet.py +474 -0
- Leffa/3rdparty/densepose/modeling/inference.py +44 -0
- Leffa/3rdparty/densepose/modeling/losses/__init__.py +14 -0
- Leffa/3rdparty/densepose/modeling/losses/chart.py +291 -0
- Leffa/3rdparty/densepose/modeling/losses/chart_with_confidences.py +209 -0
- Leffa/3rdparty/densepose/modeling/losses/cse.py +115 -0
- Leffa/3rdparty/densepose/modeling/losses/cycle_pix2shape.py +152 -0
- Leffa/3rdparty/densepose/modeling/losses/cycle_shape2shape.py +117 -0
- Leffa/3rdparty/densepose/modeling/losses/embed.py +119 -0
- Leffa/3rdparty/densepose/modeling/losses/embed_utils.py +137 -0
- Leffa/3rdparty/densepose/modeling/losses/mask.py +125 -0
- Leffa/3rdparty/densepose/modeling/losses/mask_or_segm.py +77 -0
- Leffa/3rdparty/densepose/modeling/losses/registry.py +5 -0
- Leffa/3rdparty/densepose/modeling/losses/soft_embed.py +133 -0
- Leffa/3rdparty/densepose/modeling/losses/utils.py +443 -0
- Leffa/3rdparty/densepose/modeling/predictors/__init__.py +9 -0
- Leffa/3rdparty/densepose/modeling/predictors/chart.py +94 -0
- Leffa/3rdparty/densepose/modeling/predictors/chart_confidence.py +174 -0
- Leffa/3rdparty/densepose/modeling/predictors/chart_with_confidence.py +15 -0
- Leffa/3rdparty/densepose/modeling/predictors/cse.py +70 -0
- Leffa/3rdparty/densepose/modeling/predictors/cse_confidence.py +115 -0
- Leffa/3rdparty/densepose/modeling/predictors/cse_with_confidence.py +15 -0
- Leffa/3rdparty/densepose/modeling/predictors/registry.py +5 -0
- Leffa/3rdparty/densepose/modeling/roi_heads/__init__.py +6 -0
- Leffa/3rdparty/densepose/modeling/roi_heads/deeplab.py +263 -0
- Leffa/3rdparty/densepose/modeling/roi_heads/registry.py +5 -0
- Leffa/3rdparty/densepose/modeling/roi_heads/roi_head.py +218 -0
- Leffa/3rdparty/densepose/modeling/roi_heads/v1convx.py +64 -0
- Leffa/3rdparty/densepose/modeling/test_time_augmentation.py +207 -0
- Leffa/3rdparty/densepose/modeling/utils.py +11 -0
- Leffa/3rdparty/densepose/utils/__init__.py +0 -0
Leffa/3rdparty/densepose/__init__.py
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) Facebook, Inc. and its affiliates.
|
| 2 |
+
from .data.datasets import builtin # just to register data
|
| 3 |
+
from .converters import builtin as builtin_converters # register converters
|
| 4 |
+
from .config import (
|
| 5 |
+
add_densepose_config,
|
| 6 |
+
add_densepose_head_config,
|
| 7 |
+
add_hrnet_config,
|
| 8 |
+
add_dataset_category_config,
|
| 9 |
+
add_bootstrap_config,
|
| 10 |
+
load_bootstrap_config,
|
| 11 |
+
)
|
| 12 |
+
from .structures import DensePoseDataRelative, DensePoseList, DensePoseTransformData
|
| 13 |
+
from .evaluation import DensePoseCOCOEvaluator
|
| 14 |
+
from .modeling.roi_heads import DensePoseROIHeads
|
| 15 |
+
from .modeling.test_time_augmentation import (
|
| 16 |
+
DensePoseGeneralizedRCNNWithTTA,
|
| 17 |
+
DensePoseDatasetMapperTTA,
|
| 18 |
+
)
|
| 19 |
+
from .utils.transform import load_from_cfg
|
| 20 |
+
from .modeling.hrfpn import build_hrfpn_backbone
|
Leffa/3rdparty/densepose/config.py
ADDED
|
@@ -0,0 +1,277 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# -*- coding = utf-8 -*-
|
| 2 |
+
# Copyright (c) Facebook, Inc. and its affiliates.
|
| 3 |
+
# pyre-ignore-all-errors
|
| 4 |
+
|
| 5 |
+
from detectron2.config import CfgNode as CN
|
| 6 |
+
|
| 7 |
+
|
| 8 |
+
def add_dataset_category_config(cfg: CN) -> None:
|
| 9 |
+
"""
|
| 10 |
+
Add config for additional category-related dataset options
|
| 11 |
+
- category whitelisting
|
| 12 |
+
- category mapping
|
| 13 |
+
"""
|
| 14 |
+
_C = cfg
|
| 15 |
+
_C.DATASETS.CATEGORY_MAPS = CN(new_allowed=True)
|
| 16 |
+
_C.DATASETS.WHITELISTED_CATEGORIES = CN(new_allowed=True)
|
| 17 |
+
# class to mesh mapping
|
| 18 |
+
_C.DATASETS.CLASS_TO_MESH_NAME_MAPPING = CN(new_allowed=True)
|
| 19 |
+
|
| 20 |
+
|
| 21 |
+
def add_evaluation_config(cfg: CN) -> None:
|
| 22 |
+
_C = cfg
|
| 23 |
+
_C.DENSEPOSE_EVALUATION = CN()
|
| 24 |
+
# evaluator type, possible values:
|
| 25 |
+
# - "iou": evaluator for models that produce iou data
|
| 26 |
+
# - "cse": evaluator for models that produce cse data
|
| 27 |
+
_C.DENSEPOSE_EVALUATION.TYPE = "iou"
|
| 28 |
+
# storage for DensePose results, possible values:
|
| 29 |
+
# - "none": no explicit storage, all the results are stored in the
|
| 30 |
+
# dictionary with predictions, memory intensive;
|
| 31 |
+
# historically the default storage type
|
| 32 |
+
# - "ram": RAM storage, uses per-process RAM storage, which is
|
| 33 |
+
# reduced to a single process storage on later stages,
|
| 34 |
+
# less memory intensive
|
| 35 |
+
# - "file": file storage, uses per-process file-based storage,
|
| 36 |
+
# the least memory intensive, but may create bottlenecks
|
| 37 |
+
# on file system accesses
|
| 38 |
+
_C.DENSEPOSE_EVALUATION.STORAGE = "none"
|
| 39 |
+
# minimum threshold for IOU values: the lower its values is,
|
| 40 |
+
# the more matches are produced (and the higher the AP score)
|
| 41 |
+
_C.DENSEPOSE_EVALUATION.MIN_IOU_THRESHOLD = 0.5
|
| 42 |
+
# Non-distributed inference is slower (at inference time) but can avoid RAM OOM
|
| 43 |
+
_C.DENSEPOSE_EVALUATION.DISTRIBUTED_INFERENCE = True
|
| 44 |
+
# evaluate mesh alignment based on vertex embeddings, only makes sense in CSE context
|
| 45 |
+
_C.DENSEPOSE_EVALUATION.EVALUATE_MESH_ALIGNMENT = False
|
| 46 |
+
# meshes to compute mesh alignment for
|
| 47 |
+
_C.DENSEPOSE_EVALUATION.MESH_ALIGNMENT_MESH_NAMES = []
|
| 48 |
+
|
| 49 |
+
|
| 50 |
+
def add_bootstrap_config(cfg: CN) -> None:
|
| 51 |
+
""" """
|
| 52 |
+
_C = cfg
|
| 53 |
+
_C.BOOTSTRAP_DATASETS = []
|
| 54 |
+
_C.BOOTSTRAP_MODEL = CN()
|
| 55 |
+
_C.BOOTSTRAP_MODEL.WEIGHTS = ""
|
| 56 |
+
_C.BOOTSTRAP_MODEL.DEVICE = "cuda"
|
| 57 |
+
|
| 58 |
+
|
| 59 |
+
def get_bootstrap_dataset_config() -> CN:
|
| 60 |
+
_C = CN()
|
| 61 |
+
_C.DATASET = ""
|
| 62 |
+
# ratio used to mix data loaders
|
| 63 |
+
_C.RATIO = 0.1
|
| 64 |
+
# image loader
|
| 65 |
+
_C.IMAGE_LOADER = CN(new_allowed=True)
|
| 66 |
+
_C.IMAGE_LOADER.TYPE = ""
|
| 67 |
+
_C.IMAGE_LOADER.BATCH_SIZE = 4
|
| 68 |
+
_C.IMAGE_LOADER.NUM_WORKERS = 4
|
| 69 |
+
_C.IMAGE_LOADER.CATEGORIES = []
|
| 70 |
+
_C.IMAGE_LOADER.MAX_COUNT_PER_CATEGORY = 1_000_000
|
| 71 |
+
_C.IMAGE_LOADER.CATEGORY_TO_CLASS_MAPPING = CN(new_allowed=True)
|
| 72 |
+
# inference
|
| 73 |
+
_C.INFERENCE = CN()
|
| 74 |
+
# batch size for model inputs
|
| 75 |
+
_C.INFERENCE.INPUT_BATCH_SIZE = 4
|
| 76 |
+
# batch size to group model outputs
|
| 77 |
+
_C.INFERENCE.OUTPUT_BATCH_SIZE = 2
|
| 78 |
+
# sampled data
|
| 79 |
+
_C.DATA_SAMPLER = CN(new_allowed=True)
|
| 80 |
+
_C.DATA_SAMPLER.TYPE = ""
|
| 81 |
+
_C.DATA_SAMPLER.USE_GROUND_TRUTH_CATEGORIES = False
|
| 82 |
+
# filter
|
| 83 |
+
_C.FILTER = CN(new_allowed=True)
|
| 84 |
+
_C.FILTER.TYPE = ""
|
| 85 |
+
return _C
|
| 86 |
+
|
| 87 |
+
|
| 88 |
+
def load_bootstrap_config(cfg: CN) -> None:
|
| 89 |
+
"""
|
| 90 |
+
Bootstrap datasets are given as a list of `dict` that are not automatically
|
| 91 |
+
converted into CfgNode. This method processes all bootstrap dataset entries
|
| 92 |
+
and ensures that they are in CfgNode format and comply with the specification
|
| 93 |
+
"""
|
| 94 |
+
if not cfg.BOOTSTRAP_DATASETS:
|
| 95 |
+
return
|
| 96 |
+
|
| 97 |
+
bootstrap_datasets_cfgnodes = []
|
| 98 |
+
for dataset_cfg in cfg.BOOTSTRAP_DATASETS:
|
| 99 |
+
_C = get_bootstrap_dataset_config().clone()
|
| 100 |
+
_C.merge_from_other_cfg(CN(dataset_cfg))
|
| 101 |
+
bootstrap_datasets_cfgnodes.append(_C)
|
| 102 |
+
cfg.BOOTSTRAP_DATASETS = bootstrap_datasets_cfgnodes
|
| 103 |
+
|
| 104 |
+
|
| 105 |
+
def add_densepose_head_cse_config(cfg: CN) -> None:
|
| 106 |
+
"""
|
| 107 |
+
Add configuration options for Continuous Surface Embeddings (CSE)
|
| 108 |
+
"""
|
| 109 |
+
_C = cfg
|
| 110 |
+
_C.MODEL.ROI_DENSEPOSE_HEAD.CSE = CN()
|
| 111 |
+
# Dimensionality D of the embedding space
|
| 112 |
+
_C.MODEL.ROI_DENSEPOSE_HEAD.CSE.EMBED_SIZE = 16
|
| 113 |
+
# Embedder specifications for various mesh IDs
|
| 114 |
+
_C.MODEL.ROI_DENSEPOSE_HEAD.CSE.EMBEDDERS = CN(new_allowed=True)
|
| 115 |
+
# normalization coefficient for embedding distances
|
| 116 |
+
_C.MODEL.ROI_DENSEPOSE_HEAD.CSE.EMBEDDING_DIST_GAUSS_SIGMA = 0.01
|
| 117 |
+
# normalization coefficient for geodesic distances
|
| 118 |
+
_C.MODEL.ROI_DENSEPOSE_HEAD.CSE.GEODESIC_DIST_GAUSS_SIGMA = 0.01
|
| 119 |
+
# embedding loss weight
|
| 120 |
+
_C.MODEL.ROI_DENSEPOSE_HEAD.CSE.EMBED_LOSS_WEIGHT = 0.6
|
| 121 |
+
# embedding loss name, currently the following options are supported:
|
| 122 |
+
# - EmbeddingLoss: cross-entropy on vertex labels
|
| 123 |
+
# - SoftEmbeddingLoss: cross-entropy on vertex label combined with
|
| 124 |
+
# Gaussian penalty on distance between vertices
|
| 125 |
+
_C.MODEL.ROI_DENSEPOSE_HEAD.CSE.EMBED_LOSS_NAME = "EmbeddingLoss"
|
| 126 |
+
# optimizer hyperparameters
|
| 127 |
+
_C.MODEL.ROI_DENSEPOSE_HEAD.CSE.FEATURES_LR_FACTOR = 1.0
|
| 128 |
+
_C.MODEL.ROI_DENSEPOSE_HEAD.CSE.EMBEDDING_LR_FACTOR = 1.0
|
| 129 |
+
# Shape to shape cycle consistency loss parameters:
|
| 130 |
+
_C.MODEL.ROI_DENSEPOSE_HEAD.CSE.SHAPE_TO_SHAPE_CYCLE_LOSS = CN({"ENABLED": False})
|
| 131 |
+
# shape to shape cycle consistency loss weight
|
| 132 |
+
_C.MODEL.ROI_DENSEPOSE_HEAD.CSE.SHAPE_TO_SHAPE_CYCLE_LOSS.WEIGHT = 0.025
|
| 133 |
+
# norm type used for loss computation
|
| 134 |
+
_C.MODEL.ROI_DENSEPOSE_HEAD.CSE.SHAPE_TO_SHAPE_CYCLE_LOSS.NORM_P = 2
|
| 135 |
+
# normalization term for embedding similarity matrices
|
| 136 |
+
_C.MODEL.ROI_DENSEPOSE_HEAD.CSE.SHAPE_TO_SHAPE_CYCLE_LOSS.TEMPERATURE = 0.05
|
| 137 |
+
# maximum number of vertices to include into shape to shape cycle loss
|
| 138 |
+
# if negative or zero, all vertices are considered
|
| 139 |
+
# if positive, random subset of vertices of given size is considered
|
| 140 |
+
_C.MODEL.ROI_DENSEPOSE_HEAD.CSE.SHAPE_TO_SHAPE_CYCLE_LOSS.MAX_NUM_VERTICES = 4936
|
| 141 |
+
# Pixel to shape cycle consistency loss parameters:
|
| 142 |
+
_C.MODEL.ROI_DENSEPOSE_HEAD.CSE.PIX_TO_SHAPE_CYCLE_LOSS = CN({"ENABLED": False})
|
| 143 |
+
# pixel to shape cycle consistency loss weight
|
| 144 |
+
_C.MODEL.ROI_DENSEPOSE_HEAD.CSE.PIX_TO_SHAPE_CYCLE_LOSS.WEIGHT = 0.0001
|
| 145 |
+
# norm type used for loss computation
|
| 146 |
+
_C.MODEL.ROI_DENSEPOSE_HEAD.CSE.PIX_TO_SHAPE_CYCLE_LOSS.NORM_P = 2
|
| 147 |
+
# map images to all meshes and back (if false, use only gt meshes from the batch)
|
| 148 |
+
_C.MODEL.ROI_DENSEPOSE_HEAD.CSE.PIX_TO_SHAPE_CYCLE_LOSS.USE_ALL_MESHES_NOT_GT_ONLY = False
|
| 149 |
+
# Randomly select at most this number of pixels from every instance
|
| 150 |
+
# if negative or zero, all vertices are considered
|
| 151 |
+
_C.MODEL.ROI_DENSEPOSE_HEAD.CSE.PIX_TO_SHAPE_CYCLE_LOSS.NUM_PIXELS_TO_SAMPLE = 100
|
| 152 |
+
# normalization factor for pixel to pixel distances (higher value = smoother distribution)
|
| 153 |
+
_C.MODEL.ROI_DENSEPOSE_HEAD.CSE.PIX_TO_SHAPE_CYCLE_LOSS.PIXEL_SIGMA = 5.0
|
| 154 |
+
_C.MODEL.ROI_DENSEPOSE_HEAD.CSE.PIX_TO_SHAPE_CYCLE_LOSS.TEMPERATURE_PIXEL_TO_VERTEX = 0.05
|
| 155 |
+
_C.MODEL.ROI_DENSEPOSE_HEAD.CSE.PIX_TO_SHAPE_CYCLE_LOSS.TEMPERATURE_VERTEX_TO_PIXEL = 0.05
|
| 156 |
+
|
| 157 |
+
|
| 158 |
+
def add_densepose_head_config(cfg: CN) -> None:
|
| 159 |
+
"""
|
| 160 |
+
Add config for densepose head.
|
| 161 |
+
"""
|
| 162 |
+
_C = cfg
|
| 163 |
+
|
| 164 |
+
_C.MODEL.DENSEPOSE_ON = True
|
| 165 |
+
|
| 166 |
+
_C.MODEL.ROI_DENSEPOSE_HEAD = CN()
|
| 167 |
+
_C.MODEL.ROI_DENSEPOSE_HEAD.NAME = ""
|
| 168 |
+
_C.MODEL.ROI_DENSEPOSE_HEAD.NUM_STACKED_CONVS = 8
|
| 169 |
+
# Number of parts used for point labels
|
| 170 |
+
_C.MODEL.ROI_DENSEPOSE_HEAD.NUM_PATCHES = 24
|
| 171 |
+
_C.MODEL.ROI_DENSEPOSE_HEAD.DECONV_KERNEL = 4
|
| 172 |
+
_C.MODEL.ROI_DENSEPOSE_HEAD.CONV_HEAD_DIM = 512
|
| 173 |
+
_C.MODEL.ROI_DENSEPOSE_HEAD.CONV_HEAD_KERNEL = 3
|
| 174 |
+
_C.MODEL.ROI_DENSEPOSE_HEAD.UP_SCALE = 2
|
| 175 |
+
_C.MODEL.ROI_DENSEPOSE_HEAD.HEATMAP_SIZE = 112
|
| 176 |
+
_C.MODEL.ROI_DENSEPOSE_HEAD.POOLER_TYPE = "ROIAlignV2"
|
| 177 |
+
_C.MODEL.ROI_DENSEPOSE_HEAD.POOLER_RESOLUTION = 28
|
| 178 |
+
_C.MODEL.ROI_DENSEPOSE_HEAD.POOLER_SAMPLING_RATIO = 2
|
| 179 |
+
_C.MODEL.ROI_DENSEPOSE_HEAD.NUM_COARSE_SEGM_CHANNELS = 2 # 15 or 2
|
| 180 |
+
# Overlap threshold for an RoI to be considered foreground (if >= FG_IOU_THRESHOLD)
|
| 181 |
+
_C.MODEL.ROI_DENSEPOSE_HEAD.FG_IOU_THRESHOLD = 0.7
|
| 182 |
+
# Loss weights for annotation masks.(14 Parts)
|
| 183 |
+
_C.MODEL.ROI_DENSEPOSE_HEAD.INDEX_WEIGHTS = 5.0
|
| 184 |
+
# Loss weights for surface parts. (24 Parts)
|
| 185 |
+
_C.MODEL.ROI_DENSEPOSE_HEAD.PART_WEIGHTS = 1.0
|
| 186 |
+
# Loss weights for UV regression.
|
| 187 |
+
_C.MODEL.ROI_DENSEPOSE_HEAD.POINT_REGRESSION_WEIGHTS = 0.01
|
| 188 |
+
# Coarse segmentation is trained using instance segmentation task data
|
| 189 |
+
_C.MODEL.ROI_DENSEPOSE_HEAD.COARSE_SEGM_TRAINED_BY_MASKS = False
|
| 190 |
+
# For Decoder
|
| 191 |
+
_C.MODEL.ROI_DENSEPOSE_HEAD.DECODER_ON = True
|
| 192 |
+
_C.MODEL.ROI_DENSEPOSE_HEAD.DECODER_NUM_CLASSES = 256
|
| 193 |
+
_C.MODEL.ROI_DENSEPOSE_HEAD.DECODER_CONV_DIMS = 256
|
| 194 |
+
_C.MODEL.ROI_DENSEPOSE_HEAD.DECODER_NORM = ""
|
| 195 |
+
_C.MODEL.ROI_DENSEPOSE_HEAD.DECODER_COMMON_STRIDE = 4
|
| 196 |
+
# For DeepLab head
|
| 197 |
+
_C.MODEL.ROI_DENSEPOSE_HEAD.DEEPLAB = CN()
|
| 198 |
+
_C.MODEL.ROI_DENSEPOSE_HEAD.DEEPLAB.NORM = "GN"
|
| 199 |
+
_C.MODEL.ROI_DENSEPOSE_HEAD.DEEPLAB.NONLOCAL_ON = 0
|
| 200 |
+
# Predictor class name, must be registered in DENSEPOSE_PREDICTOR_REGISTRY
|
| 201 |
+
# Some registered predictors:
|
| 202 |
+
# "DensePoseChartPredictor": predicts segmentation and UV coordinates for predefined charts
|
| 203 |
+
# "DensePoseChartWithConfidencePredictor": predicts segmentation, UV coordinates
|
| 204 |
+
# and associated confidences for predefined charts (default)
|
| 205 |
+
# "DensePoseEmbeddingWithConfidencePredictor": predicts segmentation, embeddings
|
| 206 |
+
# and associated confidences for CSE
|
| 207 |
+
_C.MODEL.ROI_DENSEPOSE_HEAD.PREDICTOR_NAME = "DensePoseChartWithConfidencePredictor"
|
| 208 |
+
# Loss class name, must be registered in DENSEPOSE_LOSS_REGISTRY
|
| 209 |
+
# Some registered losses:
|
| 210 |
+
# "DensePoseChartLoss": loss for chart-based models that estimate
|
| 211 |
+
# segmentation and UV coordinates
|
| 212 |
+
# "DensePoseChartWithConfidenceLoss": loss for chart-based models that estimate
|
| 213 |
+
# segmentation, UV coordinates and the corresponding confidences (default)
|
| 214 |
+
_C.MODEL.ROI_DENSEPOSE_HEAD.LOSS_NAME = "DensePoseChartWithConfidenceLoss"
|
| 215 |
+
# Confidences
|
| 216 |
+
# Enable learning UV confidences (variances) along with the actual values
|
| 217 |
+
_C.MODEL.ROI_DENSEPOSE_HEAD.UV_CONFIDENCE = CN({"ENABLED": False})
|
| 218 |
+
# UV confidence lower bound
|
| 219 |
+
_C.MODEL.ROI_DENSEPOSE_HEAD.UV_CONFIDENCE.EPSILON = 0.01
|
| 220 |
+
# Enable learning segmentation confidences (variances) along with the actual values
|
| 221 |
+
_C.MODEL.ROI_DENSEPOSE_HEAD.SEGM_CONFIDENCE = CN({"ENABLED": False})
|
| 222 |
+
# Segmentation confidence lower bound
|
| 223 |
+
_C.MODEL.ROI_DENSEPOSE_HEAD.SEGM_CONFIDENCE.EPSILON = 0.01
|
| 224 |
+
# Statistical model type for confidence learning, possible values:
|
| 225 |
+
# - "iid_iso": statistically independent identically distributed residuals
|
| 226 |
+
# with isotropic covariance
|
| 227 |
+
# - "indep_aniso": statistically independent residuals with anisotropic
|
| 228 |
+
# covariances
|
| 229 |
+
_C.MODEL.ROI_DENSEPOSE_HEAD.UV_CONFIDENCE.TYPE = "iid_iso"
|
| 230 |
+
# List of angles for rotation in data augmentation during training
|
| 231 |
+
_C.INPUT.ROTATION_ANGLES = [0]
|
| 232 |
+
_C.TEST.AUG.ROTATION_ANGLES = () # Rotation TTA
|
| 233 |
+
|
| 234 |
+
add_densepose_head_cse_config(cfg)
|
| 235 |
+
|
| 236 |
+
|
| 237 |
+
def add_hrnet_config(cfg: CN) -> None:
|
| 238 |
+
"""
|
| 239 |
+
Add config for HRNet backbone.
|
| 240 |
+
"""
|
| 241 |
+
_C = cfg
|
| 242 |
+
|
| 243 |
+
# For HigherHRNet w32
|
| 244 |
+
_C.MODEL.HRNET = CN()
|
| 245 |
+
_C.MODEL.HRNET.STEM_INPLANES = 64
|
| 246 |
+
_C.MODEL.HRNET.STAGE2 = CN()
|
| 247 |
+
_C.MODEL.HRNET.STAGE2.NUM_MODULES = 1
|
| 248 |
+
_C.MODEL.HRNET.STAGE2.NUM_BRANCHES = 2
|
| 249 |
+
_C.MODEL.HRNET.STAGE2.BLOCK = "BASIC"
|
| 250 |
+
_C.MODEL.HRNET.STAGE2.NUM_BLOCKS = [4, 4]
|
| 251 |
+
_C.MODEL.HRNET.STAGE2.NUM_CHANNELS = [32, 64]
|
| 252 |
+
_C.MODEL.HRNET.STAGE2.FUSE_METHOD = "SUM"
|
| 253 |
+
_C.MODEL.HRNET.STAGE3 = CN()
|
| 254 |
+
_C.MODEL.HRNET.STAGE3.NUM_MODULES = 4
|
| 255 |
+
_C.MODEL.HRNET.STAGE3.NUM_BRANCHES = 3
|
| 256 |
+
_C.MODEL.HRNET.STAGE3.BLOCK = "BASIC"
|
| 257 |
+
_C.MODEL.HRNET.STAGE3.NUM_BLOCKS = [4, 4, 4]
|
| 258 |
+
_C.MODEL.HRNET.STAGE3.NUM_CHANNELS = [32, 64, 128]
|
| 259 |
+
_C.MODEL.HRNET.STAGE3.FUSE_METHOD = "SUM"
|
| 260 |
+
_C.MODEL.HRNET.STAGE4 = CN()
|
| 261 |
+
_C.MODEL.HRNET.STAGE4.NUM_MODULES = 3
|
| 262 |
+
_C.MODEL.HRNET.STAGE4.NUM_BRANCHES = 4
|
| 263 |
+
_C.MODEL.HRNET.STAGE4.BLOCK = "BASIC"
|
| 264 |
+
_C.MODEL.HRNET.STAGE4.NUM_BLOCKS = [4, 4, 4, 4]
|
| 265 |
+
_C.MODEL.HRNET.STAGE4.NUM_CHANNELS = [32, 64, 128, 256]
|
| 266 |
+
_C.MODEL.HRNET.STAGE4.FUSE_METHOD = "SUM"
|
| 267 |
+
|
| 268 |
+
_C.MODEL.HRNET.HRFPN = CN()
|
| 269 |
+
_C.MODEL.HRNET.HRFPN.OUT_CHANNELS = 256
|
| 270 |
+
|
| 271 |
+
|
| 272 |
+
def add_densepose_config(cfg: CN) -> None:
|
| 273 |
+
add_densepose_head_config(cfg)
|
| 274 |
+
add_hrnet_config(cfg)
|
| 275 |
+
add_bootstrap_config(cfg)
|
| 276 |
+
add_dataset_category_config(cfg)
|
| 277 |
+
add_evaluation_config(cfg)
|
Leffa/3rdparty/densepose/converters/__init__.py
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) Facebook, Inc. and its affiliates.
|
| 2 |
+
|
| 3 |
+
from .hflip import HFlipConverter
|
| 4 |
+
from .to_mask import ToMaskConverter
|
| 5 |
+
from .to_chart_result import ToChartResultConverter, ToChartResultConverterWithConfidences
|
| 6 |
+
from .segm_to_mask import (
|
| 7 |
+
predictor_output_with_fine_and_coarse_segm_to_mask,
|
| 8 |
+
predictor_output_with_coarse_segm_to_mask,
|
| 9 |
+
resample_fine_and_coarse_segm_to_bbox,
|
| 10 |
+
)
|
| 11 |
+
from .chart_output_to_chart_result import (
|
| 12 |
+
densepose_chart_predictor_output_to_result,
|
| 13 |
+
densepose_chart_predictor_output_to_result_with_confidences,
|
| 14 |
+
)
|
| 15 |
+
from .chart_output_hflip import densepose_chart_predictor_output_hflip
|
Leffa/3rdparty/densepose/converters/base.py
ADDED
|
@@ -0,0 +1,93 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) Facebook, Inc. and its affiliates.
|
| 2 |
+
|
| 3 |
+
from typing import Any, Tuple, Type
|
| 4 |
+
import torch
|
| 5 |
+
|
| 6 |
+
|
| 7 |
+
class BaseConverter:
|
| 8 |
+
"""
|
| 9 |
+
Converter base class to be reused by various converters.
|
| 10 |
+
Converter allows one to convert data from various source types to a particular
|
| 11 |
+
destination type. Each source type needs to register its converter. The
|
| 12 |
+
registration for each source type is valid for all descendants of that type.
|
| 13 |
+
"""
|
| 14 |
+
|
| 15 |
+
@classmethod
|
| 16 |
+
def register(cls, from_type: Type, converter: Any = None):
|
| 17 |
+
"""
|
| 18 |
+
Registers a converter for the specified type.
|
| 19 |
+
Can be used as a decorator (if converter is None), or called as a method.
|
| 20 |
+
|
| 21 |
+
Args:
|
| 22 |
+
from_type (type): type to register the converter for;
|
| 23 |
+
all instances of this type will use the same converter
|
| 24 |
+
converter (callable): converter to be registered for the given
|
| 25 |
+
type; if None, this method is assumed to be a decorator for the converter
|
| 26 |
+
"""
|
| 27 |
+
|
| 28 |
+
if converter is not None:
|
| 29 |
+
cls._do_register(from_type, converter)
|
| 30 |
+
|
| 31 |
+
def wrapper(converter: Any) -> Any:
|
| 32 |
+
cls._do_register(from_type, converter)
|
| 33 |
+
return converter
|
| 34 |
+
|
| 35 |
+
return wrapper
|
| 36 |
+
|
| 37 |
+
@classmethod
|
| 38 |
+
def _do_register(cls, from_type: Type, converter: Any):
|
| 39 |
+
cls.registry[from_type] = converter # pyre-ignore[16]
|
| 40 |
+
|
| 41 |
+
@classmethod
|
| 42 |
+
def _lookup_converter(cls, from_type: Type) -> Any:
|
| 43 |
+
"""
|
| 44 |
+
Perform recursive lookup for the given type
|
| 45 |
+
to find registered converter. If a converter was found for some base
|
| 46 |
+
class, it gets registered for this class to save on further lookups.
|
| 47 |
+
|
| 48 |
+
Args:
|
| 49 |
+
from_type: type for which to find a converter
|
| 50 |
+
Return:
|
| 51 |
+
callable or None - registered converter or None
|
| 52 |
+
if no suitable entry was found in the registry
|
| 53 |
+
"""
|
| 54 |
+
if from_type in cls.registry: # pyre-ignore[16]
|
| 55 |
+
return cls.registry[from_type]
|
| 56 |
+
for base in from_type.__bases__:
|
| 57 |
+
converter = cls._lookup_converter(base)
|
| 58 |
+
if converter is not None:
|
| 59 |
+
cls._do_register(from_type, converter)
|
| 60 |
+
return converter
|
| 61 |
+
return None
|
| 62 |
+
|
| 63 |
+
@classmethod
|
| 64 |
+
def convert(cls, instance: Any, *args, **kwargs):
|
| 65 |
+
"""
|
| 66 |
+
Convert an instance to the destination type using some registered
|
| 67 |
+
converter. Does recursive lookup for base classes, so there's no need
|
| 68 |
+
for explicit registration for derived classes.
|
| 69 |
+
|
| 70 |
+
Args:
|
| 71 |
+
instance: source instance to convert to the destination type
|
| 72 |
+
Return:
|
| 73 |
+
An instance of the destination type obtained from the source instance
|
| 74 |
+
Raises KeyError, if no suitable converter found
|
| 75 |
+
"""
|
| 76 |
+
instance_type = type(instance)
|
| 77 |
+
converter = cls._lookup_converter(instance_type)
|
| 78 |
+
if converter is None:
|
| 79 |
+
if cls.dst_type is None: # pyre-ignore[16]
|
| 80 |
+
output_type_str = "itself"
|
| 81 |
+
else:
|
| 82 |
+
output_type_str = cls.dst_type
|
| 83 |
+
raise KeyError(f"Could not find converter from {instance_type} to {output_type_str}")
|
| 84 |
+
return converter(instance, *args, **kwargs)
|
| 85 |
+
|
| 86 |
+
|
| 87 |
+
IntTupleBox = Tuple[int, int, int, int]
|
| 88 |
+
|
| 89 |
+
|
| 90 |
+
def make_int_box(box: torch.Tensor) -> IntTupleBox:
|
| 91 |
+
int_box = [0, 0, 0, 0]
|
| 92 |
+
int_box[0], int_box[1], int_box[2], int_box[3] = tuple(box.long().tolist())
|
| 93 |
+
return int_box[0], int_box[1], int_box[2], int_box[3]
|
Leffa/3rdparty/densepose/converters/builtin.py
ADDED
|
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) Facebook, Inc. and its affiliates.
|
| 2 |
+
|
| 3 |
+
from ..structures import DensePoseChartPredictorOutput, DensePoseEmbeddingPredictorOutput
|
| 4 |
+
from . import (
|
| 5 |
+
HFlipConverter,
|
| 6 |
+
ToChartResultConverter,
|
| 7 |
+
ToChartResultConverterWithConfidences,
|
| 8 |
+
ToMaskConverter,
|
| 9 |
+
densepose_chart_predictor_output_hflip,
|
| 10 |
+
densepose_chart_predictor_output_to_result,
|
| 11 |
+
densepose_chart_predictor_output_to_result_with_confidences,
|
| 12 |
+
predictor_output_with_coarse_segm_to_mask,
|
| 13 |
+
predictor_output_with_fine_and_coarse_segm_to_mask,
|
| 14 |
+
)
|
| 15 |
+
|
| 16 |
+
ToMaskConverter.register(
|
| 17 |
+
DensePoseChartPredictorOutput, predictor_output_with_fine_and_coarse_segm_to_mask
|
| 18 |
+
)
|
| 19 |
+
ToMaskConverter.register(
|
| 20 |
+
DensePoseEmbeddingPredictorOutput, predictor_output_with_coarse_segm_to_mask
|
| 21 |
+
)
|
| 22 |
+
|
| 23 |
+
ToChartResultConverter.register(
|
| 24 |
+
DensePoseChartPredictorOutput, densepose_chart_predictor_output_to_result
|
| 25 |
+
)
|
| 26 |
+
|
| 27 |
+
ToChartResultConverterWithConfidences.register(
|
| 28 |
+
DensePoseChartPredictorOutput, densepose_chart_predictor_output_to_result_with_confidences
|
| 29 |
+
)
|
| 30 |
+
|
| 31 |
+
HFlipConverter.register(DensePoseChartPredictorOutput, densepose_chart_predictor_output_hflip)
|
Leffa/3rdparty/densepose/converters/chart_output_hflip.py
ADDED
|
@@ -0,0 +1,71 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) Facebook, Inc. and its affiliates.
|
| 2 |
+
from dataclasses import fields
|
| 3 |
+
import torch
|
| 4 |
+
|
| 5 |
+
from densepose.structures import DensePoseChartPredictorOutput, DensePoseTransformData
|
| 6 |
+
|
| 7 |
+
|
| 8 |
+
def densepose_chart_predictor_output_hflip(
|
| 9 |
+
densepose_predictor_output: DensePoseChartPredictorOutput,
|
| 10 |
+
transform_data: DensePoseTransformData,
|
| 11 |
+
) -> DensePoseChartPredictorOutput:
|
| 12 |
+
"""
|
| 13 |
+
Change to take into account a Horizontal flip.
|
| 14 |
+
"""
|
| 15 |
+
if len(densepose_predictor_output) > 0:
|
| 16 |
+
|
| 17 |
+
PredictorOutput = type(densepose_predictor_output)
|
| 18 |
+
output_dict = {}
|
| 19 |
+
|
| 20 |
+
for field in fields(densepose_predictor_output):
|
| 21 |
+
field_value = getattr(densepose_predictor_output, field.name)
|
| 22 |
+
# flip tensors
|
| 23 |
+
if isinstance(field_value, torch.Tensor):
|
| 24 |
+
setattr(densepose_predictor_output, field.name, torch.flip(field_value, [3]))
|
| 25 |
+
|
| 26 |
+
densepose_predictor_output = _flip_iuv_semantics_tensor(
|
| 27 |
+
densepose_predictor_output, transform_data
|
| 28 |
+
)
|
| 29 |
+
densepose_predictor_output = _flip_segm_semantics_tensor(
|
| 30 |
+
densepose_predictor_output, transform_data
|
| 31 |
+
)
|
| 32 |
+
|
| 33 |
+
for field in fields(densepose_predictor_output):
|
| 34 |
+
output_dict[field.name] = getattr(densepose_predictor_output, field.name)
|
| 35 |
+
|
| 36 |
+
return PredictorOutput(**output_dict)
|
| 37 |
+
else:
|
| 38 |
+
return densepose_predictor_output
|
| 39 |
+
|
| 40 |
+
|
| 41 |
+
def _flip_iuv_semantics_tensor(
|
| 42 |
+
densepose_predictor_output: DensePoseChartPredictorOutput,
|
| 43 |
+
dp_transform_data: DensePoseTransformData,
|
| 44 |
+
) -> DensePoseChartPredictorOutput:
|
| 45 |
+
point_label_symmetries = dp_transform_data.point_label_symmetries
|
| 46 |
+
uv_symmetries = dp_transform_data.uv_symmetries
|
| 47 |
+
|
| 48 |
+
N, C, H, W = densepose_predictor_output.u.shape
|
| 49 |
+
u_loc = (densepose_predictor_output.u[:, 1:, :, :].clamp(0, 1) * 255).long()
|
| 50 |
+
v_loc = (densepose_predictor_output.v[:, 1:, :, :].clamp(0, 1) * 255).long()
|
| 51 |
+
Iindex = torch.arange(C - 1, device=densepose_predictor_output.u.device)[
|
| 52 |
+
None, :, None, None
|
| 53 |
+
].expand(N, C - 1, H, W)
|
| 54 |
+
densepose_predictor_output.u[:, 1:, :, :] = uv_symmetries["U_transforms"][Iindex, v_loc, u_loc]
|
| 55 |
+
densepose_predictor_output.v[:, 1:, :, :] = uv_symmetries["V_transforms"][Iindex, v_loc, u_loc]
|
| 56 |
+
|
| 57 |
+
for el in ["fine_segm", "u", "v"]:
|
| 58 |
+
densepose_predictor_output.__dict__[el] = densepose_predictor_output.__dict__[el][
|
| 59 |
+
:, point_label_symmetries, :, :
|
| 60 |
+
]
|
| 61 |
+
return densepose_predictor_output
|
| 62 |
+
|
| 63 |
+
|
| 64 |
+
def _flip_segm_semantics_tensor(
|
| 65 |
+
densepose_predictor_output: DensePoseChartPredictorOutput, dp_transform_data
|
| 66 |
+
):
|
| 67 |
+
if densepose_predictor_output.coarse_segm.shape[1] > 2:
|
| 68 |
+
densepose_predictor_output.coarse_segm = densepose_predictor_output.coarse_segm[
|
| 69 |
+
:, dp_transform_data.mask_label_symmetries, :, :
|
| 70 |
+
]
|
| 71 |
+
return densepose_predictor_output
|
Leffa/3rdparty/densepose/converters/chart_output_to_chart_result.py
ADDED
|
@@ -0,0 +1,188 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) Facebook, Inc. and its affiliates.
|
| 2 |
+
|
| 3 |
+
from typing import Dict
|
| 4 |
+
import torch
|
| 5 |
+
from torch.nn import functional as F
|
| 6 |
+
|
| 7 |
+
from detectron2.structures.boxes import Boxes, BoxMode
|
| 8 |
+
|
| 9 |
+
from ..structures import (
|
| 10 |
+
DensePoseChartPredictorOutput,
|
| 11 |
+
DensePoseChartResult,
|
| 12 |
+
DensePoseChartResultWithConfidences,
|
| 13 |
+
)
|
| 14 |
+
from . import resample_fine_and_coarse_segm_to_bbox
|
| 15 |
+
from .base import IntTupleBox, make_int_box
|
| 16 |
+
|
| 17 |
+
|
| 18 |
+
def resample_uv_tensors_to_bbox(
|
| 19 |
+
u: torch.Tensor,
|
| 20 |
+
v: torch.Tensor,
|
| 21 |
+
labels: torch.Tensor,
|
| 22 |
+
box_xywh_abs: IntTupleBox,
|
| 23 |
+
) -> torch.Tensor:
|
| 24 |
+
"""
|
| 25 |
+
Resamples U and V coordinate estimates for the given bounding box
|
| 26 |
+
|
| 27 |
+
Args:
|
| 28 |
+
u (tensor [1, C, H, W] of float): U coordinates
|
| 29 |
+
v (tensor [1, C, H, W] of float): V coordinates
|
| 30 |
+
labels (tensor [H, W] of long): labels obtained by resampling segmentation
|
| 31 |
+
outputs for the given bounding box
|
| 32 |
+
box_xywh_abs (tuple of 4 int): bounding box that corresponds to predictor outputs
|
| 33 |
+
Return:
|
| 34 |
+
Resampled U and V coordinates - a tensor [2, H, W] of float
|
| 35 |
+
"""
|
| 36 |
+
x, y, w, h = box_xywh_abs
|
| 37 |
+
w = max(int(w), 1)
|
| 38 |
+
h = max(int(h), 1)
|
| 39 |
+
u_bbox = F.interpolate(u, (h, w), mode="bilinear", align_corners=False)
|
| 40 |
+
v_bbox = F.interpolate(v, (h, w), mode="bilinear", align_corners=False)
|
| 41 |
+
uv = torch.zeros([2, h, w], dtype=torch.float32, device=u.device)
|
| 42 |
+
for part_id in range(1, u_bbox.size(1)):
|
| 43 |
+
uv[0][labels == part_id] = u_bbox[0, part_id][labels == part_id]
|
| 44 |
+
uv[1][labels == part_id] = v_bbox[0, part_id][labels == part_id]
|
| 45 |
+
return uv
|
| 46 |
+
|
| 47 |
+
|
| 48 |
+
def resample_uv_to_bbox(
|
| 49 |
+
predictor_output: DensePoseChartPredictorOutput,
|
| 50 |
+
labels: torch.Tensor,
|
| 51 |
+
box_xywh_abs: IntTupleBox,
|
| 52 |
+
) -> torch.Tensor:
|
| 53 |
+
"""
|
| 54 |
+
Resamples U and V coordinate estimates for the given bounding box
|
| 55 |
+
|
| 56 |
+
Args:
|
| 57 |
+
predictor_output (DensePoseChartPredictorOutput): DensePose predictor
|
| 58 |
+
output to be resampled
|
| 59 |
+
labels (tensor [H, W] of long): labels obtained by resampling segmentation
|
| 60 |
+
outputs for the given bounding box
|
| 61 |
+
box_xywh_abs (tuple of 4 int): bounding box that corresponds to predictor outputs
|
| 62 |
+
Return:
|
| 63 |
+
Resampled U and V coordinates - a tensor [2, H, W] of float
|
| 64 |
+
"""
|
| 65 |
+
return resample_uv_tensors_to_bbox(
|
| 66 |
+
predictor_output.u,
|
| 67 |
+
predictor_output.v,
|
| 68 |
+
labels,
|
| 69 |
+
box_xywh_abs,
|
| 70 |
+
)
|
| 71 |
+
|
| 72 |
+
|
| 73 |
+
def densepose_chart_predictor_output_to_result(
|
| 74 |
+
predictor_output: DensePoseChartPredictorOutput, boxes: Boxes
|
| 75 |
+
) -> DensePoseChartResult:
|
| 76 |
+
"""
|
| 77 |
+
Convert densepose chart predictor outputs to results
|
| 78 |
+
|
| 79 |
+
Args:
|
| 80 |
+
predictor_output (DensePoseChartPredictorOutput): DensePose predictor
|
| 81 |
+
output to be converted to results, must contain only 1 output
|
| 82 |
+
boxes (Boxes): bounding box that corresponds to the predictor output,
|
| 83 |
+
must contain only 1 bounding box
|
| 84 |
+
Return:
|
| 85 |
+
DensePose chart-based result (DensePoseChartResult)
|
| 86 |
+
"""
|
| 87 |
+
assert len(predictor_output) == 1 and len(boxes) == 1, (
|
| 88 |
+
f"Predictor output to result conversion can operate only single outputs"
|
| 89 |
+
f", got {len(predictor_output)} predictor outputs and {len(boxes)} boxes"
|
| 90 |
+
)
|
| 91 |
+
|
| 92 |
+
boxes_xyxy_abs = boxes.tensor.clone()
|
| 93 |
+
boxes_xywh_abs = BoxMode.convert(boxes_xyxy_abs, BoxMode.XYXY_ABS, BoxMode.XYWH_ABS)
|
| 94 |
+
box_xywh = make_int_box(boxes_xywh_abs[0])
|
| 95 |
+
|
| 96 |
+
labels = resample_fine_and_coarse_segm_to_bbox(predictor_output, box_xywh).squeeze(0)
|
| 97 |
+
uv = resample_uv_to_bbox(predictor_output, labels, box_xywh)
|
| 98 |
+
return DensePoseChartResult(labels=labels, uv=uv)
|
| 99 |
+
|
| 100 |
+
|
| 101 |
+
def resample_confidences_to_bbox(
|
| 102 |
+
predictor_output: DensePoseChartPredictorOutput,
|
| 103 |
+
labels: torch.Tensor,
|
| 104 |
+
box_xywh_abs: IntTupleBox,
|
| 105 |
+
) -> Dict[str, torch.Tensor]:
|
| 106 |
+
"""
|
| 107 |
+
Resamples confidences for the given bounding box
|
| 108 |
+
|
| 109 |
+
Args:
|
| 110 |
+
predictor_output (DensePoseChartPredictorOutput): DensePose predictor
|
| 111 |
+
output to be resampled
|
| 112 |
+
labels (tensor [H, W] of long): labels obtained by resampling segmentation
|
| 113 |
+
outputs for the given bounding box
|
| 114 |
+
box_xywh_abs (tuple of 4 int): bounding box that corresponds to predictor outputs
|
| 115 |
+
Return:
|
| 116 |
+
Resampled confidences - a dict of [H, W] tensors of float
|
| 117 |
+
"""
|
| 118 |
+
|
| 119 |
+
x, y, w, h = box_xywh_abs
|
| 120 |
+
w = max(int(w), 1)
|
| 121 |
+
h = max(int(h), 1)
|
| 122 |
+
|
| 123 |
+
confidence_names = [
|
| 124 |
+
"sigma_1",
|
| 125 |
+
"sigma_2",
|
| 126 |
+
"kappa_u",
|
| 127 |
+
"kappa_v",
|
| 128 |
+
"fine_segm_confidence",
|
| 129 |
+
"coarse_segm_confidence",
|
| 130 |
+
]
|
| 131 |
+
confidence_results = {key: None for key in confidence_names}
|
| 132 |
+
confidence_names = [
|
| 133 |
+
key for key in confidence_names if getattr(predictor_output, key) is not None
|
| 134 |
+
]
|
| 135 |
+
confidence_base = torch.zeros([h, w], dtype=torch.float32, device=predictor_output.u.device)
|
| 136 |
+
|
| 137 |
+
# assign data from channels that correspond to the labels
|
| 138 |
+
for key in confidence_names:
|
| 139 |
+
resampled_confidence = F.interpolate(
|
| 140 |
+
getattr(predictor_output, key),
|
| 141 |
+
(h, w),
|
| 142 |
+
mode="bilinear",
|
| 143 |
+
align_corners=False,
|
| 144 |
+
)
|
| 145 |
+
result = confidence_base.clone()
|
| 146 |
+
for part_id in range(1, predictor_output.u.size(1)):
|
| 147 |
+
if resampled_confidence.size(1) != predictor_output.u.size(1):
|
| 148 |
+
# confidence is not part-based, don't try to fill it part by part
|
| 149 |
+
continue
|
| 150 |
+
result[labels == part_id] = resampled_confidence[0, part_id][labels == part_id]
|
| 151 |
+
|
| 152 |
+
if resampled_confidence.size(1) != predictor_output.u.size(1):
|
| 153 |
+
# confidence is not part-based, fill the data with the first channel
|
| 154 |
+
# (targeted for segmentation confidences that have only 1 channel)
|
| 155 |
+
result = resampled_confidence[0, 0]
|
| 156 |
+
|
| 157 |
+
confidence_results[key] = result
|
| 158 |
+
|
| 159 |
+
return confidence_results # pyre-ignore[7]
|
| 160 |
+
|
| 161 |
+
|
| 162 |
+
def densepose_chart_predictor_output_to_result_with_confidences(
|
| 163 |
+
predictor_output: DensePoseChartPredictorOutput, boxes: Boxes
|
| 164 |
+
) -> DensePoseChartResultWithConfidences:
|
| 165 |
+
"""
|
| 166 |
+
Convert densepose chart predictor outputs to results
|
| 167 |
+
|
| 168 |
+
Args:
|
| 169 |
+
predictor_output (DensePoseChartPredictorOutput): DensePose predictor
|
| 170 |
+
output with confidences to be converted to results, must contain only 1 output
|
| 171 |
+
boxes (Boxes): bounding box that corresponds to the predictor output,
|
| 172 |
+
must contain only 1 bounding box
|
| 173 |
+
Return:
|
| 174 |
+
DensePose chart-based result with confidences (DensePoseChartResultWithConfidences)
|
| 175 |
+
"""
|
| 176 |
+
assert len(predictor_output) == 1 and len(boxes) == 1, (
|
| 177 |
+
f"Predictor output to result conversion can operate only single outputs"
|
| 178 |
+
f", got {len(predictor_output)} predictor outputs and {len(boxes)} boxes"
|
| 179 |
+
)
|
| 180 |
+
|
| 181 |
+
boxes_xyxy_abs = boxes.tensor.clone()
|
| 182 |
+
boxes_xywh_abs = BoxMode.convert(boxes_xyxy_abs, BoxMode.XYXY_ABS, BoxMode.XYWH_ABS)
|
| 183 |
+
box_xywh = make_int_box(boxes_xywh_abs[0])
|
| 184 |
+
|
| 185 |
+
labels = resample_fine_and_coarse_segm_to_bbox(predictor_output, box_xywh).squeeze(0)
|
| 186 |
+
uv = resample_uv_to_bbox(predictor_output, labels, box_xywh)
|
| 187 |
+
confidences = resample_confidences_to_bbox(predictor_output, labels, box_xywh)
|
| 188 |
+
return DensePoseChartResultWithConfidences(labels=labels, uv=uv, **confidences)
|
Leffa/3rdparty/densepose/converters/hflip.py
ADDED
|
@@ -0,0 +1,34 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) Facebook, Inc. and its affiliates.
|
| 2 |
+
|
| 3 |
+
from typing import Any
|
| 4 |
+
|
| 5 |
+
from .base import BaseConverter
|
| 6 |
+
|
| 7 |
+
|
| 8 |
+
class HFlipConverter(BaseConverter):
|
| 9 |
+
"""
|
| 10 |
+
Converts various DensePose predictor outputs to DensePose results.
|
| 11 |
+
Each DensePose predictor output type has to register its convertion strategy.
|
| 12 |
+
"""
|
| 13 |
+
|
| 14 |
+
registry = {}
|
| 15 |
+
dst_type = None
|
| 16 |
+
|
| 17 |
+
@classmethod
|
| 18 |
+
# pyre-fixme[14]: `convert` overrides method defined in `BaseConverter`
|
| 19 |
+
# inconsistently.
|
| 20 |
+
def convert(cls, predictor_outputs: Any, transform_data: Any, *args, **kwargs):
|
| 21 |
+
"""
|
| 22 |
+
Performs an horizontal flip on DensePose predictor outputs.
|
| 23 |
+
Does recursive lookup for base classes, so there's no need
|
| 24 |
+
for explicit registration for derived classes.
|
| 25 |
+
|
| 26 |
+
Args:
|
| 27 |
+
predictor_outputs: DensePose predictor output to be converted to BitMasks
|
| 28 |
+
transform_data: Anything useful for the flip
|
| 29 |
+
Return:
|
| 30 |
+
An instance of the same type as predictor_outputs
|
| 31 |
+
"""
|
| 32 |
+
return super(HFlipConverter, cls).convert(
|
| 33 |
+
predictor_outputs, transform_data, *args, **kwargs
|
| 34 |
+
)
|
Leffa/3rdparty/densepose/converters/segm_to_mask.py
ADDED
|
@@ -0,0 +1,150 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) Facebook, Inc. and its affiliates.
|
| 2 |
+
|
| 3 |
+
from typing import Any
|
| 4 |
+
import torch
|
| 5 |
+
from torch.nn import functional as F
|
| 6 |
+
|
| 7 |
+
from detectron2.structures import BitMasks, Boxes, BoxMode
|
| 8 |
+
|
| 9 |
+
from .base import IntTupleBox, make_int_box
|
| 10 |
+
from .to_mask import ImageSizeType
|
| 11 |
+
|
| 12 |
+
|
| 13 |
+
def resample_coarse_segm_tensor_to_bbox(coarse_segm: torch.Tensor, box_xywh_abs: IntTupleBox):
|
| 14 |
+
"""
|
| 15 |
+
Resample coarse segmentation tensor to the given
|
| 16 |
+
bounding box and derive labels for each pixel of the bounding box
|
| 17 |
+
|
| 18 |
+
Args:
|
| 19 |
+
coarse_segm: float tensor of shape [1, K, Hout, Wout]
|
| 20 |
+
box_xywh_abs (tuple of 4 int): bounding box given by its upper-left
|
| 21 |
+
corner coordinates, width (W) and height (H)
|
| 22 |
+
Return:
|
| 23 |
+
Labels for each pixel of the bounding box, a long tensor of size [1, H, W]
|
| 24 |
+
"""
|
| 25 |
+
x, y, w, h = box_xywh_abs
|
| 26 |
+
w = max(int(w), 1)
|
| 27 |
+
h = max(int(h), 1)
|
| 28 |
+
labels = F.interpolate(coarse_segm, (h, w), mode="bilinear", align_corners=False).argmax(dim=1)
|
| 29 |
+
return labels
|
| 30 |
+
|
| 31 |
+
|
| 32 |
+
def resample_fine_and_coarse_segm_tensors_to_bbox(
|
| 33 |
+
fine_segm: torch.Tensor, coarse_segm: torch.Tensor, box_xywh_abs: IntTupleBox
|
| 34 |
+
):
|
| 35 |
+
"""
|
| 36 |
+
Resample fine and coarse segmentation tensors to the given
|
| 37 |
+
bounding box and derive labels for each pixel of the bounding box
|
| 38 |
+
|
| 39 |
+
Args:
|
| 40 |
+
fine_segm: float tensor of shape [1, C, Hout, Wout]
|
| 41 |
+
coarse_segm: float tensor of shape [1, K, Hout, Wout]
|
| 42 |
+
box_xywh_abs (tuple of 4 int): bounding box given by its upper-left
|
| 43 |
+
corner coordinates, width (W) and height (H)
|
| 44 |
+
Return:
|
| 45 |
+
Labels for each pixel of the bounding box, a long tensor of size [1, H, W]
|
| 46 |
+
"""
|
| 47 |
+
x, y, w, h = box_xywh_abs
|
| 48 |
+
w = max(int(w), 1)
|
| 49 |
+
h = max(int(h), 1)
|
| 50 |
+
# coarse segmentation
|
| 51 |
+
coarse_segm_bbox = F.interpolate(
|
| 52 |
+
coarse_segm,
|
| 53 |
+
(h, w),
|
| 54 |
+
mode="bilinear",
|
| 55 |
+
align_corners=False,
|
| 56 |
+
).argmax(dim=1)
|
| 57 |
+
# combined coarse and fine segmentation
|
| 58 |
+
labels = (
|
| 59 |
+
F.interpolate(fine_segm, (h, w), mode="bilinear", align_corners=False).argmax(dim=1)
|
| 60 |
+
* (coarse_segm_bbox > 0).long()
|
| 61 |
+
)
|
| 62 |
+
return labels
|
| 63 |
+
|
| 64 |
+
|
| 65 |
+
def resample_fine_and_coarse_segm_to_bbox(predictor_output: Any, box_xywh_abs: IntTupleBox):
|
| 66 |
+
"""
|
| 67 |
+
Resample fine and coarse segmentation outputs from a predictor to the given
|
| 68 |
+
bounding box and derive labels for each pixel of the bounding box
|
| 69 |
+
|
| 70 |
+
Args:
|
| 71 |
+
predictor_output: DensePose predictor output that contains segmentation
|
| 72 |
+
results to be resampled
|
| 73 |
+
box_xywh_abs (tuple of 4 int): bounding box given by its upper-left
|
| 74 |
+
corner coordinates, width (W) and height (H)
|
| 75 |
+
Return:
|
| 76 |
+
Labels for each pixel of the bounding box, a long tensor of size [1, H, W]
|
| 77 |
+
"""
|
| 78 |
+
return resample_fine_and_coarse_segm_tensors_to_bbox(
|
| 79 |
+
predictor_output.fine_segm,
|
| 80 |
+
predictor_output.coarse_segm,
|
| 81 |
+
box_xywh_abs,
|
| 82 |
+
)
|
| 83 |
+
|
| 84 |
+
|
| 85 |
+
def predictor_output_with_coarse_segm_to_mask(
|
| 86 |
+
predictor_output: Any, boxes: Boxes, image_size_hw: ImageSizeType
|
| 87 |
+
) -> BitMasks:
|
| 88 |
+
"""
|
| 89 |
+
Convert predictor output with coarse and fine segmentation to a mask.
|
| 90 |
+
Assumes that predictor output has the following attributes:
|
| 91 |
+
- coarse_segm (tensor of size [N, D, H, W]): coarse segmentation
|
| 92 |
+
unnormalized scores for N instances; D is the number of coarse
|
| 93 |
+
segmentation labels, H and W is the resolution of the estimate
|
| 94 |
+
|
| 95 |
+
Args:
|
| 96 |
+
predictor_output: DensePose predictor output to be converted to mask
|
| 97 |
+
boxes (Boxes): bounding boxes that correspond to the DensePose
|
| 98 |
+
predictor outputs
|
| 99 |
+
image_size_hw (tuple [int, int]): image height Himg and width Wimg
|
| 100 |
+
Return:
|
| 101 |
+
BitMasks that contain a bool tensor of size [N, Himg, Wimg] with
|
| 102 |
+
a mask of the size of the image for each instance
|
| 103 |
+
"""
|
| 104 |
+
H, W = image_size_hw
|
| 105 |
+
boxes_xyxy_abs = boxes.tensor.clone()
|
| 106 |
+
boxes_xywh_abs = BoxMode.convert(boxes_xyxy_abs, BoxMode.XYXY_ABS, BoxMode.XYWH_ABS)
|
| 107 |
+
N = len(boxes_xywh_abs)
|
| 108 |
+
masks = torch.zeros((N, H, W), dtype=torch.bool, device=boxes.tensor.device)
|
| 109 |
+
for i in range(len(boxes_xywh_abs)):
|
| 110 |
+
box_xywh = make_int_box(boxes_xywh_abs[i])
|
| 111 |
+
box_mask = resample_coarse_segm_tensor_to_bbox(predictor_output[i].coarse_segm, box_xywh)
|
| 112 |
+
x, y, w, h = box_xywh
|
| 113 |
+
masks[i, y : y + h, x : x + w] = box_mask
|
| 114 |
+
|
| 115 |
+
return BitMasks(masks)
|
| 116 |
+
|
| 117 |
+
|
| 118 |
+
def predictor_output_with_fine_and_coarse_segm_to_mask(
|
| 119 |
+
predictor_output: Any, boxes: Boxes, image_size_hw: ImageSizeType
|
| 120 |
+
) -> BitMasks:
|
| 121 |
+
"""
|
| 122 |
+
Convert predictor output with coarse and fine segmentation to a mask.
|
| 123 |
+
Assumes that predictor output has the following attributes:
|
| 124 |
+
- coarse_segm (tensor of size [N, D, H, W]): coarse segmentation
|
| 125 |
+
unnormalized scores for N instances; D is the number of coarse
|
| 126 |
+
segmentation labels, H and W is the resolution of the estimate
|
| 127 |
+
- fine_segm (tensor of size [N, C, H, W]): fine segmentation
|
| 128 |
+
unnormalized scores for N instances; C is the number of fine
|
| 129 |
+
segmentation labels, H and W is the resolution of the estimate
|
| 130 |
+
|
| 131 |
+
Args:
|
| 132 |
+
predictor_output: DensePose predictor output to be converted to mask
|
| 133 |
+
boxes (Boxes): bounding boxes that correspond to the DensePose
|
| 134 |
+
predictor outputs
|
| 135 |
+
image_size_hw (tuple [int, int]): image height Himg and width Wimg
|
| 136 |
+
Return:
|
| 137 |
+
BitMasks that contain a bool tensor of size [N, Himg, Wimg] with
|
| 138 |
+
a mask of the size of the image for each instance
|
| 139 |
+
"""
|
| 140 |
+
H, W = image_size_hw
|
| 141 |
+
boxes_xyxy_abs = boxes.tensor.clone()
|
| 142 |
+
boxes_xywh_abs = BoxMode.convert(boxes_xyxy_abs, BoxMode.XYXY_ABS, BoxMode.XYWH_ABS)
|
| 143 |
+
N = len(boxes_xywh_abs)
|
| 144 |
+
masks = torch.zeros((N, H, W), dtype=torch.bool, device=boxes.tensor.device)
|
| 145 |
+
for i in range(len(boxes_xywh_abs)):
|
| 146 |
+
box_xywh = make_int_box(boxes_xywh_abs[i])
|
| 147 |
+
labels_i = resample_fine_and_coarse_segm_to_bbox(predictor_output[i], box_xywh)
|
| 148 |
+
x, y, w, h = box_xywh
|
| 149 |
+
masks[i, y : y + h, x : x + w] = labels_i > 0
|
| 150 |
+
return BitMasks(masks)
|
Leffa/3rdparty/densepose/converters/to_chart_result.py
ADDED
|
@@ -0,0 +1,70 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) Facebook, Inc. and its affiliates.
|
| 2 |
+
|
| 3 |
+
from typing import Any
|
| 4 |
+
|
| 5 |
+
from detectron2.structures import Boxes
|
| 6 |
+
|
| 7 |
+
from ..structures import DensePoseChartResult, DensePoseChartResultWithConfidences
|
| 8 |
+
from .base import BaseConverter
|
| 9 |
+
|
| 10 |
+
|
| 11 |
+
class ToChartResultConverter(BaseConverter):
|
| 12 |
+
"""
|
| 13 |
+
Converts various DensePose predictor outputs to DensePose results.
|
| 14 |
+
Each DensePose predictor output type has to register its convertion strategy.
|
| 15 |
+
"""
|
| 16 |
+
|
| 17 |
+
registry = {}
|
| 18 |
+
dst_type = DensePoseChartResult
|
| 19 |
+
|
| 20 |
+
@classmethod
|
| 21 |
+
# pyre-fixme[14]: `convert` overrides method defined in `BaseConverter`
|
| 22 |
+
# inconsistently.
|
| 23 |
+
def convert(cls, predictor_outputs: Any, boxes: Boxes, *args, **kwargs) -> DensePoseChartResult:
|
| 24 |
+
"""
|
| 25 |
+
Convert DensePose predictor outputs to DensePoseResult using some registered
|
| 26 |
+
converter. Does recursive lookup for base classes, so there's no need
|
| 27 |
+
for explicit registration for derived classes.
|
| 28 |
+
|
| 29 |
+
Args:
|
| 30 |
+
densepose_predictor_outputs: DensePose predictor output to be
|
| 31 |
+
converted to BitMasks
|
| 32 |
+
boxes (Boxes): bounding boxes that correspond to the DensePose
|
| 33 |
+
predictor outputs
|
| 34 |
+
Return:
|
| 35 |
+
An instance of DensePoseResult. If no suitable converter was found, raises KeyError
|
| 36 |
+
"""
|
| 37 |
+
return super(ToChartResultConverter, cls).convert(predictor_outputs, boxes, *args, **kwargs)
|
| 38 |
+
|
| 39 |
+
|
| 40 |
+
class ToChartResultConverterWithConfidences(BaseConverter):
|
| 41 |
+
"""
|
| 42 |
+
Converts various DensePose predictor outputs to DensePose results.
|
| 43 |
+
Each DensePose predictor output type has to register its convertion strategy.
|
| 44 |
+
"""
|
| 45 |
+
|
| 46 |
+
registry = {}
|
| 47 |
+
dst_type = DensePoseChartResultWithConfidences
|
| 48 |
+
|
| 49 |
+
@classmethod
|
| 50 |
+
# pyre-fixme[14]: `convert` overrides method defined in `BaseConverter`
|
| 51 |
+
# inconsistently.
|
| 52 |
+
def convert(
|
| 53 |
+
cls, predictor_outputs: Any, boxes: Boxes, *args, **kwargs
|
| 54 |
+
) -> DensePoseChartResultWithConfidences:
|
| 55 |
+
"""
|
| 56 |
+
Convert DensePose predictor outputs to DensePoseResult with confidences
|
| 57 |
+
using some registered converter. Does recursive lookup for base classes,
|
| 58 |
+
so there's no need for explicit registration for derived classes.
|
| 59 |
+
|
| 60 |
+
Args:
|
| 61 |
+
densepose_predictor_outputs: DensePose predictor output with confidences
|
| 62 |
+
to be converted to BitMasks
|
| 63 |
+
boxes (Boxes): bounding boxes that correspond to the DensePose
|
| 64 |
+
predictor outputs
|
| 65 |
+
Return:
|
| 66 |
+
An instance of DensePoseResult. If no suitable converter was found, raises KeyError
|
| 67 |
+
"""
|
| 68 |
+
return super(ToChartResultConverterWithConfidences, cls).convert(
|
| 69 |
+
predictor_outputs, boxes, *args, **kwargs
|
| 70 |
+
)
|
Leffa/3rdparty/densepose/converters/to_mask.py
ADDED
|
@@ -0,0 +1,49 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) Facebook, Inc. and its affiliates.
|
| 2 |
+
|
| 3 |
+
from typing import Any, Tuple
|
| 4 |
+
|
| 5 |
+
from detectron2.structures import BitMasks, Boxes
|
| 6 |
+
|
| 7 |
+
from .base import BaseConverter
|
| 8 |
+
|
| 9 |
+
ImageSizeType = Tuple[int, int]
|
| 10 |
+
|
| 11 |
+
|
| 12 |
+
class ToMaskConverter(BaseConverter):
|
| 13 |
+
"""
|
| 14 |
+
Converts various DensePose predictor outputs to masks
|
| 15 |
+
in bit mask format (see `BitMasks`). Each DensePose predictor output type
|
| 16 |
+
has to register its convertion strategy.
|
| 17 |
+
"""
|
| 18 |
+
|
| 19 |
+
registry = {}
|
| 20 |
+
dst_type = BitMasks
|
| 21 |
+
|
| 22 |
+
@classmethod
|
| 23 |
+
# pyre-fixme[14]: `convert` overrides method defined in `BaseConverter`
|
| 24 |
+
# inconsistently.
|
| 25 |
+
def convert(
|
| 26 |
+
cls,
|
| 27 |
+
densepose_predictor_outputs: Any,
|
| 28 |
+
boxes: Boxes,
|
| 29 |
+
image_size_hw: ImageSizeType,
|
| 30 |
+
*args,
|
| 31 |
+
**kwargs
|
| 32 |
+
) -> BitMasks:
|
| 33 |
+
"""
|
| 34 |
+
Convert DensePose predictor outputs to BitMasks using some registered
|
| 35 |
+
converter. Does recursive lookup for base classes, so there's no need
|
| 36 |
+
for explicit registration for derived classes.
|
| 37 |
+
|
| 38 |
+
Args:
|
| 39 |
+
densepose_predictor_outputs: DensePose predictor output to be
|
| 40 |
+
converted to BitMasks
|
| 41 |
+
boxes (Boxes): bounding boxes that correspond to the DensePose
|
| 42 |
+
predictor outputs
|
| 43 |
+
image_size_hw (tuple [int, int]): image height and width
|
| 44 |
+
Return:
|
| 45 |
+
An instance of `BitMasks`. If no suitable converter was found, raises KeyError
|
| 46 |
+
"""
|
| 47 |
+
return super(ToMaskConverter, cls).convert(
|
| 48 |
+
densepose_predictor_outputs, boxes, image_size_hw, *args, **kwargs
|
| 49 |
+
)
|
Leffa/3rdparty/densepose/engine/__init__.py
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) Facebook, Inc. and its affiliates.
|
| 2 |
+
|
| 3 |
+
from .trainer import Trainer
|
Leffa/3rdparty/densepose/engine/trainer.py
ADDED
|
@@ -0,0 +1,258 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
|
| 2 |
+
|
| 3 |
+
import logging
|
| 4 |
+
import os
|
| 5 |
+
from collections import OrderedDict
|
| 6 |
+
from typing import List, Optional, Union
|
| 7 |
+
import torch
|
| 8 |
+
from torch import nn
|
| 9 |
+
|
| 10 |
+
from detectron2.checkpoint import DetectionCheckpointer
|
| 11 |
+
from detectron2.config import CfgNode
|
| 12 |
+
from detectron2.engine import DefaultTrainer
|
| 13 |
+
from detectron2.evaluation import (
|
| 14 |
+
DatasetEvaluator,
|
| 15 |
+
DatasetEvaluators,
|
| 16 |
+
inference_on_dataset,
|
| 17 |
+
print_csv_format,
|
| 18 |
+
)
|
| 19 |
+
from detectron2.solver.build import get_default_optimizer_params, maybe_add_gradient_clipping
|
| 20 |
+
from detectron2.utils import comm
|
| 21 |
+
from detectron2.utils.events import EventWriter, get_event_storage
|
| 22 |
+
|
| 23 |
+
from densepose import DensePoseDatasetMapperTTA, DensePoseGeneralizedRCNNWithTTA, load_from_cfg
|
| 24 |
+
from densepose.data import (
|
| 25 |
+
DatasetMapper,
|
| 26 |
+
build_combined_loader,
|
| 27 |
+
build_detection_test_loader,
|
| 28 |
+
build_detection_train_loader,
|
| 29 |
+
build_inference_based_loaders,
|
| 30 |
+
has_inference_based_loaders,
|
| 31 |
+
)
|
| 32 |
+
from densepose.evaluation.d2_evaluator_adapter import Detectron2COCOEvaluatorAdapter
|
| 33 |
+
from densepose.evaluation.evaluator import DensePoseCOCOEvaluator, build_densepose_evaluator_storage
|
| 34 |
+
from densepose.modeling.cse import Embedder
|
| 35 |
+
|
| 36 |
+
|
| 37 |
+
class SampleCountingLoader:
|
| 38 |
+
def __init__(self, loader):
|
| 39 |
+
self.loader = loader
|
| 40 |
+
|
| 41 |
+
def __iter__(self):
|
| 42 |
+
it = iter(self.loader)
|
| 43 |
+
storage = get_event_storage()
|
| 44 |
+
while True:
|
| 45 |
+
try:
|
| 46 |
+
batch = next(it)
|
| 47 |
+
num_inst_per_dataset = {}
|
| 48 |
+
for data in batch:
|
| 49 |
+
dataset_name = data["dataset"]
|
| 50 |
+
if dataset_name not in num_inst_per_dataset:
|
| 51 |
+
num_inst_per_dataset[dataset_name] = 0
|
| 52 |
+
num_inst = len(data["instances"])
|
| 53 |
+
num_inst_per_dataset[dataset_name] += num_inst
|
| 54 |
+
for dataset_name in num_inst_per_dataset:
|
| 55 |
+
storage.put_scalar(f"batch/{dataset_name}", num_inst_per_dataset[dataset_name])
|
| 56 |
+
yield batch
|
| 57 |
+
except StopIteration:
|
| 58 |
+
break
|
| 59 |
+
|
| 60 |
+
|
| 61 |
+
class SampleCountMetricPrinter(EventWriter):
|
| 62 |
+
def __init__(self):
|
| 63 |
+
self.logger = logging.getLogger(__name__)
|
| 64 |
+
|
| 65 |
+
def write(self):
|
| 66 |
+
storage = get_event_storage()
|
| 67 |
+
batch_stats_strs = []
|
| 68 |
+
for key, buf in storage.histories().items():
|
| 69 |
+
if key.startswith("batch/"):
|
| 70 |
+
batch_stats_strs.append(f"{key} {buf.avg(20)}")
|
| 71 |
+
self.logger.info(", ".join(batch_stats_strs))
|
| 72 |
+
|
| 73 |
+
|
| 74 |
+
class Trainer(DefaultTrainer):
|
| 75 |
+
@classmethod
|
| 76 |
+
def extract_embedder_from_model(cls, model: nn.Module) -> Optional[Embedder]:
|
| 77 |
+
if isinstance(model, nn.parallel.DistributedDataParallel):
|
| 78 |
+
model = model.module
|
| 79 |
+
if hasattr(model, "roi_heads") and hasattr(model.roi_heads, "embedder"):
|
| 80 |
+
return model.roi_heads.embedder
|
| 81 |
+
return None
|
| 82 |
+
|
| 83 |
+
# TODO: the only reason to copy the base class code here is to pass the embedder from
|
| 84 |
+
# the model to the evaluator; that should be refactored to avoid unnecessary copy-pasting
|
| 85 |
+
@classmethod
|
| 86 |
+
def test(
|
| 87 |
+
cls,
|
| 88 |
+
cfg: CfgNode,
|
| 89 |
+
model: nn.Module,
|
| 90 |
+
evaluators: Optional[Union[DatasetEvaluator, List[DatasetEvaluator]]] = None,
|
| 91 |
+
):
|
| 92 |
+
"""
|
| 93 |
+
Args:
|
| 94 |
+
cfg (CfgNode):
|
| 95 |
+
model (nn.Module):
|
| 96 |
+
evaluators (DatasetEvaluator, list[DatasetEvaluator] or None): if None, will call
|
| 97 |
+
:meth:`build_evaluator`. Otherwise, must have the same length as
|
| 98 |
+
``cfg.DATASETS.TEST``.
|
| 99 |
+
|
| 100 |
+
Returns:
|
| 101 |
+
dict: a dict of result metrics
|
| 102 |
+
"""
|
| 103 |
+
logger = logging.getLogger(__name__)
|
| 104 |
+
if isinstance(evaluators, DatasetEvaluator):
|
| 105 |
+
evaluators = [evaluators]
|
| 106 |
+
if evaluators is not None:
|
| 107 |
+
assert len(cfg.DATASETS.TEST) == len(evaluators), "{} != {}".format(
|
| 108 |
+
len(cfg.DATASETS.TEST), len(evaluators)
|
| 109 |
+
)
|
| 110 |
+
|
| 111 |
+
results = OrderedDict()
|
| 112 |
+
for idx, dataset_name in enumerate(cfg.DATASETS.TEST):
|
| 113 |
+
data_loader = cls.build_test_loader(cfg, dataset_name)
|
| 114 |
+
# When evaluators are passed in as arguments,
|
| 115 |
+
# implicitly assume that evaluators can be created before data_loader.
|
| 116 |
+
if evaluators is not None:
|
| 117 |
+
evaluator = evaluators[idx]
|
| 118 |
+
else:
|
| 119 |
+
try:
|
| 120 |
+
embedder = cls.extract_embedder_from_model(model)
|
| 121 |
+
evaluator = cls.build_evaluator(cfg, dataset_name, embedder=embedder)
|
| 122 |
+
except NotImplementedError:
|
| 123 |
+
logger.warn(
|
| 124 |
+
"No evaluator found. Use `DefaultTrainer.test(evaluators=)`, "
|
| 125 |
+
"or implement its `build_evaluator` method."
|
| 126 |
+
)
|
| 127 |
+
results[dataset_name] = {}
|
| 128 |
+
continue
|
| 129 |
+
if cfg.DENSEPOSE_EVALUATION.DISTRIBUTED_INFERENCE or comm.is_main_process():
|
| 130 |
+
results_i = inference_on_dataset(model, data_loader, evaluator)
|
| 131 |
+
else:
|
| 132 |
+
results_i = {}
|
| 133 |
+
results[dataset_name] = results_i
|
| 134 |
+
if comm.is_main_process():
|
| 135 |
+
assert isinstance(
|
| 136 |
+
results_i, dict
|
| 137 |
+
), "Evaluator must return a dict on the main process. Got {} instead.".format(
|
| 138 |
+
results_i
|
| 139 |
+
)
|
| 140 |
+
logger.info("Evaluation results for {} in csv format:".format(dataset_name))
|
| 141 |
+
print_csv_format(results_i)
|
| 142 |
+
|
| 143 |
+
if len(results) == 1:
|
| 144 |
+
results = list(results.values())[0]
|
| 145 |
+
return results
|
| 146 |
+
|
| 147 |
+
@classmethod
|
| 148 |
+
def build_evaluator(
|
| 149 |
+
cls,
|
| 150 |
+
cfg: CfgNode,
|
| 151 |
+
dataset_name: str,
|
| 152 |
+
output_folder: Optional[str] = None,
|
| 153 |
+
embedder: Optional[Embedder] = None,
|
| 154 |
+
) -> DatasetEvaluators:
|
| 155 |
+
if output_folder is None:
|
| 156 |
+
output_folder = os.path.join(cfg.OUTPUT_DIR, "inference")
|
| 157 |
+
evaluators = []
|
| 158 |
+
distributed = cfg.DENSEPOSE_EVALUATION.DISTRIBUTED_INFERENCE
|
| 159 |
+
# Note: we currently use COCO evaluator for both COCO and LVIS datasets
|
| 160 |
+
# to have compatible metrics. LVIS bbox evaluator could also be used
|
| 161 |
+
# with an adapter to properly handle filtered / mapped categories
|
| 162 |
+
# evaluator_type = MetadataCatalog.get(dataset_name).evaluator_type
|
| 163 |
+
# if evaluator_type == "coco":
|
| 164 |
+
# evaluators.append(COCOEvaluator(dataset_name, output_dir=output_folder))
|
| 165 |
+
# elif evaluator_type == "lvis":
|
| 166 |
+
# evaluators.append(LVISEvaluator(dataset_name, output_dir=output_folder))
|
| 167 |
+
evaluators.append(
|
| 168 |
+
Detectron2COCOEvaluatorAdapter(
|
| 169 |
+
dataset_name, output_dir=output_folder, distributed=distributed
|
| 170 |
+
)
|
| 171 |
+
)
|
| 172 |
+
if cfg.MODEL.DENSEPOSE_ON:
|
| 173 |
+
storage = build_densepose_evaluator_storage(cfg, output_folder)
|
| 174 |
+
evaluators.append(
|
| 175 |
+
DensePoseCOCOEvaluator(
|
| 176 |
+
dataset_name,
|
| 177 |
+
distributed,
|
| 178 |
+
output_folder,
|
| 179 |
+
evaluator_type=cfg.DENSEPOSE_EVALUATION.TYPE,
|
| 180 |
+
min_iou_threshold=cfg.DENSEPOSE_EVALUATION.MIN_IOU_THRESHOLD,
|
| 181 |
+
storage=storage,
|
| 182 |
+
embedder=embedder,
|
| 183 |
+
should_evaluate_mesh_alignment=cfg.DENSEPOSE_EVALUATION.EVALUATE_MESH_ALIGNMENT,
|
| 184 |
+
mesh_alignment_mesh_names=cfg.DENSEPOSE_EVALUATION.MESH_ALIGNMENT_MESH_NAMES,
|
| 185 |
+
)
|
| 186 |
+
)
|
| 187 |
+
return DatasetEvaluators(evaluators)
|
| 188 |
+
|
| 189 |
+
@classmethod
|
| 190 |
+
def build_optimizer(cls, cfg: CfgNode, model: nn.Module):
|
| 191 |
+
params = get_default_optimizer_params(
|
| 192 |
+
model,
|
| 193 |
+
base_lr=cfg.SOLVER.BASE_LR,
|
| 194 |
+
weight_decay_norm=cfg.SOLVER.WEIGHT_DECAY_NORM,
|
| 195 |
+
bias_lr_factor=cfg.SOLVER.BIAS_LR_FACTOR,
|
| 196 |
+
weight_decay_bias=cfg.SOLVER.WEIGHT_DECAY_BIAS,
|
| 197 |
+
overrides={
|
| 198 |
+
"features": {
|
| 199 |
+
"lr": cfg.SOLVER.BASE_LR * cfg.MODEL.ROI_DENSEPOSE_HEAD.CSE.FEATURES_LR_FACTOR,
|
| 200 |
+
},
|
| 201 |
+
"embeddings": {
|
| 202 |
+
"lr": cfg.SOLVER.BASE_LR * cfg.MODEL.ROI_DENSEPOSE_HEAD.CSE.EMBEDDING_LR_FACTOR,
|
| 203 |
+
},
|
| 204 |
+
},
|
| 205 |
+
)
|
| 206 |
+
optimizer = torch.optim.SGD(
|
| 207 |
+
params,
|
| 208 |
+
cfg.SOLVER.BASE_LR,
|
| 209 |
+
momentum=cfg.SOLVER.MOMENTUM,
|
| 210 |
+
nesterov=cfg.SOLVER.NESTEROV,
|
| 211 |
+
weight_decay=cfg.SOLVER.WEIGHT_DECAY,
|
| 212 |
+
)
|
| 213 |
+
# pyre-fixme[6]: For 2nd param expected `Type[Optimizer]` but got `SGD`.
|
| 214 |
+
return maybe_add_gradient_clipping(cfg, optimizer)
|
| 215 |
+
|
| 216 |
+
@classmethod
|
| 217 |
+
def build_test_loader(cls, cfg: CfgNode, dataset_name):
|
| 218 |
+
return build_detection_test_loader(cfg, dataset_name, mapper=DatasetMapper(cfg, False))
|
| 219 |
+
|
| 220 |
+
@classmethod
|
| 221 |
+
def build_train_loader(cls, cfg: CfgNode):
|
| 222 |
+
data_loader = build_detection_train_loader(cfg, mapper=DatasetMapper(cfg, True))
|
| 223 |
+
if not has_inference_based_loaders(cfg):
|
| 224 |
+
return data_loader
|
| 225 |
+
model = cls.build_model(cfg)
|
| 226 |
+
model.to(cfg.BOOTSTRAP_MODEL.DEVICE)
|
| 227 |
+
DetectionCheckpointer(model).resume_or_load(cfg.BOOTSTRAP_MODEL.WEIGHTS, resume=False)
|
| 228 |
+
inference_based_loaders, ratios = build_inference_based_loaders(cfg, model)
|
| 229 |
+
loaders = [data_loader] + inference_based_loaders
|
| 230 |
+
ratios = [1.0] + ratios
|
| 231 |
+
combined_data_loader = build_combined_loader(cfg, loaders, ratios)
|
| 232 |
+
sample_counting_loader = SampleCountingLoader(combined_data_loader)
|
| 233 |
+
return sample_counting_loader
|
| 234 |
+
|
| 235 |
+
def build_writers(self):
|
| 236 |
+
writers = super().build_writers()
|
| 237 |
+
writers.append(SampleCountMetricPrinter())
|
| 238 |
+
return writers
|
| 239 |
+
|
| 240 |
+
@classmethod
|
| 241 |
+
def test_with_TTA(cls, cfg: CfgNode, model):
|
| 242 |
+
logger = logging.getLogger("detectron2.trainer")
|
| 243 |
+
# In the end of training, run an evaluation with TTA
|
| 244 |
+
# Only support some R-CNN models.
|
| 245 |
+
logger.info("Running inference with test-time augmentation ...")
|
| 246 |
+
transform_data = load_from_cfg(cfg)
|
| 247 |
+
model = DensePoseGeneralizedRCNNWithTTA(
|
| 248 |
+
cfg, model, transform_data, DensePoseDatasetMapperTTA(cfg)
|
| 249 |
+
)
|
| 250 |
+
evaluators = [
|
| 251 |
+
cls.build_evaluator(
|
| 252 |
+
cfg, name, output_folder=os.path.join(cfg.OUTPUT_DIR, "inference_TTA")
|
| 253 |
+
)
|
| 254 |
+
for name in cfg.DATASETS.TEST
|
| 255 |
+
]
|
| 256 |
+
res = cls.test(cfg, model, evaluators) # pyre-ignore[6]
|
| 257 |
+
res = OrderedDict({k + "_TTA": v for k, v in res.items()})
|
| 258 |
+
return res
|
Leffa/3rdparty/densepose/modeling/__init__.py
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) Facebook, Inc. and its affiliates.
|
| 2 |
+
|
| 3 |
+
from .confidence import DensePoseConfidenceModelConfig, DensePoseUVConfidenceType
|
| 4 |
+
from .filter import DensePoseDataFilter
|
| 5 |
+
from .inference import densepose_inference
|
| 6 |
+
from .utils import initialize_module_params
|
| 7 |
+
from .build import (
|
| 8 |
+
build_densepose_data_filter,
|
| 9 |
+
build_densepose_embedder,
|
| 10 |
+
build_densepose_head,
|
| 11 |
+
build_densepose_losses,
|
| 12 |
+
build_densepose_predictor,
|
| 13 |
+
)
|
Leffa/3rdparty/densepose/modeling/build.py
ADDED
|
@@ -0,0 +1,87 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) Facebook, Inc. and its affiliates.
|
| 2 |
+
|
| 3 |
+
from typing import Optional
|
| 4 |
+
from torch import nn
|
| 5 |
+
|
| 6 |
+
from detectron2.config import CfgNode
|
| 7 |
+
|
| 8 |
+
from .cse.embedder import Embedder
|
| 9 |
+
from .filter import DensePoseDataFilter
|
| 10 |
+
|
| 11 |
+
|
| 12 |
+
def build_densepose_predictor(cfg: CfgNode, input_channels: int):
|
| 13 |
+
"""
|
| 14 |
+
Create an instance of DensePose predictor based on configuration options.
|
| 15 |
+
|
| 16 |
+
Args:
|
| 17 |
+
cfg (CfgNode): configuration options
|
| 18 |
+
input_channels (int): input tensor size along the channel dimension
|
| 19 |
+
Return:
|
| 20 |
+
An instance of DensePose predictor
|
| 21 |
+
"""
|
| 22 |
+
from .predictors import DENSEPOSE_PREDICTOR_REGISTRY
|
| 23 |
+
|
| 24 |
+
predictor_name = cfg.MODEL.ROI_DENSEPOSE_HEAD.PREDICTOR_NAME
|
| 25 |
+
return DENSEPOSE_PREDICTOR_REGISTRY.get(predictor_name)(cfg, input_channels)
|
| 26 |
+
|
| 27 |
+
|
| 28 |
+
def build_densepose_data_filter(cfg: CfgNode):
|
| 29 |
+
"""
|
| 30 |
+
Build DensePose data filter which selects data for training
|
| 31 |
+
|
| 32 |
+
Args:
|
| 33 |
+
cfg (CfgNode): configuration options
|
| 34 |
+
|
| 35 |
+
Return:
|
| 36 |
+
Callable: list(Tensor), list(Instances) -> list(Tensor), list(Instances)
|
| 37 |
+
An instance of DensePose filter, which takes feature tensors and proposals
|
| 38 |
+
as an input and returns filtered features and proposals
|
| 39 |
+
"""
|
| 40 |
+
dp_filter = DensePoseDataFilter(cfg)
|
| 41 |
+
return dp_filter
|
| 42 |
+
|
| 43 |
+
|
| 44 |
+
def build_densepose_head(cfg: CfgNode, input_channels: int):
|
| 45 |
+
"""
|
| 46 |
+
Build DensePose head based on configurations options
|
| 47 |
+
|
| 48 |
+
Args:
|
| 49 |
+
cfg (CfgNode): configuration options
|
| 50 |
+
input_channels (int): input tensor size along the channel dimension
|
| 51 |
+
Return:
|
| 52 |
+
An instance of DensePose head
|
| 53 |
+
"""
|
| 54 |
+
from .roi_heads.registry import ROI_DENSEPOSE_HEAD_REGISTRY
|
| 55 |
+
|
| 56 |
+
head_name = cfg.MODEL.ROI_DENSEPOSE_HEAD.NAME
|
| 57 |
+
return ROI_DENSEPOSE_HEAD_REGISTRY.get(head_name)(cfg, input_channels)
|
| 58 |
+
|
| 59 |
+
|
| 60 |
+
def build_densepose_losses(cfg: CfgNode):
|
| 61 |
+
"""
|
| 62 |
+
Build DensePose loss based on configurations options
|
| 63 |
+
|
| 64 |
+
Args:
|
| 65 |
+
cfg (CfgNode): configuration options
|
| 66 |
+
Return:
|
| 67 |
+
An instance of DensePose loss
|
| 68 |
+
"""
|
| 69 |
+
from .losses import DENSEPOSE_LOSS_REGISTRY
|
| 70 |
+
|
| 71 |
+
loss_name = cfg.MODEL.ROI_DENSEPOSE_HEAD.LOSS_NAME
|
| 72 |
+
return DENSEPOSE_LOSS_REGISTRY.get(loss_name)(cfg)
|
| 73 |
+
|
| 74 |
+
|
| 75 |
+
def build_densepose_embedder(cfg: CfgNode) -> Optional[nn.Module]:
|
| 76 |
+
"""
|
| 77 |
+
Build embedder used to embed mesh vertices into an embedding space.
|
| 78 |
+
Embedder contains sub-embedders, one for each mesh ID.
|
| 79 |
+
|
| 80 |
+
Args:
|
| 81 |
+
cfg (cfgNode): configuration options
|
| 82 |
+
Return:
|
| 83 |
+
Embedding module
|
| 84 |
+
"""
|
| 85 |
+
if cfg.MODEL.ROI_DENSEPOSE_HEAD.CSE.EMBEDDERS:
|
| 86 |
+
return Embedder(cfg)
|
| 87 |
+
return None
|
Leffa/3rdparty/densepose/modeling/confidence.py
ADDED
|
@@ -0,0 +1,73 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) Facebook, Inc. and its affiliates.
|
| 2 |
+
|
| 3 |
+
from dataclasses import dataclass
|
| 4 |
+
from enum import Enum
|
| 5 |
+
|
| 6 |
+
from detectron2.config import CfgNode
|
| 7 |
+
|
| 8 |
+
|
| 9 |
+
class DensePoseUVConfidenceType(Enum):
|
| 10 |
+
"""
|
| 11 |
+
Statistical model type for confidence learning, possible values:
|
| 12 |
+
- "iid_iso": statistically independent identically distributed residuals
|
| 13 |
+
with anisotropic covariance
|
| 14 |
+
- "indep_aniso": statistically independent residuals with anisotropic
|
| 15 |
+
covariances
|
| 16 |
+
For details, see:
|
| 17 |
+
N. Neverova, D. Novotny, A. Vedaldi "Correlated Uncertainty for Learning
|
| 18 |
+
Dense Correspondences from Noisy Labels", p. 918--926, in Proc. NIPS 2019
|
| 19 |
+
"""
|
| 20 |
+
|
| 21 |
+
# fmt: off
|
| 22 |
+
IID_ISO = "iid_iso"
|
| 23 |
+
INDEP_ANISO = "indep_aniso"
|
| 24 |
+
# fmt: on
|
| 25 |
+
|
| 26 |
+
|
| 27 |
+
@dataclass
|
| 28 |
+
class DensePoseUVConfidenceConfig:
|
| 29 |
+
"""
|
| 30 |
+
Configuration options for confidence on UV data
|
| 31 |
+
"""
|
| 32 |
+
|
| 33 |
+
enabled: bool = False
|
| 34 |
+
# lower bound on UV confidences
|
| 35 |
+
epsilon: float = 0.01
|
| 36 |
+
type: DensePoseUVConfidenceType = DensePoseUVConfidenceType.IID_ISO
|
| 37 |
+
|
| 38 |
+
|
| 39 |
+
@dataclass
|
| 40 |
+
class DensePoseSegmConfidenceConfig:
|
| 41 |
+
"""
|
| 42 |
+
Configuration options for confidence on segmentation
|
| 43 |
+
"""
|
| 44 |
+
|
| 45 |
+
enabled: bool = False
|
| 46 |
+
# lower bound on confidence values
|
| 47 |
+
epsilon: float = 0.01
|
| 48 |
+
|
| 49 |
+
|
| 50 |
+
@dataclass
|
| 51 |
+
class DensePoseConfidenceModelConfig:
|
| 52 |
+
"""
|
| 53 |
+
Configuration options for confidence models
|
| 54 |
+
"""
|
| 55 |
+
|
| 56 |
+
# confidence for U and V values
|
| 57 |
+
uv_confidence: DensePoseUVConfidenceConfig
|
| 58 |
+
# segmentation confidence
|
| 59 |
+
segm_confidence: DensePoseSegmConfidenceConfig
|
| 60 |
+
|
| 61 |
+
@staticmethod
|
| 62 |
+
def from_cfg(cfg: CfgNode) -> "DensePoseConfidenceModelConfig":
|
| 63 |
+
return DensePoseConfidenceModelConfig(
|
| 64 |
+
uv_confidence=DensePoseUVConfidenceConfig(
|
| 65 |
+
enabled=cfg.MODEL.ROI_DENSEPOSE_HEAD.UV_CONFIDENCE.ENABLED,
|
| 66 |
+
epsilon=cfg.MODEL.ROI_DENSEPOSE_HEAD.UV_CONFIDENCE.EPSILON,
|
| 67 |
+
type=DensePoseUVConfidenceType(cfg.MODEL.ROI_DENSEPOSE_HEAD.UV_CONFIDENCE.TYPE),
|
| 68 |
+
),
|
| 69 |
+
segm_confidence=DensePoseSegmConfidenceConfig(
|
| 70 |
+
enabled=cfg.MODEL.ROI_DENSEPOSE_HEAD.SEGM_CONFIDENCE.ENABLED,
|
| 71 |
+
epsilon=cfg.MODEL.ROI_DENSEPOSE_HEAD.SEGM_CONFIDENCE.EPSILON,
|
| 72 |
+
),
|
| 73 |
+
)
|
Leffa/3rdparty/densepose/modeling/densepose_checkpoint.py
ADDED
|
@@ -0,0 +1,35 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) Facebook, Inc. and its affiliates.
|
| 2 |
+
from collections import OrderedDict
|
| 3 |
+
|
| 4 |
+
from detectron2.checkpoint import DetectionCheckpointer
|
| 5 |
+
|
| 6 |
+
|
| 7 |
+
def _rename_HRNet_weights(weights):
|
| 8 |
+
# We detect and rename HRNet weights for DensePose. 1956 and 1716 are values that are
|
| 9 |
+
# common to all HRNet pretrained weights, and should be enough to accurately identify them
|
| 10 |
+
if (
|
| 11 |
+
len(weights["model"].keys()) == 1956
|
| 12 |
+
and len([k for k in weights["model"].keys() if k.startswith("stage")]) == 1716
|
| 13 |
+
):
|
| 14 |
+
hrnet_weights = OrderedDict()
|
| 15 |
+
for k in weights["model"].keys():
|
| 16 |
+
hrnet_weights["backbone.bottom_up." + str(k)] = weights["model"][k]
|
| 17 |
+
return {"model": hrnet_weights}
|
| 18 |
+
else:
|
| 19 |
+
return weights
|
| 20 |
+
|
| 21 |
+
|
| 22 |
+
class DensePoseCheckpointer(DetectionCheckpointer):
|
| 23 |
+
"""
|
| 24 |
+
Same as :class:`DetectionCheckpointer`, but is able to handle HRNet weights
|
| 25 |
+
"""
|
| 26 |
+
|
| 27 |
+
def __init__(self, model, save_dir="", *, save_to_disk=None, **checkpointables):
|
| 28 |
+
super().__init__(model, save_dir, save_to_disk=save_to_disk, **checkpointables)
|
| 29 |
+
|
| 30 |
+
def _load_file(self, filename: str) -> object:
|
| 31 |
+
"""
|
| 32 |
+
Adding hrnet support
|
| 33 |
+
"""
|
| 34 |
+
weights = super()._load_file(filename)
|
| 35 |
+
return _rename_HRNet_weights(weights)
|
Leffa/3rdparty/densepose/modeling/filter.py
ADDED
|
@@ -0,0 +1,94 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) Facebook, Inc. and its affiliates.
|
| 2 |
+
|
| 3 |
+
from typing import List
|
| 4 |
+
import torch
|
| 5 |
+
|
| 6 |
+
from detectron2.config import CfgNode
|
| 7 |
+
from detectron2.structures import Instances
|
| 8 |
+
from detectron2.structures.boxes import matched_pairwise_iou
|
| 9 |
+
|
| 10 |
+
|
| 11 |
+
class DensePoseDataFilter:
|
| 12 |
+
def __init__(self, cfg: CfgNode):
|
| 13 |
+
self.iou_threshold = cfg.MODEL.ROI_DENSEPOSE_HEAD.FG_IOU_THRESHOLD
|
| 14 |
+
self.keep_masks = cfg.MODEL.ROI_DENSEPOSE_HEAD.COARSE_SEGM_TRAINED_BY_MASKS
|
| 15 |
+
|
| 16 |
+
@torch.no_grad()
|
| 17 |
+
def __call__(self, features: List[torch.Tensor], proposals_with_targets: List[Instances]):
|
| 18 |
+
"""
|
| 19 |
+
Filters proposals with targets to keep only the ones relevant for
|
| 20 |
+
DensePose training
|
| 21 |
+
|
| 22 |
+
Args:
|
| 23 |
+
features (list[Tensor]): input data as a list of features,
|
| 24 |
+
each feature is a tensor. Axis 0 represents the number of
|
| 25 |
+
images `N` in the input data; axes 1-3 are channels,
|
| 26 |
+
height, and width, which may vary between features
|
| 27 |
+
(e.g., if a feature pyramid is used).
|
| 28 |
+
proposals_with_targets (list[Instances]): length `N` list of
|
| 29 |
+
`Instances`. The i-th `Instances` contains instances
|
| 30 |
+
(proposals, GT) for the i-th input image,
|
| 31 |
+
Returns:
|
| 32 |
+
list[Tensor]: filtered features
|
| 33 |
+
list[Instances]: filtered proposals
|
| 34 |
+
"""
|
| 35 |
+
proposals_filtered = []
|
| 36 |
+
# TODO: the commented out code was supposed to correctly deal with situations
|
| 37 |
+
# where no valid DensePose GT is available for certain images. The corresponding
|
| 38 |
+
# image features were sliced and proposals were filtered. This led to performance
|
| 39 |
+
# deterioration, both in terms of runtime and in terms of evaluation results.
|
| 40 |
+
#
|
| 41 |
+
# feature_mask = torch.ones(
|
| 42 |
+
# len(proposals_with_targets),
|
| 43 |
+
# dtype=torch.bool,
|
| 44 |
+
# device=features[0].device if len(features) > 0 else torch.device("cpu"),
|
| 45 |
+
# )
|
| 46 |
+
for i, proposals_per_image in enumerate(proposals_with_targets):
|
| 47 |
+
if not proposals_per_image.has("gt_densepose") and (
|
| 48 |
+
not proposals_per_image.has("gt_masks") or not self.keep_masks
|
| 49 |
+
):
|
| 50 |
+
# feature_mask[i] = 0
|
| 51 |
+
continue
|
| 52 |
+
gt_boxes = proposals_per_image.gt_boxes
|
| 53 |
+
est_boxes = proposals_per_image.proposal_boxes
|
| 54 |
+
# apply match threshold for densepose head
|
| 55 |
+
iou = matched_pairwise_iou(gt_boxes, est_boxes)
|
| 56 |
+
iou_select = iou > self.iou_threshold
|
| 57 |
+
proposals_per_image = proposals_per_image[iou_select] # pyre-ignore[6]
|
| 58 |
+
|
| 59 |
+
N_gt_boxes = len(proposals_per_image.gt_boxes)
|
| 60 |
+
assert N_gt_boxes == len(proposals_per_image.proposal_boxes), (
|
| 61 |
+
f"The number of GT boxes {N_gt_boxes} is different from the "
|
| 62 |
+
f"number of proposal boxes {len(proposals_per_image.proposal_boxes)}"
|
| 63 |
+
)
|
| 64 |
+
# filter out any target without suitable annotation
|
| 65 |
+
if self.keep_masks:
|
| 66 |
+
gt_masks = (
|
| 67 |
+
proposals_per_image.gt_masks
|
| 68 |
+
if hasattr(proposals_per_image, "gt_masks")
|
| 69 |
+
else [None] * N_gt_boxes
|
| 70 |
+
)
|
| 71 |
+
else:
|
| 72 |
+
gt_masks = [None] * N_gt_boxes
|
| 73 |
+
gt_densepose = (
|
| 74 |
+
proposals_per_image.gt_densepose
|
| 75 |
+
if hasattr(proposals_per_image, "gt_densepose")
|
| 76 |
+
else [None] * N_gt_boxes
|
| 77 |
+
)
|
| 78 |
+
assert len(gt_masks) == N_gt_boxes
|
| 79 |
+
assert len(gt_densepose) == N_gt_boxes
|
| 80 |
+
selected_indices = [
|
| 81 |
+
i
|
| 82 |
+
for i, (dp_target, mask_target) in enumerate(zip(gt_densepose, gt_masks))
|
| 83 |
+
if (dp_target is not None) or (mask_target is not None)
|
| 84 |
+
]
|
| 85 |
+
# if not len(selected_indices):
|
| 86 |
+
# feature_mask[i] = 0
|
| 87 |
+
# continue
|
| 88 |
+
if len(selected_indices) != N_gt_boxes:
|
| 89 |
+
proposals_per_image = proposals_per_image[selected_indices] # pyre-ignore[6]
|
| 90 |
+
assert len(proposals_per_image.gt_boxes) == len(proposals_per_image.proposal_boxes)
|
| 91 |
+
proposals_filtered.append(proposals_per_image)
|
| 92 |
+
# features_filtered = [feature[feature_mask] for feature in features]
|
| 93 |
+
# return features_filtered, proposals_filtered
|
| 94 |
+
return features, proposals_filtered
|
Leffa/3rdparty/densepose/modeling/hrfpn.py
ADDED
|
@@ -0,0 +1,182 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) Facebook, Inc. and its affiliates.
|
| 2 |
+
"""
|
| 3 |
+
MIT License
|
| 4 |
+
Copyright (c) 2019 Microsoft
|
| 5 |
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
| 6 |
+
of this software and associated documentation files (the "Software"), to deal
|
| 7 |
+
in the Software without restriction, including without limitation the rights
|
| 8 |
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
| 9 |
+
copies of the Software, and to permit persons to whom the Software is
|
| 10 |
+
furnished to do so, subject to the following conditions:
|
| 11 |
+
The above copyright notice and this permission notice shall be included in all
|
| 12 |
+
copies or substantial portions of the Software.
|
| 13 |
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
| 14 |
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
| 15 |
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
| 16 |
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
| 17 |
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
| 18 |
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
| 19 |
+
SOFTWARE.
|
| 20 |
+
"""
|
| 21 |
+
|
| 22 |
+
import torch
|
| 23 |
+
import torch.nn as nn
|
| 24 |
+
import torch.nn.functional as F
|
| 25 |
+
|
| 26 |
+
from detectron2.layers import ShapeSpec
|
| 27 |
+
from detectron2.modeling.backbone import BACKBONE_REGISTRY
|
| 28 |
+
from detectron2.modeling.backbone.backbone import Backbone
|
| 29 |
+
|
| 30 |
+
from .hrnet import build_pose_hrnet_backbone
|
| 31 |
+
|
| 32 |
+
|
| 33 |
+
class HRFPN(Backbone):
|
| 34 |
+
"""HRFPN (High Resolution Feature Pyramids)
|
| 35 |
+
Transforms outputs of HRNet backbone so they are suitable for the ROI_heads
|
| 36 |
+
arXiv: https://arxiv.org/abs/1904.04514
|
| 37 |
+
Adapted from https://github.com/open-mmlab/mmdetection/blob/master/mmdet/models/necks/hrfpn.py
|
| 38 |
+
Args:
|
| 39 |
+
bottom_up: (list) output of HRNet
|
| 40 |
+
in_features (list): names of the input features (output of HRNet)
|
| 41 |
+
in_channels (list): number of channels for each branch
|
| 42 |
+
out_channels (int): output channels of feature pyramids
|
| 43 |
+
n_out_features (int): number of output stages
|
| 44 |
+
pooling (str): pooling for generating feature pyramids (from {MAX, AVG})
|
| 45 |
+
share_conv (bool): Have one conv per output, or share one with all the outputs
|
| 46 |
+
"""
|
| 47 |
+
|
| 48 |
+
def __init__(
|
| 49 |
+
self,
|
| 50 |
+
bottom_up,
|
| 51 |
+
in_features,
|
| 52 |
+
n_out_features,
|
| 53 |
+
in_channels,
|
| 54 |
+
out_channels,
|
| 55 |
+
pooling="AVG",
|
| 56 |
+
share_conv=False,
|
| 57 |
+
):
|
| 58 |
+
super(HRFPN, self).__init__()
|
| 59 |
+
assert isinstance(in_channels, list)
|
| 60 |
+
self.bottom_up = bottom_up
|
| 61 |
+
self.in_features = in_features
|
| 62 |
+
self.n_out_features = n_out_features
|
| 63 |
+
self.in_channels = in_channels
|
| 64 |
+
self.out_channels = out_channels
|
| 65 |
+
self.num_ins = len(in_channels)
|
| 66 |
+
self.share_conv = share_conv
|
| 67 |
+
|
| 68 |
+
if self.share_conv:
|
| 69 |
+
self.fpn_conv = nn.Conv2d(
|
| 70 |
+
in_channels=out_channels, out_channels=out_channels, kernel_size=3, padding=1
|
| 71 |
+
)
|
| 72 |
+
else:
|
| 73 |
+
self.fpn_conv = nn.ModuleList()
|
| 74 |
+
for _ in range(self.n_out_features):
|
| 75 |
+
self.fpn_conv.append(
|
| 76 |
+
nn.Conv2d(
|
| 77 |
+
in_channels=out_channels,
|
| 78 |
+
out_channels=out_channels,
|
| 79 |
+
kernel_size=3,
|
| 80 |
+
padding=1,
|
| 81 |
+
)
|
| 82 |
+
)
|
| 83 |
+
|
| 84 |
+
# Custom change: Replaces a simple bilinear interpolation
|
| 85 |
+
self.interp_conv = nn.ModuleList()
|
| 86 |
+
for i in range(len(self.in_features)):
|
| 87 |
+
self.interp_conv.append(
|
| 88 |
+
nn.Sequential(
|
| 89 |
+
nn.ConvTranspose2d(
|
| 90 |
+
in_channels=in_channels[i],
|
| 91 |
+
out_channels=in_channels[i],
|
| 92 |
+
kernel_size=4,
|
| 93 |
+
stride=2**i,
|
| 94 |
+
padding=0,
|
| 95 |
+
output_padding=0,
|
| 96 |
+
bias=False,
|
| 97 |
+
),
|
| 98 |
+
nn.BatchNorm2d(in_channels[i], momentum=0.1),
|
| 99 |
+
nn.ReLU(inplace=True),
|
| 100 |
+
)
|
| 101 |
+
)
|
| 102 |
+
|
| 103 |
+
# Custom change: Replaces a couple (reduction conv + pooling) by one conv
|
| 104 |
+
self.reduction_pooling_conv = nn.ModuleList()
|
| 105 |
+
for i in range(self.n_out_features):
|
| 106 |
+
self.reduction_pooling_conv.append(
|
| 107 |
+
nn.Sequential(
|
| 108 |
+
nn.Conv2d(sum(in_channels), out_channels, kernel_size=2**i, stride=2**i),
|
| 109 |
+
nn.BatchNorm2d(out_channels, momentum=0.1),
|
| 110 |
+
nn.ReLU(inplace=True),
|
| 111 |
+
)
|
| 112 |
+
)
|
| 113 |
+
|
| 114 |
+
if pooling == "MAX":
|
| 115 |
+
self.pooling = F.max_pool2d
|
| 116 |
+
else:
|
| 117 |
+
self.pooling = F.avg_pool2d
|
| 118 |
+
|
| 119 |
+
self._out_features = []
|
| 120 |
+
self._out_feature_channels = {}
|
| 121 |
+
self._out_feature_strides = {}
|
| 122 |
+
|
| 123 |
+
for i in range(self.n_out_features):
|
| 124 |
+
self._out_features.append("p%d" % (i + 1))
|
| 125 |
+
self._out_feature_channels.update({self._out_features[-1]: self.out_channels})
|
| 126 |
+
self._out_feature_strides.update({self._out_features[-1]: 2 ** (i + 2)})
|
| 127 |
+
|
| 128 |
+
# default init_weights for conv(msra) and norm in ConvModule
|
| 129 |
+
def init_weights(self):
|
| 130 |
+
for m in self.modules():
|
| 131 |
+
if isinstance(m, nn.Conv2d):
|
| 132 |
+
nn.init.kaiming_normal_(m.weight, a=1)
|
| 133 |
+
nn.init.constant_(m.bias, 0)
|
| 134 |
+
|
| 135 |
+
def forward(self, inputs):
|
| 136 |
+
bottom_up_features = self.bottom_up(inputs)
|
| 137 |
+
assert len(bottom_up_features) == len(self.in_features)
|
| 138 |
+
inputs = [bottom_up_features[f] for f in self.in_features]
|
| 139 |
+
|
| 140 |
+
outs = []
|
| 141 |
+
for i in range(len(inputs)):
|
| 142 |
+
outs.append(self.interp_conv[i](inputs[i]))
|
| 143 |
+
shape_2 = min(o.shape[2] for o in outs)
|
| 144 |
+
shape_3 = min(o.shape[3] for o in outs)
|
| 145 |
+
out = torch.cat([o[:, :, :shape_2, :shape_3] for o in outs], dim=1)
|
| 146 |
+
outs = []
|
| 147 |
+
for i in range(self.n_out_features):
|
| 148 |
+
outs.append(self.reduction_pooling_conv[i](out))
|
| 149 |
+
for i in range(len(outs)): # Make shapes consistent
|
| 150 |
+
outs[-1 - i] = outs[-1 - i][
|
| 151 |
+
:, :, : outs[-1].shape[2] * 2**i, : outs[-1].shape[3] * 2**i
|
| 152 |
+
]
|
| 153 |
+
outputs = []
|
| 154 |
+
for i in range(len(outs)):
|
| 155 |
+
if self.share_conv:
|
| 156 |
+
outputs.append(self.fpn_conv(outs[i]))
|
| 157 |
+
else:
|
| 158 |
+
outputs.append(self.fpn_conv[i](outs[i]))
|
| 159 |
+
|
| 160 |
+
assert len(self._out_features) == len(outputs)
|
| 161 |
+
return dict(zip(self._out_features, outputs))
|
| 162 |
+
|
| 163 |
+
|
| 164 |
+
@BACKBONE_REGISTRY.register()
|
| 165 |
+
def build_hrfpn_backbone(cfg, input_shape: ShapeSpec) -> HRFPN:
|
| 166 |
+
|
| 167 |
+
in_channels = cfg.MODEL.HRNET.STAGE4.NUM_CHANNELS
|
| 168 |
+
in_features = ["p%d" % (i + 1) for i in range(cfg.MODEL.HRNET.STAGE4.NUM_BRANCHES)]
|
| 169 |
+
n_out_features = len(cfg.MODEL.ROI_HEADS.IN_FEATURES)
|
| 170 |
+
out_channels = cfg.MODEL.HRNET.HRFPN.OUT_CHANNELS
|
| 171 |
+
hrnet = build_pose_hrnet_backbone(cfg, input_shape)
|
| 172 |
+
hrfpn = HRFPN(
|
| 173 |
+
hrnet,
|
| 174 |
+
in_features,
|
| 175 |
+
n_out_features,
|
| 176 |
+
in_channels,
|
| 177 |
+
out_channels,
|
| 178 |
+
pooling="AVG",
|
| 179 |
+
share_conv=False,
|
| 180 |
+
)
|
| 181 |
+
|
| 182 |
+
return hrfpn
|
Leffa/3rdparty/densepose/modeling/hrnet.py
ADDED
|
@@ -0,0 +1,474 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) Facebook, Inc. and its affiliates.
|
| 2 |
+
# ------------------------------------------------------------------------------
|
| 3 |
+
# Copyright (c) Microsoft
|
| 4 |
+
# Licensed under the MIT License.
|
| 5 |
+
# Written by Bin Xiao (leoxiaobin@gmail.com)
|
| 6 |
+
# Modified by Bowen Cheng (bcheng9@illinois.edu)
|
| 7 |
+
# Adapted from https://github.com/HRNet/Higher-HRNet-Human-Pose-Estimation/blob/master/lib/models/pose_higher_hrnet.py # noqa
|
| 8 |
+
# ------------------------------------------------------------------------------
|
| 9 |
+
|
| 10 |
+
from __future__ import absolute_import, division, print_function
|
| 11 |
+
import logging
|
| 12 |
+
import torch.nn as nn
|
| 13 |
+
|
| 14 |
+
from detectron2.layers import ShapeSpec
|
| 15 |
+
from detectron2.modeling.backbone import BACKBONE_REGISTRY
|
| 16 |
+
from detectron2.modeling.backbone.backbone import Backbone
|
| 17 |
+
|
| 18 |
+
BN_MOMENTUM = 0.1
|
| 19 |
+
logger = logging.getLogger(__name__)
|
| 20 |
+
|
| 21 |
+
__all__ = ["build_pose_hrnet_backbone", "PoseHigherResolutionNet"]
|
| 22 |
+
|
| 23 |
+
|
| 24 |
+
def conv3x3(in_planes, out_planes, stride=1):
|
| 25 |
+
"""3x3 convolution with padding"""
|
| 26 |
+
return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, padding=1, bias=False)
|
| 27 |
+
|
| 28 |
+
|
| 29 |
+
class BasicBlock(nn.Module):
|
| 30 |
+
expansion = 1
|
| 31 |
+
|
| 32 |
+
def __init__(self, inplanes, planes, stride=1, downsample=None):
|
| 33 |
+
super(BasicBlock, self).__init__()
|
| 34 |
+
self.conv1 = conv3x3(inplanes, planes, stride)
|
| 35 |
+
self.bn1 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM)
|
| 36 |
+
self.relu = nn.ReLU(inplace=True)
|
| 37 |
+
self.conv2 = conv3x3(planes, planes)
|
| 38 |
+
self.bn2 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM)
|
| 39 |
+
self.downsample = downsample
|
| 40 |
+
self.stride = stride
|
| 41 |
+
|
| 42 |
+
def forward(self, x):
|
| 43 |
+
residual = x
|
| 44 |
+
|
| 45 |
+
out = self.conv1(x)
|
| 46 |
+
out = self.bn1(out)
|
| 47 |
+
out = self.relu(out)
|
| 48 |
+
|
| 49 |
+
out = self.conv2(out)
|
| 50 |
+
out = self.bn2(out)
|
| 51 |
+
|
| 52 |
+
if self.downsample is not None:
|
| 53 |
+
residual = self.downsample(x)
|
| 54 |
+
|
| 55 |
+
out += residual
|
| 56 |
+
out = self.relu(out)
|
| 57 |
+
|
| 58 |
+
return out
|
| 59 |
+
|
| 60 |
+
|
| 61 |
+
class Bottleneck(nn.Module):
|
| 62 |
+
expansion = 4
|
| 63 |
+
|
| 64 |
+
def __init__(self, inplanes, planes, stride=1, downsample=None):
|
| 65 |
+
super(Bottleneck, self).__init__()
|
| 66 |
+
self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False)
|
| 67 |
+
self.bn1 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM)
|
| 68 |
+
self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
|
| 69 |
+
self.bn2 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM)
|
| 70 |
+
self.conv3 = nn.Conv2d(planes, planes * self.expansion, kernel_size=1, bias=False)
|
| 71 |
+
self.bn3 = nn.BatchNorm2d(planes * self.expansion, momentum=BN_MOMENTUM)
|
| 72 |
+
self.relu = nn.ReLU(inplace=True)
|
| 73 |
+
self.downsample = downsample
|
| 74 |
+
self.stride = stride
|
| 75 |
+
|
| 76 |
+
def forward(self, x):
|
| 77 |
+
residual = x
|
| 78 |
+
|
| 79 |
+
out = self.conv1(x)
|
| 80 |
+
out = self.bn1(out)
|
| 81 |
+
out = self.relu(out)
|
| 82 |
+
|
| 83 |
+
out = self.conv2(out)
|
| 84 |
+
out = self.bn2(out)
|
| 85 |
+
out = self.relu(out)
|
| 86 |
+
|
| 87 |
+
out = self.conv3(out)
|
| 88 |
+
out = self.bn3(out)
|
| 89 |
+
|
| 90 |
+
if self.downsample is not None:
|
| 91 |
+
residual = self.downsample(x)
|
| 92 |
+
|
| 93 |
+
out += residual
|
| 94 |
+
out = self.relu(out)
|
| 95 |
+
|
| 96 |
+
return out
|
| 97 |
+
|
| 98 |
+
|
| 99 |
+
class HighResolutionModule(nn.Module):
|
| 100 |
+
"""HighResolutionModule
|
| 101 |
+
Building block of the PoseHigherResolutionNet (see lower)
|
| 102 |
+
arXiv: https://arxiv.org/abs/1908.10357
|
| 103 |
+
Args:
|
| 104 |
+
num_branches (int): number of branches of the modyle
|
| 105 |
+
blocks (str): type of block of the module
|
| 106 |
+
num_blocks (int): number of blocks of the module
|
| 107 |
+
num_inchannels (int): number of input channels of the module
|
| 108 |
+
num_channels (list): number of channels of each branch
|
| 109 |
+
multi_scale_output (bool): only used by the last module of PoseHigherResolutionNet
|
| 110 |
+
"""
|
| 111 |
+
|
| 112 |
+
def __init__(
|
| 113 |
+
self,
|
| 114 |
+
num_branches,
|
| 115 |
+
blocks,
|
| 116 |
+
num_blocks,
|
| 117 |
+
num_inchannels,
|
| 118 |
+
num_channels,
|
| 119 |
+
multi_scale_output=True,
|
| 120 |
+
):
|
| 121 |
+
super(HighResolutionModule, self).__init__()
|
| 122 |
+
self._check_branches(num_branches, blocks, num_blocks, num_inchannels, num_channels)
|
| 123 |
+
|
| 124 |
+
self.num_inchannels = num_inchannels
|
| 125 |
+
self.num_branches = num_branches
|
| 126 |
+
|
| 127 |
+
self.multi_scale_output = multi_scale_output
|
| 128 |
+
|
| 129 |
+
self.branches = self._make_branches(num_branches, blocks, num_blocks, num_channels)
|
| 130 |
+
self.fuse_layers = self._make_fuse_layers()
|
| 131 |
+
self.relu = nn.ReLU(True)
|
| 132 |
+
|
| 133 |
+
def _check_branches(self, num_branches, blocks, num_blocks, num_inchannels, num_channels):
|
| 134 |
+
if num_branches != len(num_blocks):
|
| 135 |
+
error_msg = "NUM_BRANCHES({}) <> NUM_BLOCKS({})".format(num_branches, len(num_blocks))
|
| 136 |
+
logger.error(error_msg)
|
| 137 |
+
raise ValueError(error_msg)
|
| 138 |
+
|
| 139 |
+
if num_branches != len(num_channels):
|
| 140 |
+
error_msg = "NUM_BRANCHES({}) <> NUM_CHANNELS({})".format(
|
| 141 |
+
num_branches, len(num_channels)
|
| 142 |
+
)
|
| 143 |
+
logger.error(error_msg)
|
| 144 |
+
raise ValueError(error_msg)
|
| 145 |
+
|
| 146 |
+
if num_branches != len(num_inchannels):
|
| 147 |
+
error_msg = "NUM_BRANCHES({}) <> NUM_INCHANNELS({})".format(
|
| 148 |
+
num_branches, len(num_inchannels)
|
| 149 |
+
)
|
| 150 |
+
logger.error(error_msg)
|
| 151 |
+
raise ValueError(error_msg)
|
| 152 |
+
|
| 153 |
+
def _make_one_branch(self, branch_index, block, num_blocks, num_channels, stride=1):
|
| 154 |
+
downsample = None
|
| 155 |
+
if (
|
| 156 |
+
stride != 1
|
| 157 |
+
or self.num_inchannels[branch_index] != num_channels[branch_index] * block.expansion
|
| 158 |
+
):
|
| 159 |
+
downsample = nn.Sequential(
|
| 160 |
+
nn.Conv2d(
|
| 161 |
+
self.num_inchannels[branch_index],
|
| 162 |
+
num_channels[branch_index] * block.expansion,
|
| 163 |
+
kernel_size=1,
|
| 164 |
+
stride=stride,
|
| 165 |
+
bias=False,
|
| 166 |
+
),
|
| 167 |
+
nn.BatchNorm2d(num_channels[branch_index] * block.expansion, momentum=BN_MOMENTUM),
|
| 168 |
+
)
|
| 169 |
+
|
| 170 |
+
layers = []
|
| 171 |
+
layers.append(
|
| 172 |
+
block(self.num_inchannels[branch_index], num_channels[branch_index], stride, downsample)
|
| 173 |
+
)
|
| 174 |
+
self.num_inchannels[branch_index] = num_channels[branch_index] * block.expansion
|
| 175 |
+
for _ in range(1, num_blocks[branch_index]):
|
| 176 |
+
layers.append(block(self.num_inchannels[branch_index], num_channels[branch_index]))
|
| 177 |
+
|
| 178 |
+
return nn.Sequential(*layers)
|
| 179 |
+
|
| 180 |
+
def _make_branches(self, num_branches, block, num_blocks, num_channels):
|
| 181 |
+
branches = []
|
| 182 |
+
|
| 183 |
+
for i in range(num_branches):
|
| 184 |
+
branches.append(self._make_one_branch(i, block, num_blocks, num_channels))
|
| 185 |
+
|
| 186 |
+
return nn.ModuleList(branches)
|
| 187 |
+
|
| 188 |
+
def _make_fuse_layers(self):
|
| 189 |
+
if self.num_branches == 1:
|
| 190 |
+
return None
|
| 191 |
+
|
| 192 |
+
num_branches = self.num_branches
|
| 193 |
+
num_inchannels = self.num_inchannels
|
| 194 |
+
fuse_layers = []
|
| 195 |
+
for i in range(num_branches if self.multi_scale_output else 1):
|
| 196 |
+
fuse_layer = []
|
| 197 |
+
for j in range(num_branches):
|
| 198 |
+
if j > i:
|
| 199 |
+
fuse_layer.append(
|
| 200 |
+
nn.Sequential(
|
| 201 |
+
nn.Conv2d(num_inchannels[j], num_inchannels[i], 1, 1, 0, bias=False),
|
| 202 |
+
nn.BatchNorm2d(num_inchannels[i]),
|
| 203 |
+
nn.Upsample(scale_factor=2 ** (j - i), mode="nearest"),
|
| 204 |
+
)
|
| 205 |
+
)
|
| 206 |
+
elif j == i:
|
| 207 |
+
fuse_layer.append(None)
|
| 208 |
+
else:
|
| 209 |
+
conv3x3s = []
|
| 210 |
+
for k in range(i - j):
|
| 211 |
+
if k == i - j - 1:
|
| 212 |
+
num_outchannels_conv3x3 = num_inchannels[i]
|
| 213 |
+
conv3x3s.append(
|
| 214 |
+
nn.Sequential(
|
| 215 |
+
nn.Conv2d(
|
| 216 |
+
num_inchannels[j],
|
| 217 |
+
num_outchannels_conv3x3,
|
| 218 |
+
3,
|
| 219 |
+
2,
|
| 220 |
+
1,
|
| 221 |
+
bias=False,
|
| 222 |
+
),
|
| 223 |
+
nn.BatchNorm2d(num_outchannels_conv3x3),
|
| 224 |
+
)
|
| 225 |
+
)
|
| 226 |
+
else:
|
| 227 |
+
num_outchannels_conv3x3 = num_inchannels[j]
|
| 228 |
+
conv3x3s.append(
|
| 229 |
+
nn.Sequential(
|
| 230 |
+
nn.Conv2d(
|
| 231 |
+
num_inchannels[j],
|
| 232 |
+
num_outchannels_conv3x3,
|
| 233 |
+
3,
|
| 234 |
+
2,
|
| 235 |
+
1,
|
| 236 |
+
bias=False,
|
| 237 |
+
),
|
| 238 |
+
nn.BatchNorm2d(num_outchannels_conv3x3),
|
| 239 |
+
nn.ReLU(True),
|
| 240 |
+
)
|
| 241 |
+
)
|
| 242 |
+
fuse_layer.append(nn.Sequential(*conv3x3s))
|
| 243 |
+
fuse_layers.append(nn.ModuleList(fuse_layer))
|
| 244 |
+
|
| 245 |
+
return nn.ModuleList(fuse_layers)
|
| 246 |
+
|
| 247 |
+
def get_num_inchannels(self):
|
| 248 |
+
return self.num_inchannels
|
| 249 |
+
|
| 250 |
+
def forward(self, x):
|
| 251 |
+
if self.num_branches == 1:
|
| 252 |
+
return [self.branches[0](x[0])]
|
| 253 |
+
|
| 254 |
+
for i in range(self.num_branches):
|
| 255 |
+
x[i] = self.branches[i](x[i])
|
| 256 |
+
|
| 257 |
+
x_fuse = []
|
| 258 |
+
|
| 259 |
+
for i in range(len(self.fuse_layers)):
|
| 260 |
+
y = x[0] if i == 0 else self.fuse_layers[i][0](x[0])
|
| 261 |
+
for j in range(1, self.num_branches):
|
| 262 |
+
if i == j:
|
| 263 |
+
y = y + x[j]
|
| 264 |
+
else:
|
| 265 |
+
z = self.fuse_layers[i][j](x[j])[:, :, : y.shape[2], : y.shape[3]]
|
| 266 |
+
y = y + z
|
| 267 |
+
x_fuse.append(self.relu(y))
|
| 268 |
+
|
| 269 |
+
return x_fuse
|
| 270 |
+
|
| 271 |
+
|
| 272 |
+
blocks_dict = {"BASIC": BasicBlock, "BOTTLENECK": Bottleneck}
|
| 273 |
+
|
| 274 |
+
|
| 275 |
+
class PoseHigherResolutionNet(Backbone):
|
| 276 |
+
"""PoseHigherResolutionNet
|
| 277 |
+
Composed of several HighResolutionModule tied together with ConvNets
|
| 278 |
+
Adapted from the GitHub version to fit with HRFPN and the Detectron2 infrastructure
|
| 279 |
+
arXiv: https://arxiv.org/abs/1908.10357
|
| 280 |
+
"""
|
| 281 |
+
|
| 282 |
+
def __init__(self, cfg, **kwargs):
|
| 283 |
+
self.inplanes = cfg.MODEL.HRNET.STEM_INPLANES
|
| 284 |
+
super(PoseHigherResolutionNet, self).__init__()
|
| 285 |
+
|
| 286 |
+
# stem net
|
| 287 |
+
self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=2, padding=1, bias=False)
|
| 288 |
+
self.bn1 = nn.BatchNorm2d(64, momentum=BN_MOMENTUM)
|
| 289 |
+
self.conv2 = nn.Conv2d(64, 64, kernel_size=3, stride=2, padding=1, bias=False)
|
| 290 |
+
self.bn2 = nn.BatchNorm2d(64, momentum=BN_MOMENTUM)
|
| 291 |
+
self.relu = nn.ReLU(inplace=True)
|
| 292 |
+
self.layer1 = self._make_layer(Bottleneck, 64, 4)
|
| 293 |
+
|
| 294 |
+
self.stage2_cfg = cfg.MODEL.HRNET.STAGE2
|
| 295 |
+
num_channels = self.stage2_cfg.NUM_CHANNELS
|
| 296 |
+
block = blocks_dict[self.stage2_cfg.BLOCK]
|
| 297 |
+
num_channels = [num_channels[i] * block.expansion for i in range(len(num_channels))]
|
| 298 |
+
self.transition1 = self._make_transition_layer([256], num_channels)
|
| 299 |
+
self.stage2, pre_stage_channels = self._make_stage(self.stage2_cfg, num_channels)
|
| 300 |
+
|
| 301 |
+
self.stage3_cfg = cfg.MODEL.HRNET.STAGE3
|
| 302 |
+
num_channels = self.stage3_cfg.NUM_CHANNELS
|
| 303 |
+
block = blocks_dict[self.stage3_cfg.BLOCK]
|
| 304 |
+
num_channels = [num_channels[i] * block.expansion for i in range(len(num_channels))]
|
| 305 |
+
self.transition2 = self._make_transition_layer(pre_stage_channels, num_channels)
|
| 306 |
+
self.stage3, pre_stage_channels = self._make_stage(self.stage3_cfg, num_channels)
|
| 307 |
+
|
| 308 |
+
self.stage4_cfg = cfg.MODEL.HRNET.STAGE4
|
| 309 |
+
num_channels = self.stage4_cfg.NUM_CHANNELS
|
| 310 |
+
block = blocks_dict[self.stage4_cfg.BLOCK]
|
| 311 |
+
num_channels = [num_channels[i] * block.expansion for i in range(len(num_channels))]
|
| 312 |
+
self.transition3 = self._make_transition_layer(pre_stage_channels, num_channels)
|
| 313 |
+
self.stage4, pre_stage_channels = self._make_stage(
|
| 314 |
+
self.stage4_cfg, num_channels, multi_scale_output=True
|
| 315 |
+
)
|
| 316 |
+
|
| 317 |
+
self._out_features = []
|
| 318 |
+
self._out_feature_channels = {}
|
| 319 |
+
self._out_feature_strides = {}
|
| 320 |
+
|
| 321 |
+
for i in range(cfg.MODEL.HRNET.STAGE4.NUM_BRANCHES):
|
| 322 |
+
self._out_features.append("p%d" % (i + 1))
|
| 323 |
+
self._out_feature_channels.update(
|
| 324 |
+
{self._out_features[-1]: cfg.MODEL.HRNET.STAGE4.NUM_CHANNELS[i]}
|
| 325 |
+
)
|
| 326 |
+
self._out_feature_strides.update({self._out_features[-1]: 1})
|
| 327 |
+
|
| 328 |
+
def _get_deconv_cfg(self, deconv_kernel):
|
| 329 |
+
if deconv_kernel == 4:
|
| 330 |
+
padding = 1
|
| 331 |
+
output_padding = 0
|
| 332 |
+
elif deconv_kernel == 3:
|
| 333 |
+
padding = 1
|
| 334 |
+
output_padding = 1
|
| 335 |
+
elif deconv_kernel == 2:
|
| 336 |
+
padding = 0
|
| 337 |
+
output_padding = 0
|
| 338 |
+
|
| 339 |
+
return deconv_kernel, padding, output_padding
|
| 340 |
+
|
| 341 |
+
def _make_transition_layer(self, num_channels_pre_layer, num_channels_cur_layer):
|
| 342 |
+
num_branches_cur = len(num_channels_cur_layer)
|
| 343 |
+
num_branches_pre = len(num_channels_pre_layer)
|
| 344 |
+
|
| 345 |
+
transition_layers = []
|
| 346 |
+
for i in range(num_branches_cur):
|
| 347 |
+
if i < num_branches_pre:
|
| 348 |
+
if num_channels_cur_layer[i] != num_channels_pre_layer[i]:
|
| 349 |
+
transition_layers.append(
|
| 350 |
+
nn.Sequential(
|
| 351 |
+
nn.Conv2d(
|
| 352 |
+
num_channels_pre_layer[i],
|
| 353 |
+
num_channels_cur_layer[i],
|
| 354 |
+
3,
|
| 355 |
+
1,
|
| 356 |
+
1,
|
| 357 |
+
bias=False,
|
| 358 |
+
),
|
| 359 |
+
nn.BatchNorm2d(num_channels_cur_layer[i]),
|
| 360 |
+
nn.ReLU(inplace=True),
|
| 361 |
+
)
|
| 362 |
+
)
|
| 363 |
+
else:
|
| 364 |
+
transition_layers.append(None)
|
| 365 |
+
else:
|
| 366 |
+
conv3x3s = []
|
| 367 |
+
for j in range(i + 1 - num_branches_pre):
|
| 368 |
+
inchannels = num_channels_pre_layer[-1]
|
| 369 |
+
outchannels = (
|
| 370 |
+
num_channels_cur_layer[i] if j == i - num_branches_pre else inchannels
|
| 371 |
+
)
|
| 372 |
+
conv3x3s.append(
|
| 373 |
+
nn.Sequential(
|
| 374 |
+
nn.Conv2d(inchannels, outchannels, 3, 2, 1, bias=False),
|
| 375 |
+
nn.BatchNorm2d(outchannels),
|
| 376 |
+
nn.ReLU(inplace=True),
|
| 377 |
+
)
|
| 378 |
+
)
|
| 379 |
+
transition_layers.append(nn.Sequential(*conv3x3s))
|
| 380 |
+
|
| 381 |
+
return nn.ModuleList(transition_layers)
|
| 382 |
+
|
| 383 |
+
def _make_layer(self, block, planes, blocks, stride=1):
|
| 384 |
+
downsample = None
|
| 385 |
+
if stride != 1 or self.inplanes != planes * block.expansion:
|
| 386 |
+
downsample = nn.Sequential(
|
| 387 |
+
nn.Conv2d(
|
| 388 |
+
self.inplanes,
|
| 389 |
+
planes * block.expansion,
|
| 390 |
+
kernel_size=1,
|
| 391 |
+
stride=stride,
|
| 392 |
+
bias=False,
|
| 393 |
+
),
|
| 394 |
+
nn.BatchNorm2d(planes * block.expansion, momentum=BN_MOMENTUM),
|
| 395 |
+
)
|
| 396 |
+
|
| 397 |
+
layers = []
|
| 398 |
+
layers.append(block(self.inplanes, planes, stride, downsample))
|
| 399 |
+
self.inplanes = planes * block.expansion
|
| 400 |
+
for _ in range(1, blocks):
|
| 401 |
+
layers.append(block(self.inplanes, planes))
|
| 402 |
+
|
| 403 |
+
return nn.Sequential(*layers)
|
| 404 |
+
|
| 405 |
+
def _make_stage(self, layer_config, num_inchannels, multi_scale_output=True):
|
| 406 |
+
num_modules = layer_config["NUM_MODULES"]
|
| 407 |
+
num_branches = layer_config["NUM_BRANCHES"]
|
| 408 |
+
num_blocks = layer_config["NUM_BLOCKS"]
|
| 409 |
+
num_channels = layer_config["NUM_CHANNELS"]
|
| 410 |
+
block = blocks_dict[layer_config["BLOCK"]]
|
| 411 |
+
|
| 412 |
+
modules = []
|
| 413 |
+
for i in range(num_modules):
|
| 414 |
+
# multi_scale_output is only used last module
|
| 415 |
+
if not multi_scale_output and i == num_modules - 1:
|
| 416 |
+
reset_multi_scale_output = False
|
| 417 |
+
else:
|
| 418 |
+
reset_multi_scale_output = True
|
| 419 |
+
|
| 420 |
+
modules.append(
|
| 421 |
+
HighResolutionModule(
|
| 422 |
+
num_branches,
|
| 423 |
+
block,
|
| 424 |
+
num_blocks,
|
| 425 |
+
num_inchannels,
|
| 426 |
+
num_channels,
|
| 427 |
+
reset_multi_scale_output,
|
| 428 |
+
)
|
| 429 |
+
)
|
| 430 |
+
num_inchannels = modules[-1].get_num_inchannels()
|
| 431 |
+
|
| 432 |
+
return nn.Sequential(*modules), num_inchannels
|
| 433 |
+
|
| 434 |
+
def forward(self, x):
|
| 435 |
+
x = self.conv1(x)
|
| 436 |
+
x = self.bn1(x)
|
| 437 |
+
x = self.relu(x)
|
| 438 |
+
x = self.conv2(x)
|
| 439 |
+
x = self.bn2(x)
|
| 440 |
+
x = self.relu(x)
|
| 441 |
+
x = self.layer1(x)
|
| 442 |
+
|
| 443 |
+
x_list = []
|
| 444 |
+
for i in range(self.stage2_cfg.NUM_BRANCHES):
|
| 445 |
+
if self.transition1[i] is not None:
|
| 446 |
+
x_list.append(self.transition1[i](x))
|
| 447 |
+
else:
|
| 448 |
+
x_list.append(x)
|
| 449 |
+
y_list = self.stage2(x_list)
|
| 450 |
+
|
| 451 |
+
x_list = []
|
| 452 |
+
for i in range(self.stage3_cfg.NUM_BRANCHES):
|
| 453 |
+
if self.transition2[i] is not None:
|
| 454 |
+
x_list.append(self.transition2[i](y_list[-1]))
|
| 455 |
+
else:
|
| 456 |
+
x_list.append(y_list[i])
|
| 457 |
+
y_list = self.stage3(x_list)
|
| 458 |
+
|
| 459 |
+
x_list = []
|
| 460 |
+
for i in range(self.stage4_cfg.NUM_BRANCHES):
|
| 461 |
+
if self.transition3[i] is not None:
|
| 462 |
+
x_list.append(self.transition3[i](y_list[-1]))
|
| 463 |
+
else:
|
| 464 |
+
x_list.append(y_list[i])
|
| 465 |
+
y_list = self.stage4(x_list)
|
| 466 |
+
|
| 467 |
+
assert len(self._out_features) == len(y_list)
|
| 468 |
+
return dict(zip(self._out_features, y_list)) # final_outputs
|
| 469 |
+
|
| 470 |
+
|
| 471 |
+
@BACKBONE_REGISTRY.register()
|
| 472 |
+
def build_pose_hrnet_backbone(cfg, input_shape: ShapeSpec):
|
| 473 |
+
model = PoseHigherResolutionNet(cfg)
|
| 474 |
+
return model
|
Leffa/3rdparty/densepose/modeling/inference.py
ADDED
|
@@ -0,0 +1,44 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) Facebook, Inc. and its affiliates.
|
| 2 |
+
from dataclasses import fields
|
| 3 |
+
from typing import Any, List
|
| 4 |
+
import torch
|
| 5 |
+
|
| 6 |
+
from detectron2.structures import Instances
|
| 7 |
+
|
| 8 |
+
|
| 9 |
+
def densepose_inference(densepose_predictor_output: Any, detections: List[Instances]) -> None:
|
| 10 |
+
"""
|
| 11 |
+
Splits DensePose predictor outputs into chunks, each chunk corresponds to
|
| 12 |
+
detections on one image. Predictor output chunks are stored in `pred_densepose`
|
| 13 |
+
attribute of the corresponding `Instances` object.
|
| 14 |
+
|
| 15 |
+
Args:
|
| 16 |
+
densepose_predictor_output: a dataclass instance (can be of different types,
|
| 17 |
+
depending on predictor used for inference). Each field can be `None`
|
| 18 |
+
(if the corresponding output was not inferred) or a tensor of size
|
| 19 |
+
[N, ...], where N = N_1 + N_2 + .. + N_k is a total number of
|
| 20 |
+
detections on all images, N_1 is the number of detections on image 1,
|
| 21 |
+
N_2 is the number of detections on image 2, etc.
|
| 22 |
+
detections: a list of objects of type `Instance`, k-th object corresponds
|
| 23 |
+
to detections on k-th image.
|
| 24 |
+
"""
|
| 25 |
+
k = 0
|
| 26 |
+
for detection_i in detections:
|
| 27 |
+
if densepose_predictor_output is None:
|
| 28 |
+
# don't add `pred_densepose` attribute
|
| 29 |
+
continue
|
| 30 |
+
n_i = detection_i.__len__()
|
| 31 |
+
|
| 32 |
+
PredictorOutput = type(densepose_predictor_output)
|
| 33 |
+
output_i_dict = {}
|
| 34 |
+
# we assume here that `densepose_predictor_output` is a dataclass object
|
| 35 |
+
for field in fields(densepose_predictor_output):
|
| 36 |
+
field_value = getattr(densepose_predictor_output, field.name)
|
| 37 |
+
# slice tensors
|
| 38 |
+
if isinstance(field_value, torch.Tensor):
|
| 39 |
+
output_i_dict[field.name] = field_value[k : k + n_i]
|
| 40 |
+
# leave others as is
|
| 41 |
+
else:
|
| 42 |
+
output_i_dict[field.name] = field_value
|
| 43 |
+
detection_i.pred_densepose = PredictorOutput(**output_i_dict)
|
| 44 |
+
k += n_i
|
Leffa/3rdparty/densepose/modeling/losses/__init__.py
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) Facebook, Inc. and its affiliates.
|
| 2 |
+
|
| 3 |
+
from .chart import DensePoseChartLoss
|
| 4 |
+
from .chart_with_confidences import DensePoseChartWithConfidenceLoss
|
| 5 |
+
from .cse import DensePoseCseLoss
|
| 6 |
+
from .registry import DENSEPOSE_LOSS_REGISTRY
|
| 7 |
+
|
| 8 |
+
|
| 9 |
+
__all__ = [
|
| 10 |
+
"DensePoseChartLoss",
|
| 11 |
+
"DensePoseChartWithConfidenceLoss",
|
| 12 |
+
"DensePoseCseLoss",
|
| 13 |
+
"DENSEPOSE_LOSS_REGISTRY",
|
| 14 |
+
]
|
Leffa/3rdparty/densepose/modeling/losses/chart.py
ADDED
|
@@ -0,0 +1,291 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) Facebook, Inc. and its affiliates.
|
| 2 |
+
|
| 3 |
+
from typing import Any, List
|
| 4 |
+
import torch
|
| 5 |
+
from torch.nn import functional as F
|
| 6 |
+
|
| 7 |
+
from detectron2.config import CfgNode
|
| 8 |
+
from detectron2.structures import Instances
|
| 9 |
+
|
| 10 |
+
from .mask_or_segm import MaskOrSegmentationLoss
|
| 11 |
+
from .registry import DENSEPOSE_LOSS_REGISTRY
|
| 12 |
+
from .utils import (
|
| 13 |
+
BilinearInterpolationHelper,
|
| 14 |
+
ChartBasedAnnotationsAccumulator,
|
| 15 |
+
LossDict,
|
| 16 |
+
extract_packed_annotations_from_matches,
|
| 17 |
+
)
|
| 18 |
+
|
| 19 |
+
|
| 20 |
+
@DENSEPOSE_LOSS_REGISTRY.register()
|
| 21 |
+
class DensePoseChartLoss:
|
| 22 |
+
"""
|
| 23 |
+
DensePose loss for chart-based training. A mesh is split into charts,
|
| 24 |
+
each chart is given a label (I) and parametrized by 2 coordinates referred to
|
| 25 |
+
as U and V. Ground truth consists of a number of points annotated with
|
| 26 |
+
I, U and V values and coarse segmentation S defined for all pixels of the
|
| 27 |
+
object bounding box. In some cases (see `COARSE_SEGM_TRAINED_BY_MASKS`),
|
| 28 |
+
semantic segmentation annotations can be used as ground truth inputs as well.
|
| 29 |
+
|
| 30 |
+
Estimated values are tensors:
|
| 31 |
+
* U coordinates, tensor of shape [N, C, S, S]
|
| 32 |
+
* V coordinates, tensor of shape [N, C, S, S]
|
| 33 |
+
* fine segmentation estimates, tensor of shape [N, C, S, S] with raw unnormalized
|
| 34 |
+
scores for each fine segmentation label at each location
|
| 35 |
+
* coarse segmentation estimates, tensor of shape [N, D, S, S] with raw unnormalized
|
| 36 |
+
scores for each coarse segmentation label at each location
|
| 37 |
+
where N is the number of detections, C is the number of fine segmentation
|
| 38 |
+
labels, S is the estimate size ( = width = height) and D is the number of
|
| 39 |
+
coarse segmentation channels.
|
| 40 |
+
|
| 41 |
+
The losses are:
|
| 42 |
+
* regression (smooth L1) loss for U and V coordinates
|
| 43 |
+
* cross entropy loss for fine (I) and coarse (S) segmentations
|
| 44 |
+
Each loss has an associated weight
|
| 45 |
+
"""
|
| 46 |
+
|
| 47 |
+
def __init__(self, cfg: CfgNode):
|
| 48 |
+
"""
|
| 49 |
+
Initialize chart-based loss from configuration options
|
| 50 |
+
|
| 51 |
+
Args:
|
| 52 |
+
cfg (CfgNode): configuration options
|
| 53 |
+
"""
|
| 54 |
+
# fmt: off
|
| 55 |
+
self.heatmap_size = cfg.MODEL.ROI_DENSEPOSE_HEAD.HEATMAP_SIZE
|
| 56 |
+
self.w_points = cfg.MODEL.ROI_DENSEPOSE_HEAD.POINT_REGRESSION_WEIGHTS
|
| 57 |
+
self.w_part = cfg.MODEL.ROI_DENSEPOSE_HEAD.PART_WEIGHTS
|
| 58 |
+
self.w_segm = cfg.MODEL.ROI_DENSEPOSE_HEAD.INDEX_WEIGHTS
|
| 59 |
+
self.n_segm_chan = cfg.MODEL.ROI_DENSEPOSE_HEAD.NUM_COARSE_SEGM_CHANNELS
|
| 60 |
+
# fmt: on
|
| 61 |
+
self.segm_trained_by_masks = cfg.MODEL.ROI_DENSEPOSE_HEAD.COARSE_SEGM_TRAINED_BY_MASKS
|
| 62 |
+
self.segm_loss = MaskOrSegmentationLoss(cfg)
|
| 63 |
+
|
| 64 |
+
def __call__(
|
| 65 |
+
self, proposals_with_gt: List[Instances], densepose_predictor_outputs: Any, **kwargs
|
| 66 |
+
) -> LossDict:
|
| 67 |
+
"""
|
| 68 |
+
Produce chart-based DensePose losses
|
| 69 |
+
|
| 70 |
+
Args:
|
| 71 |
+
proposals_with_gt (list of Instances): detections with associated ground truth data
|
| 72 |
+
densepose_predictor_outputs: an object of a dataclass that contains predictor outputs
|
| 73 |
+
with estimated values; assumed to have the following attributes:
|
| 74 |
+
* coarse_segm - coarse segmentation estimates, tensor of shape [N, D, S, S]
|
| 75 |
+
* fine_segm - fine segmentation estimates, tensor of shape [N, C, S, S]
|
| 76 |
+
* u - U coordinate estimates per fine labels, tensor of shape [N, C, S, S]
|
| 77 |
+
* v - V coordinate estimates per fine labels, tensor of shape [N, C, S, S]
|
| 78 |
+
where N is the number of detections, C is the number of fine segmentation
|
| 79 |
+
labels, S is the estimate size ( = width = height) and D is the number of
|
| 80 |
+
coarse segmentation channels.
|
| 81 |
+
|
| 82 |
+
Return:
|
| 83 |
+
dict: str -> tensor: dict of losses with the following entries:
|
| 84 |
+
* `loss_densepose_U`: smooth L1 loss for U coordinate estimates
|
| 85 |
+
* `loss_densepose_V`: smooth L1 loss for V coordinate estimates
|
| 86 |
+
* `loss_densepose_I`: cross entropy for raw unnormalized scores for fine
|
| 87 |
+
segmentation estimates given ground truth labels;
|
| 88 |
+
* `loss_densepose_S`: cross entropy for raw unnormalized scores for coarse
|
| 89 |
+
segmentation estimates given ground truth labels;
|
| 90 |
+
"""
|
| 91 |
+
# densepose outputs are computed for all images and all bounding boxes;
|
| 92 |
+
# i.e. if a batch has 4 images with (3, 1, 2, 1) proposals respectively,
|
| 93 |
+
# the outputs will have size(0) == 3+1+2+1 == 7
|
| 94 |
+
|
| 95 |
+
if not len(proposals_with_gt):
|
| 96 |
+
return self.produce_fake_densepose_losses(densepose_predictor_outputs)
|
| 97 |
+
|
| 98 |
+
accumulator = ChartBasedAnnotationsAccumulator()
|
| 99 |
+
packed_annotations = extract_packed_annotations_from_matches(proposals_with_gt, accumulator)
|
| 100 |
+
|
| 101 |
+
# NOTE: we need to keep the same computation graph on all the GPUs to
|
| 102 |
+
# perform reduction properly. Hence even if we have no data on one
|
| 103 |
+
# of the GPUs, we still need to generate the computation graph.
|
| 104 |
+
# Add fake (zero) loss in the form Tensor.sum() * 0
|
| 105 |
+
if packed_annotations is None:
|
| 106 |
+
return self.produce_fake_densepose_losses(densepose_predictor_outputs)
|
| 107 |
+
|
| 108 |
+
h, w = densepose_predictor_outputs.u.shape[2:]
|
| 109 |
+
interpolator = BilinearInterpolationHelper.from_matches(
|
| 110 |
+
packed_annotations,
|
| 111 |
+
(h, w),
|
| 112 |
+
)
|
| 113 |
+
|
| 114 |
+
j_valid_fg = interpolator.j_valid * ( # pyre-ignore[16]
|
| 115 |
+
packed_annotations.fine_segm_labels_gt > 0
|
| 116 |
+
)
|
| 117 |
+
# pyre-fixme[6]: For 1st param expected `Tensor` but got `int`.
|
| 118 |
+
if not torch.any(j_valid_fg):
|
| 119 |
+
return self.produce_fake_densepose_losses(densepose_predictor_outputs)
|
| 120 |
+
|
| 121 |
+
losses_uv = self.produce_densepose_losses_uv(
|
| 122 |
+
proposals_with_gt,
|
| 123 |
+
densepose_predictor_outputs,
|
| 124 |
+
packed_annotations,
|
| 125 |
+
interpolator,
|
| 126 |
+
j_valid_fg, # pyre-ignore[6]
|
| 127 |
+
)
|
| 128 |
+
|
| 129 |
+
losses_segm = self.produce_densepose_losses_segm(
|
| 130 |
+
proposals_with_gt,
|
| 131 |
+
densepose_predictor_outputs,
|
| 132 |
+
packed_annotations,
|
| 133 |
+
interpolator,
|
| 134 |
+
j_valid_fg, # pyre-ignore[6]
|
| 135 |
+
)
|
| 136 |
+
|
| 137 |
+
return {**losses_uv, **losses_segm}
|
| 138 |
+
|
| 139 |
+
def produce_fake_densepose_losses(self, densepose_predictor_outputs: Any) -> LossDict:
|
| 140 |
+
"""
|
| 141 |
+
Fake losses for fine segmentation and U/V coordinates. These are used when
|
| 142 |
+
no suitable ground truth data was found in a batch. The loss has a value 0
|
| 143 |
+
and is primarily used to construct the computation graph, so that
|
| 144 |
+
`DistributedDataParallel` has similar graphs on all GPUs and can perform
|
| 145 |
+
reduction properly.
|
| 146 |
+
|
| 147 |
+
Args:
|
| 148 |
+
densepose_predictor_outputs: DensePose predictor outputs, an object
|
| 149 |
+
of a dataclass that is assumed to have the following attributes:
|
| 150 |
+
* fine_segm - fine segmentation estimates, tensor of shape [N, C, S, S]
|
| 151 |
+
* u - U coordinate estimates per fine labels, tensor of shape [N, C, S, S]
|
| 152 |
+
* v - V coordinate estimates per fine labels, tensor of shape [N, C, S, S]
|
| 153 |
+
Return:
|
| 154 |
+
dict: str -> tensor: dict of losses with the following entries:
|
| 155 |
+
* `loss_densepose_U`: has value 0
|
| 156 |
+
* `loss_densepose_V`: has value 0
|
| 157 |
+
* `loss_densepose_I`: has value 0
|
| 158 |
+
* `loss_densepose_S`: has value 0
|
| 159 |
+
"""
|
| 160 |
+
losses_uv = self.produce_fake_densepose_losses_uv(densepose_predictor_outputs)
|
| 161 |
+
losses_segm = self.produce_fake_densepose_losses_segm(densepose_predictor_outputs)
|
| 162 |
+
return {**losses_uv, **losses_segm}
|
| 163 |
+
|
| 164 |
+
def produce_fake_densepose_losses_uv(self, densepose_predictor_outputs: Any) -> LossDict:
|
| 165 |
+
"""
|
| 166 |
+
Fake losses for U/V coordinates. These are used when no suitable ground
|
| 167 |
+
truth data was found in a batch. The loss has a value 0
|
| 168 |
+
and is primarily used to construct the computation graph, so that
|
| 169 |
+
`DistributedDataParallel` has similar graphs on all GPUs and can perform
|
| 170 |
+
reduction properly.
|
| 171 |
+
|
| 172 |
+
Args:
|
| 173 |
+
densepose_predictor_outputs: DensePose predictor outputs, an object
|
| 174 |
+
of a dataclass that is assumed to have the following attributes:
|
| 175 |
+
* u - U coordinate estimates per fine labels, tensor of shape [N, C, S, S]
|
| 176 |
+
* v - V coordinate estimates per fine labels, tensor of shape [N, C, S, S]
|
| 177 |
+
Return:
|
| 178 |
+
dict: str -> tensor: dict of losses with the following entries:
|
| 179 |
+
* `loss_densepose_U`: has value 0
|
| 180 |
+
* `loss_densepose_V`: has value 0
|
| 181 |
+
"""
|
| 182 |
+
return {
|
| 183 |
+
"loss_densepose_U": densepose_predictor_outputs.u.sum() * 0,
|
| 184 |
+
"loss_densepose_V": densepose_predictor_outputs.v.sum() * 0,
|
| 185 |
+
}
|
| 186 |
+
|
| 187 |
+
def produce_fake_densepose_losses_segm(self, densepose_predictor_outputs: Any) -> LossDict:
|
| 188 |
+
"""
|
| 189 |
+
Fake losses for fine / coarse segmentation. These are used when
|
| 190 |
+
no suitable ground truth data was found in a batch. The loss has a value 0
|
| 191 |
+
and is primarily used to construct the computation graph, so that
|
| 192 |
+
`DistributedDataParallel` has similar graphs on all GPUs and can perform
|
| 193 |
+
reduction properly.
|
| 194 |
+
|
| 195 |
+
Args:
|
| 196 |
+
densepose_predictor_outputs: DensePose predictor outputs, an object
|
| 197 |
+
of a dataclass that is assumed to have the following attributes:
|
| 198 |
+
* fine_segm - fine segmentation estimates, tensor of shape [N, C, S, S]
|
| 199 |
+
* coarse_segm - coarse segmentation estimates, tensor of shape [N, D, S, S]
|
| 200 |
+
Return:
|
| 201 |
+
dict: str -> tensor: dict of losses with the following entries:
|
| 202 |
+
* `loss_densepose_I`: has value 0
|
| 203 |
+
* `loss_densepose_S`: has value 0, added only if `segm_trained_by_masks` is False
|
| 204 |
+
"""
|
| 205 |
+
losses = {
|
| 206 |
+
"loss_densepose_I": densepose_predictor_outputs.fine_segm.sum() * 0,
|
| 207 |
+
"loss_densepose_S": self.segm_loss.fake_value(densepose_predictor_outputs),
|
| 208 |
+
}
|
| 209 |
+
return losses
|
| 210 |
+
|
| 211 |
+
def produce_densepose_losses_uv(
|
| 212 |
+
self,
|
| 213 |
+
proposals_with_gt: List[Instances],
|
| 214 |
+
densepose_predictor_outputs: Any,
|
| 215 |
+
packed_annotations: Any,
|
| 216 |
+
interpolator: BilinearInterpolationHelper,
|
| 217 |
+
j_valid_fg: torch.Tensor,
|
| 218 |
+
) -> LossDict:
|
| 219 |
+
"""
|
| 220 |
+
Compute losses for U/V coordinates: smooth L1 loss between
|
| 221 |
+
estimated coordinates and the ground truth.
|
| 222 |
+
|
| 223 |
+
Args:
|
| 224 |
+
proposals_with_gt (list of Instances): detections with associated ground truth data
|
| 225 |
+
densepose_predictor_outputs: DensePose predictor outputs, an object
|
| 226 |
+
of a dataclass that is assumed to have the following attributes:
|
| 227 |
+
* u - U coordinate estimates per fine labels, tensor of shape [N, C, S, S]
|
| 228 |
+
* v - V coordinate estimates per fine labels, tensor of shape [N, C, S, S]
|
| 229 |
+
Return:
|
| 230 |
+
dict: str -> tensor: dict of losses with the following entries:
|
| 231 |
+
* `loss_densepose_U`: smooth L1 loss for U coordinate estimates
|
| 232 |
+
* `loss_densepose_V`: smooth L1 loss for V coordinate estimates
|
| 233 |
+
"""
|
| 234 |
+
u_gt = packed_annotations.u_gt[j_valid_fg]
|
| 235 |
+
u_est = interpolator.extract_at_points(densepose_predictor_outputs.u)[j_valid_fg]
|
| 236 |
+
v_gt = packed_annotations.v_gt[j_valid_fg]
|
| 237 |
+
v_est = interpolator.extract_at_points(densepose_predictor_outputs.v)[j_valid_fg]
|
| 238 |
+
return {
|
| 239 |
+
"loss_densepose_U": F.smooth_l1_loss(u_est, u_gt, reduction="sum") * self.w_points,
|
| 240 |
+
"loss_densepose_V": F.smooth_l1_loss(v_est, v_gt, reduction="sum") * self.w_points,
|
| 241 |
+
}
|
| 242 |
+
|
| 243 |
+
def produce_densepose_losses_segm(
|
| 244 |
+
self,
|
| 245 |
+
proposals_with_gt: List[Instances],
|
| 246 |
+
densepose_predictor_outputs: Any,
|
| 247 |
+
packed_annotations: Any,
|
| 248 |
+
interpolator: BilinearInterpolationHelper,
|
| 249 |
+
j_valid_fg: torch.Tensor,
|
| 250 |
+
) -> LossDict:
|
| 251 |
+
"""
|
| 252 |
+
Losses for fine / coarse segmentation: cross-entropy
|
| 253 |
+
for segmentation unnormalized scores given ground truth labels at
|
| 254 |
+
annotated points for fine segmentation and dense mask annotations
|
| 255 |
+
for coarse segmentation.
|
| 256 |
+
|
| 257 |
+
Args:
|
| 258 |
+
proposals_with_gt (list of Instances): detections with associated ground truth data
|
| 259 |
+
densepose_predictor_outputs: DensePose predictor outputs, an object
|
| 260 |
+
of a dataclass that is assumed to have the following attributes:
|
| 261 |
+
* fine_segm - fine segmentation estimates, tensor of shape [N, C, S, S]
|
| 262 |
+
* coarse_segm - coarse segmentation estimates, tensor of shape [N, D, S, S]
|
| 263 |
+
Return:
|
| 264 |
+
dict: str -> tensor: dict of losses with the following entries:
|
| 265 |
+
* `loss_densepose_I`: cross entropy for raw unnormalized scores for fine
|
| 266 |
+
segmentation estimates given ground truth labels
|
| 267 |
+
* `loss_densepose_S`: cross entropy for raw unnormalized scores for coarse
|
| 268 |
+
segmentation estimates given ground truth labels;
|
| 269 |
+
may be included if coarse segmentation is only trained
|
| 270 |
+
using DensePose ground truth; if additional supervision through
|
| 271 |
+
instance segmentation data is performed (`segm_trained_by_masks` is True),
|
| 272 |
+
this loss is handled by `produce_mask_losses` instead
|
| 273 |
+
"""
|
| 274 |
+
fine_segm_gt = packed_annotations.fine_segm_labels_gt[
|
| 275 |
+
interpolator.j_valid # pyre-ignore[16]
|
| 276 |
+
]
|
| 277 |
+
fine_segm_est = interpolator.extract_at_points(
|
| 278 |
+
densepose_predictor_outputs.fine_segm,
|
| 279 |
+
slice_fine_segm=slice(None),
|
| 280 |
+
w_ylo_xlo=interpolator.w_ylo_xlo[:, None], # pyre-ignore[16]
|
| 281 |
+
w_ylo_xhi=interpolator.w_ylo_xhi[:, None], # pyre-ignore[16]
|
| 282 |
+
w_yhi_xlo=interpolator.w_yhi_xlo[:, None], # pyre-ignore[16]
|
| 283 |
+
w_yhi_xhi=interpolator.w_yhi_xhi[:, None], # pyre-ignore[16]
|
| 284 |
+
)[interpolator.j_valid, :]
|
| 285 |
+
return {
|
| 286 |
+
"loss_densepose_I": F.cross_entropy(fine_segm_est, fine_segm_gt.long()) * self.w_part,
|
| 287 |
+
"loss_densepose_S": self.segm_loss(
|
| 288 |
+
proposals_with_gt, densepose_predictor_outputs, packed_annotations
|
| 289 |
+
)
|
| 290 |
+
* self.w_segm,
|
| 291 |
+
}
|
Leffa/3rdparty/densepose/modeling/losses/chart_with_confidences.py
ADDED
|
@@ -0,0 +1,209 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) Facebook, Inc. and its affiliates.
|
| 2 |
+
import math
|
| 3 |
+
from typing import Any, List
|
| 4 |
+
import torch
|
| 5 |
+
from torch import nn
|
| 6 |
+
from torch.nn import functional as F
|
| 7 |
+
|
| 8 |
+
from detectron2.config import CfgNode
|
| 9 |
+
from detectron2.structures import Instances
|
| 10 |
+
|
| 11 |
+
from .. import DensePoseConfidenceModelConfig, DensePoseUVConfidenceType
|
| 12 |
+
from .chart import DensePoseChartLoss
|
| 13 |
+
from .registry import DENSEPOSE_LOSS_REGISTRY
|
| 14 |
+
from .utils import BilinearInterpolationHelper, LossDict
|
| 15 |
+
|
| 16 |
+
|
| 17 |
+
@DENSEPOSE_LOSS_REGISTRY.register()
|
| 18 |
+
class DensePoseChartWithConfidenceLoss(DensePoseChartLoss):
|
| 19 |
+
""" """
|
| 20 |
+
|
| 21 |
+
def __init__(self, cfg: CfgNode):
|
| 22 |
+
super().__init__(cfg)
|
| 23 |
+
self.confidence_model_cfg = DensePoseConfidenceModelConfig.from_cfg(cfg)
|
| 24 |
+
if self.confidence_model_cfg.uv_confidence.type == DensePoseUVConfidenceType.IID_ISO:
|
| 25 |
+
self.uv_loss_with_confidences = IIDIsotropicGaussianUVLoss(
|
| 26 |
+
self.confidence_model_cfg.uv_confidence.epsilon
|
| 27 |
+
)
|
| 28 |
+
elif self.confidence_model_cfg.uv_confidence.type == DensePoseUVConfidenceType.INDEP_ANISO:
|
| 29 |
+
self.uv_loss_with_confidences = IndepAnisotropicGaussianUVLoss(
|
| 30 |
+
self.confidence_model_cfg.uv_confidence.epsilon
|
| 31 |
+
)
|
| 32 |
+
|
| 33 |
+
def produce_fake_densepose_losses_uv(self, densepose_predictor_outputs: Any) -> LossDict:
|
| 34 |
+
"""
|
| 35 |
+
Overrides fake losses for fine segmentation and U/V coordinates to
|
| 36 |
+
include computation graphs for additional confidence parameters.
|
| 37 |
+
These are used when no suitable ground truth data was found in a batch.
|
| 38 |
+
The loss has a value 0 and is primarily used to construct the computation graph,
|
| 39 |
+
so that `DistributedDataParallel` has similar graphs on all GPUs and can
|
| 40 |
+
perform reduction properly.
|
| 41 |
+
|
| 42 |
+
Args:
|
| 43 |
+
densepose_predictor_outputs: DensePose predictor outputs, an object
|
| 44 |
+
of a dataclass that is assumed to have the following attributes:
|
| 45 |
+
* fine_segm - fine segmentation estimates, tensor of shape [N, C, S, S]
|
| 46 |
+
* u - U coordinate estimates per fine labels, tensor of shape [N, C, S, S]
|
| 47 |
+
* v - V coordinate estimates per fine labels, tensor of shape [N, C, S, S]
|
| 48 |
+
Return:
|
| 49 |
+
dict: str -> tensor: dict of losses with the following entries:
|
| 50 |
+
* `loss_densepose_U`: has value 0
|
| 51 |
+
* `loss_densepose_V`: has value 0
|
| 52 |
+
* `loss_densepose_I`: has value 0
|
| 53 |
+
"""
|
| 54 |
+
conf_type = self.confidence_model_cfg.uv_confidence.type
|
| 55 |
+
if self.confidence_model_cfg.uv_confidence.enabled:
|
| 56 |
+
loss_uv = (
|
| 57 |
+
densepose_predictor_outputs.u.sum() + densepose_predictor_outputs.v.sum()
|
| 58 |
+
) * 0
|
| 59 |
+
if conf_type == DensePoseUVConfidenceType.IID_ISO:
|
| 60 |
+
loss_uv += densepose_predictor_outputs.sigma_2.sum() * 0
|
| 61 |
+
elif conf_type == DensePoseUVConfidenceType.INDEP_ANISO:
|
| 62 |
+
loss_uv += (
|
| 63 |
+
densepose_predictor_outputs.sigma_2.sum()
|
| 64 |
+
+ densepose_predictor_outputs.kappa_u.sum()
|
| 65 |
+
+ densepose_predictor_outputs.kappa_v.sum()
|
| 66 |
+
) * 0
|
| 67 |
+
return {"loss_densepose_UV": loss_uv}
|
| 68 |
+
else:
|
| 69 |
+
return super().produce_fake_densepose_losses_uv(densepose_predictor_outputs)
|
| 70 |
+
|
| 71 |
+
def produce_densepose_losses_uv(
|
| 72 |
+
self,
|
| 73 |
+
proposals_with_gt: List[Instances],
|
| 74 |
+
densepose_predictor_outputs: Any,
|
| 75 |
+
packed_annotations: Any,
|
| 76 |
+
interpolator: BilinearInterpolationHelper,
|
| 77 |
+
j_valid_fg: torch.Tensor,
|
| 78 |
+
) -> LossDict:
|
| 79 |
+
conf_type = self.confidence_model_cfg.uv_confidence.type
|
| 80 |
+
if self.confidence_model_cfg.uv_confidence.enabled:
|
| 81 |
+
u_gt = packed_annotations.u_gt[j_valid_fg]
|
| 82 |
+
u_est = interpolator.extract_at_points(densepose_predictor_outputs.u)[j_valid_fg]
|
| 83 |
+
v_gt = packed_annotations.v_gt[j_valid_fg]
|
| 84 |
+
v_est = interpolator.extract_at_points(densepose_predictor_outputs.v)[j_valid_fg]
|
| 85 |
+
sigma_2_est = interpolator.extract_at_points(densepose_predictor_outputs.sigma_2)[
|
| 86 |
+
j_valid_fg
|
| 87 |
+
]
|
| 88 |
+
if conf_type == DensePoseUVConfidenceType.IID_ISO:
|
| 89 |
+
return {
|
| 90 |
+
"loss_densepose_UV": (
|
| 91 |
+
self.uv_loss_with_confidences(u_est, v_est, sigma_2_est, u_gt, v_gt)
|
| 92 |
+
* self.w_points
|
| 93 |
+
)
|
| 94 |
+
}
|
| 95 |
+
elif conf_type in [DensePoseUVConfidenceType.INDEP_ANISO]:
|
| 96 |
+
kappa_u_est = interpolator.extract_at_points(densepose_predictor_outputs.kappa_u)[
|
| 97 |
+
j_valid_fg
|
| 98 |
+
]
|
| 99 |
+
kappa_v_est = interpolator.extract_at_points(densepose_predictor_outputs.kappa_v)[
|
| 100 |
+
j_valid_fg
|
| 101 |
+
]
|
| 102 |
+
return {
|
| 103 |
+
"loss_densepose_UV": (
|
| 104 |
+
self.uv_loss_with_confidences(
|
| 105 |
+
u_est, v_est, sigma_2_est, kappa_u_est, kappa_v_est, u_gt, v_gt
|
| 106 |
+
)
|
| 107 |
+
* self.w_points
|
| 108 |
+
)
|
| 109 |
+
}
|
| 110 |
+
return super().produce_densepose_losses_uv(
|
| 111 |
+
proposals_with_gt,
|
| 112 |
+
densepose_predictor_outputs,
|
| 113 |
+
packed_annotations,
|
| 114 |
+
interpolator,
|
| 115 |
+
j_valid_fg,
|
| 116 |
+
)
|
| 117 |
+
|
| 118 |
+
|
| 119 |
+
class IIDIsotropicGaussianUVLoss(nn.Module):
|
| 120 |
+
"""
|
| 121 |
+
Loss for the case of iid residuals with isotropic covariance:
|
| 122 |
+
$Sigma_i = sigma_i^2 I$
|
| 123 |
+
The loss (negative log likelihood) is then:
|
| 124 |
+
$1/2 sum_{i=1}^n (log(2 pi) + 2 log sigma_i^2 + ||delta_i||^2 / sigma_i^2)$,
|
| 125 |
+
where $delta_i=(u - u', v - v')$ is a 2D vector containing UV coordinates
|
| 126 |
+
difference between estimated and ground truth UV values
|
| 127 |
+
For details, see:
|
| 128 |
+
N. Neverova, D. Novotny, A. Vedaldi "Correlated Uncertainty for Learning
|
| 129 |
+
Dense Correspondences from Noisy Labels", p. 918--926, in Proc. NIPS 2019
|
| 130 |
+
"""
|
| 131 |
+
|
| 132 |
+
def __init__(self, sigma_lower_bound: float):
|
| 133 |
+
super(IIDIsotropicGaussianUVLoss, self).__init__()
|
| 134 |
+
self.sigma_lower_bound = sigma_lower_bound
|
| 135 |
+
self.log2pi = math.log(2 * math.pi)
|
| 136 |
+
|
| 137 |
+
def forward(
|
| 138 |
+
self,
|
| 139 |
+
u: torch.Tensor,
|
| 140 |
+
v: torch.Tensor,
|
| 141 |
+
sigma_u: torch.Tensor,
|
| 142 |
+
target_u: torch.Tensor,
|
| 143 |
+
target_v: torch.Tensor,
|
| 144 |
+
):
|
| 145 |
+
# compute $\sigma_i^2$
|
| 146 |
+
# use sigma_lower_bound to avoid degenerate solution for variance
|
| 147 |
+
# (sigma -> 0)
|
| 148 |
+
sigma2 = F.softplus(sigma_u) + self.sigma_lower_bound
|
| 149 |
+
# compute \|delta_i\|^2
|
| 150 |
+
# pyre-fixme[58]: `**` is not supported for operand types `Tensor` and `int`.
|
| 151 |
+
delta_t_delta = (u - target_u) ** 2 + (v - target_v) ** 2
|
| 152 |
+
# the total loss from the formula above:
|
| 153 |
+
loss = 0.5 * (self.log2pi + 2 * torch.log(sigma2) + delta_t_delta / sigma2)
|
| 154 |
+
return loss.sum()
|
| 155 |
+
|
| 156 |
+
|
| 157 |
+
class IndepAnisotropicGaussianUVLoss(nn.Module):
|
| 158 |
+
"""
|
| 159 |
+
Loss for the case of independent residuals with anisotropic covariances:
|
| 160 |
+
$Sigma_i = sigma_i^2 I + r_i r_i^T$
|
| 161 |
+
The loss (negative log likelihood) is then:
|
| 162 |
+
$1/2 sum_{i=1}^n (log(2 pi)
|
| 163 |
+
+ log sigma_i^2 (sigma_i^2 + ||r_i||^2)
|
| 164 |
+
+ ||delta_i||^2 / sigma_i^2
|
| 165 |
+
- <delta_i, r_i>^2 / (sigma_i^2 * (sigma_i^2 + ||r_i||^2)))$,
|
| 166 |
+
where $delta_i=(u - u', v - v')$ is a 2D vector containing UV coordinates
|
| 167 |
+
difference between estimated and ground truth UV values
|
| 168 |
+
For details, see:
|
| 169 |
+
N. Neverova, D. Novotny, A. Vedaldi "Correlated Uncertainty for Learning
|
| 170 |
+
Dense Correspondences from Noisy Labels", p. 918--926, in Proc. NIPS 2019
|
| 171 |
+
"""
|
| 172 |
+
|
| 173 |
+
def __init__(self, sigma_lower_bound: float):
|
| 174 |
+
super(IndepAnisotropicGaussianUVLoss, self).__init__()
|
| 175 |
+
self.sigma_lower_bound = sigma_lower_bound
|
| 176 |
+
self.log2pi = math.log(2 * math.pi)
|
| 177 |
+
|
| 178 |
+
def forward(
|
| 179 |
+
self,
|
| 180 |
+
u: torch.Tensor,
|
| 181 |
+
v: torch.Tensor,
|
| 182 |
+
sigma_u: torch.Tensor,
|
| 183 |
+
kappa_u_est: torch.Tensor,
|
| 184 |
+
kappa_v_est: torch.Tensor,
|
| 185 |
+
target_u: torch.Tensor,
|
| 186 |
+
target_v: torch.Tensor,
|
| 187 |
+
):
|
| 188 |
+
# compute $\sigma_i^2$
|
| 189 |
+
sigma2 = F.softplus(sigma_u) + self.sigma_lower_bound
|
| 190 |
+
# compute \|r_i\|^2
|
| 191 |
+
# pyre-fixme[58]: `**` is not supported for operand types `Tensor` and `int`.
|
| 192 |
+
r_sqnorm2 = kappa_u_est**2 + kappa_v_est**2
|
| 193 |
+
delta_u = u - target_u
|
| 194 |
+
delta_v = v - target_v
|
| 195 |
+
# compute \|delta_i\|^2
|
| 196 |
+
# pyre-fixme[58]: `**` is not supported for operand types `Tensor` and `int`.
|
| 197 |
+
delta_sqnorm = delta_u**2 + delta_v**2
|
| 198 |
+
delta_u_r_u = delta_u * kappa_u_est
|
| 199 |
+
delta_v_r_v = delta_v * kappa_v_est
|
| 200 |
+
# compute the scalar product <delta_i, r_i>
|
| 201 |
+
delta_r = delta_u_r_u + delta_v_r_v
|
| 202 |
+
# compute squared scalar product <delta_i, r_i>^2
|
| 203 |
+
# pyre-fixme[58]: `**` is not supported for operand types `Tensor` and `int`.
|
| 204 |
+
delta_r_sqnorm = delta_r**2
|
| 205 |
+
denom2 = sigma2 * (sigma2 + r_sqnorm2)
|
| 206 |
+
loss = 0.5 * (
|
| 207 |
+
self.log2pi + torch.log(denom2) + delta_sqnorm / sigma2 - delta_r_sqnorm / denom2
|
| 208 |
+
)
|
| 209 |
+
return loss.sum()
|
Leffa/3rdparty/densepose/modeling/losses/cse.py
ADDED
|
@@ -0,0 +1,115 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
|
| 2 |
+
|
| 3 |
+
from typing import Any, List
|
| 4 |
+
from torch import nn
|
| 5 |
+
|
| 6 |
+
from detectron2.config import CfgNode
|
| 7 |
+
from detectron2.structures import Instances
|
| 8 |
+
|
| 9 |
+
from .cycle_pix2shape import PixToShapeCycleLoss
|
| 10 |
+
from .cycle_shape2shape import ShapeToShapeCycleLoss
|
| 11 |
+
from .embed import EmbeddingLoss
|
| 12 |
+
from .embed_utils import CseAnnotationsAccumulator
|
| 13 |
+
from .mask_or_segm import MaskOrSegmentationLoss
|
| 14 |
+
from .registry import DENSEPOSE_LOSS_REGISTRY
|
| 15 |
+
from .soft_embed import SoftEmbeddingLoss
|
| 16 |
+
from .utils import BilinearInterpolationHelper, LossDict, extract_packed_annotations_from_matches
|
| 17 |
+
|
| 18 |
+
|
| 19 |
+
@DENSEPOSE_LOSS_REGISTRY.register()
|
| 20 |
+
class DensePoseCseLoss:
|
| 21 |
+
""" """
|
| 22 |
+
|
| 23 |
+
_EMBED_LOSS_REGISTRY = {
|
| 24 |
+
EmbeddingLoss.__name__: EmbeddingLoss,
|
| 25 |
+
SoftEmbeddingLoss.__name__: SoftEmbeddingLoss,
|
| 26 |
+
}
|
| 27 |
+
|
| 28 |
+
def __init__(self, cfg: CfgNode):
|
| 29 |
+
"""
|
| 30 |
+
Initialize CSE loss from configuration options
|
| 31 |
+
|
| 32 |
+
Args:
|
| 33 |
+
cfg (CfgNode): configuration options
|
| 34 |
+
"""
|
| 35 |
+
self.w_segm = cfg.MODEL.ROI_DENSEPOSE_HEAD.INDEX_WEIGHTS
|
| 36 |
+
self.w_embed = cfg.MODEL.ROI_DENSEPOSE_HEAD.CSE.EMBED_LOSS_WEIGHT
|
| 37 |
+
self.segm_loss = MaskOrSegmentationLoss(cfg)
|
| 38 |
+
self.embed_loss = DensePoseCseLoss.create_embed_loss(cfg)
|
| 39 |
+
self.do_shape2shape = cfg.MODEL.ROI_DENSEPOSE_HEAD.CSE.SHAPE_TO_SHAPE_CYCLE_LOSS.ENABLED
|
| 40 |
+
if self.do_shape2shape:
|
| 41 |
+
self.w_shape2shape = cfg.MODEL.ROI_DENSEPOSE_HEAD.CSE.SHAPE_TO_SHAPE_CYCLE_LOSS.WEIGHT
|
| 42 |
+
self.shape2shape_loss = ShapeToShapeCycleLoss(cfg)
|
| 43 |
+
self.do_pix2shape = cfg.MODEL.ROI_DENSEPOSE_HEAD.CSE.PIX_TO_SHAPE_CYCLE_LOSS.ENABLED
|
| 44 |
+
if self.do_pix2shape:
|
| 45 |
+
self.w_pix2shape = cfg.MODEL.ROI_DENSEPOSE_HEAD.CSE.PIX_TO_SHAPE_CYCLE_LOSS.WEIGHT
|
| 46 |
+
self.pix2shape_loss = PixToShapeCycleLoss(cfg)
|
| 47 |
+
|
| 48 |
+
@classmethod
|
| 49 |
+
def create_embed_loss(cls, cfg: CfgNode):
|
| 50 |
+
# registry not used here, since embedding losses are currently local
|
| 51 |
+
# and are not used anywhere else
|
| 52 |
+
return cls._EMBED_LOSS_REGISTRY[cfg.MODEL.ROI_DENSEPOSE_HEAD.CSE.EMBED_LOSS_NAME](cfg)
|
| 53 |
+
|
| 54 |
+
def __call__(
|
| 55 |
+
self,
|
| 56 |
+
proposals_with_gt: List[Instances],
|
| 57 |
+
densepose_predictor_outputs: Any,
|
| 58 |
+
embedder: nn.Module,
|
| 59 |
+
) -> LossDict:
|
| 60 |
+
if not len(proposals_with_gt):
|
| 61 |
+
return self.produce_fake_losses(densepose_predictor_outputs, embedder)
|
| 62 |
+
accumulator = CseAnnotationsAccumulator()
|
| 63 |
+
packed_annotations = extract_packed_annotations_from_matches(proposals_with_gt, accumulator)
|
| 64 |
+
if packed_annotations is None:
|
| 65 |
+
return self.produce_fake_losses(densepose_predictor_outputs, embedder)
|
| 66 |
+
h, w = densepose_predictor_outputs.embedding.shape[2:]
|
| 67 |
+
interpolator = BilinearInterpolationHelper.from_matches(
|
| 68 |
+
packed_annotations,
|
| 69 |
+
(h, w),
|
| 70 |
+
)
|
| 71 |
+
meshid_to_embed_losses = self.embed_loss(
|
| 72 |
+
proposals_with_gt,
|
| 73 |
+
densepose_predictor_outputs,
|
| 74 |
+
packed_annotations,
|
| 75 |
+
interpolator,
|
| 76 |
+
embedder,
|
| 77 |
+
)
|
| 78 |
+
embed_loss_dict = {
|
| 79 |
+
f"loss_densepose_E{meshid}": self.w_embed * meshid_to_embed_losses[meshid]
|
| 80 |
+
for meshid in meshid_to_embed_losses
|
| 81 |
+
}
|
| 82 |
+
all_loss_dict = {
|
| 83 |
+
"loss_densepose_S": self.w_segm
|
| 84 |
+
* self.segm_loss(proposals_with_gt, densepose_predictor_outputs, packed_annotations),
|
| 85 |
+
**embed_loss_dict,
|
| 86 |
+
}
|
| 87 |
+
if self.do_shape2shape:
|
| 88 |
+
all_loss_dict["loss_shape2shape"] = self.w_shape2shape * self.shape2shape_loss(embedder)
|
| 89 |
+
if self.do_pix2shape:
|
| 90 |
+
all_loss_dict["loss_pix2shape"] = self.w_pix2shape * self.pix2shape_loss(
|
| 91 |
+
proposals_with_gt, densepose_predictor_outputs, packed_annotations, embedder
|
| 92 |
+
)
|
| 93 |
+
return all_loss_dict
|
| 94 |
+
|
| 95 |
+
def produce_fake_losses(
|
| 96 |
+
self, densepose_predictor_outputs: Any, embedder: nn.Module
|
| 97 |
+
) -> LossDict:
|
| 98 |
+
meshname_to_embed_losses = self.embed_loss.fake_values(
|
| 99 |
+
densepose_predictor_outputs, embedder=embedder
|
| 100 |
+
)
|
| 101 |
+
embed_loss_dict = {
|
| 102 |
+
f"loss_densepose_E{mesh_name}": meshname_to_embed_losses[mesh_name]
|
| 103 |
+
for mesh_name in meshname_to_embed_losses
|
| 104 |
+
}
|
| 105 |
+
all_loss_dict = {
|
| 106 |
+
"loss_densepose_S": self.segm_loss.fake_value(densepose_predictor_outputs),
|
| 107 |
+
**embed_loss_dict,
|
| 108 |
+
}
|
| 109 |
+
if self.do_shape2shape:
|
| 110 |
+
all_loss_dict["loss_shape2shape"] = self.shape2shape_loss.fake_value(embedder)
|
| 111 |
+
if self.do_pix2shape:
|
| 112 |
+
all_loss_dict["loss_pix2shape"] = self.pix2shape_loss.fake_value(
|
| 113 |
+
densepose_predictor_outputs, embedder
|
| 114 |
+
)
|
| 115 |
+
return all_loss_dict
|
Leffa/3rdparty/densepose/modeling/losses/cycle_pix2shape.py
ADDED
|
@@ -0,0 +1,152 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
|
| 2 |
+
|
| 3 |
+
from typing import Any, List
|
| 4 |
+
import torch
|
| 5 |
+
from torch import nn
|
| 6 |
+
from torch.nn import functional as F
|
| 7 |
+
|
| 8 |
+
from detectron2.config import CfgNode
|
| 9 |
+
from detectron2.structures import Instances
|
| 10 |
+
|
| 11 |
+
from densepose.data.meshes.catalog import MeshCatalog
|
| 12 |
+
from densepose.modeling.cse.utils import normalize_embeddings, squared_euclidean_distance_matrix
|
| 13 |
+
|
| 14 |
+
from .embed_utils import PackedCseAnnotations
|
| 15 |
+
from .mask import extract_data_for_mask_loss_from_matches
|
| 16 |
+
|
| 17 |
+
|
| 18 |
+
def _create_pixel_dist_matrix(grid_size: int) -> torch.Tensor:
|
| 19 |
+
rows = torch.arange(grid_size)
|
| 20 |
+
cols = torch.arange(grid_size)
|
| 21 |
+
# at index `i` contains [row, col], where
|
| 22 |
+
# row = i // grid_size
|
| 23 |
+
# col = i % grid_size
|
| 24 |
+
pix_coords = (
|
| 25 |
+
torch.stack(torch.meshgrid(rows, cols), -1).reshape((grid_size * grid_size, 2)).float()
|
| 26 |
+
)
|
| 27 |
+
return squared_euclidean_distance_matrix(pix_coords, pix_coords)
|
| 28 |
+
|
| 29 |
+
|
| 30 |
+
def _sample_fg_pixels_randperm(fg_mask: torch.Tensor, sample_size: int) -> torch.Tensor:
|
| 31 |
+
fg_mask_flattened = fg_mask.reshape((-1,))
|
| 32 |
+
num_pixels = int(fg_mask_flattened.sum().item())
|
| 33 |
+
fg_pixel_indices = fg_mask_flattened.nonzero(as_tuple=True)[0]
|
| 34 |
+
if (sample_size <= 0) or (num_pixels <= sample_size):
|
| 35 |
+
return fg_pixel_indices
|
| 36 |
+
sample_indices = torch.randperm(num_pixels, device=fg_mask.device)[:sample_size]
|
| 37 |
+
return fg_pixel_indices[sample_indices]
|
| 38 |
+
|
| 39 |
+
|
| 40 |
+
def _sample_fg_pixels_multinomial(fg_mask: torch.Tensor, sample_size: int) -> torch.Tensor:
|
| 41 |
+
fg_mask_flattened = fg_mask.reshape((-1,))
|
| 42 |
+
num_pixels = int(fg_mask_flattened.sum().item())
|
| 43 |
+
if (sample_size <= 0) or (num_pixels <= sample_size):
|
| 44 |
+
return fg_mask_flattened.nonzero(as_tuple=True)[0]
|
| 45 |
+
return fg_mask_flattened.float().multinomial(sample_size, replacement=False)
|
| 46 |
+
|
| 47 |
+
|
| 48 |
+
class PixToShapeCycleLoss(nn.Module):
|
| 49 |
+
"""
|
| 50 |
+
Cycle loss for pixel-vertex correspondence
|
| 51 |
+
"""
|
| 52 |
+
|
| 53 |
+
def __init__(self, cfg: CfgNode):
|
| 54 |
+
super().__init__()
|
| 55 |
+
self.shape_names = list(cfg.MODEL.ROI_DENSEPOSE_HEAD.CSE.EMBEDDERS.keys())
|
| 56 |
+
self.embed_size = cfg.MODEL.ROI_DENSEPOSE_HEAD.CSE.EMBED_SIZE
|
| 57 |
+
self.norm_p = cfg.MODEL.ROI_DENSEPOSE_HEAD.CSE.PIX_TO_SHAPE_CYCLE_LOSS.NORM_P
|
| 58 |
+
self.use_all_meshes_not_gt_only = (
|
| 59 |
+
cfg.MODEL.ROI_DENSEPOSE_HEAD.CSE.PIX_TO_SHAPE_CYCLE_LOSS.USE_ALL_MESHES_NOT_GT_ONLY
|
| 60 |
+
)
|
| 61 |
+
self.num_pixels_to_sample = (
|
| 62 |
+
cfg.MODEL.ROI_DENSEPOSE_HEAD.CSE.PIX_TO_SHAPE_CYCLE_LOSS.NUM_PIXELS_TO_SAMPLE
|
| 63 |
+
)
|
| 64 |
+
self.pix_sigma = cfg.MODEL.ROI_DENSEPOSE_HEAD.CSE.PIX_TO_SHAPE_CYCLE_LOSS.PIXEL_SIGMA
|
| 65 |
+
self.temperature_pix_to_vertex = (
|
| 66 |
+
cfg.MODEL.ROI_DENSEPOSE_HEAD.CSE.PIX_TO_SHAPE_CYCLE_LOSS.TEMPERATURE_PIXEL_TO_VERTEX
|
| 67 |
+
)
|
| 68 |
+
self.temperature_vertex_to_pix = (
|
| 69 |
+
cfg.MODEL.ROI_DENSEPOSE_HEAD.CSE.PIX_TO_SHAPE_CYCLE_LOSS.TEMPERATURE_VERTEX_TO_PIXEL
|
| 70 |
+
)
|
| 71 |
+
self.pixel_dists = _create_pixel_dist_matrix(cfg.MODEL.ROI_DENSEPOSE_HEAD.HEATMAP_SIZE)
|
| 72 |
+
|
| 73 |
+
def forward(
|
| 74 |
+
self,
|
| 75 |
+
proposals_with_gt: List[Instances],
|
| 76 |
+
densepose_predictor_outputs: Any,
|
| 77 |
+
packed_annotations: PackedCseAnnotations,
|
| 78 |
+
embedder: nn.Module,
|
| 79 |
+
):
|
| 80 |
+
"""
|
| 81 |
+
Args:
|
| 82 |
+
proposals_with_gt (list of Instances): detections with associated
|
| 83 |
+
ground truth data; each item corresponds to instances detected
|
| 84 |
+
on 1 image; the number of items corresponds to the number of
|
| 85 |
+
images in a batch
|
| 86 |
+
densepose_predictor_outputs: an object of a dataclass that contains predictor
|
| 87 |
+
outputs with estimated values; assumed to have the following attributes:
|
| 88 |
+
* embedding - embedding estimates, tensor of shape [N, D, S, S], where
|
| 89 |
+
N = number of instances (= sum N_i, where N_i is the number of
|
| 90 |
+
instances on image i)
|
| 91 |
+
D = embedding space dimensionality (MODEL.ROI_DENSEPOSE_HEAD.CSE.EMBED_SIZE)
|
| 92 |
+
S = output size (width and height)
|
| 93 |
+
packed_annotations (PackedCseAnnotations): contains various data useful
|
| 94 |
+
for loss computation, each data is packed into a single tensor
|
| 95 |
+
embedder (nn.Module): module that computes vertex embeddings for different meshes
|
| 96 |
+
"""
|
| 97 |
+
pix_embeds = densepose_predictor_outputs.embedding
|
| 98 |
+
if self.pixel_dists.device != pix_embeds.device:
|
| 99 |
+
# should normally be done only once
|
| 100 |
+
self.pixel_dists = self.pixel_dists.to(device=pix_embeds.device)
|
| 101 |
+
with torch.no_grad():
|
| 102 |
+
mask_loss_data = extract_data_for_mask_loss_from_matches(
|
| 103 |
+
proposals_with_gt, densepose_predictor_outputs.coarse_segm
|
| 104 |
+
)
|
| 105 |
+
# GT masks - tensor of shape [N, S, S] of int64
|
| 106 |
+
masks_gt = mask_loss_data.masks_gt.long() # pyre-ignore[16]
|
| 107 |
+
assert len(pix_embeds) == len(masks_gt), (
|
| 108 |
+
f"Number of instances with embeddings {len(pix_embeds)} != "
|
| 109 |
+
f"number of instances with GT masks {len(masks_gt)}"
|
| 110 |
+
)
|
| 111 |
+
losses = []
|
| 112 |
+
mesh_names = (
|
| 113 |
+
self.shape_names
|
| 114 |
+
if self.use_all_meshes_not_gt_only
|
| 115 |
+
else [
|
| 116 |
+
MeshCatalog.get_mesh_name(mesh_id.item())
|
| 117 |
+
for mesh_id in packed_annotations.vertex_mesh_ids_gt.unique()
|
| 118 |
+
]
|
| 119 |
+
)
|
| 120 |
+
for pixel_embeddings, mask_gt in zip(pix_embeds, masks_gt):
|
| 121 |
+
# pixel_embeddings [D, S, S]
|
| 122 |
+
# mask_gt [S, S]
|
| 123 |
+
for mesh_name in mesh_names:
|
| 124 |
+
mesh_vertex_embeddings = embedder(mesh_name)
|
| 125 |
+
# pixel indices [M]
|
| 126 |
+
pixel_indices_flattened = _sample_fg_pixels_randperm(
|
| 127 |
+
mask_gt, self.num_pixels_to_sample
|
| 128 |
+
)
|
| 129 |
+
# pixel distances [M, M]
|
| 130 |
+
pixel_dists = self.pixel_dists.to(pixel_embeddings.device)[
|
| 131 |
+
torch.meshgrid(pixel_indices_flattened, pixel_indices_flattened)
|
| 132 |
+
]
|
| 133 |
+
# pixel embeddings [M, D]
|
| 134 |
+
pixel_embeddings_sampled = normalize_embeddings(
|
| 135 |
+
pixel_embeddings.reshape((self.embed_size, -1))[:, pixel_indices_flattened].T
|
| 136 |
+
)
|
| 137 |
+
# pixel-vertex similarity [M, K]
|
| 138 |
+
sim_matrix = pixel_embeddings_sampled.mm(mesh_vertex_embeddings.T)
|
| 139 |
+
c_pix_vertex = F.softmax(sim_matrix / self.temperature_pix_to_vertex, dim=1)
|
| 140 |
+
c_vertex_pix = F.softmax(sim_matrix.T / self.temperature_vertex_to_pix, dim=1)
|
| 141 |
+
c_cycle = c_pix_vertex.mm(c_vertex_pix)
|
| 142 |
+
loss_cycle = torch.norm(pixel_dists * c_cycle, p=self.norm_p)
|
| 143 |
+
losses.append(loss_cycle)
|
| 144 |
+
|
| 145 |
+
if len(losses) == 0:
|
| 146 |
+
return pix_embeds.sum() * 0
|
| 147 |
+
return torch.stack(losses, dim=0).mean()
|
| 148 |
+
|
| 149 |
+
def fake_value(self, densepose_predictor_outputs: Any, embedder: nn.Module):
|
| 150 |
+
losses = [embedder(mesh_name).sum() * 0 for mesh_name in embedder.mesh_names]
|
| 151 |
+
losses.append(densepose_predictor_outputs.embedding.sum() * 0)
|
| 152 |
+
return torch.mean(torch.stack(losses))
|
Leffa/3rdparty/densepose/modeling/losses/cycle_shape2shape.py
ADDED
|
@@ -0,0 +1,117 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
|
| 2 |
+
|
| 3 |
+
import random
|
| 4 |
+
from typing import Tuple
|
| 5 |
+
import torch
|
| 6 |
+
from torch import nn
|
| 7 |
+
from torch.nn import functional as F
|
| 8 |
+
|
| 9 |
+
from detectron2.config import CfgNode
|
| 10 |
+
|
| 11 |
+
from densepose.structures.mesh import create_mesh
|
| 12 |
+
|
| 13 |
+
from .utils import sample_random_indices
|
| 14 |
+
|
| 15 |
+
|
| 16 |
+
class ShapeToShapeCycleLoss(nn.Module):
|
| 17 |
+
"""
|
| 18 |
+
Cycle Loss for Shapes.
|
| 19 |
+
Inspired by:
|
| 20 |
+
"Mapping in a Cycle: Sinkhorn Regularized Unsupervised Learning for Point Cloud Shapes".
|
| 21 |
+
"""
|
| 22 |
+
|
| 23 |
+
def __init__(self, cfg: CfgNode):
|
| 24 |
+
super().__init__()
|
| 25 |
+
self.shape_names = list(cfg.MODEL.ROI_DENSEPOSE_HEAD.CSE.EMBEDDERS.keys())
|
| 26 |
+
self.all_shape_pairs = [
|
| 27 |
+
(x, y) for i, x in enumerate(self.shape_names) for y in self.shape_names[i + 1 :]
|
| 28 |
+
]
|
| 29 |
+
random.shuffle(self.all_shape_pairs)
|
| 30 |
+
self.cur_pos = 0
|
| 31 |
+
self.norm_p = cfg.MODEL.ROI_DENSEPOSE_HEAD.CSE.SHAPE_TO_SHAPE_CYCLE_LOSS.NORM_P
|
| 32 |
+
self.temperature = cfg.MODEL.ROI_DENSEPOSE_HEAD.CSE.SHAPE_TO_SHAPE_CYCLE_LOSS.TEMPERATURE
|
| 33 |
+
self.max_num_vertices = (
|
| 34 |
+
cfg.MODEL.ROI_DENSEPOSE_HEAD.CSE.SHAPE_TO_SHAPE_CYCLE_LOSS.MAX_NUM_VERTICES
|
| 35 |
+
)
|
| 36 |
+
|
| 37 |
+
def _sample_random_pair(self) -> Tuple[str, str]:
|
| 38 |
+
"""
|
| 39 |
+
Produce a random pair of different mesh names
|
| 40 |
+
|
| 41 |
+
Return:
|
| 42 |
+
tuple(str, str): a pair of different mesh names
|
| 43 |
+
"""
|
| 44 |
+
if self.cur_pos >= len(self.all_shape_pairs):
|
| 45 |
+
random.shuffle(self.all_shape_pairs)
|
| 46 |
+
self.cur_pos = 0
|
| 47 |
+
shape_pair = self.all_shape_pairs[self.cur_pos]
|
| 48 |
+
self.cur_pos += 1
|
| 49 |
+
return shape_pair
|
| 50 |
+
|
| 51 |
+
def forward(self, embedder: nn.Module):
|
| 52 |
+
"""
|
| 53 |
+
Do a forward pass with a random pair (src, dst) pair of shapes
|
| 54 |
+
Args:
|
| 55 |
+
embedder (nn.Module): module that computes vertex embeddings for different meshes
|
| 56 |
+
"""
|
| 57 |
+
src_mesh_name, dst_mesh_name = self._sample_random_pair()
|
| 58 |
+
return self._forward_one_pair(embedder, src_mesh_name, dst_mesh_name)
|
| 59 |
+
|
| 60 |
+
def fake_value(self, embedder: nn.Module):
|
| 61 |
+
losses = []
|
| 62 |
+
for mesh_name in embedder.mesh_names:
|
| 63 |
+
losses.append(embedder(mesh_name).sum() * 0)
|
| 64 |
+
return torch.mean(torch.stack(losses))
|
| 65 |
+
|
| 66 |
+
def _get_embeddings_and_geodists_for_mesh(
|
| 67 |
+
self, embedder: nn.Module, mesh_name: str
|
| 68 |
+
) -> Tuple[torch.Tensor, torch.Tensor]:
|
| 69 |
+
"""
|
| 70 |
+
Produces embeddings and geodesic distance tensors for a given mesh. May subsample
|
| 71 |
+
the mesh, if it contains too many vertices (controlled by
|
| 72 |
+
SHAPE_CYCLE_LOSS_MAX_NUM_VERTICES parameter).
|
| 73 |
+
Args:
|
| 74 |
+
embedder (nn.Module): module that computes embeddings for mesh vertices
|
| 75 |
+
mesh_name (str): mesh name
|
| 76 |
+
Return:
|
| 77 |
+
embeddings (torch.Tensor of size [N, D]): embeddings for selected mesh
|
| 78 |
+
vertices (N = number of selected vertices, D = embedding space dim)
|
| 79 |
+
geodists (torch.Tensor of size [N, N]): geodesic distances for the selected
|
| 80 |
+
mesh vertices (N = number of selected vertices)
|
| 81 |
+
"""
|
| 82 |
+
embeddings = embedder(mesh_name)
|
| 83 |
+
indices = sample_random_indices(
|
| 84 |
+
embeddings.shape[0], self.max_num_vertices, embeddings.device
|
| 85 |
+
)
|
| 86 |
+
mesh = create_mesh(mesh_name, embeddings.device)
|
| 87 |
+
geodists = mesh.geodists
|
| 88 |
+
if indices is not None:
|
| 89 |
+
embeddings = embeddings[indices]
|
| 90 |
+
geodists = geodists[torch.meshgrid(indices, indices)]
|
| 91 |
+
return embeddings, geodists
|
| 92 |
+
|
| 93 |
+
def _forward_one_pair(
|
| 94 |
+
self, embedder: nn.Module, mesh_name_1: str, mesh_name_2: str
|
| 95 |
+
) -> torch.Tensor:
|
| 96 |
+
"""
|
| 97 |
+
Do a forward pass with a selected pair of meshes
|
| 98 |
+
Args:
|
| 99 |
+
embedder (nn.Module): module that computes vertex embeddings for different meshes
|
| 100 |
+
mesh_name_1 (str): first mesh name
|
| 101 |
+
mesh_name_2 (str): second mesh name
|
| 102 |
+
Return:
|
| 103 |
+
Tensor containing the loss value
|
| 104 |
+
"""
|
| 105 |
+
embeddings_1, geodists_1 = self._get_embeddings_and_geodists_for_mesh(embedder, mesh_name_1)
|
| 106 |
+
embeddings_2, geodists_2 = self._get_embeddings_and_geodists_for_mesh(embedder, mesh_name_2)
|
| 107 |
+
sim_matrix_12 = embeddings_1.mm(embeddings_2.T)
|
| 108 |
+
|
| 109 |
+
c_12 = F.softmax(sim_matrix_12 / self.temperature, dim=1)
|
| 110 |
+
c_21 = F.softmax(sim_matrix_12.T / self.temperature, dim=1)
|
| 111 |
+
c_11 = c_12.mm(c_21)
|
| 112 |
+
c_22 = c_21.mm(c_12)
|
| 113 |
+
|
| 114 |
+
loss_cycle_11 = torch.norm(geodists_1 * c_11, p=self.norm_p)
|
| 115 |
+
loss_cycle_22 = torch.norm(geodists_2 * c_22, p=self.norm_p)
|
| 116 |
+
|
| 117 |
+
return loss_cycle_11 + loss_cycle_22
|
Leffa/3rdparty/densepose/modeling/losses/embed.py
ADDED
|
@@ -0,0 +1,119 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
|
| 2 |
+
|
| 3 |
+
from typing import Any, Dict, List
|
| 4 |
+
import torch
|
| 5 |
+
from torch import nn
|
| 6 |
+
from torch.nn import functional as F
|
| 7 |
+
|
| 8 |
+
from detectron2.config import CfgNode
|
| 9 |
+
from detectron2.structures import Instances
|
| 10 |
+
|
| 11 |
+
from densepose.data.meshes.catalog import MeshCatalog
|
| 12 |
+
from densepose.modeling.cse.utils import normalize_embeddings, squared_euclidean_distance_matrix
|
| 13 |
+
|
| 14 |
+
from .embed_utils import PackedCseAnnotations
|
| 15 |
+
from .utils import BilinearInterpolationHelper
|
| 16 |
+
|
| 17 |
+
|
| 18 |
+
class EmbeddingLoss:
|
| 19 |
+
"""
|
| 20 |
+
Computes losses for estimated embeddings given annotated vertices.
|
| 21 |
+
Instances in a minibatch that correspond to the same mesh are grouped
|
| 22 |
+
together. For each group, loss is computed as cross-entropy for
|
| 23 |
+
unnormalized scores given ground truth mesh vertex ids.
|
| 24 |
+
Scores are based on squared distances between estimated vertex embeddings
|
| 25 |
+
and mesh vertex embeddings.
|
| 26 |
+
"""
|
| 27 |
+
|
| 28 |
+
def __init__(self, cfg: CfgNode):
|
| 29 |
+
"""
|
| 30 |
+
Initialize embedding loss from config
|
| 31 |
+
"""
|
| 32 |
+
self.embdist_gauss_sigma = cfg.MODEL.ROI_DENSEPOSE_HEAD.CSE.EMBEDDING_DIST_GAUSS_SIGMA
|
| 33 |
+
|
| 34 |
+
def __call__(
|
| 35 |
+
self,
|
| 36 |
+
proposals_with_gt: List[Instances],
|
| 37 |
+
densepose_predictor_outputs: Any,
|
| 38 |
+
packed_annotations: PackedCseAnnotations,
|
| 39 |
+
interpolator: BilinearInterpolationHelper,
|
| 40 |
+
embedder: nn.Module,
|
| 41 |
+
) -> Dict[int, torch.Tensor]:
|
| 42 |
+
"""
|
| 43 |
+
Produces losses for estimated embeddings given annotated vertices.
|
| 44 |
+
Embeddings for all the vertices of a mesh are computed by the embedder.
|
| 45 |
+
Embeddings for observed pixels are estimated by a predictor.
|
| 46 |
+
Losses are computed as cross-entropy for squared distances between
|
| 47 |
+
observed vertex embeddings and all mesh vertex embeddings given
|
| 48 |
+
ground truth vertex IDs.
|
| 49 |
+
|
| 50 |
+
Args:
|
| 51 |
+
proposals_with_gt (list of Instances): detections with associated
|
| 52 |
+
ground truth data; each item corresponds to instances detected
|
| 53 |
+
on 1 image; the number of items corresponds to the number of
|
| 54 |
+
images in a batch
|
| 55 |
+
densepose_predictor_outputs: an object of a dataclass that contains predictor
|
| 56 |
+
outputs with estimated values; assumed to have the following attributes:
|
| 57 |
+
* embedding - embedding estimates, tensor of shape [N, D, S, S], where
|
| 58 |
+
N = number of instances (= sum N_i, where N_i is the number of
|
| 59 |
+
instances on image i)
|
| 60 |
+
D = embedding space dimensionality (MODEL.ROI_DENSEPOSE_HEAD.CSE.EMBED_SIZE)
|
| 61 |
+
S = output size (width and height)
|
| 62 |
+
packed_annotations (PackedCseAnnotations): contains various data useful
|
| 63 |
+
for loss computation, each data is packed into a single tensor
|
| 64 |
+
interpolator (BilinearInterpolationHelper): bilinear interpolation helper
|
| 65 |
+
embedder (nn.Module): module that computes vertex embeddings for different meshes
|
| 66 |
+
Return:
|
| 67 |
+
dict(int -> tensor): losses for different mesh IDs
|
| 68 |
+
"""
|
| 69 |
+
losses = {}
|
| 70 |
+
for mesh_id_tensor in packed_annotations.vertex_mesh_ids_gt.unique():
|
| 71 |
+
mesh_id = mesh_id_tensor.item()
|
| 72 |
+
mesh_name = MeshCatalog.get_mesh_name(mesh_id)
|
| 73 |
+
# valid points are those that fall into estimated bbox
|
| 74 |
+
# and correspond to the current mesh
|
| 75 |
+
j_valid = interpolator.j_valid * ( # pyre-ignore[16]
|
| 76 |
+
packed_annotations.vertex_mesh_ids_gt == mesh_id
|
| 77 |
+
)
|
| 78 |
+
if not torch.any(j_valid):
|
| 79 |
+
continue
|
| 80 |
+
# extract estimated embeddings for valid points
|
| 81 |
+
# -> tensor [J, D]
|
| 82 |
+
vertex_embeddings_i = normalize_embeddings(
|
| 83 |
+
interpolator.extract_at_points(
|
| 84 |
+
densepose_predictor_outputs.embedding,
|
| 85 |
+
slice_fine_segm=slice(None),
|
| 86 |
+
w_ylo_xlo=interpolator.w_ylo_xlo[:, None], # pyre-ignore[16]
|
| 87 |
+
w_ylo_xhi=interpolator.w_ylo_xhi[:, None], # pyre-ignore[16]
|
| 88 |
+
w_yhi_xlo=interpolator.w_yhi_xlo[:, None], # pyre-ignore[16]
|
| 89 |
+
w_yhi_xhi=interpolator.w_yhi_xhi[:, None], # pyre-ignore[16]
|
| 90 |
+
)[j_valid, :]
|
| 91 |
+
)
|
| 92 |
+
# extract vertex ids for valid points
|
| 93 |
+
# -> tensor [J]
|
| 94 |
+
vertex_indices_i = packed_annotations.vertex_ids_gt[j_valid]
|
| 95 |
+
# embeddings for all mesh vertices
|
| 96 |
+
# -> tensor [K, D]
|
| 97 |
+
mesh_vertex_embeddings = embedder(mesh_name)
|
| 98 |
+
# unnormalized scores for valid points
|
| 99 |
+
# -> tensor [J, K]
|
| 100 |
+
scores = squared_euclidean_distance_matrix(
|
| 101 |
+
vertex_embeddings_i, mesh_vertex_embeddings
|
| 102 |
+
) / (-self.embdist_gauss_sigma)
|
| 103 |
+
losses[mesh_name] = F.cross_entropy(scores, vertex_indices_i, ignore_index=-1)
|
| 104 |
+
|
| 105 |
+
for mesh_name in embedder.mesh_names:
|
| 106 |
+
if mesh_name not in losses:
|
| 107 |
+
losses[mesh_name] = self.fake_value(
|
| 108 |
+
densepose_predictor_outputs, embedder, mesh_name
|
| 109 |
+
)
|
| 110 |
+
return losses
|
| 111 |
+
|
| 112 |
+
def fake_values(self, densepose_predictor_outputs: Any, embedder: nn.Module):
|
| 113 |
+
losses = {}
|
| 114 |
+
for mesh_name in embedder.mesh_names:
|
| 115 |
+
losses[mesh_name] = self.fake_value(densepose_predictor_outputs, embedder, mesh_name)
|
| 116 |
+
return losses
|
| 117 |
+
|
| 118 |
+
def fake_value(self, densepose_predictor_outputs: Any, embedder: nn.Module, mesh_name: str):
|
| 119 |
+
return densepose_predictor_outputs.embedding.sum() * 0 + embedder(mesh_name).sum() * 0
|
Leffa/3rdparty/densepose/modeling/losses/embed_utils.py
ADDED
|
@@ -0,0 +1,137 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
|
| 2 |
+
|
| 3 |
+
from dataclasses import dataclass
|
| 4 |
+
from typing import Any, Optional
|
| 5 |
+
import torch
|
| 6 |
+
|
| 7 |
+
from detectron2.structures import BoxMode, Instances
|
| 8 |
+
|
| 9 |
+
from .utils import AnnotationsAccumulator
|
| 10 |
+
|
| 11 |
+
|
| 12 |
+
@dataclass
|
| 13 |
+
class PackedCseAnnotations:
|
| 14 |
+
x_gt: torch.Tensor
|
| 15 |
+
y_gt: torch.Tensor
|
| 16 |
+
coarse_segm_gt: Optional[torch.Tensor]
|
| 17 |
+
vertex_mesh_ids_gt: torch.Tensor
|
| 18 |
+
vertex_ids_gt: torch.Tensor
|
| 19 |
+
bbox_xywh_gt: torch.Tensor
|
| 20 |
+
bbox_xywh_est: torch.Tensor
|
| 21 |
+
point_bbox_with_dp_indices: torch.Tensor
|
| 22 |
+
point_bbox_indices: torch.Tensor
|
| 23 |
+
bbox_indices: torch.Tensor
|
| 24 |
+
|
| 25 |
+
|
| 26 |
+
class CseAnnotationsAccumulator(AnnotationsAccumulator):
|
| 27 |
+
"""
|
| 28 |
+
Accumulates annotations by batches that correspond to objects detected on
|
| 29 |
+
individual images. Can pack them together into single tensors.
|
| 30 |
+
"""
|
| 31 |
+
|
| 32 |
+
def __init__(self):
|
| 33 |
+
self.x_gt = []
|
| 34 |
+
self.y_gt = []
|
| 35 |
+
self.s_gt = []
|
| 36 |
+
self.vertex_mesh_ids_gt = []
|
| 37 |
+
self.vertex_ids_gt = []
|
| 38 |
+
self.bbox_xywh_gt = []
|
| 39 |
+
self.bbox_xywh_est = []
|
| 40 |
+
self.point_bbox_with_dp_indices = []
|
| 41 |
+
self.point_bbox_indices = []
|
| 42 |
+
self.bbox_indices = []
|
| 43 |
+
self.nxt_bbox_with_dp_index = 0
|
| 44 |
+
self.nxt_bbox_index = 0
|
| 45 |
+
|
| 46 |
+
def accumulate(self, instances_one_image: Instances):
|
| 47 |
+
"""
|
| 48 |
+
Accumulate instances data for one image
|
| 49 |
+
|
| 50 |
+
Args:
|
| 51 |
+
instances_one_image (Instances): instances data to accumulate
|
| 52 |
+
"""
|
| 53 |
+
boxes_xywh_est = BoxMode.convert(
|
| 54 |
+
instances_one_image.proposal_boxes.tensor.clone(), BoxMode.XYXY_ABS, BoxMode.XYWH_ABS
|
| 55 |
+
)
|
| 56 |
+
boxes_xywh_gt = BoxMode.convert(
|
| 57 |
+
instances_one_image.gt_boxes.tensor.clone(), BoxMode.XYXY_ABS, BoxMode.XYWH_ABS
|
| 58 |
+
)
|
| 59 |
+
n_matches = len(boxes_xywh_gt)
|
| 60 |
+
assert n_matches == len(
|
| 61 |
+
boxes_xywh_est
|
| 62 |
+
), f"Got {len(boxes_xywh_est)} proposal boxes and {len(boxes_xywh_gt)} GT boxes"
|
| 63 |
+
if not n_matches:
|
| 64 |
+
# no detection - GT matches
|
| 65 |
+
return
|
| 66 |
+
if (
|
| 67 |
+
not hasattr(instances_one_image, "gt_densepose")
|
| 68 |
+
or instances_one_image.gt_densepose is None
|
| 69 |
+
):
|
| 70 |
+
# no densepose GT for the detections, just increase the bbox index
|
| 71 |
+
self.nxt_bbox_index += n_matches
|
| 72 |
+
return
|
| 73 |
+
for box_xywh_est, box_xywh_gt, dp_gt in zip(
|
| 74 |
+
boxes_xywh_est, boxes_xywh_gt, instances_one_image.gt_densepose
|
| 75 |
+
):
|
| 76 |
+
if (dp_gt is not None) and (len(dp_gt.x) > 0):
|
| 77 |
+
# pyre-fixme[6]: For 1st argument expected `Tensor` but got `float`.
|
| 78 |
+
# pyre-fixme[6]: For 2nd argument expected `Tensor` but got `float`.
|
| 79 |
+
self._do_accumulate(box_xywh_gt, box_xywh_est, dp_gt)
|
| 80 |
+
self.nxt_bbox_index += 1
|
| 81 |
+
|
| 82 |
+
def _do_accumulate(self, box_xywh_gt: torch.Tensor, box_xywh_est: torch.Tensor, dp_gt: Any):
|
| 83 |
+
"""
|
| 84 |
+
Accumulate instances data for one image, given that the data is not empty
|
| 85 |
+
|
| 86 |
+
Args:
|
| 87 |
+
box_xywh_gt (tensor): GT bounding box
|
| 88 |
+
box_xywh_est (tensor): estimated bounding box
|
| 89 |
+
dp_gt: GT densepose data with the following attributes:
|
| 90 |
+
- x: normalized X coordinates
|
| 91 |
+
- y: normalized Y coordinates
|
| 92 |
+
- segm: tensor of size [S, S] with coarse segmentation
|
| 93 |
+
-
|
| 94 |
+
"""
|
| 95 |
+
self.x_gt.append(dp_gt.x)
|
| 96 |
+
self.y_gt.append(dp_gt.y)
|
| 97 |
+
if hasattr(dp_gt, "segm"):
|
| 98 |
+
self.s_gt.append(dp_gt.segm.unsqueeze(0))
|
| 99 |
+
self.vertex_ids_gt.append(dp_gt.vertex_ids)
|
| 100 |
+
self.vertex_mesh_ids_gt.append(torch.full_like(dp_gt.vertex_ids, dp_gt.mesh_id))
|
| 101 |
+
self.bbox_xywh_gt.append(box_xywh_gt.view(-1, 4))
|
| 102 |
+
self.bbox_xywh_est.append(box_xywh_est.view(-1, 4))
|
| 103 |
+
self.point_bbox_with_dp_indices.append(
|
| 104 |
+
torch.full_like(dp_gt.vertex_ids, self.nxt_bbox_with_dp_index)
|
| 105 |
+
)
|
| 106 |
+
self.point_bbox_indices.append(torch.full_like(dp_gt.vertex_ids, self.nxt_bbox_index))
|
| 107 |
+
self.bbox_indices.append(self.nxt_bbox_index)
|
| 108 |
+
self.nxt_bbox_with_dp_index += 1
|
| 109 |
+
|
| 110 |
+
def pack(self) -> Optional[PackedCseAnnotations]:
|
| 111 |
+
"""
|
| 112 |
+
Pack data into tensors
|
| 113 |
+
"""
|
| 114 |
+
if not len(self.x_gt):
|
| 115 |
+
# TODO:
|
| 116 |
+
# returning proper empty annotations would require
|
| 117 |
+
# creating empty tensors of appropriate shape and
|
| 118 |
+
# type on an appropriate device;
|
| 119 |
+
# we return None so far to indicate empty annotations
|
| 120 |
+
return None
|
| 121 |
+
return PackedCseAnnotations(
|
| 122 |
+
x_gt=torch.cat(self.x_gt, 0),
|
| 123 |
+
y_gt=torch.cat(self.y_gt, 0),
|
| 124 |
+
vertex_mesh_ids_gt=torch.cat(self.vertex_mesh_ids_gt, 0),
|
| 125 |
+
vertex_ids_gt=torch.cat(self.vertex_ids_gt, 0),
|
| 126 |
+
# ignore segmentation annotations, if not all the instances contain those
|
| 127 |
+
coarse_segm_gt=torch.cat(self.s_gt, 0)
|
| 128 |
+
if len(self.s_gt) == len(self.bbox_xywh_gt)
|
| 129 |
+
else None,
|
| 130 |
+
bbox_xywh_gt=torch.cat(self.bbox_xywh_gt, 0),
|
| 131 |
+
bbox_xywh_est=torch.cat(self.bbox_xywh_est, 0),
|
| 132 |
+
point_bbox_with_dp_indices=torch.cat(self.point_bbox_with_dp_indices, 0),
|
| 133 |
+
point_bbox_indices=torch.cat(self.point_bbox_indices, 0),
|
| 134 |
+
bbox_indices=torch.as_tensor(
|
| 135 |
+
self.bbox_indices, dtype=torch.long, device=self.x_gt[0].device
|
| 136 |
+
),
|
| 137 |
+
)
|
Leffa/3rdparty/densepose/modeling/losses/mask.py
ADDED
|
@@ -0,0 +1,125 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
|
| 2 |
+
|
| 3 |
+
from dataclasses import dataclass
|
| 4 |
+
from typing import Any, Iterable, List, Optional
|
| 5 |
+
import torch
|
| 6 |
+
from torch.nn import functional as F
|
| 7 |
+
|
| 8 |
+
from detectron2.structures import Instances
|
| 9 |
+
|
| 10 |
+
|
| 11 |
+
@dataclass
|
| 12 |
+
class DataForMaskLoss:
|
| 13 |
+
"""
|
| 14 |
+
Contains mask GT and estimated data for proposals from multiple images:
|
| 15 |
+
"""
|
| 16 |
+
|
| 17 |
+
# tensor of size (K, H, W) containing GT labels
|
| 18 |
+
masks_gt: Optional[torch.Tensor] = None
|
| 19 |
+
# tensor of size (K, C, H, W) containing estimated scores
|
| 20 |
+
masks_est: Optional[torch.Tensor] = None
|
| 21 |
+
|
| 22 |
+
|
| 23 |
+
def extract_data_for_mask_loss_from_matches(
|
| 24 |
+
proposals_targets: Iterable[Instances], estimated_segm: torch.Tensor
|
| 25 |
+
) -> DataForMaskLoss:
|
| 26 |
+
"""
|
| 27 |
+
Extract data for mask loss from instances that contain matched GT and
|
| 28 |
+
estimated bounding boxes.
|
| 29 |
+
Args:
|
| 30 |
+
proposals_targets: Iterable[Instances]
|
| 31 |
+
matched GT and estimated results, each item in the iterable
|
| 32 |
+
corresponds to data in 1 image
|
| 33 |
+
estimated_segm: tensor(K, C, S, S) of float - raw unnormalized
|
| 34 |
+
segmentation scores, here S is the size to which GT masks are
|
| 35 |
+
to be resized
|
| 36 |
+
Return:
|
| 37 |
+
masks_est: tensor(K, C, S, S) of float - class scores
|
| 38 |
+
masks_gt: tensor(K, S, S) of int64 - labels
|
| 39 |
+
"""
|
| 40 |
+
data = DataForMaskLoss()
|
| 41 |
+
masks_gt = []
|
| 42 |
+
offset = 0
|
| 43 |
+
assert estimated_segm.shape[2] == estimated_segm.shape[3], (
|
| 44 |
+
f"Expected estimated segmentation to have a square shape, "
|
| 45 |
+
f"but the actual shape is {estimated_segm.shape[2:]}"
|
| 46 |
+
)
|
| 47 |
+
mask_size = estimated_segm.shape[2]
|
| 48 |
+
num_proposals = sum(inst.proposal_boxes.tensor.size(0) for inst in proposals_targets)
|
| 49 |
+
num_estimated = estimated_segm.shape[0]
|
| 50 |
+
assert (
|
| 51 |
+
num_proposals == num_estimated
|
| 52 |
+
), "The number of proposals {} must be equal to the number of estimates {}".format(
|
| 53 |
+
num_proposals, num_estimated
|
| 54 |
+
)
|
| 55 |
+
|
| 56 |
+
for proposals_targets_per_image in proposals_targets:
|
| 57 |
+
n_i = proposals_targets_per_image.proposal_boxes.tensor.size(0)
|
| 58 |
+
if not n_i:
|
| 59 |
+
continue
|
| 60 |
+
gt_masks_per_image = proposals_targets_per_image.gt_masks.crop_and_resize(
|
| 61 |
+
proposals_targets_per_image.proposal_boxes.tensor, mask_size
|
| 62 |
+
).to(device=estimated_segm.device)
|
| 63 |
+
masks_gt.append(gt_masks_per_image)
|
| 64 |
+
offset += n_i
|
| 65 |
+
if masks_gt:
|
| 66 |
+
data.masks_est = estimated_segm
|
| 67 |
+
data.masks_gt = torch.cat(masks_gt, dim=0)
|
| 68 |
+
return data
|
| 69 |
+
|
| 70 |
+
|
| 71 |
+
class MaskLoss:
|
| 72 |
+
"""
|
| 73 |
+
Mask loss as cross-entropy for raw unnormalized scores given ground truth labels.
|
| 74 |
+
Mask ground truth labels are defined for the whole image and not only the
|
| 75 |
+
bounding box of interest. They are stored as objects that are assumed to implement
|
| 76 |
+
the `crop_and_resize` interface (e.g. BitMasks, PolygonMasks).
|
| 77 |
+
"""
|
| 78 |
+
|
| 79 |
+
def __call__(
|
| 80 |
+
self, proposals_with_gt: List[Instances], densepose_predictor_outputs: Any
|
| 81 |
+
) -> torch.Tensor:
|
| 82 |
+
"""
|
| 83 |
+
Computes segmentation loss as cross-entropy for raw unnormalized
|
| 84 |
+
scores given ground truth labels.
|
| 85 |
+
|
| 86 |
+
Args:
|
| 87 |
+
proposals_with_gt (list of Instances): detections with associated ground truth data
|
| 88 |
+
densepose_predictor_outputs: an object of a dataclass that contains predictor outputs
|
| 89 |
+
with estimated values; assumed to have the following attribute:
|
| 90 |
+
* coarse_segm (tensor of shape [N, D, S, S]): coarse segmentation estimates
|
| 91 |
+
as raw unnormalized scores
|
| 92 |
+
where N is the number of detections, S is the estimate size ( = width = height)
|
| 93 |
+
and D is the number of coarse segmentation channels.
|
| 94 |
+
Return:
|
| 95 |
+
Cross entropy for raw unnormalized scores for coarse segmentation given
|
| 96 |
+
ground truth labels from masks
|
| 97 |
+
"""
|
| 98 |
+
if not len(proposals_with_gt):
|
| 99 |
+
return self.fake_value(densepose_predictor_outputs)
|
| 100 |
+
# densepose outputs are computed for all images and all bounding boxes;
|
| 101 |
+
# i.e. if a batch has 4 images with (3, 1, 2, 1) proposals respectively,
|
| 102 |
+
# the outputs will have size(0) == 3+1+2+1 == 7
|
| 103 |
+
with torch.no_grad():
|
| 104 |
+
mask_loss_data = extract_data_for_mask_loss_from_matches(
|
| 105 |
+
proposals_with_gt, densepose_predictor_outputs.coarse_segm
|
| 106 |
+
)
|
| 107 |
+
if (mask_loss_data.masks_gt is None) or (mask_loss_data.masks_est is None):
|
| 108 |
+
return self.fake_value(densepose_predictor_outputs)
|
| 109 |
+
return F.cross_entropy(mask_loss_data.masks_est, mask_loss_data.masks_gt.long())
|
| 110 |
+
|
| 111 |
+
def fake_value(self, densepose_predictor_outputs: Any) -> torch.Tensor:
|
| 112 |
+
"""
|
| 113 |
+
Fake segmentation loss used when no suitable ground truth data
|
| 114 |
+
was found in a batch. The loss has a value 0 and is primarily used to
|
| 115 |
+
construct the computation graph, so that `DistributedDataParallel`
|
| 116 |
+
has similar graphs on all GPUs and can perform reduction properly.
|
| 117 |
+
|
| 118 |
+
Args:
|
| 119 |
+
densepose_predictor_outputs: DensePose predictor outputs, an object
|
| 120 |
+
of a dataclass that is assumed to have `coarse_segm`
|
| 121 |
+
attribute
|
| 122 |
+
Return:
|
| 123 |
+
Zero value loss with proper computation graph
|
| 124 |
+
"""
|
| 125 |
+
return densepose_predictor_outputs.coarse_segm.sum() * 0
|
Leffa/3rdparty/densepose/modeling/losses/mask_or_segm.py
ADDED
|
@@ -0,0 +1,77 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
|
| 2 |
+
|
| 3 |
+
from typing import Any, List
|
| 4 |
+
|
| 5 |
+
import torch
|
| 6 |
+
|
| 7 |
+
from detectron2.config import CfgNode
|
| 8 |
+
from detectron2.structures import Instances
|
| 9 |
+
|
| 10 |
+
from .mask import MaskLoss
|
| 11 |
+
from .segm import SegmentationLoss
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
class MaskOrSegmentationLoss:
|
| 15 |
+
"""
|
| 16 |
+
Mask or segmentation loss as cross-entropy for raw unnormalized scores
|
| 17 |
+
given ground truth labels. Ground truth labels are either defined by coarse
|
| 18 |
+
segmentation annotation, or by mask annotation, depending on the config
|
| 19 |
+
value MODEL.ROI_DENSEPOSE_HEAD.COARSE_SEGM_TRAINED_BY_MASKS
|
| 20 |
+
"""
|
| 21 |
+
|
| 22 |
+
def __init__(self, cfg: CfgNode):
|
| 23 |
+
"""
|
| 24 |
+
Initialize segmentation loss from configuration options
|
| 25 |
+
|
| 26 |
+
Args:
|
| 27 |
+
cfg (CfgNode): configuration options
|
| 28 |
+
"""
|
| 29 |
+
self.segm_trained_by_masks = (
|
| 30 |
+
cfg.MODEL.ROI_DENSEPOSE_HEAD.COARSE_SEGM_TRAINED_BY_MASKS
|
| 31 |
+
)
|
| 32 |
+
if self.segm_trained_by_masks:
|
| 33 |
+
self.mask_loss = MaskLoss()
|
| 34 |
+
self.segm_loss = SegmentationLoss(cfg)
|
| 35 |
+
|
| 36 |
+
def __call__(
|
| 37 |
+
self,
|
| 38 |
+
proposals_with_gt: List[Instances],
|
| 39 |
+
densepose_predictor_outputs: Any,
|
| 40 |
+
packed_annotations: Any,
|
| 41 |
+
) -> torch.Tensor:
|
| 42 |
+
"""
|
| 43 |
+
Compute segmentation loss as cross-entropy between aligned unnormalized
|
| 44 |
+
score estimates and ground truth; with ground truth given
|
| 45 |
+
either by masks, or by coarse segmentation annotations.
|
| 46 |
+
|
| 47 |
+
Args:
|
| 48 |
+
proposals_with_gt (list of Instances): detections with associated ground truth data
|
| 49 |
+
densepose_predictor_outputs: an object of a dataclass that contains predictor outputs
|
| 50 |
+
with estimated values; assumed to have the following attributes:
|
| 51 |
+
* coarse_segm - coarse segmentation estimates, tensor of shape [N, D, S, S]
|
| 52 |
+
packed_annotations: packed annotations for efficient loss computation
|
| 53 |
+
Return:
|
| 54 |
+
tensor: loss value as cross-entropy for raw unnormalized scores
|
| 55 |
+
given ground truth labels
|
| 56 |
+
"""
|
| 57 |
+
if self.segm_trained_by_masks:
|
| 58 |
+
return self.mask_loss(proposals_with_gt, densepose_predictor_outputs)
|
| 59 |
+
return self.segm_loss(
|
| 60 |
+
proposals_with_gt, densepose_predictor_outputs, packed_annotations
|
| 61 |
+
)
|
| 62 |
+
|
| 63 |
+
def fake_value(self, densepose_predictor_outputs: Any) -> torch.Tensor:
|
| 64 |
+
"""
|
| 65 |
+
Fake segmentation loss used when no suitable ground truth data
|
| 66 |
+
was found in a batch. The loss has a value 0 and is primarily used to
|
| 67 |
+
construct the computation graph, so that `DistributedDataParallel`
|
| 68 |
+
has similar graphs on all GPUs and can perform reduction properly.
|
| 69 |
+
|
| 70 |
+
Args:
|
| 71 |
+
densepose_predictor_outputs: DensePose predictor outputs, an object
|
| 72 |
+
of a dataclass that is assumed to have `coarse_segm`
|
| 73 |
+
attribute
|
| 74 |
+
Return:
|
| 75 |
+
Zero value loss with proper computation graph
|
| 76 |
+
"""
|
| 77 |
+
return densepose_predictor_outputs.coarse_segm.sum() * 0
|
Leffa/3rdparty/densepose/modeling/losses/registry.py
ADDED
|
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) Facebook, Inc. and its affiliates.
|
| 2 |
+
|
| 3 |
+
from detectron2.utils.registry import Registry
|
| 4 |
+
|
| 5 |
+
DENSEPOSE_LOSS_REGISTRY = Registry("DENSEPOSE_LOSS")
|
Leffa/3rdparty/densepose/modeling/losses/soft_embed.py
ADDED
|
@@ -0,0 +1,133 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
|
| 2 |
+
|
| 3 |
+
from typing import Any, Dict, List
|
| 4 |
+
import torch
|
| 5 |
+
from torch import nn
|
| 6 |
+
from torch.nn import functional as F
|
| 7 |
+
|
| 8 |
+
from detectron2.config import CfgNode
|
| 9 |
+
from detectron2.structures import Instances
|
| 10 |
+
|
| 11 |
+
from densepose.data.meshes.catalog import MeshCatalog
|
| 12 |
+
from densepose.modeling.cse.utils import normalize_embeddings, squared_euclidean_distance_matrix
|
| 13 |
+
from densepose.structures.mesh import create_mesh
|
| 14 |
+
|
| 15 |
+
from .embed_utils import PackedCseAnnotations
|
| 16 |
+
from .utils import BilinearInterpolationHelper
|
| 17 |
+
|
| 18 |
+
|
| 19 |
+
class SoftEmbeddingLoss:
|
| 20 |
+
"""
|
| 21 |
+
Computes losses for estimated embeddings given annotated vertices.
|
| 22 |
+
Instances in a minibatch that correspond to the same mesh are grouped
|
| 23 |
+
together. For each group, loss is computed as cross-entropy for
|
| 24 |
+
unnormalized scores given ground truth mesh vertex ids.
|
| 25 |
+
Scores are based on:
|
| 26 |
+
1) squared distances between estimated vertex embeddings
|
| 27 |
+
and mesh vertex embeddings;
|
| 28 |
+
2) geodesic distances between vertices of a mesh
|
| 29 |
+
"""
|
| 30 |
+
|
| 31 |
+
def __init__(self, cfg: CfgNode):
|
| 32 |
+
"""
|
| 33 |
+
Initialize embedding loss from config
|
| 34 |
+
"""
|
| 35 |
+
self.embdist_gauss_sigma = cfg.MODEL.ROI_DENSEPOSE_HEAD.CSE.EMBEDDING_DIST_GAUSS_SIGMA
|
| 36 |
+
self.geodist_gauss_sigma = cfg.MODEL.ROI_DENSEPOSE_HEAD.CSE.GEODESIC_DIST_GAUSS_SIGMA
|
| 37 |
+
|
| 38 |
+
def __call__(
|
| 39 |
+
self,
|
| 40 |
+
proposals_with_gt: List[Instances],
|
| 41 |
+
densepose_predictor_outputs: Any,
|
| 42 |
+
packed_annotations: PackedCseAnnotations,
|
| 43 |
+
interpolator: BilinearInterpolationHelper,
|
| 44 |
+
embedder: nn.Module,
|
| 45 |
+
) -> Dict[int, torch.Tensor]:
|
| 46 |
+
"""
|
| 47 |
+
Produces losses for estimated embeddings given annotated vertices.
|
| 48 |
+
Embeddings for all the vertices of a mesh are computed by the embedder.
|
| 49 |
+
Embeddings for observed pixels are estimated by a predictor.
|
| 50 |
+
Losses are computed as cross-entropy for unnormalized scores given
|
| 51 |
+
ground truth vertex IDs.
|
| 52 |
+
1) squared distances between estimated vertex embeddings
|
| 53 |
+
and mesh vertex embeddings;
|
| 54 |
+
2) geodesic distances between vertices of a mesh
|
| 55 |
+
|
| 56 |
+
Args:
|
| 57 |
+
proposals_with_gt (list of Instances): detections with associated
|
| 58 |
+
ground truth data; each item corresponds to instances detected
|
| 59 |
+
on 1 image; the number of items corresponds to the number of
|
| 60 |
+
images in a batch
|
| 61 |
+
densepose_predictor_outputs: an object of a dataclass that contains predictor
|
| 62 |
+
outputs with estimated values; assumed to have the following attributes:
|
| 63 |
+
* embedding - embedding estimates, tensor of shape [N, D, S, S], where
|
| 64 |
+
N = number of instances (= sum N_i, where N_i is the number of
|
| 65 |
+
instances on image i)
|
| 66 |
+
D = embedding space dimensionality (MODEL.ROI_DENSEPOSE_HEAD.CSE.EMBED_SIZE)
|
| 67 |
+
S = output size (width and height)
|
| 68 |
+
packed_annotations (PackedCseAnnotations): contains various data useful
|
| 69 |
+
for loss computation, each data is packed into a single tensor
|
| 70 |
+
interpolator (BilinearInterpolationHelper): bilinear interpolation helper
|
| 71 |
+
embedder (nn.Module): module that computes vertex embeddings for different meshes
|
| 72 |
+
Return:
|
| 73 |
+
dict(int -> tensor): losses for different mesh IDs
|
| 74 |
+
"""
|
| 75 |
+
losses = {}
|
| 76 |
+
for mesh_id_tensor in packed_annotations.vertex_mesh_ids_gt.unique():
|
| 77 |
+
mesh_id = mesh_id_tensor.item()
|
| 78 |
+
mesh_name = MeshCatalog.get_mesh_name(mesh_id)
|
| 79 |
+
# valid points are those that fall into estimated bbox
|
| 80 |
+
# and correspond to the current mesh
|
| 81 |
+
j_valid = interpolator.j_valid * ( # pyre-ignore[16]
|
| 82 |
+
packed_annotations.vertex_mesh_ids_gt == mesh_id
|
| 83 |
+
)
|
| 84 |
+
if not torch.any(j_valid):
|
| 85 |
+
continue
|
| 86 |
+
# extract estimated embeddings for valid points
|
| 87 |
+
# -> tensor [J, D]
|
| 88 |
+
vertex_embeddings_i = normalize_embeddings(
|
| 89 |
+
interpolator.extract_at_points(
|
| 90 |
+
densepose_predictor_outputs.embedding,
|
| 91 |
+
slice_fine_segm=slice(None),
|
| 92 |
+
w_ylo_xlo=interpolator.w_ylo_xlo[:, None], # pyre-ignore[16]
|
| 93 |
+
w_ylo_xhi=interpolator.w_ylo_xhi[:, None], # pyre-ignore[16]
|
| 94 |
+
w_yhi_xlo=interpolator.w_yhi_xlo[:, None], # pyre-ignore[16]
|
| 95 |
+
w_yhi_xhi=interpolator.w_yhi_xhi[:, None], # pyre-ignore[16]
|
| 96 |
+
)[j_valid, :]
|
| 97 |
+
)
|
| 98 |
+
# extract vertex ids for valid points
|
| 99 |
+
# -> tensor [J]
|
| 100 |
+
vertex_indices_i = packed_annotations.vertex_ids_gt[j_valid]
|
| 101 |
+
# embeddings for all mesh vertices
|
| 102 |
+
# -> tensor [K, D]
|
| 103 |
+
mesh_vertex_embeddings = embedder(mesh_name)
|
| 104 |
+
# softmax values of geodesic distances for GT mesh vertices
|
| 105 |
+
# -> tensor [J, K]
|
| 106 |
+
mesh = create_mesh(mesh_name, mesh_vertex_embeddings.device)
|
| 107 |
+
geodist_softmax_values = F.softmax(
|
| 108 |
+
mesh.geodists[vertex_indices_i] / (-self.geodist_gauss_sigma), dim=1
|
| 109 |
+
)
|
| 110 |
+
# logsoftmax values for valid points
|
| 111 |
+
# -> tensor [J, K]
|
| 112 |
+
embdist_logsoftmax_values = F.log_softmax(
|
| 113 |
+
squared_euclidean_distance_matrix(vertex_embeddings_i, mesh_vertex_embeddings)
|
| 114 |
+
/ (-self.embdist_gauss_sigma),
|
| 115 |
+
dim=1,
|
| 116 |
+
)
|
| 117 |
+
losses[mesh_name] = (-geodist_softmax_values * embdist_logsoftmax_values).sum(1).mean()
|
| 118 |
+
|
| 119 |
+
for mesh_name in embedder.mesh_names:
|
| 120 |
+
if mesh_name not in losses:
|
| 121 |
+
losses[mesh_name] = self.fake_value(
|
| 122 |
+
densepose_predictor_outputs, embedder, mesh_name
|
| 123 |
+
)
|
| 124 |
+
return losses
|
| 125 |
+
|
| 126 |
+
def fake_values(self, densepose_predictor_outputs: Any, embedder: nn.Module):
|
| 127 |
+
losses = {}
|
| 128 |
+
for mesh_name in embedder.mesh_names:
|
| 129 |
+
losses[mesh_name] = self.fake_value(densepose_predictor_outputs, embedder, mesh_name)
|
| 130 |
+
return losses
|
| 131 |
+
|
| 132 |
+
def fake_value(self, densepose_predictor_outputs: Any, embedder: nn.Module, mesh_name: str):
|
| 133 |
+
return densepose_predictor_outputs.embedding.sum() * 0 + embedder(mesh_name).sum() * 0
|
Leffa/3rdparty/densepose/modeling/losses/utils.py
ADDED
|
@@ -0,0 +1,443 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) Facebook, Inc. and its affiliates.
|
| 2 |
+
|
| 3 |
+
from abc import ABC, abstractmethod
|
| 4 |
+
from dataclasses import dataclass
|
| 5 |
+
from typing import Any, Dict, List, Optional, Tuple
|
| 6 |
+
import torch
|
| 7 |
+
from torch.nn import functional as F
|
| 8 |
+
|
| 9 |
+
from detectron2.structures import BoxMode, Instances
|
| 10 |
+
|
| 11 |
+
from densepose import DensePoseDataRelative
|
| 12 |
+
|
| 13 |
+
LossDict = Dict[str, torch.Tensor]
|
| 14 |
+
|
| 15 |
+
|
| 16 |
+
def _linear_interpolation_utilities(v_norm, v0_src, size_src, v0_dst, size_dst, size_z):
|
| 17 |
+
"""
|
| 18 |
+
Computes utility values for linear interpolation at points v.
|
| 19 |
+
The points are given as normalized offsets in the source interval
|
| 20 |
+
(v0_src, v0_src + size_src), more precisely:
|
| 21 |
+
v = v0_src + v_norm * size_src / 256.0
|
| 22 |
+
The computed utilities include lower points v_lo, upper points v_hi,
|
| 23 |
+
interpolation weights v_w and flags j_valid indicating whether the
|
| 24 |
+
points falls into the destination interval (v0_dst, v0_dst + size_dst).
|
| 25 |
+
|
| 26 |
+
Args:
|
| 27 |
+
v_norm (:obj: `torch.Tensor`): tensor of size N containing
|
| 28 |
+
normalized point offsets
|
| 29 |
+
v0_src (:obj: `torch.Tensor`): tensor of size N containing
|
| 30 |
+
left bounds of source intervals for normalized points
|
| 31 |
+
size_src (:obj: `torch.Tensor`): tensor of size N containing
|
| 32 |
+
source interval sizes for normalized points
|
| 33 |
+
v0_dst (:obj: `torch.Tensor`): tensor of size N containing
|
| 34 |
+
left bounds of destination intervals
|
| 35 |
+
size_dst (:obj: `torch.Tensor`): tensor of size N containing
|
| 36 |
+
destination interval sizes
|
| 37 |
+
size_z (int): interval size for data to be interpolated
|
| 38 |
+
|
| 39 |
+
Returns:
|
| 40 |
+
v_lo (:obj: `torch.Tensor`): int tensor of size N containing
|
| 41 |
+
indices of lower values used for interpolation, all values are
|
| 42 |
+
integers from [0, size_z - 1]
|
| 43 |
+
v_hi (:obj: `torch.Tensor`): int tensor of size N containing
|
| 44 |
+
indices of upper values used for interpolation, all values are
|
| 45 |
+
integers from [0, size_z - 1]
|
| 46 |
+
v_w (:obj: `torch.Tensor`): float tensor of size N containing
|
| 47 |
+
interpolation weights
|
| 48 |
+
j_valid (:obj: `torch.Tensor`): uint8 tensor of size N containing
|
| 49 |
+
0 for points outside the estimation interval
|
| 50 |
+
(v0_est, v0_est + size_est) and 1 otherwise
|
| 51 |
+
"""
|
| 52 |
+
v = v0_src + v_norm * size_src / 256.0
|
| 53 |
+
j_valid = (v - v0_dst >= 0) * (v - v0_dst < size_dst)
|
| 54 |
+
v_grid = (v - v0_dst) * size_z / size_dst
|
| 55 |
+
v_lo = v_grid.floor().long().clamp(min=0, max=size_z - 1)
|
| 56 |
+
v_hi = (v_lo + 1).clamp(max=size_z - 1)
|
| 57 |
+
v_grid = torch.min(v_hi.float(), v_grid)
|
| 58 |
+
v_w = v_grid - v_lo.float()
|
| 59 |
+
return v_lo, v_hi, v_w, j_valid
|
| 60 |
+
|
| 61 |
+
|
| 62 |
+
class BilinearInterpolationHelper:
|
| 63 |
+
"""
|
| 64 |
+
Args:
|
| 65 |
+
packed_annotations: object that contains packed annotations
|
| 66 |
+
j_valid (:obj: `torch.Tensor`): uint8 tensor of size M containing
|
| 67 |
+
0 for points to be discarded and 1 for points to be selected
|
| 68 |
+
y_lo (:obj: `torch.Tensor`): int tensor of indices of upper values
|
| 69 |
+
in z_est for each point
|
| 70 |
+
y_hi (:obj: `torch.Tensor`): int tensor of indices of lower values
|
| 71 |
+
in z_est for each point
|
| 72 |
+
x_lo (:obj: `torch.Tensor`): int tensor of indices of left values
|
| 73 |
+
in z_est for each point
|
| 74 |
+
x_hi (:obj: `torch.Tensor`): int tensor of indices of right values
|
| 75 |
+
in z_est for each point
|
| 76 |
+
w_ylo_xlo (:obj: `torch.Tensor`): float tensor of size M;
|
| 77 |
+
contains upper-left value weight for each point
|
| 78 |
+
w_ylo_xhi (:obj: `torch.Tensor`): float tensor of size M;
|
| 79 |
+
contains upper-right value weight for each point
|
| 80 |
+
w_yhi_xlo (:obj: `torch.Tensor`): float tensor of size M;
|
| 81 |
+
contains lower-left value weight for each point
|
| 82 |
+
w_yhi_xhi (:obj: `torch.Tensor`): float tensor of size M;
|
| 83 |
+
contains lower-right value weight for each point
|
| 84 |
+
"""
|
| 85 |
+
|
| 86 |
+
def __init__(
|
| 87 |
+
self,
|
| 88 |
+
packed_annotations: Any,
|
| 89 |
+
j_valid: torch.Tensor,
|
| 90 |
+
y_lo: torch.Tensor,
|
| 91 |
+
y_hi: torch.Tensor,
|
| 92 |
+
x_lo: torch.Tensor,
|
| 93 |
+
x_hi: torch.Tensor,
|
| 94 |
+
w_ylo_xlo: torch.Tensor,
|
| 95 |
+
w_ylo_xhi: torch.Tensor,
|
| 96 |
+
w_yhi_xlo: torch.Tensor,
|
| 97 |
+
w_yhi_xhi: torch.Tensor,
|
| 98 |
+
):
|
| 99 |
+
for k, v in locals().items():
|
| 100 |
+
if k != "self":
|
| 101 |
+
setattr(self, k, v)
|
| 102 |
+
|
| 103 |
+
@staticmethod
|
| 104 |
+
def from_matches(
|
| 105 |
+
packed_annotations: Any, densepose_outputs_size_hw: Tuple[int, int]
|
| 106 |
+
) -> "BilinearInterpolationHelper":
|
| 107 |
+
"""
|
| 108 |
+
Args:
|
| 109 |
+
packed_annotations: annotations packed into tensors, the following
|
| 110 |
+
attributes are required:
|
| 111 |
+
- bbox_xywh_gt
|
| 112 |
+
- bbox_xywh_est
|
| 113 |
+
- x_gt
|
| 114 |
+
- y_gt
|
| 115 |
+
- point_bbox_with_dp_indices
|
| 116 |
+
- point_bbox_indices
|
| 117 |
+
densepose_outputs_size_hw (tuple [int, int]): resolution of
|
| 118 |
+
DensePose predictor outputs (H, W)
|
| 119 |
+
Return:
|
| 120 |
+
An instance of `BilinearInterpolationHelper` used to perform
|
| 121 |
+
interpolation for the given annotation points and output resolution
|
| 122 |
+
"""
|
| 123 |
+
|
| 124 |
+
zh, zw = densepose_outputs_size_hw
|
| 125 |
+
x0_gt, y0_gt, w_gt, h_gt = packed_annotations.bbox_xywh_gt[
|
| 126 |
+
packed_annotations.point_bbox_with_dp_indices
|
| 127 |
+
].unbind(dim=1)
|
| 128 |
+
x0_est, y0_est, w_est, h_est = packed_annotations.bbox_xywh_est[
|
| 129 |
+
packed_annotations.point_bbox_with_dp_indices
|
| 130 |
+
].unbind(dim=1)
|
| 131 |
+
x_lo, x_hi, x_w, jx_valid = _linear_interpolation_utilities(
|
| 132 |
+
packed_annotations.x_gt, x0_gt, w_gt, x0_est, w_est, zw
|
| 133 |
+
)
|
| 134 |
+
y_lo, y_hi, y_w, jy_valid = _linear_interpolation_utilities(
|
| 135 |
+
packed_annotations.y_gt, y0_gt, h_gt, y0_est, h_est, zh
|
| 136 |
+
)
|
| 137 |
+
j_valid = jx_valid * jy_valid
|
| 138 |
+
|
| 139 |
+
w_ylo_xlo = (1.0 - x_w) * (1.0 - y_w)
|
| 140 |
+
w_ylo_xhi = x_w * (1.0 - y_w)
|
| 141 |
+
w_yhi_xlo = (1.0 - x_w) * y_w
|
| 142 |
+
w_yhi_xhi = x_w * y_w
|
| 143 |
+
|
| 144 |
+
return BilinearInterpolationHelper(
|
| 145 |
+
packed_annotations,
|
| 146 |
+
j_valid,
|
| 147 |
+
y_lo,
|
| 148 |
+
y_hi,
|
| 149 |
+
x_lo,
|
| 150 |
+
x_hi,
|
| 151 |
+
w_ylo_xlo, # pyre-ignore[6]
|
| 152 |
+
w_ylo_xhi,
|
| 153 |
+
# pyre-fixme[6]: Expected `Tensor` for 9th param but got `float`.
|
| 154 |
+
w_yhi_xlo,
|
| 155 |
+
w_yhi_xhi,
|
| 156 |
+
)
|
| 157 |
+
|
| 158 |
+
def extract_at_points(
|
| 159 |
+
self,
|
| 160 |
+
z_est,
|
| 161 |
+
slice_fine_segm=None,
|
| 162 |
+
w_ylo_xlo=None,
|
| 163 |
+
w_ylo_xhi=None,
|
| 164 |
+
w_yhi_xlo=None,
|
| 165 |
+
w_yhi_xhi=None,
|
| 166 |
+
):
|
| 167 |
+
"""
|
| 168 |
+
Extract ground truth values z_gt for valid point indices and estimated
|
| 169 |
+
values z_est using bilinear interpolation over top-left (y_lo, x_lo),
|
| 170 |
+
top-right (y_lo, x_hi), bottom-left (y_hi, x_lo) and bottom-right
|
| 171 |
+
(y_hi, x_hi) values in z_est with corresponding weights:
|
| 172 |
+
w_ylo_xlo, w_ylo_xhi, w_yhi_xlo and w_yhi_xhi.
|
| 173 |
+
Use slice_fine_segm to slice dim=1 in z_est
|
| 174 |
+
"""
|
| 175 |
+
slice_fine_segm = (
|
| 176 |
+
self.packed_annotations.fine_segm_labels_gt
|
| 177 |
+
if slice_fine_segm is None
|
| 178 |
+
else slice_fine_segm
|
| 179 |
+
)
|
| 180 |
+
w_ylo_xlo = self.w_ylo_xlo if w_ylo_xlo is None else w_ylo_xlo
|
| 181 |
+
w_ylo_xhi = self.w_ylo_xhi if w_ylo_xhi is None else w_ylo_xhi
|
| 182 |
+
w_yhi_xlo = self.w_yhi_xlo if w_yhi_xlo is None else w_yhi_xlo
|
| 183 |
+
w_yhi_xhi = self.w_yhi_xhi if w_yhi_xhi is None else w_yhi_xhi
|
| 184 |
+
|
| 185 |
+
index_bbox = self.packed_annotations.point_bbox_indices
|
| 186 |
+
z_est_sampled = (
|
| 187 |
+
z_est[index_bbox, slice_fine_segm, self.y_lo, self.x_lo] * w_ylo_xlo
|
| 188 |
+
+ z_est[index_bbox, slice_fine_segm, self.y_lo, self.x_hi] * w_ylo_xhi
|
| 189 |
+
+ z_est[index_bbox, slice_fine_segm, self.y_hi, self.x_lo] * w_yhi_xlo
|
| 190 |
+
+ z_est[index_bbox, slice_fine_segm, self.y_hi, self.x_hi] * w_yhi_xhi
|
| 191 |
+
)
|
| 192 |
+
return z_est_sampled
|
| 193 |
+
|
| 194 |
+
|
| 195 |
+
def resample_data(
|
| 196 |
+
z, bbox_xywh_src, bbox_xywh_dst, wout, hout, mode: str = "nearest", padding_mode: str = "zeros"
|
| 197 |
+
):
|
| 198 |
+
"""
|
| 199 |
+
Args:
|
| 200 |
+
z (:obj: `torch.Tensor`): tensor of size (N,C,H,W) with data to be
|
| 201 |
+
resampled
|
| 202 |
+
bbox_xywh_src (:obj: `torch.Tensor`): tensor of size (N,4) containing
|
| 203 |
+
source bounding boxes in format XYWH
|
| 204 |
+
bbox_xywh_dst (:obj: `torch.Tensor`): tensor of size (N,4) containing
|
| 205 |
+
destination bounding boxes in format XYWH
|
| 206 |
+
Return:
|
| 207 |
+
zresampled (:obj: `torch.Tensor`): tensor of size (N, C, Hout, Wout)
|
| 208 |
+
with resampled values of z, where D is the discretization size
|
| 209 |
+
"""
|
| 210 |
+
n = bbox_xywh_src.size(0)
|
| 211 |
+
assert n == bbox_xywh_dst.size(0), (
|
| 212 |
+
"The number of "
|
| 213 |
+
"source ROIs for resampling ({}) should be equal to the number "
|
| 214 |
+
"of destination ROIs ({})".format(bbox_xywh_src.size(0), bbox_xywh_dst.size(0))
|
| 215 |
+
)
|
| 216 |
+
x0src, y0src, wsrc, hsrc = bbox_xywh_src.unbind(dim=1)
|
| 217 |
+
x0dst, y0dst, wdst, hdst = bbox_xywh_dst.unbind(dim=1)
|
| 218 |
+
x0dst_norm = 2 * (x0dst - x0src) / wsrc - 1
|
| 219 |
+
y0dst_norm = 2 * (y0dst - y0src) / hsrc - 1
|
| 220 |
+
x1dst_norm = 2 * (x0dst + wdst - x0src) / wsrc - 1
|
| 221 |
+
y1dst_norm = 2 * (y0dst + hdst - y0src) / hsrc - 1
|
| 222 |
+
grid_w = torch.arange(wout, device=z.device, dtype=torch.float) / wout
|
| 223 |
+
grid_h = torch.arange(hout, device=z.device, dtype=torch.float) / hout
|
| 224 |
+
grid_w_expanded = grid_w[None, None, :].expand(n, hout, wout)
|
| 225 |
+
grid_h_expanded = grid_h[None, :, None].expand(n, hout, wout)
|
| 226 |
+
dx_expanded = (x1dst_norm - x0dst_norm)[:, None, None].expand(n, hout, wout)
|
| 227 |
+
dy_expanded = (y1dst_norm - y0dst_norm)[:, None, None].expand(n, hout, wout)
|
| 228 |
+
x0_expanded = x0dst_norm[:, None, None].expand(n, hout, wout)
|
| 229 |
+
y0_expanded = y0dst_norm[:, None, None].expand(n, hout, wout)
|
| 230 |
+
grid_x = grid_w_expanded * dx_expanded + x0_expanded
|
| 231 |
+
grid_y = grid_h_expanded * dy_expanded + y0_expanded
|
| 232 |
+
grid = torch.stack((grid_x, grid_y), dim=3)
|
| 233 |
+
# resample Z from (N, C, H, W) into (N, C, Hout, Wout)
|
| 234 |
+
zresampled = F.grid_sample(z, grid, mode=mode, padding_mode=padding_mode, align_corners=True)
|
| 235 |
+
return zresampled
|
| 236 |
+
|
| 237 |
+
|
| 238 |
+
class AnnotationsAccumulator(ABC):
|
| 239 |
+
"""
|
| 240 |
+
Abstract class for an accumulator for annotations that can produce
|
| 241 |
+
dense annotations packed into tensors.
|
| 242 |
+
"""
|
| 243 |
+
|
| 244 |
+
@abstractmethod
|
| 245 |
+
def accumulate(self, instances_one_image: Instances):
|
| 246 |
+
"""
|
| 247 |
+
Accumulate instances data for one image
|
| 248 |
+
|
| 249 |
+
Args:
|
| 250 |
+
instances_one_image (Instances): instances data to accumulate
|
| 251 |
+
"""
|
| 252 |
+
pass
|
| 253 |
+
|
| 254 |
+
@abstractmethod
|
| 255 |
+
def pack(self) -> Any:
|
| 256 |
+
"""
|
| 257 |
+
Pack data into tensors
|
| 258 |
+
"""
|
| 259 |
+
pass
|
| 260 |
+
|
| 261 |
+
|
| 262 |
+
@dataclass
|
| 263 |
+
class PackedChartBasedAnnotations:
|
| 264 |
+
"""
|
| 265 |
+
Packed annotations for chart-based model training. The following attributes
|
| 266 |
+
are defined:
|
| 267 |
+
- fine_segm_labels_gt (tensor [K] of `int64`): GT fine segmentation point labels
|
| 268 |
+
- x_gt (tensor [K] of `float32`): GT normalized X point coordinates
|
| 269 |
+
- y_gt (tensor [K] of `float32`): GT normalized Y point coordinates
|
| 270 |
+
- u_gt (tensor [K] of `float32`): GT point U values
|
| 271 |
+
- v_gt (tensor [K] of `float32`): GT point V values
|
| 272 |
+
- coarse_segm_gt (tensor [N, S, S] of `float32`): GT segmentation for bounding boxes
|
| 273 |
+
- bbox_xywh_gt (tensor [N, 4] of `float32`): selected GT bounding boxes in
|
| 274 |
+
XYWH format
|
| 275 |
+
- bbox_xywh_est (tensor [N, 4] of `float32`): selected matching estimated
|
| 276 |
+
bounding boxes in XYWH format
|
| 277 |
+
- point_bbox_with_dp_indices (tensor [K] of `int64`): indices of bounding boxes
|
| 278 |
+
with DensePose annotations that correspond to the point data
|
| 279 |
+
- point_bbox_indices (tensor [K] of `int64`): indices of bounding boxes
|
| 280 |
+
(not necessarily the selected ones with DensePose data) that correspond
|
| 281 |
+
to the point data
|
| 282 |
+
- bbox_indices (tensor [N] of `int64`): global indices of selected bounding
|
| 283 |
+
boxes with DensePose annotations; these indices could be used to access
|
| 284 |
+
features that are computed for all bounding boxes, not only the ones with
|
| 285 |
+
DensePose annotations.
|
| 286 |
+
Here K is the total number of points and N is the total number of instances
|
| 287 |
+
with DensePose annotations.
|
| 288 |
+
"""
|
| 289 |
+
|
| 290 |
+
fine_segm_labels_gt: torch.Tensor
|
| 291 |
+
x_gt: torch.Tensor
|
| 292 |
+
y_gt: torch.Tensor
|
| 293 |
+
u_gt: torch.Tensor
|
| 294 |
+
v_gt: torch.Tensor
|
| 295 |
+
coarse_segm_gt: Optional[torch.Tensor]
|
| 296 |
+
bbox_xywh_gt: torch.Tensor
|
| 297 |
+
bbox_xywh_est: torch.Tensor
|
| 298 |
+
point_bbox_with_dp_indices: torch.Tensor
|
| 299 |
+
point_bbox_indices: torch.Tensor
|
| 300 |
+
bbox_indices: torch.Tensor
|
| 301 |
+
|
| 302 |
+
|
| 303 |
+
class ChartBasedAnnotationsAccumulator(AnnotationsAccumulator):
|
| 304 |
+
"""
|
| 305 |
+
Accumulates annotations by batches that correspond to objects detected on
|
| 306 |
+
individual images. Can pack them together into single tensors.
|
| 307 |
+
"""
|
| 308 |
+
|
| 309 |
+
def __init__(self):
|
| 310 |
+
self.i_gt = []
|
| 311 |
+
self.x_gt = []
|
| 312 |
+
self.y_gt = []
|
| 313 |
+
self.u_gt = []
|
| 314 |
+
self.v_gt = []
|
| 315 |
+
self.s_gt = []
|
| 316 |
+
self.bbox_xywh_gt = []
|
| 317 |
+
self.bbox_xywh_est = []
|
| 318 |
+
self.point_bbox_with_dp_indices = []
|
| 319 |
+
self.point_bbox_indices = []
|
| 320 |
+
self.bbox_indices = []
|
| 321 |
+
self.nxt_bbox_with_dp_index = 0
|
| 322 |
+
self.nxt_bbox_index = 0
|
| 323 |
+
|
| 324 |
+
def accumulate(self, instances_one_image: Instances):
|
| 325 |
+
"""
|
| 326 |
+
Accumulate instances data for one image
|
| 327 |
+
|
| 328 |
+
Args:
|
| 329 |
+
instances_one_image (Instances): instances data to accumulate
|
| 330 |
+
"""
|
| 331 |
+
boxes_xywh_est = BoxMode.convert(
|
| 332 |
+
instances_one_image.proposal_boxes.tensor.clone(), BoxMode.XYXY_ABS, BoxMode.XYWH_ABS
|
| 333 |
+
)
|
| 334 |
+
boxes_xywh_gt = BoxMode.convert(
|
| 335 |
+
instances_one_image.gt_boxes.tensor.clone(), BoxMode.XYXY_ABS, BoxMode.XYWH_ABS
|
| 336 |
+
)
|
| 337 |
+
n_matches = len(boxes_xywh_gt)
|
| 338 |
+
assert n_matches == len(
|
| 339 |
+
boxes_xywh_est
|
| 340 |
+
), f"Got {len(boxes_xywh_est)} proposal boxes and {len(boxes_xywh_gt)} GT boxes"
|
| 341 |
+
if not n_matches:
|
| 342 |
+
# no detection - GT matches
|
| 343 |
+
return
|
| 344 |
+
if (
|
| 345 |
+
not hasattr(instances_one_image, "gt_densepose")
|
| 346 |
+
or instances_one_image.gt_densepose is None
|
| 347 |
+
):
|
| 348 |
+
# no densepose GT for the detections, just increase the bbox index
|
| 349 |
+
self.nxt_bbox_index += n_matches
|
| 350 |
+
return
|
| 351 |
+
for box_xywh_est, box_xywh_gt, dp_gt in zip(
|
| 352 |
+
boxes_xywh_est, boxes_xywh_gt, instances_one_image.gt_densepose
|
| 353 |
+
):
|
| 354 |
+
if (dp_gt is not None) and (len(dp_gt.x) > 0):
|
| 355 |
+
# pyre-fixme[6]: For 1st argument expected `Tensor` but got `float`.
|
| 356 |
+
# pyre-fixme[6]: For 2nd argument expected `Tensor` but got `float`.
|
| 357 |
+
self._do_accumulate(box_xywh_gt, box_xywh_est, dp_gt)
|
| 358 |
+
self.nxt_bbox_index += 1
|
| 359 |
+
|
| 360 |
+
def _do_accumulate(
|
| 361 |
+
self, box_xywh_gt: torch.Tensor, box_xywh_est: torch.Tensor, dp_gt: DensePoseDataRelative
|
| 362 |
+
):
|
| 363 |
+
"""
|
| 364 |
+
Accumulate instances data for one image, given that the data is not empty
|
| 365 |
+
|
| 366 |
+
Args:
|
| 367 |
+
box_xywh_gt (tensor): GT bounding box
|
| 368 |
+
box_xywh_est (tensor): estimated bounding box
|
| 369 |
+
dp_gt (DensePoseDataRelative): GT densepose data
|
| 370 |
+
"""
|
| 371 |
+
self.i_gt.append(dp_gt.i)
|
| 372 |
+
self.x_gt.append(dp_gt.x)
|
| 373 |
+
self.y_gt.append(dp_gt.y)
|
| 374 |
+
self.u_gt.append(dp_gt.u)
|
| 375 |
+
self.v_gt.append(dp_gt.v)
|
| 376 |
+
if hasattr(dp_gt, "segm"):
|
| 377 |
+
self.s_gt.append(dp_gt.segm.unsqueeze(0))
|
| 378 |
+
self.bbox_xywh_gt.append(box_xywh_gt.view(-1, 4))
|
| 379 |
+
self.bbox_xywh_est.append(box_xywh_est.view(-1, 4))
|
| 380 |
+
self.point_bbox_with_dp_indices.append(
|
| 381 |
+
torch.full_like(dp_gt.i, self.nxt_bbox_with_dp_index)
|
| 382 |
+
)
|
| 383 |
+
self.point_bbox_indices.append(torch.full_like(dp_gt.i, self.nxt_bbox_index))
|
| 384 |
+
self.bbox_indices.append(self.nxt_bbox_index)
|
| 385 |
+
self.nxt_bbox_with_dp_index += 1
|
| 386 |
+
|
| 387 |
+
def pack(self) -> Optional[PackedChartBasedAnnotations]:
|
| 388 |
+
"""
|
| 389 |
+
Pack data into tensors
|
| 390 |
+
"""
|
| 391 |
+
if not len(self.i_gt):
|
| 392 |
+
# TODO:
|
| 393 |
+
# returning proper empty annotations would require
|
| 394 |
+
# creating empty tensors of appropriate shape and
|
| 395 |
+
# type on an appropriate device;
|
| 396 |
+
# we return None so far to indicate empty annotations
|
| 397 |
+
return None
|
| 398 |
+
return PackedChartBasedAnnotations(
|
| 399 |
+
fine_segm_labels_gt=torch.cat(self.i_gt, 0).long(),
|
| 400 |
+
x_gt=torch.cat(self.x_gt, 0),
|
| 401 |
+
y_gt=torch.cat(self.y_gt, 0),
|
| 402 |
+
u_gt=torch.cat(self.u_gt, 0),
|
| 403 |
+
v_gt=torch.cat(self.v_gt, 0),
|
| 404 |
+
# ignore segmentation annotations, if not all the instances contain those
|
| 405 |
+
coarse_segm_gt=torch.cat(self.s_gt, 0)
|
| 406 |
+
if len(self.s_gt) == len(self.bbox_xywh_gt)
|
| 407 |
+
else None,
|
| 408 |
+
bbox_xywh_gt=torch.cat(self.bbox_xywh_gt, 0),
|
| 409 |
+
bbox_xywh_est=torch.cat(self.bbox_xywh_est, 0),
|
| 410 |
+
point_bbox_with_dp_indices=torch.cat(self.point_bbox_with_dp_indices, 0).long(),
|
| 411 |
+
point_bbox_indices=torch.cat(self.point_bbox_indices, 0).long(),
|
| 412 |
+
bbox_indices=torch.as_tensor(
|
| 413 |
+
self.bbox_indices, dtype=torch.long, device=self.x_gt[0].device
|
| 414 |
+
).long(),
|
| 415 |
+
)
|
| 416 |
+
|
| 417 |
+
|
| 418 |
+
def extract_packed_annotations_from_matches(
|
| 419 |
+
proposals_with_targets: List[Instances], accumulator: AnnotationsAccumulator
|
| 420 |
+
) -> Any:
|
| 421 |
+
for proposals_targets_per_image in proposals_with_targets:
|
| 422 |
+
accumulator.accumulate(proposals_targets_per_image)
|
| 423 |
+
return accumulator.pack()
|
| 424 |
+
|
| 425 |
+
|
| 426 |
+
def sample_random_indices(
|
| 427 |
+
n_indices: int, n_samples: int, device: Optional[torch.device] = None
|
| 428 |
+
) -> Optional[torch.Tensor]:
|
| 429 |
+
"""
|
| 430 |
+
Samples `n_samples` random indices from range `[0..n_indices - 1]`.
|
| 431 |
+
If `n_indices` is smaller than `n_samples`, returns `None` meaning that all indices
|
| 432 |
+
are selected.
|
| 433 |
+
Args:
|
| 434 |
+
n_indices (int): total number of indices
|
| 435 |
+
n_samples (int): number of indices to sample
|
| 436 |
+
device (torch.device): the desired device of returned tensor
|
| 437 |
+
Return:
|
| 438 |
+
Tensor of selected vertex indices, or `None`, if all vertices are selected
|
| 439 |
+
"""
|
| 440 |
+
if (n_samples <= 0) or (n_indices <= n_samples):
|
| 441 |
+
return None
|
| 442 |
+
indices = torch.randperm(n_indices, device=device)[:n_samples]
|
| 443 |
+
return indices
|
Leffa/3rdparty/densepose/modeling/predictors/__init__.py
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) Facebook, Inc. and its affiliates.
|
| 2 |
+
|
| 3 |
+
from .chart import DensePoseChartPredictor
|
| 4 |
+
from .chart_confidence import DensePoseChartConfidencePredictorMixin
|
| 5 |
+
from .chart_with_confidence import DensePoseChartWithConfidencePredictor
|
| 6 |
+
from .cse import DensePoseEmbeddingPredictor
|
| 7 |
+
from .cse_confidence import DensePoseEmbeddingConfidencePredictorMixin
|
| 8 |
+
from .cse_with_confidence import DensePoseEmbeddingWithConfidencePredictor
|
| 9 |
+
from .registry import DENSEPOSE_PREDICTOR_REGISTRY
|
Leffa/3rdparty/densepose/modeling/predictors/chart.py
ADDED
|
@@ -0,0 +1,94 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) Facebook, Inc. and its affiliates.
|
| 2 |
+
|
| 3 |
+
import torch
|
| 4 |
+
from torch import nn
|
| 5 |
+
|
| 6 |
+
from detectron2.config import CfgNode
|
| 7 |
+
from detectron2.layers import ConvTranspose2d, interpolate
|
| 8 |
+
|
| 9 |
+
from ...structures import DensePoseChartPredictorOutput
|
| 10 |
+
from ..utils import initialize_module_params
|
| 11 |
+
from .registry import DENSEPOSE_PREDICTOR_REGISTRY
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
@DENSEPOSE_PREDICTOR_REGISTRY.register()
|
| 15 |
+
class DensePoseChartPredictor(nn.Module):
|
| 16 |
+
"""
|
| 17 |
+
Predictor (last layers of a DensePose model) that takes DensePose head outputs as an input
|
| 18 |
+
and produces 4 tensors which represent DensePose results for predefined body parts
|
| 19 |
+
(patches / charts):
|
| 20 |
+
* coarse segmentation, a tensor of shape [N, K, Hout, Wout]
|
| 21 |
+
* fine segmentation, a tensor of shape [N, C, Hout, Wout]
|
| 22 |
+
* U coordinates, a tensor of shape [N, C, Hout, Wout]
|
| 23 |
+
* V coordinates, a tensor of shape [N, C, Hout, Wout]
|
| 24 |
+
where
|
| 25 |
+
- N is the number of instances
|
| 26 |
+
- K is the number of coarse segmentation channels (
|
| 27 |
+
2 = foreground / background,
|
| 28 |
+
15 = one of 14 body parts / background)
|
| 29 |
+
- C is the number of fine segmentation channels (
|
| 30 |
+
24 fine body parts / background)
|
| 31 |
+
- Hout and Wout are height and width of predictions
|
| 32 |
+
"""
|
| 33 |
+
|
| 34 |
+
def __init__(self, cfg: CfgNode, input_channels: int):
|
| 35 |
+
"""
|
| 36 |
+
Initialize predictor using configuration options
|
| 37 |
+
|
| 38 |
+
Args:
|
| 39 |
+
cfg (CfgNode): configuration options
|
| 40 |
+
input_channels (int): input tensor size along the channel dimension
|
| 41 |
+
"""
|
| 42 |
+
super().__init__()
|
| 43 |
+
dim_in = input_channels
|
| 44 |
+
n_segm_chan = cfg.MODEL.ROI_DENSEPOSE_HEAD.NUM_COARSE_SEGM_CHANNELS
|
| 45 |
+
dim_out_patches = cfg.MODEL.ROI_DENSEPOSE_HEAD.NUM_PATCHES + 1
|
| 46 |
+
kernel_size = cfg.MODEL.ROI_DENSEPOSE_HEAD.DECONV_KERNEL
|
| 47 |
+
# coarse segmentation
|
| 48 |
+
self.ann_index_lowres = ConvTranspose2d(
|
| 49 |
+
dim_in, n_segm_chan, kernel_size, stride=2, padding=int(kernel_size / 2 - 1)
|
| 50 |
+
)
|
| 51 |
+
# fine segmentation
|
| 52 |
+
self.index_uv_lowres = ConvTranspose2d(
|
| 53 |
+
dim_in, dim_out_patches, kernel_size, stride=2, padding=int(kernel_size / 2 - 1)
|
| 54 |
+
)
|
| 55 |
+
# U
|
| 56 |
+
self.u_lowres = ConvTranspose2d(
|
| 57 |
+
dim_in, dim_out_patches, kernel_size, stride=2, padding=int(kernel_size / 2 - 1)
|
| 58 |
+
)
|
| 59 |
+
# V
|
| 60 |
+
self.v_lowres = ConvTranspose2d(
|
| 61 |
+
dim_in, dim_out_patches, kernel_size, stride=2, padding=int(kernel_size / 2 - 1)
|
| 62 |
+
)
|
| 63 |
+
self.scale_factor = cfg.MODEL.ROI_DENSEPOSE_HEAD.UP_SCALE
|
| 64 |
+
initialize_module_params(self)
|
| 65 |
+
|
| 66 |
+
def interp2d(self, tensor_nchw: torch.Tensor):
|
| 67 |
+
"""
|
| 68 |
+
Bilinear interpolation method to be used for upscaling
|
| 69 |
+
|
| 70 |
+
Args:
|
| 71 |
+
tensor_nchw (tensor): tensor of shape (N, C, H, W)
|
| 72 |
+
Return:
|
| 73 |
+
tensor of shape (N, C, Hout, Wout), where Hout and Wout are computed
|
| 74 |
+
by applying the scale factor to H and W
|
| 75 |
+
"""
|
| 76 |
+
return interpolate(
|
| 77 |
+
tensor_nchw, scale_factor=self.scale_factor, mode="bilinear", align_corners=False
|
| 78 |
+
)
|
| 79 |
+
|
| 80 |
+
def forward(self, head_outputs: torch.Tensor):
|
| 81 |
+
"""
|
| 82 |
+
Perform forward step on DensePose head outputs
|
| 83 |
+
|
| 84 |
+
Args:
|
| 85 |
+
head_outputs (tensor): DensePose head outputs, tensor of shape [N, D, H, W]
|
| 86 |
+
Return:
|
| 87 |
+
An instance of DensePoseChartPredictorOutput
|
| 88 |
+
"""
|
| 89 |
+
return DensePoseChartPredictorOutput(
|
| 90 |
+
coarse_segm=self.interp2d(self.ann_index_lowres(head_outputs)),
|
| 91 |
+
fine_segm=self.interp2d(self.index_uv_lowres(head_outputs)),
|
| 92 |
+
u=self.interp2d(self.u_lowres(head_outputs)),
|
| 93 |
+
v=self.interp2d(self.v_lowres(head_outputs)),
|
| 94 |
+
)
|
Leffa/3rdparty/densepose/modeling/predictors/chart_confidence.py
ADDED
|
@@ -0,0 +1,174 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) Facebook, Inc. and its affiliates.
|
| 2 |
+
|
| 3 |
+
from typing import Any
|
| 4 |
+
import torch
|
| 5 |
+
from torch.nn import functional as F
|
| 6 |
+
|
| 7 |
+
from detectron2.config import CfgNode
|
| 8 |
+
from detectron2.layers import ConvTranspose2d
|
| 9 |
+
|
| 10 |
+
from ...structures import decorate_predictor_output_class_with_confidences
|
| 11 |
+
from ..confidence import DensePoseConfidenceModelConfig, DensePoseUVConfidenceType
|
| 12 |
+
from ..utils import initialize_module_params
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
class DensePoseChartConfidencePredictorMixin:
|
| 16 |
+
"""
|
| 17 |
+
Predictor contains the last layers of a DensePose model that take DensePose head
|
| 18 |
+
outputs as an input and produce model outputs. Confidence predictor mixin is used
|
| 19 |
+
to generate confidences for segmentation and UV tensors estimated by some
|
| 20 |
+
base predictor. Several assumptions need to hold for the base predictor:
|
| 21 |
+
1) the `forward` method must return SIUV tuple as the first result (
|
| 22 |
+
S = coarse segmentation, I = fine segmentation, U and V are intrinsic
|
| 23 |
+
chart coordinates)
|
| 24 |
+
2) `interp2d` method must be defined to perform bilinear interpolation;
|
| 25 |
+
the same method is typically used for SIUV and confidences
|
| 26 |
+
Confidence predictor mixin provides confidence estimates, as described in:
|
| 27 |
+
N. Neverova et al., Correlated Uncertainty for Learning Dense Correspondences
|
| 28 |
+
from Noisy Labels, NeurIPS 2019
|
| 29 |
+
A. Sanakoyeu et al., Transferring Dense Pose to Proximal Animal Classes, CVPR 2020
|
| 30 |
+
"""
|
| 31 |
+
|
| 32 |
+
def __init__(self, cfg: CfgNode, input_channels: int):
|
| 33 |
+
"""
|
| 34 |
+
Initialize confidence predictor using configuration options.
|
| 35 |
+
|
| 36 |
+
Args:
|
| 37 |
+
cfg (CfgNode): configuration options
|
| 38 |
+
input_channels (int): number of input channels
|
| 39 |
+
"""
|
| 40 |
+
# we rely on base predictor to call nn.Module.__init__
|
| 41 |
+
super().__init__(cfg, input_channels) # pyre-ignore[19]
|
| 42 |
+
self.confidence_model_cfg = DensePoseConfidenceModelConfig.from_cfg(cfg)
|
| 43 |
+
self._initialize_confidence_estimation_layers(cfg, input_channels)
|
| 44 |
+
self._registry = {}
|
| 45 |
+
initialize_module_params(self) # pyre-ignore[6]
|
| 46 |
+
|
| 47 |
+
def _initialize_confidence_estimation_layers(self, cfg: CfgNode, dim_in: int):
|
| 48 |
+
"""
|
| 49 |
+
Initialize confidence estimation layers based on configuration options
|
| 50 |
+
|
| 51 |
+
Args:
|
| 52 |
+
cfg (CfgNode): configuration options
|
| 53 |
+
dim_in (int): number of input channels
|
| 54 |
+
"""
|
| 55 |
+
dim_out_patches = cfg.MODEL.ROI_DENSEPOSE_HEAD.NUM_PATCHES + 1
|
| 56 |
+
kernel_size = cfg.MODEL.ROI_DENSEPOSE_HEAD.DECONV_KERNEL
|
| 57 |
+
if self.confidence_model_cfg.uv_confidence.enabled:
|
| 58 |
+
if self.confidence_model_cfg.uv_confidence.type == DensePoseUVConfidenceType.IID_ISO:
|
| 59 |
+
self.sigma_2_lowres = ConvTranspose2d( # pyre-ignore[16]
|
| 60 |
+
dim_in, dim_out_patches, kernel_size, stride=2, padding=int(kernel_size / 2 - 1)
|
| 61 |
+
)
|
| 62 |
+
elif (
|
| 63 |
+
self.confidence_model_cfg.uv_confidence.type
|
| 64 |
+
== DensePoseUVConfidenceType.INDEP_ANISO
|
| 65 |
+
):
|
| 66 |
+
self.sigma_2_lowres = ConvTranspose2d(
|
| 67 |
+
dim_in, dim_out_patches, kernel_size, stride=2, padding=int(kernel_size / 2 - 1)
|
| 68 |
+
)
|
| 69 |
+
self.kappa_u_lowres = ConvTranspose2d( # pyre-ignore[16]
|
| 70 |
+
dim_in, dim_out_patches, kernel_size, stride=2, padding=int(kernel_size / 2 - 1)
|
| 71 |
+
)
|
| 72 |
+
self.kappa_v_lowres = ConvTranspose2d( # pyre-ignore[16]
|
| 73 |
+
dim_in, dim_out_patches, kernel_size, stride=2, padding=int(kernel_size / 2 - 1)
|
| 74 |
+
)
|
| 75 |
+
else:
|
| 76 |
+
raise ValueError(
|
| 77 |
+
f"Unknown confidence model type: "
|
| 78 |
+
f"{self.confidence_model_cfg.confidence_model_type}"
|
| 79 |
+
)
|
| 80 |
+
if self.confidence_model_cfg.segm_confidence.enabled:
|
| 81 |
+
self.fine_segm_confidence_lowres = ConvTranspose2d( # pyre-ignore[16]
|
| 82 |
+
dim_in, 1, kernel_size, stride=2, padding=int(kernel_size / 2 - 1)
|
| 83 |
+
)
|
| 84 |
+
self.coarse_segm_confidence_lowres = ConvTranspose2d( # pyre-ignore[16]
|
| 85 |
+
dim_in, 1, kernel_size, stride=2, padding=int(kernel_size / 2 - 1)
|
| 86 |
+
)
|
| 87 |
+
|
| 88 |
+
def forward(self, head_outputs: torch.Tensor):
|
| 89 |
+
"""
|
| 90 |
+
Perform forward operation on head outputs used as inputs for the predictor.
|
| 91 |
+
Calls forward method from the base predictor and uses its outputs to compute
|
| 92 |
+
confidences.
|
| 93 |
+
|
| 94 |
+
Args:
|
| 95 |
+
head_outputs (Tensor): head outputs used as predictor inputs
|
| 96 |
+
Return:
|
| 97 |
+
An instance of outputs with confidences,
|
| 98 |
+
see `decorate_predictor_output_class_with_confidences`
|
| 99 |
+
"""
|
| 100 |
+
# assuming base class returns SIUV estimates in its first result
|
| 101 |
+
base_predictor_outputs = super().forward(head_outputs) # pyre-ignore[16]
|
| 102 |
+
|
| 103 |
+
# create output instance by extending base predictor outputs:
|
| 104 |
+
output = self._create_output_instance(base_predictor_outputs)
|
| 105 |
+
|
| 106 |
+
if self.confidence_model_cfg.uv_confidence.enabled:
|
| 107 |
+
if self.confidence_model_cfg.uv_confidence.type == DensePoseUVConfidenceType.IID_ISO:
|
| 108 |
+
# assuming base class defines interp2d method for bilinear interpolation
|
| 109 |
+
output.sigma_2 = self.interp2d(self.sigma_2_lowres(head_outputs)) # pyre-ignore[16]
|
| 110 |
+
elif (
|
| 111 |
+
self.confidence_model_cfg.uv_confidence.type
|
| 112 |
+
== DensePoseUVConfidenceType.INDEP_ANISO
|
| 113 |
+
):
|
| 114 |
+
# assuming base class defines interp2d method for bilinear interpolation
|
| 115 |
+
output.sigma_2 = self.interp2d(self.sigma_2_lowres(head_outputs))
|
| 116 |
+
output.kappa_u = self.interp2d(self.kappa_u_lowres(head_outputs)) # pyre-ignore[16]
|
| 117 |
+
output.kappa_v = self.interp2d(self.kappa_v_lowres(head_outputs)) # pyre-ignore[16]
|
| 118 |
+
else:
|
| 119 |
+
raise ValueError(
|
| 120 |
+
f"Unknown confidence model type: "
|
| 121 |
+
f"{self.confidence_model_cfg.confidence_model_type}"
|
| 122 |
+
)
|
| 123 |
+
if self.confidence_model_cfg.segm_confidence.enabled:
|
| 124 |
+
# base predictor outputs are assumed to have `fine_segm` and `coarse_segm` attributes
|
| 125 |
+
# base predictor is assumed to define `interp2d` method for bilinear interpolation
|
| 126 |
+
output.fine_segm_confidence = (
|
| 127 |
+
F.softplus(
|
| 128 |
+
self.interp2d(self.fine_segm_confidence_lowres(head_outputs)) # pyre-ignore[16]
|
| 129 |
+
)
|
| 130 |
+
+ self.confidence_model_cfg.segm_confidence.epsilon
|
| 131 |
+
)
|
| 132 |
+
output.fine_segm = base_predictor_outputs.fine_segm * torch.repeat_interleave(
|
| 133 |
+
output.fine_segm_confidence, base_predictor_outputs.fine_segm.shape[1], dim=1
|
| 134 |
+
)
|
| 135 |
+
output.coarse_segm_confidence = (
|
| 136 |
+
F.softplus(
|
| 137 |
+
self.interp2d(
|
| 138 |
+
self.coarse_segm_confidence_lowres(head_outputs) # pyre-ignore[16]
|
| 139 |
+
)
|
| 140 |
+
)
|
| 141 |
+
+ self.confidence_model_cfg.segm_confidence.epsilon
|
| 142 |
+
)
|
| 143 |
+
output.coarse_segm = base_predictor_outputs.coarse_segm * torch.repeat_interleave(
|
| 144 |
+
output.coarse_segm_confidence, base_predictor_outputs.coarse_segm.shape[1], dim=1
|
| 145 |
+
)
|
| 146 |
+
|
| 147 |
+
return output
|
| 148 |
+
|
| 149 |
+
def _create_output_instance(self, base_predictor_outputs: Any):
|
| 150 |
+
"""
|
| 151 |
+
Create an instance of predictor outputs by copying the outputs from the
|
| 152 |
+
base predictor and initializing confidence
|
| 153 |
+
|
| 154 |
+
Args:
|
| 155 |
+
base_predictor_outputs: an instance of base predictor outputs
|
| 156 |
+
(the outputs type is assumed to be a dataclass)
|
| 157 |
+
Return:
|
| 158 |
+
An instance of outputs with confidences
|
| 159 |
+
"""
|
| 160 |
+
PredictorOutput = decorate_predictor_output_class_with_confidences(
|
| 161 |
+
type(base_predictor_outputs) # pyre-ignore[6]
|
| 162 |
+
)
|
| 163 |
+
# base_predictor_outputs is assumed to be a dataclass
|
| 164 |
+
# reassign all the fields from base_predictor_outputs (no deep copy!), add new fields
|
| 165 |
+
output = PredictorOutput(
|
| 166 |
+
**base_predictor_outputs.__dict__,
|
| 167 |
+
coarse_segm_confidence=None,
|
| 168 |
+
fine_segm_confidence=None,
|
| 169 |
+
sigma_1=None,
|
| 170 |
+
sigma_2=None,
|
| 171 |
+
kappa_u=None,
|
| 172 |
+
kappa_v=None,
|
| 173 |
+
)
|
| 174 |
+
return output
|
Leffa/3rdparty/densepose/modeling/predictors/chart_with_confidence.py
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) Facebook, Inc. and its affiliates.
|
| 2 |
+
|
| 3 |
+
from . import DensePoseChartConfidencePredictorMixin, DensePoseChartPredictor
|
| 4 |
+
from .registry import DENSEPOSE_PREDICTOR_REGISTRY
|
| 5 |
+
|
| 6 |
+
|
| 7 |
+
@DENSEPOSE_PREDICTOR_REGISTRY.register()
|
| 8 |
+
class DensePoseChartWithConfidencePredictor(
|
| 9 |
+
DensePoseChartConfidencePredictorMixin, DensePoseChartPredictor
|
| 10 |
+
):
|
| 11 |
+
"""
|
| 12 |
+
Predictor that combines chart and chart confidence estimation
|
| 13 |
+
"""
|
| 14 |
+
|
| 15 |
+
pass
|
Leffa/3rdparty/densepose/modeling/predictors/cse.py
ADDED
|
@@ -0,0 +1,70 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
|
| 2 |
+
|
| 3 |
+
import torch
|
| 4 |
+
from torch import nn
|
| 5 |
+
|
| 6 |
+
from detectron2.config import CfgNode
|
| 7 |
+
from detectron2.layers import ConvTranspose2d, interpolate
|
| 8 |
+
|
| 9 |
+
from ...structures import DensePoseEmbeddingPredictorOutput
|
| 10 |
+
from ..utils import initialize_module_params
|
| 11 |
+
from .registry import DENSEPOSE_PREDICTOR_REGISTRY
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
@DENSEPOSE_PREDICTOR_REGISTRY.register()
|
| 15 |
+
class DensePoseEmbeddingPredictor(nn.Module):
|
| 16 |
+
"""
|
| 17 |
+
Last layers of a DensePose model that take DensePose head outputs as an input
|
| 18 |
+
and produce model outputs for continuous surface embeddings (CSE).
|
| 19 |
+
"""
|
| 20 |
+
|
| 21 |
+
def __init__(self, cfg: CfgNode, input_channels: int):
|
| 22 |
+
"""
|
| 23 |
+
Initialize predictor using configuration options
|
| 24 |
+
|
| 25 |
+
Args:
|
| 26 |
+
cfg (CfgNode): configuration options
|
| 27 |
+
input_channels (int): input tensor size along the channel dimension
|
| 28 |
+
"""
|
| 29 |
+
super().__init__()
|
| 30 |
+
dim_in = input_channels
|
| 31 |
+
n_segm_chan = cfg.MODEL.ROI_DENSEPOSE_HEAD.NUM_COARSE_SEGM_CHANNELS
|
| 32 |
+
embed_size = cfg.MODEL.ROI_DENSEPOSE_HEAD.CSE.EMBED_SIZE
|
| 33 |
+
kernel_size = cfg.MODEL.ROI_DENSEPOSE_HEAD.DECONV_KERNEL
|
| 34 |
+
# coarse segmentation
|
| 35 |
+
self.coarse_segm_lowres = ConvTranspose2d(
|
| 36 |
+
dim_in, n_segm_chan, kernel_size, stride=2, padding=int(kernel_size / 2 - 1)
|
| 37 |
+
)
|
| 38 |
+
# embedding
|
| 39 |
+
self.embed_lowres = ConvTranspose2d(
|
| 40 |
+
dim_in, embed_size, kernel_size, stride=2, padding=int(kernel_size / 2 - 1)
|
| 41 |
+
)
|
| 42 |
+
self.scale_factor = cfg.MODEL.ROI_DENSEPOSE_HEAD.UP_SCALE
|
| 43 |
+
initialize_module_params(self)
|
| 44 |
+
|
| 45 |
+
def interp2d(self, tensor_nchw: torch.Tensor):
|
| 46 |
+
"""
|
| 47 |
+
Bilinear interpolation method to be used for upscaling
|
| 48 |
+
|
| 49 |
+
Args:
|
| 50 |
+
tensor_nchw (tensor): tensor of shape (N, C, H, W)
|
| 51 |
+
Return:
|
| 52 |
+
tensor of shape (N, C, Hout, Wout), where Hout and Wout are computed
|
| 53 |
+
by applying the scale factor to H and W
|
| 54 |
+
"""
|
| 55 |
+
return interpolate(
|
| 56 |
+
tensor_nchw, scale_factor=self.scale_factor, mode="bilinear", align_corners=False
|
| 57 |
+
)
|
| 58 |
+
|
| 59 |
+
def forward(self, head_outputs):
|
| 60 |
+
"""
|
| 61 |
+
Perform forward step on DensePose head outputs
|
| 62 |
+
|
| 63 |
+
Args:
|
| 64 |
+
head_outputs (tensor): DensePose head outputs, tensor of shape [N, D, H, W]
|
| 65 |
+
"""
|
| 66 |
+
embed_lowres = self.embed_lowres(head_outputs)
|
| 67 |
+
coarse_segm_lowres = self.coarse_segm_lowres(head_outputs)
|
| 68 |
+
embed = self.interp2d(embed_lowres)
|
| 69 |
+
coarse_segm = self.interp2d(coarse_segm_lowres)
|
| 70 |
+
return DensePoseEmbeddingPredictorOutput(embedding=embed, coarse_segm=coarse_segm)
|
Leffa/3rdparty/densepose/modeling/predictors/cse_confidence.py
ADDED
|
@@ -0,0 +1,115 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) Facebook, Inc. and its affiliates.
|
| 2 |
+
|
| 3 |
+
from typing import Any
|
| 4 |
+
import torch
|
| 5 |
+
from torch.nn import functional as F
|
| 6 |
+
|
| 7 |
+
from detectron2.config import CfgNode
|
| 8 |
+
from detectron2.layers import ConvTranspose2d
|
| 9 |
+
|
| 10 |
+
from densepose.modeling.confidence import DensePoseConfidenceModelConfig
|
| 11 |
+
from densepose.modeling.utils import initialize_module_params
|
| 12 |
+
from densepose.structures import decorate_cse_predictor_output_class_with_confidences
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
class DensePoseEmbeddingConfidencePredictorMixin:
|
| 16 |
+
"""
|
| 17 |
+
Predictor contains the last layers of a DensePose model that take DensePose head
|
| 18 |
+
outputs as an input and produce model outputs. Confidence predictor mixin is used
|
| 19 |
+
to generate confidences for coarse segmentation estimated by some
|
| 20 |
+
base predictor. Several assumptions need to hold for the base predictor:
|
| 21 |
+
1) the `forward` method must return CSE DensePose head outputs,
|
| 22 |
+
tensor of shape [N, D, H, W]
|
| 23 |
+
2) `interp2d` method must be defined to perform bilinear interpolation;
|
| 24 |
+
the same method is typically used for masks and confidences
|
| 25 |
+
Confidence predictor mixin provides confidence estimates, as described in:
|
| 26 |
+
N. Neverova et al., Correlated Uncertainty for Learning Dense Correspondences
|
| 27 |
+
from Noisy Labels, NeurIPS 2019
|
| 28 |
+
A. Sanakoyeu et al., Transferring Dense Pose to Proximal Animal Classes, CVPR 2020
|
| 29 |
+
"""
|
| 30 |
+
|
| 31 |
+
def __init__(self, cfg: CfgNode, input_channels: int):
|
| 32 |
+
"""
|
| 33 |
+
Initialize confidence predictor using configuration options.
|
| 34 |
+
|
| 35 |
+
Args:
|
| 36 |
+
cfg (CfgNode): configuration options
|
| 37 |
+
input_channels (int): number of input channels
|
| 38 |
+
"""
|
| 39 |
+
# we rely on base predictor to call nn.Module.__init__
|
| 40 |
+
super().__init__(cfg, input_channels) # pyre-ignore[19]
|
| 41 |
+
self.confidence_model_cfg = DensePoseConfidenceModelConfig.from_cfg(cfg)
|
| 42 |
+
self._initialize_confidence_estimation_layers(cfg, input_channels)
|
| 43 |
+
self._registry = {}
|
| 44 |
+
initialize_module_params(self) # pyre-ignore[6]
|
| 45 |
+
|
| 46 |
+
def _initialize_confidence_estimation_layers(self, cfg: CfgNode, dim_in: int):
|
| 47 |
+
"""
|
| 48 |
+
Initialize confidence estimation layers based on configuration options
|
| 49 |
+
|
| 50 |
+
Args:
|
| 51 |
+
cfg (CfgNode): configuration options
|
| 52 |
+
dim_in (int): number of input channels
|
| 53 |
+
"""
|
| 54 |
+
kernel_size = cfg.MODEL.ROI_DENSEPOSE_HEAD.DECONV_KERNEL
|
| 55 |
+
if self.confidence_model_cfg.segm_confidence.enabled:
|
| 56 |
+
self.coarse_segm_confidence_lowres = ConvTranspose2d( # pyre-ignore[16]
|
| 57 |
+
dim_in, 1, kernel_size, stride=2, padding=int(kernel_size / 2 - 1)
|
| 58 |
+
)
|
| 59 |
+
|
| 60 |
+
def forward(self, head_outputs: torch.Tensor):
|
| 61 |
+
"""
|
| 62 |
+
Perform forward operation on head outputs used as inputs for the predictor.
|
| 63 |
+
Calls forward method from the base predictor and uses its outputs to compute
|
| 64 |
+
confidences.
|
| 65 |
+
|
| 66 |
+
Args:
|
| 67 |
+
head_outputs (Tensor): head outputs used as predictor inputs
|
| 68 |
+
Return:
|
| 69 |
+
An instance of outputs with confidences,
|
| 70 |
+
see `decorate_cse_predictor_output_class_with_confidences`
|
| 71 |
+
"""
|
| 72 |
+
# assuming base class returns SIUV estimates in its first result
|
| 73 |
+
base_predictor_outputs = super().forward(head_outputs) # pyre-ignore[16]
|
| 74 |
+
|
| 75 |
+
# create output instance by extending base predictor outputs:
|
| 76 |
+
output = self._create_output_instance(base_predictor_outputs)
|
| 77 |
+
|
| 78 |
+
if self.confidence_model_cfg.segm_confidence.enabled:
|
| 79 |
+
# base predictor outputs are assumed to have `coarse_segm` attribute
|
| 80 |
+
# base predictor is assumed to define `interp2d` method for bilinear interpolation
|
| 81 |
+
output.coarse_segm_confidence = (
|
| 82 |
+
F.softplus(
|
| 83 |
+
self.interp2d( # pyre-ignore[16]
|
| 84 |
+
self.coarse_segm_confidence_lowres(head_outputs) # pyre-ignore[16]
|
| 85 |
+
)
|
| 86 |
+
)
|
| 87 |
+
+ self.confidence_model_cfg.segm_confidence.epsilon
|
| 88 |
+
)
|
| 89 |
+
output.coarse_segm = base_predictor_outputs.coarse_segm * torch.repeat_interleave(
|
| 90 |
+
output.coarse_segm_confidence, base_predictor_outputs.coarse_segm.shape[1], dim=1
|
| 91 |
+
)
|
| 92 |
+
|
| 93 |
+
return output
|
| 94 |
+
|
| 95 |
+
def _create_output_instance(self, base_predictor_outputs: Any):
|
| 96 |
+
"""
|
| 97 |
+
Create an instance of predictor outputs by copying the outputs from the
|
| 98 |
+
base predictor and initializing confidence
|
| 99 |
+
|
| 100 |
+
Args:
|
| 101 |
+
base_predictor_outputs: an instance of base predictor outputs
|
| 102 |
+
(the outputs type is assumed to be a dataclass)
|
| 103 |
+
Return:
|
| 104 |
+
An instance of outputs with confidences
|
| 105 |
+
"""
|
| 106 |
+
PredictorOutput = decorate_cse_predictor_output_class_with_confidences(
|
| 107 |
+
type(base_predictor_outputs) # pyre-ignore[6]
|
| 108 |
+
)
|
| 109 |
+
# base_predictor_outputs is assumed to be a dataclass
|
| 110 |
+
# reassign all the fields from base_predictor_outputs (no deep copy!), add new fields
|
| 111 |
+
output = PredictorOutput(
|
| 112 |
+
**base_predictor_outputs.__dict__,
|
| 113 |
+
coarse_segm_confidence=None,
|
| 114 |
+
)
|
| 115 |
+
return output
|
Leffa/3rdparty/densepose/modeling/predictors/cse_with_confidence.py
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) Facebook, Inc. and its affiliates.
|
| 2 |
+
|
| 3 |
+
from . import DensePoseEmbeddingConfidencePredictorMixin, DensePoseEmbeddingPredictor
|
| 4 |
+
from .registry import DENSEPOSE_PREDICTOR_REGISTRY
|
| 5 |
+
|
| 6 |
+
|
| 7 |
+
@DENSEPOSE_PREDICTOR_REGISTRY.register()
|
| 8 |
+
class DensePoseEmbeddingWithConfidencePredictor(
|
| 9 |
+
DensePoseEmbeddingConfidencePredictorMixin, DensePoseEmbeddingPredictor
|
| 10 |
+
):
|
| 11 |
+
"""
|
| 12 |
+
Predictor that combines CSE and CSE confidence estimation
|
| 13 |
+
"""
|
| 14 |
+
|
| 15 |
+
pass
|
Leffa/3rdparty/densepose/modeling/predictors/registry.py
ADDED
|
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) Facebook, Inc. and its affiliates.
|
| 2 |
+
|
| 3 |
+
from detectron2.utils.registry import Registry
|
| 4 |
+
|
| 5 |
+
DENSEPOSE_PREDICTOR_REGISTRY = Registry("DENSEPOSE_PREDICTOR")
|
Leffa/3rdparty/densepose/modeling/roi_heads/__init__.py
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) Facebook, Inc. and its affiliates.
|
| 2 |
+
|
| 3 |
+
from .v1convx import DensePoseV1ConvXHead
|
| 4 |
+
from .deeplab import DensePoseDeepLabHead
|
| 5 |
+
from .registry import ROI_DENSEPOSE_HEAD_REGISTRY
|
| 6 |
+
from .roi_head import Decoder, DensePoseROIHeads
|
Leffa/3rdparty/densepose/modeling/roi_heads/deeplab.py
ADDED
|
@@ -0,0 +1,263 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) Facebook, Inc. and its affiliates.
|
| 2 |
+
|
| 3 |
+
import fvcore.nn.weight_init as weight_init
|
| 4 |
+
import torch
|
| 5 |
+
from torch import nn
|
| 6 |
+
from torch.nn import functional as F
|
| 7 |
+
|
| 8 |
+
from detectron2.config import CfgNode
|
| 9 |
+
from detectron2.layers import Conv2d
|
| 10 |
+
|
| 11 |
+
from .registry import ROI_DENSEPOSE_HEAD_REGISTRY
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
@ROI_DENSEPOSE_HEAD_REGISTRY.register()
|
| 15 |
+
class DensePoseDeepLabHead(nn.Module):
|
| 16 |
+
"""
|
| 17 |
+
DensePose head using DeepLabV3 model from
|
| 18 |
+
"Rethinking Atrous Convolution for Semantic Image Segmentation"
|
| 19 |
+
<https://arxiv.org/abs/1706.05587>.
|
| 20 |
+
"""
|
| 21 |
+
|
| 22 |
+
def __init__(self, cfg: CfgNode, input_channels: int):
|
| 23 |
+
super(DensePoseDeepLabHead, self).__init__()
|
| 24 |
+
# fmt: off
|
| 25 |
+
hidden_dim = cfg.MODEL.ROI_DENSEPOSE_HEAD.CONV_HEAD_DIM
|
| 26 |
+
kernel_size = cfg.MODEL.ROI_DENSEPOSE_HEAD.CONV_HEAD_KERNEL
|
| 27 |
+
norm = cfg.MODEL.ROI_DENSEPOSE_HEAD.DEEPLAB.NORM
|
| 28 |
+
self.n_stacked_convs = cfg.MODEL.ROI_DENSEPOSE_HEAD.NUM_STACKED_CONVS
|
| 29 |
+
self.use_nonlocal = cfg.MODEL.ROI_DENSEPOSE_HEAD.DEEPLAB.NONLOCAL_ON
|
| 30 |
+
# fmt: on
|
| 31 |
+
pad_size = kernel_size // 2
|
| 32 |
+
n_channels = input_channels
|
| 33 |
+
|
| 34 |
+
self.ASPP = ASPP(input_channels, [6, 12, 56], n_channels) # 6, 12, 56
|
| 35 |
+
self.add_module("ASPP", self.ASPP)
|
| 36 |
+
|
| 37 |
+
if self.use_nonlocal:
|
| 38 |
+
self.NLBlock = NONLocalBlock2D(input_channels, bn_layer=True)
|
| 39 |
+
self.add_module("NLBlock", self.NLBlock)
|
| 40 |
+
# weight_init.c2_msra_fill(self.ASPP)
|
| 41 |
+
|
| 42 |
+
for i in range(self.n_stacked_convs):
|
| 43 |
+
norm_module = nn.GroupNorm(32, hidden_dim) if norm == "GN" else None
|
| 44 |
+
layer = Conv2d(
|
| 45 |
+
n_channels,
|
| 46 |
+
hidden_dim,
|
| 47 |
+
kernel_size,
|
| 48 |
+
stride=1,
|
| 49 |
+
padding=pad_size,
|
| 50 |
+
bias=not norm,
|
| 51 |
+
norm=norm_module,
|
| 52 |
+
)
|
| 53 |
+
weight_init.c2_msra_fill(layer)
|
| 54 |
+
n_channels = hidden_dim
|
| 55 |
+
layer_name = self._get_layer_name(i)
|
| 56 |
+
self.add_module(layer_name, layer)
|
| 57 |
+
self.n_out_channels = hidden_dim
|
| 58 |
+
# initialize_module_params(self)
|
| 59 |
+
|
| 60 |
+
def forward(self, features):
|
| 61 |
+
x0 = features
|
| 62 |
+
x = self.ASPP(x0)
|
| 63 |
+
if self.use_nonlocal:
|
| 64 |
+
x = self.NLBlock(x)
|
| 65 |
+
output = x
|
| 66 |
+
for i in range(self.n_stacked_convs):
|
| 67 |
+
layer_name = self._get_layer_name(i)
|
| 68 |
+
x = getattr(self, layer_name)(x)
|
| 69 |
+
x = F.relu(x)
|
| 70 |
+
output = x
|
| 71 |
+
return output
|
| 72 |
+
|
| 73 |
+
def _get_layer_name(self, i: int):
|
| 74 |
+
layer_name = "body_conv_fcn{}".format(i + 1)
|
| 75 |
+
return layer_name
|
| 76 |
+
|
| 77 |
+
|
| 78 |
+
# Copied from
|
| 79 |
+
# https://github.com/pytorch/vision/blob/master/torchvision/models/segmentation/deeplabv3.py
|
| 80 |
+
# See https://arxiv.org/pdf/1706.05587.pdf for details
|
| 81 |
+
class ASPPConv(nn.Sequential):
|
| 82 |
+
def __init__(self, in_channels, out_channels, dilation):
|
| 83 |
+
modules = [
|
| 84 |
+
nn.Conv2d(
|
| 85 |
+
in_channels, out_channels, 3, padding=dilation, dilation=dilation, bias=False
|
| 86 |
+
),
|
| 87 |
+
nn.GroupNorm(32, out_channels),
|
| 88 |
+
nn.ReLU(),
|
| 89 |
+
]
|
| 90 |
+
super(ASPPConv, self).__init__(*modules)
|
| 91 |
+
|
| 92 |
+
|
| 93 |
+
class ASPPPooling(nn.Sequential):
|
| 94 |
+
def __init__(self, in_channels, out_channels):
|
| 95 |
+
super(ASPPPooling, self).__init__(
|
| 96 |
+
nn.AdaptiveAvgPool2d(1),
|
| 97 |
+
nn.Conv2d(in_channels, out_channels, 1, bias=False),
|
| 98 |
+
nn.GroupNorm(32, out_channels),
|
| 99 |
+
nn.ReLU(),
|
| 100 |
+
)
|
| 101 |
+
|
| 102 |
+
def forward(self, x):
|
| 103 |
+
size = x.shape[-2:]
|
| 104 |
+
x = super(ASPPPooling, self).forward(x)
|
| 105 |
+
return F.interpolate(x, size=size, mode="bilinear", align_corners=False)
|
| 106 |
+
|
| 107 |
+
|
| 108 |
+
class ASPP(nn.Module):
|
| 109 |
+
def __init__(self, in_channels, atrous_rates, out_channels):
|
| 110 |
+
super(ASPP, self).__init__()
|
| 111 |
+
modules = []
|
| 112 |
+
modules.append(
|
| 113 |
+
nn.Sequential(
|
| 114 |
+
nn.Conv2d(in_channels, out_channels, 1, bias=False),
|
| 115 |
+
nn.GroupNorm(32, out_channels),
|
| 116 |
+
nn.ReLU(),
|
| 117 |
+
)
|
| 118 |
+
)
|
| 119 |
+
|
| 120 |
+
rate1, rate2, rate3 = tuple(atrous_rates)
|
| 121 |
+
modules.append(ASPPConv(in_channels, out_channels, rate1))
|
| 122 |
+
modules.append(ASPPConv(in_channels, out_channels, rate2))
|
| 123 |
+
modules.append(ASPPConv(in_channels, out_channels, rate3))
|
| 124 |
+
modules.append(ASPPPooling(in_channels, out_channels))
|
| 125 |
+
|
| 126 |
+
self.convs = nn.ModuleList(modules)
|
| 127 |
+
|
| 128 |
+
self.project = nn.Sequential(
|
| 129 |
+
nn.Conv2d(5 * out_channels, out_channels, 1, bias=False),
|
| 130 |
+
# nn.BatchNorm2d(out_channels),
|
| 131 |
+
nn.ReLU()
|
| 132 |
+
# nn.Dropout(0.5)
|
| 133 |
+
)
|
| 134 |
+
|
| 135 |
+
def forward(self, x):
|
| 136 |
+
res = []
|
| 137 |
+
for conv in self.convs:
|
| 138 |
+
res.append(conv(x))
|
| 139 |
+
res = torch.cat(res, dim=1)
|
| 140 |
+
return self.project(res)
|
| 141 |
+
|
| 142 |
+
|
| 143 |
+
# copied from
|
| 144 |
+
# https://github.com/AlexHex7/Non-local_pytorch/blob/master/lib/non_local_embedded_gaussian.py
|
| 145 |
+
# See https://arxiv.org/abs/1711.07971 for details
|
| 146 |
+
class _NonLocalBlockND(nn.Module):
|
| 147 |
+
def __init__(
|
| 148 |
+
self, in_channels, inter_channels=None, dimension=3, sub_sample=True, bn_layer=True
|
| 149 |
+
):
|
| 150 |
+
super(_NonLocalBlockND, self).__init__()
|
| 151 |
+
|
| 152 |
+
assert dimension in [1, 2, 3]
|
| 153 |
+
|
| 154 |
+
self.dimension = dimension
|
| 155 |
+
self.sub_sample = sub_sample
|
| 156 |
+
|
| 157 |
+
self.in_channels = in_channels
|
| 158 |
+
self.inter_channels = inter_channels
|
| 159 |
+
|
| 160 |
+
if self.inter_channels is None:
|
| 161 |
+
self.inter_channels = in_channels // 2
|
| 162 |
+
if self.inter_channels == 0:
|
| 163 |
+
self.inter_channels = 1
|
| 164 |
+
|
| 165 |
+
if dimension == 3:
|
| 166 |
+
conv_nd = nn.Conv3d
|
| 167 |
+
max_pool_layer = nn.MaxPool3d(kernel_size=(1, 2, 2))
|
| 168 |
+
bn = nn.GroupNorm # (32, hidden_dim) #nn.BatchNorm3d
|
| 169 |
+
elif dimension == 2:
|
| 170 |
+
conv_nd = nn.Conv2d
|
| 171 |
+
max_pool_layer = nn.MaxPool2d(kernel_size=(2, 2))
|
| 172 |
+
bn = nn.GroupNorm # (32, hidden_dim)nn.BatchNorm2d
|
| 173 |
+
else:
|
| 174 |
+
conv_nd = nn.Conv1d
|
| 175 |
+
max_pool_layer = nn.MaxPool1d(kernel_size=2)
|
| 176 |
+
bn = nn.GroupNorm # (32, hidden_dim)nn.BatchNorm1d
|
| 177 |
+
|
| 178 |
+
self.g = conv_nd(
|
| 179 |
+
in_channels=self.in_channels,
|
| 180 |
+
out_channels=self.inter_channels,
|
| 181 |
+
kernel_size=1,
|
| 182 |
+
stride=1,
|
| 183 |
+
padding=0,
|
| 184 |
+
)
|
| 185 |
+
|
| 186 |
+
if bn_layer:
|
| 187 |
+
self.W = nn.Sequential(
|
| 188 |
+
conv_nd(
|
| 189 |
+
in_channels=self.inter_channels,
|
| 190 |
+
out_channels=self.in_channels,
|
| 191 |
+
kernel_size=1,
|
| 192 |
+
stride=1,
|
| 193 |
+
padding=0,
|
| 194 |
+
),
|
| 195 |
+
bn(32, self.in_channels),
|
| 196 |
+
)
|
| 197 |
+
nn.init.constant_(self.W[1].weight, 0)
|
| 198 |
+
nn.init.constant_(self.W[1].bias, 0)
|
| 199 |
+
else:
|
| 200 |
+
self.W = conv_nd(
|
| 201 |
+
in_channels=self.inter_channels,
|
| 202 |
+
out_channels=self.in_channels,
|
| 203 |
+
kernel_size=1,
|
| 204 |
+
stride=1,
|
| 205 |
+
padding=0,
|
| 206 |
+
)
|
| 207 |
+
nn.init.constant_(self.W.weight, 0)
|
| 208 |
+
nn.init.constant_(self.W.bias, 0)
|
| 209 |
+
|
| 210 |
+
self.theta = conv_nd(
|
| 211 |
+
in_channels=self.in_channels,
|
| 212 |
+
out_channels=self.inter_channels,
|
| 213 |
+
kernel_size=1,
|
| 214 |
+
stride=1,
|
| 215 |
+
padding=0,
|
| 216 |
+
)
|
| 217 |
+
self.phi = conv_nd(
|
| 218 |
+
in_channels=self.in_channels,
|
| 219 |
+
out_channels=self.inter_channels,
|
| 220 |
+
kernel_size=1,
|
| 221 |
+
stride=1,
|
| 222 |
+
padding=0,
|
| 223 |
+
)
|
| 224 |
+
|
| 225 |
+
if sub_sample:
|
| 226 |
+
self.g = nn.Sequential(self.g, max_pool_layer)
|
| 227 |
+
self.phi = nn.Sequential(self.phi, max_pool_layer)
|
| 228 |
+
|
| 229 |
+
def forward(self, x):
|
| 230 |
+
"""
|
| 231 |
+
:param x: (b, c, t, h, w)
|
| 232 |
+
:return:
|
| 233 |
+
"""
|
| 234 |
+
|
| 235 |
+
batch_size = x.size(0)
|
| 236 |
+
|
| 237 |
+
g_x = self.g(x).view(batch_size, self.inter_channels, -1)
|
| 238 |
+
g_x = g_x.permute(0, 2, 1)
|
| 239 |
+
|
| 240 |
+
theta_x = self.theta(x).view(batch_size, self.inter_channels, -1)
|
| 241 |
+
theta_x = theta_x.permute(0, 2, 1)
|
| 242 |
+
phi_x = self.phi(x).view(batch_size, self.inter_channels, -1)
|
| 243 |
+
f = torch.matmul(theta_x, phi_x)
|
| 244 |
+
f_div_C = F.softmax(f, dim=-1)
|
| 245 |
+
|
| 246 |
+
y = torch.matmul(f_div_C, g_x)
|
| 247 |
+
y = y.permute(0, 2, 1).contiguous()
|
| 248 |
+
y = y.view(batch_size, self.inter_channels, *x.size()[2:])
|
| 249 |
+
W_y = self.W(y)
|
| 250 |
+
z = W_y + x
|
| 251 |
+
|
| 252 |
+
return z
|
| 253 |
+
|
| 254 |
+
|
| 255 |
+
class NONLocalBlock2D(_NonLocalBlockND):
|
| 256 |
+
def __init__(self, in_channels, inter_channels=None, sub_sample=True, bn_layer=True):
|
| 257 |
+
super(NONLocalBlock2D, self).__init__(
|
| 258 |
+
in_channels,
|
| 259 |
+
inter_channels=inter_channels,
|
| 260 |
+
dimension=2,
|
| 261 |
+
sub_sample=sub_sample,
|
| 262 |
+
bn_layer=bn_layer,
|
| 263 |
+
)
|
Leffa/3rdparty/densepose/modeling/roi_heads/registry.py
ADDED
|
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) Facebook, Inc. and its affiliates.
|
| 2 |
+
|
| 3 |
+
from detectron2.utils.registry import Registry
|
| 4 |
+
|
| 5 |
+
ROI_DENSEPOSE_HEAD_REGISTRY = Registry("ROI_DENSEPOSE_HEAD")
|
Leffa/3rdparty/densepose/modeling/roi_heads/roi_head.py
ADDED
|
@@ -0,0 +1,218 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) Facebook, Inc. and its affiliates.
|
| 2 |
+
|
| 3 |
+
import numpy as np
|
| 4 |
+
from typing import Dict, List, Optional
|
| 5 |
+
import fvcore.nn.weight_init as weight_init
|
| 6 |
+
import torch
|
| 7 |
+
import torch.nn as nn
|
| 8 |
+
from torch.nn import functional as F
|
| 9 |
+
|
| 10 |
+
from detectron2.layers import Conv2d, ShapeSpec, get_norm
|
| 11 |
+
from detectron2.modeling import ROI_HEADS_REGISTRY, StandardROIHeads
|
| 12 |
+
from detectron2.modeling.poolers import ROIPooler
|
| 13 |
+
from detectron2.modeling.roi_heads import select_foreground_proposals
|
| 14 |
+
from detectron2.structures import ImageList, Instances
|
| 15 |
+
|
| 16 |
+
from .. import (
|
| 17 |
+
build_densepose_data_filter,
|
| 18 |
+
build_densepose_embedder,
|
| 19 |
+
build_densepose_head,
|
| 20 |
+
build_densepose_losses,
|
| 21 |
+
build_densepose_predictor,
|
| 22 |
+
densepose_inference,
|
| 23 |
+
)
|
| 24 |
+
|
| 25 |
+
|
| 26 |
+
class Decoder(nn.Module):
|
| 27 |
+
"""
|
| 28 |
+
A semantic segmentation head described in detail in the Panoptic Feature Pyramid Networks paper
|
| 29 |
+
(https://arxiv.org/abs/1901.02446). It takes FPN features as input and merges information from
|
| 30 |
+
all levels of the FPN into single output.
|
| 31 |
+
"""
|
| 32 |
+
|
| 33 |
+
def __init__(self, cfg, input_shape: Dict[str, ShapeSpec], in_features):
|
| 34 |
+
super(Decoder, self).__init__()
|
| 35 |
+
|
| 36 |
+
# fmt: off
|
| 37 |
+
self.in_features = in_features
|
| 38 |
+
feature_strides = {k: v.stride for k, v in input_shape.items()}
|
| 39 |
+
feature_channels = {k: v.channels for k, v in input_shape.items()}
|
| 40 |
+
num_classes = cfg.MODEL.ROI_DENSEPOSE_HEAD.DECODER_NUM_CLASSES
|
| 41 |
+
conv_dims = cfg.MODEL.ROI_DENSEPOSE_HEAD.DECODER_CONV_DIMS
|
| 42 |
+
self.common_stride = cfg.MODEL.ROI_DENSEPOSE_HEAD.DECODER_COMMON_STRIDE
|
| 43 |
+
norm = cfg.MODEL.ROI_DENSEPOSE_HEAD.DECODER_NORM
|
| 44 |
+
# fmt: on
|
| 45 |
+
|
| 46 |
+
self.scale_heads = []
|
| 47 |
+
for in_feature in self.in_features:
|
| 48 |
+
head_ops = []
|
| 49 |
+
head_length = max(
|
| 50 |
+
1, int(np.log2(feature_strides[in_feature]) - np.log2(self.common_stride))
|
| 51 |
+
)
|
| 52 |
+
for k in range(head_length):
|
| 53 |
+
conv = Conv2d(
|
| 54 |
+
feature_channels[in_feature] if k == 0 else conv_dims,
|
| 55 |
+
conv_dims,
|
| 56 |
+
kernel_size=3,
|
| 57 |
+
stride=1,
|
| 58 |
+
padding=1,
|
| 59 |
+
bias=not norm,
|
| 60 |
+
norm=get_norm(norm, conv_dims),
|
| 61 |
+
activation=F.relu,
|
| 62 |
+
)
|
| 63 |
+
weight_init.c2_msra_fill(conv)
|
| 64 |
+
head_ops.append(conv)
|
| 65 |
+
if feature_strides[in_feature] != self.common_stride:
|
| 66 |
+
head_ops.append(
|
| 67 |
+
nn.Upsample(scale_factor=2, mode="bilinear", align_corners=False)
|
| 68 |
+
)
|
| 69 |
+
self.scale_heads.append(nn.Sequential(*head_ops))
|
| 70 |
+
self.add_module(in_feature, self.scale_heads[-1])
|
| 71 |
+
self.predictor = Conv2d(conv_dims, num_classes, kernel_size=1, stride=1, padding=0)
|
| 72 |
+
weight_init.c2_msra_fill(self.predictor)
|
| 73 |
+
|
| 74 |
+
def forward(self, features: List[torch.Tensor]):
|
| 75 |
+
for i, _ in enumerate(self.in_features):
|
| 76 |
+
if i == 0:
|
| 77 |
+
x = self.scale_heads[i](features[i])
|
| 78 |
+
else:
|
| 79 |
+
x = x + self.scale_heads[i](features[i])
|
| 80 |
+
x = self.predictor(x)
|
| 81 |
+
return x
|
| 82 |
+
|
| 83 |
+
|
| 84 |
+
@ROI_HEADS_REGISTRY.register()
|
| 85 |
+
class DensePoseROIHeads(StandardROIHeads):
|
| 86 |
+
"""
|
| 87 |
+
A Standard ROIHeads which contains an addition of DensePose head.
|
| 88 |
+
"""
|
| 89 |
+
|
| 90 |
+
def __init__(self, cfg, input_shape):
|
| 91 |
+
super().__init__(cfg, input_shape)
|
| 92 |
+
self._init_densepose_head(cfg, input_shape)
|
| 93 |
+
|
| 94 |
+
def _init_densepose_head(self, cfg, input_shape):
|
| 95 |
+
# fmt: off
|
| 96 |
+
self.densepose_on = cfg.MODEL.DENSEPOSE_ON
|
| 97 |
+
if not self.densepose_on:
|
| 98 |
+
return
|
| 99 |
+
self.densepose_data_filter = build_densepose_data_filter(cfg)
|
| 100 |
+
dp_pooler_resolution = cfg.MODEL.ROI_DENSEPOSE_HEAD.POOLER_RESOLUTION
|
| 101 |
+
dp_pooler_sampling_ratio = cfg.MODEL.ROI_DENSEPOSE_HEAD.POOLER_SAMPLING_RATIO
|
| 102 |
+
dp_pooler_type = cfg.MODEL.ROI_DENSEPOSE_HEAD.POOLER_TYPE
|
| 103 |
+
self.use_decoder = cfg.MODEL.ROI_DENSEPOSE_HEAD.DECODER_ON
|
| 104 |
+
# fmt: on
|
| 105 |
+
if self.use_decoder:
|
| 106 |
+
dp_pooler_scales = (1.0 / input_shape[self.in_features[0]].stride,)
|
| 107 |
+
else:
|
| 108 |
+
dp_pooler_scales = tuple(1.0 / input_shape[k].stride for k in self.in_features)
|
| 109 |
+
in_channels = [input_shape[f].channels for f in self.in_features][0]
|
| 110 |
+
|
| 111 |
+
if self.use_decoder:
|
| 112 |
+
self.decoder = Decoder(cfg, input_shape, self.in_features)
|
| 113 |
+
|
| 114 |
+
self.densepose_pooler = ROIPooler(
|
| 115 |
+
output_size=dp_pooler_resolution,
|
| 116 |
+
scales=dp_pooler_scales,
|
| 117 |
+
sampling_ratio=dp_pooler_sampling_ratio,
|
| 118 |
+
pooler_type=dp_pooler_type,
|
| 119 |
+
)
|
| 120 |
+
self.densepose_head = build_densepose_head(cfg, in_channels)
|
| 121 |
+
self.densepose_predictor = build_densepose_predictor(
|
| 122 |
+
cfg, self.densepose_head.n_out_channels
|
| 123 |
+
)
|
| 124 |
+
self.densepose_losses = build_densepose_losses(cfg)
|
| 125 |
+
self.embedder = build_densepose_embedder(cfg)
|
| 126 |
+
|
| 127 |
+
def _forward_densepose(self, features: Dict[str, torch.Tensor], instances: List[Instances]):
|
| 128 |
+
"""
|
| 129 |
+
Forward logic of the densepose prediction branch.
|
| 130 |
+
|
| 131 |
+
Args:
|
| 132 |
+
features (dict[str, Tensor]): input data as a mapping from feature
|
| 133 |
+
map name to tensor. Axis 0 represents the number of images `N` in
|
| 134 |
+
the input data; axes 1-3 are channels, height, and width, which may
|
| 135 |
+
vary between feature maps (e.g., if a feature pyramid is used).
|
| 136 |
+
instances (list[Instances]): length `N` list of `Instances`. The i-th
|
| 137 |
+
`Instances` contains instances for the i-th input image,
|
| 138 |
+
In training, they can be the proposals.
|
| 139 |
+
In inference, they can be the predicted boxes.
|
| 140 |
+
|
| 141 |
+
Returns:
|
| 142 |
+
In training, a dict of losses.
|
| 143 |
+
In inference, update `instances` with new fields "densepose" and return it.
|
| 144 |
+
"""
|
| 145 |
+
if not self.densepose_on:
|
| 146 |
+
return {} if self.training else instances
|
| 147 |
+
|
| 148 |
+
features_list = [features[f] for f in self.in_features]
|
| 149 |
+
if self.training:
|
| 150 |
+
proposals, _ = select_foreground_proposals(instances, self.num_classes)
|
| 151 |
+
features_list, proposals = self.densepose_data_filter(features_list, proposals)
|
| 152 |
+
if len(proposals) > 0:
|
| 153 |
+
proposal_boxes = [x.proposal_boxes for x in proposals]
|
| 154 |
+
|
| 155 |
+
if self.use_decoder:
|
| 156 |
+
features_list = [self.decoder(features_list)]
|
| 157 |
+
|
| 158 |
+
features_dp = self.densepose_pooler(features_list, proposal_boxes)
|
| 159 |
+
densepose_head_outputs = self.densepose_head(features_dp)
|
| 160 |
+
densepose_predictor_outputs = self.densepose_predictor(densepose_head_outputs)
|
| 161 |
+
densepose_loss_dict = self.densepose_losses(
|
| 162 |
+
proposals, densepose_predictor_outputs, embedder=self.embedder
|
| 163 |
+
)
|
| 164 |
+
return densepose_loss_dict
|
| 165 |
+
else:
|
| 166 |
+
pred_boxes = [x.pred_boxes for x in instances]
|
| 167 |
+
|
| 168 |
+
if self.use_decoder:
|
| 169 |
+
features_list = [self.decoder(features_list)]
|
| 170 |
+
|
| 171 |
+
features_dp = self.densepose_pooler(features_list, pred_boxes)
|
| 172 |
+
if len(features_dp) > 0:
|
| 173 |
+
densepose_head_outputs = self.densepose_head(features_dp)
|
| 174 |
+
densepose_predictor_outputs = self.densepose_predictor(densepose_head_outputs)
|
| 175 |
+
else:
|
| 176 |
+
densepose_predictor_outputs = None
|
| 177 |
+
|
| 178 |
+
densepose_inference(densepose_predictor_outputs, instances)
|
| 179 |
+
return instances
|
| 180 |
+
|
| 181 |
+
def forward(
|
| 182 |
+
self,
|
| 183 |
+
images: ImageList,
|
| 184 |
+
features: Dict[str, torch.Tensor],
|
| 185 |
+
proposals: List[Instances],
|
| 186 |
+
targets: Optional[List[Instances]] = None,
|
| 187 |
+
):
|
| 188 |
+
instances, losses = super().forward(images, features, proposals, targets)
|
| 189 |
+
del targets, images
|
| 190 |
+
|
| 191 |
+
if self.training:
|
| 192 |
+
losses.update(self._forward_densepose(features, instances))
|
| 193 |
+
return instances, losses
|
| 194 |
+
|
| 195 |
+
def forward_with_given_boxes(
|
| 196 |
+
self, features: Dict[str, torch.Tensor], instances: List[Instances]
|
| 197 |
+
):
|
| 198 |
+
"""
|
| 199 |
+
Use the given boxes in `instances` to produce other (non-box) per-ROI outputs.
|
| 200 |
+
|
| 201 |
+
This is useful for downstream tasks where a box is known, but need to obtain
|
| 202 |
+
other attributes (outputs of other heads).
|
| 203 |
+
Test-time augmentation also uses this.
|
| 204 |
+
|
| 205 |
+
Args:
|
| 206 |
+
features: same as in `forward()`
|
| 207 |
+
instances (list[Instances]): instances to predict other outputs. Expect the keys
|
| 208 |
+
"pred_boxes" and "pred_classes" to exist.
|
| 209 |
+
|
| 210 |
+
Returns:
|
| 211 |
+
instances (list[Instances]):
|
| 212 |
+
the same `Instances` objects, with extra
|
| 213 |
+
fields such as `pred_masks` or `pred_keypoints`.
|
| 214 |
+
"""
|
| 215 |
+
|
| 216 |
+
instances = super().forward_with_given_boxes(features, instances)
|
| 217 |
+
instances = self._forward_densepose(features, instances)
|
| 218 |
+
return instances
|
Leffa/3rdparty/densepose/modeling/roi_heads/v1convx.py
ADDED
|
@@ -0,0 +1,64 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) Facebook, Inc. and its affiliates.
|
| 2 |
+
|
| 3 |
+
import torch
|
| 4 |
+
from torch import nn
|
| 5 |
+
from torch.nn import functional as F
|
| 6 |
+
|
| 7 |
+
from detectron2.config import CfgNode
|
| 8 |
+
from detectron2.layers import Conv2d
|
| 9 |
+
|
| 10 |
+
from ..utils import initialize_module_params
|
| 11 |
+
from .registry import ROI_DENSEPOSE_HEAD_REGISTRY
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
@ROI_DENSEPOSE_HEAD_REGISTRY.register()
|
| 15 |
+
class DensePoseV1ConvXHead(nn.Module):
|
| 16 |
+
"""
|
| 17 |
+
Fully convolutional DensePose head.
|
| 18 |
+
"""
|
| 19 |
+
|
| 20 |
+
def __init__(self, cfg: CfgNode, input_channels: int):
|
| 21 |
+
"""
|
| 22 |
+
Initialize DensePose fully convolutional head
|
| 23 |
+
|
| 24 |
+
Args:
|
| 25 |
+
cfg (CfgNode): configuration options
|
| 26 |
+
input_channels (int): number of input channels
|
| 27 |
+
"""
|
| 28 |
+
super(DensePoseV1ConvXHead, self).__init__()
|
| 29 |
+
# fmt: off
|
| 30 |
+
hidden_dim = cfg.MODEL.ROI_DENSEPOSE_HEAD.CONV_HEAD_DIM
|
| 31 |
+
kernel_size = cfg.MODEL.ROI_DENSEPOSE_HEAD.CONV_HEAD_KERNEL
|
| 32 |
+
self.n_stacked_convs = cfg.MODEL.ROI_DENSEPOSE_HEAD.NUM_STACKED_CONVS
|
| 33 |
+
# fmt: on
|
| 34 |
+
pad_size = kernel_size // 2
|
| 35 |
+
n_channels = input_channels
|
| 36 |
+
for i in range(self.n_stacked_convs):
|
| 37 |
+
layer = Conv2d(n_channels, hidden_dim, kernel_size, stride=1, padding=pad_size)
|
| 38 |
+
layer_name = self._get_layer_name(i)
|
| 39 |
+
self.add_module(layer_name, layer)
|
| 40 |
+
n_channels = hidden_dim
|
| 41 |
+
self.n_out_channels = n_channels
|
| 42 |
+
initialize_module_params(self)
|
| 43 |
+
|
| 44 |
+
def forward(self, features: torch.Tensor):
|
| 45 |
+
"""
|
| 46 |
+
Apply DensePose fully convolutional head to the input features
|
| 47 |
+
|
| 48 |
+
Args:
|
| 49 |
+
features (tensor): input features
|
| 50 |
+
Result:
|
| 51 |
+
A tensor of DensePose head outputs
|
| 52 |
+
"""
|
| 53 |
+
x = features
|
| 54 |
+
output = x
|
| 55 |
+
for i in range(self.n_stacked_convs):
|
| 56 |
+
layer_name = self._get_layer_name(i)
|
| 57 |
+
x = getattr(self, layer_name)(x)
|
| 58 |
+
x = F.relu(x)
|
| 59 |
+
output = x
|
| 60 |
+
return output
|
| 61 |
+
|
| 62 |
+
def _get_layer_name(self, i: int):
|
| 63 |
+
layer_name = "body_conv_fcn{}".format(i + 1)
|
| 64 |
+
return layer_name
|
Leffa/3rdparty/densepose/modeling/test_time_augmentation.py
ADDED
|
@@ -0,0 +1,207 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) Facebook, Inc. and its affiliates.
|
| 2 |
+
import copy
|
| 3 |
+
import numpy as np
|
| 4 |
+
import torch
|
| 5 |
+
from fvcore.transforms import HFlipTransform, TransformList
|
| 6 |
+
from torch.nn import functional as F
|
| 7 |
+
|
| 8 |
+
from detectron2.data.transforms import RandomRotation, RotationTransform, apply_transform_gens
|
| 9 |
+
from detectron2.modeling.postprocessing import detector_postprocess
|
| 10 |
+
from detectron2.modeling.test_time_augmentation import DatasetMapperTTA, GeneralizedRCNNWithTTA
|
| 11 |
+
|
| 12 |
+
from ..converters import HFlipConverter
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
class DensePoseDatasetMapperTTA(DatasetMapperTTA):
|
| 16 |
+
def __init__(self, cfg):
|
| 17 |
+
super().__init__(cfg=cfg)
|
| 18 |
+
self.angles = cfg.TEST.AUG.ROTATION_ANGLES
|
| 19 |
+
|
| 20 |
+
def __call__(self, dataset_dict):
|
| 21 |
+
ret = super().__call__(dataset_dict=dataset_dict)
|
| 22 |
+
numpy_image = dataset_dict["image"].permute(1, 2, 0).numpy()
|
| 23 |
+
for angle in self.angles:
|
| 24 |
+
rotate = RandomRotation(angle=angle, expand=True)
|
| 25 |
+
new_numpy_image, tfms = apply_transform_gens([rotate], np.copy(numpy_image))
|
| 26 |
+
torch_image = torch.from_numpy(np.ascontiguousarray(new_numpy_image.transpose(2, 0, 1)))
|
| 27 |
+
dic = copy.deepcopy(dataset_dict)
|
| 28 |
+
# In DatasetMapperTTA, there is a pre_tfm transform (resize or no-op) that is
|
| 29 |
+
# added at the beginning of each TransformList. That's '.transforms[0]'.
|
| 30 |
+
dic["transforms"] = TransformList(
|
| 31 |
+
[ret[-1]["transforms"].transforms[0]] + tfms.transforms
|
| 32 |
+
)
|
| 33 |
+
dic["image"] = torch_image
|
| 34 |
+
ret.append(dic)
|
| 35 |
+
return ret
|
| 36 |
+
|
| 37 |
+
|
| 38 |
+
class DensePoseGeneralizedRCNNWithTTA(GeneralizedRCNNWithTTA):
|
| 39 |
+
def __init__(self, cfg, model, transform_data, tta_mapper=None, batch_size=1):
|
| 40 |
+
"""
|
| 41 |
+
Args:
|
| 42 |
+
cfg (CfgNode):
|
| 43 |
+
model (GeneralizedRCNN): a GeneralizedRCNN to apply TTA on.
|
| 44 |
+
transform_data (DensePoseTransformData): contains symmetry label
|
| 45 |
+
transforms used for horizontal flip
|
| 46 |
+
tta_mapper (callable): takes a dataset dict and returns a list of
|
| 47 |
+
augmented versions of the dataset dict. Defaults to
|
| 48 |
+
`DatasetMapperTTA(cfg)`.
|
| 49 |
+
batch_size (int): batch the augmented images into this batch size for inference.
|
| 50 |
+
"""
|
| 51 |
+
self._transform_data = transform_data.to(model.device)
|
| 52 |
+
super().__init__(cfg=cfg, model=model, tta_mapper=tta_mapper, batch_size=batch_size)
|
| 53 |
+
|
| 54 |
+
# the implementation follows closely the one from detectron2/modeling
|
| 55 |
+
def _inference_one_image(self, input):
|
| 56 |
+
"""
|
| 57 |
+
Args:
|
| 58 |
+
input (dict): one dataset dict with "image" field being a CHW tensor
|
| 59 |
+
|
| 60 |
+
Returns:
|
| 61 |
+
dict: one output dict
|
| 62 |
+
"""
|
| 63 |
+
orig_shape = (input["height"], input["width"])
|
| 64 |
+
# For some reason, resize with uint8 slightly increases box AP but decreases densepose AP
|
| 65 |
+
input["image"] = input["image"].to(torch.uint8)
|
| 66 |
+
augmented_inputs, tfms = self._get_augmented_inputs(input)
|
| 67 |
+
# Detect boxes from all augmented versions
|
| 68 |
+
with self._turn_off_roi_heads(["mask_on", "keypoint_on", "densepose_on"]):
|
| 69 |
+
# temporarily disable roi heads
|
| 70 |
+
all_boxes, all_scores, all_classes = self._get_augmented_boxes(augmented_inputs, tfms)
|
| 71 |
+
merged_instances = self._merge_detections(all_boxes, all_scores, all_classes, orig_shape)
|
| 72 |
+
|
| 73 |
+
if self.cfg.MODEL.MASK_ON or self.cfg.MODEL.DENSEPOSE_ON:
|
| 74 |
+
# Use the detected boxes to obtain new fields
|
| 75 |
+
augmented_instances = self._rescale_detected_boxes(
|
| 76 |
+
augmented_inputs, merged_instances, tfms
|
| 77 |
+
)
|
| 78 |
+
# run forward on the detected boxes
|
| 79 |
+
outputs = self._batch_inference(augmented_inputs, augmented_instances)
|
| 80 |
+
# Delete now useless variables to avoid being out of memory
|
| 81 |
+
del augmented_inputs, augmented_instances
|
| 82 |
+
# average the predictions
|
| 83 |
+
if self.cfg.MODEL.MASK_ON:
|
| 84 |
+
merged_instances.pred_masks = self._reduce_pred_masks(outputs, tfms)
|
| 85 |
+
if self.cfg.MODEL.DENSEPOSE_ON:
|
| 86 |
+
merged_instances.pred_densepose = self._reduce_pred_densepose(outputs, tfms)
|
| 87 |
+
# postprocess
|
| 88 |
+
merged_instances = detector_postprocess(merged_instances, *orig_shape)
|
| 89 |
+
return {"instances": merged_instances}
|
| 90 |
+
else:
|
| 91 |
+
return {"instances": merged_instances}
|
| 92 |
+
|
| 93 |
+
def _get_augmented_boxes(self, augmented_inputs, tfms):
|
| 94 |
+
# Heavily based on detectron2/modeling/test_time_augmentation.py
|
| 95 |
+
# Only difference is that RotationTransform is excluded from bbox computation
|
| 96 |
+
# 1: forward with all augmented images
|
| 97 |
+
outputs = self._batch_inference(augmented_inputs)
|
| 98 |
+
# 2: union the results
|
| 99 |
+
all_boxes = []
|
| 100 |
+
all_scores = []
|
| 101 |
+
all_classes = []
|
| 102 |
+
for output, tfm in zip(outputs, tfms):
|
| 103 |
+
# Need to inverse the transforms on boxes, to obtain results on original image
|
| 104 |
+
if not any(isinstance(t, RotationTransform) for t in tfm.transforms):
|
| 105 |
+
# Some transforms can't compute bbox correctly
|
| 106 |
+
pred_boxes = output.pred_boxes.tensor
|
| 107 |
+
original_pred_boxes = tfm.inverse().apply_box(pred_boxes.cpu().numpy())
|
| 108 |
+
all_boxes.append(torch.from_numpy(original_pred_boxes).to(pred_boxes.device))
|
| 109 |
+
all_scores.extend(output.scores)
|
| 110 |
+
all_classes.extend(output.pred_classes)
|
| 111 |
+
all_boxes = torch.cat(all_boxes, dim=0)
|
| 112 |
+
return all_boxes, all_scores, all_classes
|
| 113 |
+
|
| 114 |
+
def _reduce_pred_densepose(self, outputs, tfms):
|
| 115 |
+
# Should apply inverse transforms on densepose preds.
|
| 116 |
+
# We assume only rotation, resize & flip are used. pred_masks is a scale-invariant
|
| 117 |
+
# representation, so we handle the other ones specially
|
| 118 |
+
for idx, (output, tfm) in enumerate(zip(outputs, tfms)):
|
| 119 |
+
for t in tfm.transforms:
|
| 120 |
+
for attr in ["coarse_segm", "fine_segm", "u", "v"]:
|
| 121 |
+
setattr(
|
| 122 |
+
output.pred_densepose,
|
| 123 |
+
attr,
|
| 124 |
+
_inverse_rotation(
|
| 125 |
+
getattr(output.pred_densepose, attr), output.pred_boxes.tensor, t
|
| 126 |
+
),
|
| 127 |
+
)
|
| 128 |
+
if any(isinstance(t, HFlipTransform) for t in tfm.transforms):
|
| 129 |
+
output.pred_densepose = HFlipConverter.convert(
|
| 130 |
+
output.pred_densepose, self._transform_data
|
| 131 |
+
)
|
| 132 |
+
self._incremental_avg_dp(outputs[0].pred_densepose, output.pred_densepose, idx)
|
| 133 |
+
return outputs[0].pred_densepose
|
| 134 |
+
|
| 135 |
+
# incrementally computed average: u_(n + 1) = u_n + (x_(n+1) - u_n) / (n + 1).
|
| 136 |
+
def _incremental_avg_dp(self, avg, new_el, idx):
|
| 137 |
+
for attr in ["coarse_segm", "fine_segm", "u", "v"]:
|
| 138 |
+
setattr(avg, attr, (getattr(avg, attr) * idx + getattr(new_el, attr)) / (idx + 1))
|
| 139 |
+
if idx:
|
| 140 |
+
# Deletion of the > 0 index intermediary values to prevent GPU OOM
|
| 141 |
+
setattr(new_el, attr, None)
|
| 142 |
+
return avg
|
| 143 |
+
|
| 144 |
+
|
| 145 |
+
def _inverse_rotation(densepose_attrs, boxes, transform):
|
| 146 |
+
# resample outputs to image size and rotate back the densepose preds
|
| 147 |
+
# on the rotated images to the space of the original image
|
| 148 |
+
if len(boxes) == 0 or not isinstance(transform, RotationTransform):
|
| 149 |
+
return densepose_attrs
|
| 150 |
+
boxes = boxes.int().cpu().numpy()
|
| 151 |
+
wh_boxes = boxes[:, 2:] - boxes[:, :2] # bboxes in the rotated space
|
| 152 |
+
inv_boxes = rotate_box_inverse(transform, boxes).astype(int) # bboxes in original image
|
| 153 |
+
wh_diff = (inv_boxes[:, 2:] - inv_boxes[:, :2] - wh_boxes) // 2 # diff between new/old bboxes
|
| 154 |
+
rotation_matrix = torch.tensor([transform.rm_image]).to(device=densepose_attrs.device).float()
|
| 155 |
+
rotation_matrix[:, :, -1] = 0
|
| 156 |
+
# To apply grid_sample for rotation, we need to have enough space to fit the original and
|
| 157 |
+
# rotated bboxes. l_bds and r_bds are the left/right bounds that will be used to
|
| 158 |
+
# crop the difference once the rotation is done
|
| 159 |
+
l_bds = np.maximum(0, -wh_diff)
|
| 160 |
+
for i in range(len(densepose_attrs)):
|
| 161 |
+
if min(wh_boxes[i]) <= 0:
|
| 162 |
+
continue
|
| 163 |
+
densepose_attr = densepose_attrs[[i]].clone()
|
| 164 |
+
# 1. Interpolate densepose attribute to size of the rotated bbox
|
| 165 |
+
densepose_attr = F.interpolate(densepose_attr, wh_boxes[i].tolist()[::-1], mode="bilinear")
|
| 166 |
+
# 2. Pad the interpolated attribute so it has room for the original + rotated bbox
|
| 167 |
+
densepose_attr = F.pad(densepose_attr, tuple(np.repeat(np.maximum(0, wh_diff[i]), 2)))
|
| 168 |
+
# 3. Compute rotation grid and transform
|
| 169 |
+
grid = F.affine_grid(rotation_matrix, size=densepose_attr.shape)
|
| 170 |
+
densepose_attr = F.grid_sample(densepose_attr, grid)
|
| 171 |
+
# 4. Compute right bounds and crop the densepose_attr to the size of the original bbox
|
| 172 |
+
r_bds = densepose_attr.shape[2:][::-1] - l_bds[i]
|
| 173 |
+
densepose_attr = densepose_attr[:, :, l_bds[i][1] : r_bds[1], l_bds[i][0] : r_bds[0]]
|
| 174 |
+
if min(densepose_attr.shape) > 0:
|
| 175 |
+
# Interpolate back to the original size of the densepose attribute
|
| 176 |
+
densepose_attr = F.interpolate(
|
| 177 |
+
densepose_attr, densepose_attrs.shape[-2:], mode="bilinear"
|
| 178 |
+
)
|
| 179 |
+
# Adding a very small probability to the background class to fill padded zones
|
| 180 |
+
densepose_attr[:, 0] += 1e-10
|
| 181 |
+
densepose_attrs[i] = densepose_attr
|
| 182 |
+
return densepose_attrs
|
| 183 |
+
|
| 184 |
+
|
| 185 |
+
def rotate_box_inverse(rot_tfm, rotated_box):
|
| 186 |
+
"""
|
| 187 |
+
rotated_box is a N * 4 array of [x0, y0, x1, y1] boxes
|
| 188 |
+
When a bbox is rotated, it gets bigger, because we need to surround the tilted bbox
|
| 189 |
+
So when a bbox is rotated then inverse-rotated, it is much bigger than the original
|
| 190 |
+
This function aims to invert the rotation on the box, but also resize it to its original size
|
| 191 |
+
"""
|
| 192 |
+
# 1. Compute the inverse rotation of the rotated bboxes (bigger than it )
|
| 193 |
+
invrot_box = rot_tfm.inverse().apply_box(rotated_box)
|
| 194 |
+
h, w = rotated_box[:, 3] - rotated_box[:, 1], rotated_box[:, 2] - rotated_box[:, 0]
|
| 195 |
+
ih, iw = invrot_box[:, 3] - invrot_box[:, 1], invrot_box[:, 2] - invrot_box[:, 0]
|
| 196 |
+
assert 2 * rot_tfm.abs_sin**2 != 1, "45 degrees angle can't be inverted"
|
| 197 |
+
# 2. Inverse the corresponding computation in the rotation transform
|
| 198 |
+
# to get the original height/width of the rotated boxes
|
| 199 |
+
orig_h = (h * rot_tfm.abs_cos - w * rot_tfm.abs_sin) / (1 - 2 * rot_tfm.abs_sin**2)
|
| 200 |
+
orig_w = (w * rot_tfm.abs_cos - h * rot_tfm.abs_sin) / (1 - 2 * rot_tfm.abs_sin**2)
|
| 201 |
+
# 3. Resize the inverse-rotated bboxes to their original size
|
| 202 |
+
invrot_box[:, 0] += (iw - orig_w) / 2
|
| 203 |
+
invrot_box[:, 1] += (ih - orig_h) / 2
|
| 204 |
+
invrot_box[:, 2] -= (iw - orig_w) / 2
|
| 205 |
+
invrot_box[:, 3] -= (ih - orig_h) / 2
|
| 206 |
+
|
| 207 |
+
return invrot_box
|
Leffa/3rdparty/densepose/modeling/utils.py
ADDED
|
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) Facebook, Inc. and its affiliates.
|
| 2 |
+
|
| 3 |
+
from torch import nn
|
| 4 |
+
|
| 5 |
+
|
| 6 |
+
def initialize_module_params(module: nn.Module) -> None:
|
| 7 |
+
for name, param in module.named_parameters():
|
| 8 |
+
if "bias" in name:
|
| 9 |
+
nn.init.constant_(param, 0)
|
| 10 |
+
elif "weight" in name:
|
| 11 |
+
nn.init.kaiming_normal_(param, mode="fan_out", nonlinearity="relu")
|
Leffa/3rdparty/densepose/utils/__init__.py
ADDED
|
File without changes
|