Add files using upload-large-folder tool
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- Leffa/densepose/__init__.py +20 -0
- Leffa/densepose/config.py +277 -0
- Leffa/densepose/converters/__init__.py +15 -0
- Leffa/densepose/converters/base.py +93 -0
- Leffa/densepose/converters/builtin.py +31 -0
- Leffa/densepose/converters/chart_output_hflip.py +71 -0
- Leffa/densepose/converters/chart_output_to_chart_result.py +188 -0
- Leffa/densepose/converters/hflip.py +34 -0
- Leffa/densepose/converters/segm_to_mask.py +150 -0
- Leffa/densepose/converters/to_chart_result.py +70 -0
- Leffa/densepose/converters/to_mask.py +49 -0
- Leffa/densepose/engine/__init__.py +3 -0
- Leffa/densepose/engine/trainer.py +258 -0
- Leffa/densepose/modeling/__init__.py +13 -0
- Leffa/densepose/modeling/build.py +87 -0
- Leffa/densepose/modeling/confidence.py +73 -0
- Leffa/densepose/modeling/densepose_checkpoint.py +35 -0
- Leffa/densepose/modeling/filter.py +94 -0
- Leffa/densepose/modeling/hrfpn.py +182 -0
- Leffa/densepose/modeling/hrnet.py +474 -0
- Leffa/densepose/modeling/inference.py +44 -0
- Leffa/densepose/modeling/losses/__init__.py +14 -0
- Leffa/densepose/modeling/losses/chart.py +291 -0
- Leffa/densepose/modeling/losses/embed_utils.py +137 -0
- Leffa/densepose/modeling/losses/mask_or_segm.py +77 -0
- Leffa/densepose/modeling/predictors/__init__.py +9 -0
- Leffa/densepose/modeling/predictors/chart.py +94 -0
- Leffa/densepose/modeling/predictors/chart_confidence.py +174 -0
- Leffa/densepose/modeling/predictors/chart_with_confidence.py +15 -0
- Leffa/densepose/modeling/predictors/cse.py +70 -0
- Leffa/densepose/modeling/predictors/cse_confidence.py +115 -0
- Leffa/densepose/modeling/predictors/cse_with_confidence.py +15 -0
- Leffa/densepose/modeling/predictors/registry.py +5 -0
- Leffa/densepose/modeling/roi_heads/__init__.py +6 -0
- Leffa/densepose/modeling/roi_heads/deeplab.py +263 -0
- Leffa/densepose/modeling/roi_heads/registry.py +5 -0
- Leffa/densepose/modeling/roi_heads/roi_head.py +218 -0
- Leffa/densepose/modeling/roi_heads/v1convx.py +64 -0
- Leffa/densepose/modeling/test_time_augmentation.py +207 -0
- Leffa/densepose/modeling/utils.py +11 -0
- Leffa/densepose/utils/__init__.py +0 -0
- Leffa/densepose/utils/dbhelper.py +147 -0
- Leffa/densepose/utils/logger.py +13 -0
- Leffa/densepose/utils/transform.py +15 -0
- Leffa/leffa_utils/densepose_for_mask.py +170 -0
- Leffa/leffa_utils/densepose_predictor.py +77 -0
- Leffa/leffa_utils/garment_agnostic_mask_predictor.py +415 -0
- Leffa/leffa_utils/utils.py +379 -0
- Leffa/preprocess/humanparsing/mhp_extension/detectron2/docker/Dockerfile +49 -0
- Leffa/preprocess/humanparsing/mhp_extension/detectron2/docker/Dockerfile-circleci +17 -0
Leffa/densepose/__init__.py
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) Facebook, Inc. and its affiliates.
|
| 2 |
+
from .data.datasets import builtin # just to register data
|
| 3 |
+
from .converters import builtin as builtin_converters # register converters
|
| 4 |
+
from .config import (
|
| 5 |
+
add_densepose_config,
|
| 6 |
+
add_densepose_head_config,
|
| 7 |
+
add_hrnet_config,
|
| 8 |
+
add_dataset_category_config,
|
| 9 |
+
add_bootstrap_config,
|
| 10 |
+
load_bootstrap_config,
|
| 11 |
+
)
|
| 12 |
+
from .structures import DensePoseDataRelative, DensePoseList, DensePoseTransformData
|
| 13 |
+
from .evaluation import DensePoseCOCOEvaluator
|
| 14 |
+
from .modeling.roi_heads import DensePoseROIHeads
|
| 15 |
+
from .modeling.test_time_augmentation import (
|
| 16 |
+
DensePoseGeneralizedRCNNWithTTA,
|
| 17 |
+
DensePoseDatasetMapperTTA,
|
| 18 |
+
)
|
| 19 |
+
from .utils.transform import load_from_cfg
|
| 20 |
+
from .modeling.hrfpn import build_hrfpn_backbone
|
Leffa/densepose/config.py
ADDED
|
@@ -0,0 +1,277 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# -*- coding = utf-8 -*-
|
| 2 |
+
# Copyright (c) Facebook, Inc. and its affiliates.
|
| 3 |
+
# pyre-ignore-all-errors
|
| 4 |
+
|
| 5 |
+
from detectron2.config import CfgNode as CN
|
| 6 |
+
|
| 7 |
+
|
| 8 |
+
def add_dataset_category_config(cfg: CN) -> None:
|
| 9 |
+
"""
|
| 10 |
+
Add config for additional category-related dataset options
|
| 11 |
+
- category whitelisting
|
| 12 |
+
- category mapping
|
| 13 |
+
"""
|
| 14 |
+
_C = cfg
|
| 15 |
+
_C.DATASETS.CATEGORY_MAPS = CN(new_allowed=True)
|
| 16 |
+
_C.DATASETS.WHITELISTED_CATEGORIES = CN(new_allowed=True)
|
| 17 |
+
# class to mesh mapping
|
| 18 |
+
_C.DATASETS.CLASS_TO_MESH_NAME_MAPPING = CN(new_allowed=True)
|
| 19 |
+
|
| 20 |
+
|
| 21 |
+
def add_evaluation_config(cfg: CN) -> None:
|
| 22 |
+
_C = cfg
|
| 23 |
+
_C.DENSEPOSE_EVALUATION = CN()
|
| 24 |
+
# evaluator type, possible values:
|
| 25 |
+
# - "iou": evaluator for models that produce iou data
|
| 26 |
+
# - "cse": evaluator for models that produce cse data
|
| 27 |
+
_C.DENSEPOSE_EVALUATION.TYPE = "iou"
|
| 28 |
+
# storage for DensePose results, possible values:
|
| 29 |
+
# - "none": no explicit storage, all the results are stored in the
|
| 30 |
+
# dictionary with predictions, memory intensive;
|
| 31 |
+
# historically the default storage type
|
| 32 |
+
# - "ram": RAM storage, uses per-process RAM storage, which is
|
| 33 |
+
# reduced to a single process storage on later stages,
|
| 34 |
+
# less memory intensive
|
| 35 |
+
# - "file": file storage, uses per-process file-based storage,
|
| 36 |
+
# the least memory intensive, but may create bottlenecks
|
| 37 |
+
# on file system accesses
|
| 38 |
+
_C.DENSEPOSE_EVALUATION.STORAGE = "none"
|
| 39 |
+
# minimum threshold for IOU values: the lower its values is,
|
| 40 |
+
# the more matches are produced (and the higher the AP score)
|
| 41 |
+
_C.DENSEPOSE_EVALUATION.MIN_IOU_THRESHOLD = 0.5
|
| 42 |
+
# Non-distributed inference is slower (at inference time) but can avoid RAM OOM
|
| 43 |
+
_C.DENSEPOSE_EVALUATION.DISTRIBUTED_INFERENCE = True
|
| 44 |
+
# evaluate mesh alignment based on vertex embeddings, only makes sense in CSE context
|
| 45 |
+
_C.DENSEPOSE_EVALUATION.EVALUATE_MESH_ALIGNMENT = False
|
| 46 |
+
# meshes to compute mesh alignment for
|
| 47 |
+
_C.DENSEPOSE_EVALUATION.MESH_ALIGNMENT_MESH_NAMES = []
|
| 48 |
+
|
| 49 |
+
|
| 50 |
+
def add_bootstrap_config(cfg: CN) -> None:
|
| 51 |
+
""" """
|
| 52 |
+
_C = cfg
|
| 53 |
+
_C.BOOTSTRAP_DATASETS = []
|
| 54 |
+
_C.BOOTSTRAP_MODEL = CN()
|
| 55 |
+
_C.BOOTSTRAP_MODEL.WEIGHTS = ""
|
| 56 |
+
_C.BOOTSTRAP_MODEL.DEVICE = "cuda"
|
| 57 |
+
|
| 58 |
+
|
| 59 |
+
def get_bootstrap_dataset_config() -> CN:
|
| 60 |
+
_C = CN()
|
| 61 |
+
_C.DATASET = ""
|
| 62 |
+
# ratio used to mix data loaders
|
| 63 |
+
_C.RATIO = 0.1
|
| 64 |
+
# image loader
|
| 65 |
+
_C.IMAGE_LOADER = CN(new_allowed=True)
|
| 66 |
+
_C.IMAGE_LOADER.TYPE = ""
|
| 67 |
+
_C.IMAGE_LOADER.BATCH_SIZE = 4
|
| 68 |
+
_C.IMAGE_LOADER.NUM_WORKERS = 4
|
| 69 |
+
_C.IMAGE_LOADER.CATEGORIES = []
|
| 70 |
+
_C.IMAGE_LOADER.MAX_COUNT_PER_CATEGORY = 1_000_000
|
| 71 |
+
_C.IMAGE_LOADER.CATEGORY_TO_CLASS_MAPPING = CN(new_allowed=True)
|
| 72 |
+
# inference
|
| 73 |
+
_C.INFERENCE = CN()
|
| 74 |
+
# batch size for model inputs
|
| 75 |
+
_C.INFERENCE.INPUT_BATCH_SIZE = 4
|
| 76 |
+
# batch size to group model outputs
|
| 77 |
+
_C.INFERENCE.OUTPUT_BATCH_SIZE = 2
|
| 78 |
+
# sampled data
|
| 79 |
+
_C.DATA_SAMPLER = CN(new_allowed=True)
|
| 80 |
+
_C.DATA_SAMPLER.TYPE = ""
|
| 81 |
+
_C.DATA_SAMPLER.USE_GROUND_TRUTH_CATEGORIES = False
|
| 82 |
+
# filter
|
| 83 |
+
_C.FILTER = CN(new_allowed=True)
|
| 84 |
+
_C.FILTER.TYPE = ""
|
| 85 |
+
return _C
|
| 86 |
+
|
| 87 |
+
|
| 88 |
+
def load_bootstrap_config(cfg: CN) -> None:
|
| 89 |
+
"""
|
| 90 |
+
Bootstrap datasets are given as a list of `dict` that are not automatically
|
| 91 |
+
converted into CfgNode. This method processes all bootstrap dataset entries
|
| 92 |
+
and ensures that they are in CfgNode format and comply with the specification
|
| 93 |
+
"""
|
| 94 |
+
if not cfg.BOOTSTRAP_DATASETS:
|
| 95 |
+
return
|
| 96 |
+
|
| 97 |
+
bootstrap_datasets_cfgnodes = []
|
| 98 |
+
for dataset_cfg in cfg.BOOTSTRAP_DATASETS:
|
| 99 |
+
_C = get_bootstrap_dataset_config().clone()
|
| 100 |
+
_C.merge_from_other_cfg(CN(dataset_cfg))
|
| 101 |
+
bootstrap_datasets_cfgnodes.append(_C)
|
| 102 |
+
cfg.BOOTSTRAP_DATASETS = bootstrap_datasets_cfgnodes
|
| 103 |
+
|
| 104 |
+
|
| 105 |
+
def add_densepose_head_cse_config(cfg: CN) -> None:
|
| 106 |
+
"""
|
| 107 |
+
Add configuration options for Continuous Surface Embeddings (CSE)
|
| 108 |
+
"""
|
| 109 |
+
_C = cfg
|
| 110 |
+
_C.MODEL.ROI_DENSEPOSE_HEAD.CSE = CN()
|
| 111 |
+
# Dimensionality D of the embedding space
|
| 112 |
+
_C.MODEL.ROI_DENSEPOSE_HEAD.CSE.EMBED_SIZE = 16
|
| 113 |
+
# Embedder specifications for various mesh IDs
|
| 114 |
+
_C.MODEL.ROI_DENSEPOSE_HEAD.CSE.EMBEDDERS = CN(new_allowed=True)
|
| 115 |
+
# normalization coefficient for embedding distances
|
| 116 |
+
_C.MODEL.ROI_DENSEPOSE_HEAD.CSE.EMBEDDING_DIST_GAUSS_SIGMA = 0.01
|
| 117 |
+
# normalization coefficient for geodesic distances
|
| 118 |
+
_C.MODEL.ROI_DENSEPOSE_HEAD.CSE.GEODESIC_DIST_GAUSS_SIGMA = 0.01
|
| 119 |
+
# embedding loss weight
|
| 120 |
+
_C.MODEL.ROI_DENSEPOSE_HEAD.CSE.EMBED_LOSS_WEIGHT = 0.6
|
| 121 |
+
# embedding loss name, currently the following options are supported:
|
| 122 |
+
# - EmbeddingLoss: cross-entropy on vertex labels
|
| 123 |
+
# - SoftEmbeddingLoss: cross-entropy on vertex label combined with
|
| 124 |
+
# Gaussian penalty on distance between vertices
|
| 125 |
+
_C.MODEL.ROI_DENSEPOSE_HEAD.CSE.EMBED_LOSS_NAME = "EmbeddingLoss"
|
| 126 |
+
# optimizer hyperparameters
|
| 127 |
+
_C.MODEL.ROI_DENSEPOSE_HEAD.CSE.FEATURES_LR_FACTOR = 1.0
|
| 128 |
+
_C.MODEL.ROI_DENSEPOSE_HEAD.CSE.EMBEDDING_LR_FACTOR = 1.0
|
| 129 |
+
# Shape to shape cycle consistency loss parameters:
|
| 130 |
+
_C.MODEL.ROI_DENSEPOSE_HEAD.CSE.SHAPE_TO_SHAPE_CYCLE_LOSS = CN({"ENABLED": False})
|
| 131 |
+
# shape to shape cycle consistency loss weight
|
| 132 |
+
_C.MODEL.ROI_DENSEPOSE_HEAD.CSE.SHAPE_TO_SHAPE_CYCLE_LOSS.WEIGHT = 0.025
|
| 133 |
+
# norm type used for loss computation
|
| 134 |
+
_C.MODEL.ROI_DENSEPOSE_HEAD.CSE.SHAPE_TO_SHAPE_CYCLE_LOSS.NORM_P = 2
|
| 135 |
+
# normalization term for embedding similarity matrices
|
| 136 |
+
_C.MODEL.ROI_DENSEPOSE_HEAD.CSE.SHAPE_TO_SHAPE_CYCLE_LOSS.TEMPERATURE = 0.05
|
| 137 |
+
# maximum number of vertices to include into shape to shape cycle loss
|
| 138 |
+
# if negative or zero, all vertices are considered
|
| 139 |
+
# if positive, random subset of vertices of given size is considered
|
| 140 |
+
_C.MODEL.ROI_DENSEPOSE_HEAD.CSE.SHAPE_TO_SHAPE_CYCLE_LOSS.MAX_NUM_VERTICES = 4936
|
| 141 |
+
# Pixel to shape cycle consistency loss parameters:
|
| 142 |
+
_C.MODEL.ROI_DENSEPOSE_HEAD.CSE.PIX_TO_SHAPE_CYCLE_LOSS = CN({"ENABLED": False})
|
| 143 |
+
# pixel to shape cycle consistency loss weight
|
| 144 |
+
_C.MODEL.ROI_DENSEPOSE_HEAD.CSE.PIX_TO_SHAPE_CYCLE_LOSS.WEIGHT = 0.0001
|
| 145 |
+
# norm type used for loss computation
|
| 146 |
+
_C.MODEL.ROI_DENSEPOSE_HEAD.CSE.PIX_TO_SHAPE_CYCLE_LOSS.NORM_P = 2
|
| 147 |
+
# map images to all meshes and back (if false, use only gt meshes from the batch)
|
| 148 |
+
_C.MODEL.ROI_DENSEPOSE_HEAD.CSE.PIX_TO_SHAPE_CYCLE_LOSS.USE_ALL_MESHES_NOT_GT_ONLY = False
|
| 149 |
+
# Randomly select at most this number of pixels from every instance
|
| 150 |
+
# if negative or zero, all vertices are considered
|
| 151 |
+
_C.MODEL.ROI_DENSEPOSE_HEAD.CSE.PIX_TO_SHAPE_CYCLE_LOSS.NUM_PIXELS_TO_SAMPLE = 100
|
| 152 |
+
# normalization factor for pixel to pixel distances (higher value = smoother distribution)
|
| 153 |
+
_C.MODEL.ROI_DENSEPOSE_HEAD.CSE.PIX_TO_SHAPE_CYCLE_LOSS.PIXEL_SIGMA = 5.0
|
| 154 |
+
_C.MODEL.ROI_DENSEPOSE_HEAD.CSE.PIX_TO_SHAPE_CYCLE_LOSS.TEMPERATURE_PIXEL_TO_VERTEX = 0.05
|
| 155 |
+
_C.MODEL.ROI_DENSEPOSE_HEAD.CSE.PIX_TO_SHAPE_CYCLE_LOSS.TEMPERATURE_VERTEX_TO_PIXEL = 0.05
|
| 156 |
+
|
| 157 |
+
|
| 158 |
+
def add_densepose_head_config(cfg: CN) -> None:
|
| 159 |
+
"""
|
| 160 |
+
Add config for densepose head.
|
| 161 |
+
"""
|
| 162 |
+
_C = cfg
|
| 163 |
+
|
| 164 |
+
_C.MODEL.DENSEPOSE_ON = True
|
| 165 |
+
|
| 166 |
+
_C.MODEL.ROI_DENSEPOSE_HEAD = CN()
|
| 167 |
+
_C.MODEL.ROI_DENSEPOSE_HEAD.NAME = ""
|
| 168 |
+
_C.MODEL.ROI_DENSEPOSE_HEAD.NUM_STACKED_CONVS = 8
|
| 169 |
+
# Number of parts used for point labels
|
| 170 |
+
_C.MODEL.ROI_DENSEPOSE_HEAD.NUM_PATCHES = 24
|
| 171 |
+
_C.MODEL.ROI_DENSEPOSE_HEAD.DECONV_KERNEL = 4
|
| 172 |
+
_C.MODEL.ROI_DENSEPOSE_HEAD.CONV_HEAD_DIM = 512
|
| 173 |
+
_C.MODEL.ROI_DENSEPOSE_HEAD.CONV_HEAD_KERNEL = 3
|
| 174 |
+
_C.MODEL.ROI_DENSEPOSE_HEAD.UP_SCALE = 2
|
| 175 |
+
_C.MODEL.ROI_DENSEPOSE_HEAD.HEATMAP_SIZE = 112
|
| 176 |
+
_C.MODEL.ROI_DENSEPOSE_HEAD.POOLER_TYPE = "ROIAlignV2"
|
| 177 |
+
_C.MODEL.ROI_DENSEPOSE_HEAD.POOLER_RESOLUTION = 28
|
| 178 |
+
_C.MODEL.ROI_DENSEPOSE_HEAD.POOLER_SAMPLING_RATIO = 2
|
| 179 |
+
_C.MODEL.ROI_DENSEPOSE_HEAD.NUM_COARSE_SEGM_CHANNELS = 2 # 15 or 2
|
| 180 |
+
# Overlap threshold for an RoI to be considered foreground (if >= FG_IOU_THRESHOLD)
|
| 181 |
+
_C.MODEL.ROI_DENSEPOSE_HEAD.FG_IOU_THRESHOLD = 0.7
|
| 182 |
+
# Loss weights for annotation masks.(14 Parts)
|
| 183 |
+
_C.MODEL.ROI_DENSEPOSE_HEAD.INDEX_WEIGHTS = 5.0
|
| 184 |
+
# Loss weights for surface parts. (24 Parts)
|
| 185 |
+
_C.MODEL.ROI_DENSEPOSE_HEAD.PART_WEIGHTS = 1.0
|
| 186 |
+
# Loss weights for UV regression.
|
| 187 |
+
_C.MODEL.ROI_DENSEPOSE_HEAD.POINT_REGRESSION_WEIGHTS = 0.01
|
| 188 |
+
# Coarse segmentation is trained using instance segmentation task data
|
| 189 |
+
_C.MODEL.ROI_DENSEPOSE_HEAD.COARSE_SEGM_TRAINED_BY_MASKS = False
|
| 190 |
+
# For Decoder
|
| 191 |
+
_C.MODEL.ROI_DENSEPOSE_HEAD.DECODER_ON = True
|
| 192 |
+
_C.MODEL.ROI_DENSEPOSE_HEAD.DECODER_NUM_CLASSES = 256
|
| 193 |
+
_C.MODEL.ROI_DENSEPOSE_HEAD.DECODER_CONV_DIMS = 256
|
| 194 |
+
_C.MODEL.ROI_DENSEPOSE_HEAD.DECODER_NORM = ""
|
| 195 |
+
_C.MODEL.ROI_DENSEPOSE_HEAD.DECODER_COMMON_STRIDE = 4
|
| 196 |
+
# For DeepLab head
|
| 197 |
+
_C.MODEL.ROI_DENSEPOSE_HEAD.DEEPLAB = CN()
|
| 198 |
+
_C.MODEL.ROI_DENSEPOSE_HEAD.DEEPLAB.NORM = "GN"
|
| 199 |
+
_C.MODEL.ROI_DENSEPOSE_HEAD.DEEPLAB.NONLOCAL_ON = 0
|
| 200 |
+
# Predictor class name, must be registered in DENSEPOSE_PREDICTOR_REGISTRY
|
| 201 |
+
# Some registered predictors:
|
| 202 |
+
# "DensePoseChartPredictor": predicts segmentation and UV coordinates for predefined charts
|
| 203 |
+
# "DensePoseChartWithConfidencePredictor": predicts segmentation, UV coordinates
|
| 204 |
+
# and associated confidences for predefined charts (default)
|
| 205 |
+
# "DensePoseEmbeddingWithConfidencePredictor": predicts segmentation, embeddings
|
| 206 |
+
# and associated confidences for CSE
|
| 207 |
+
_C.MODEL.ROI_DENSEPOSE_HEAD.PREDICTOR_NAME = "DensePoseChartWithConfidencePredictor"
|
| 208 |
+
# Loss class name, must be registered in DENSEPOSE_LOSS_REGISTRY
|
| 209 |
+
# Some registered losses:
|
| 210 |
+
# "DensePoseChartLoss": loss for chart-based models that estimate
|
| 211 |
+
# segmentation and UV coordinates
|
| 212 |
+
# "DensePoseChartWithConfidenceLoss": loss for chart-based models that estimate
|
| 213 |
+
# segmentation, UV coordinates and the corresponding confidences (default)
|
| 214 |
+
_C.MODEL.ROI_DENSEPOSE_HEAD.LOSS_NAME = "DensePoseChartWithConfidenceLoss"
|
| 215 |
+
# Confidences
|
| 216 |
+
# Enable learning UV confidences (variances) along with the actual values
|
| 217 |
+
_C.MODEL.ROI_DENSEPOSE_HEAD.UV_CONFIDENCE = CN({"ENABLED": False})
|
| 218 |
+
# UV confidence lower bound
|
| 219 |
+
_C.MODEL.ROI_DENSEPOSE_HEAD.UV_CONFIDENCE.EPSILON = 0.01
|
| 220 |
+
# Enable learning segmentation confidences (variances) along with the actual values
|
| 221 |
+
_C.MODEL.ROI_DENSEPOSE_HEAD.SEGM_CONFIDENCE = CN({"ENABLED": False})
|
| 222 |
+
# Segmentation confidence lower bound
|
| 223 |
+
_C.MODEL.ROI_DENSEPOSE_HEAD.SEGM_CONFIDENCE.EPSILON = 0.01
|
| 224 |
+
# Statistical model type for confidence learning, possible values:
|
| 225 |
+
# - "iid_iso": statistically independent identically distributed residuals
|
| 226 |
+
# with isotropic covariance
|
| 227 |
+
# - "indep_aniso": statistically independent residuals with anisotropic
|
| 228 |
+
# covariances
|
| 229 |
+
_C.MODEL.ROI_DENSEPOSE_HEAD.UV_CONFIDENCE.TYPE = "iid_iso"
|
| 230 |
+
# List of angles for rotation in data augmentation during training
|
| 231 |
+
_C.INPUT.ROTATION_ANGLES = [0]
|
| 232 |
+
_C.TEST.AUG.ROTATION_ANGLES = () # Rotation TTA
|
| 233 |
+
|
| 234 |
+
add_densepose_head_cse_config(cfg)
|
| 235 |
+
|
| 236 |
+
|
| 237 |
+
def add_hrnet_config(cfg: CN) -> None:
|
| 238 |
+
"""
|
| 239 |
+
Add config for HRNet backbone.
|
| 240 |
+
"""
|
| 241 |
+
_C = cfg
|
| 242 |
+
|
| 243 |
+
# For HigherHRNet w32
|
| 244 |
+
_C.MODEL.HRNET = CN()
|
| 245 |
+
_C.MODEL.HRNET.STEM_INPLANES = 64
|
| 246 |
+
_C.MODEL.HRNET.STAGE2 = CN()
|
| 247 |
+
_C.MODEL.HRNET.STAGE2.NUM_MODULES = 1
|
| 248 |
+
_C.MODEL.HRNET.STAGE2.NUM_BRANCHES = 2
|
| 249 |
+
_C.MODEL.HRNET.STAGE2.BLOCK = "BASIC"
|
| 250 |
+
_C.MODEL.HRNET.STAGE2.NUM_BLOCKS = [4, 4]
|
| 251 |
+
_C.MODEL.HRNET.STAGE2.NUM_CHANNELS = [32, 64]
|
| 252 |
+
_C.MODEL.HRNET.STAGE2.FUSE_METHOD = "SUM"
|
| 253 |
+
_C.MODEL.HRNET.STAGE3 = CN()
|
| 254 |
+
_C.MODEL.HRNET.STAGE3.NUM_MODULES = 4
|
| 255 |
+
_C.MODEL.HRNET.STAGE3.NUM_BRANCHES = 3
|
| 256 |
+
_C.MODEL.HRNET.STAGE3.BLOCK = "BASIC"
|
| 257 |
+
_C.MODEL.HRNET.STAGE3.NUM_BLOCKS = [4, 4, 4]
|
| 258 |
+
_C.MODEL.HRNET.STAGE3.NUM_CHANNELS = [32, 64, 128]
|
| 259 |
+
_C.MODEL.HRNET.STAGE3.FUSE_METHOD = "SUM"
|
| 260 |
+
_C.MODEL.HRNET.STAGE4 = CN()
|
| 261 |
+
_C.MODEL.HRNET.STAGE4.NUM_MODULES = 3
|
| 262 |
+
_C.MODEL.HRNET.STAGE4.NUM_BRANCHES = 4
|
| 263 |
+
_C.MODEL.HRNET.STAGE4.BLOCK = "BASIC"
|
| 264 |
+
_C.MODEL.HRNET.STAGE4.NUM_BLOCKS = [4, 4, 4, 4]
|
| 265 |
+
_C.MODEL.HRNET.STAGE4.NUM_CHANNELS = [32, 64, 128, 256]
|
| 266 |
+
_C.MODEL.HRNET.STAGE4.FUSE_METHOD = "SUM"
|
| 267 |
+
|
| 268 |
+
_C.MODEL.HRNET.HRFPN = CN()
|
| 269 |
+
_C.MODEL.HRNET.HRFPN.OUT_CHANNELS = 256
|
| 270 |
+
|
| 271 |
+
|
| 272 |
+
def add_densepose_config(cfg: CN) -> None:
|
| 273 |
+
add_densepose_head_config(cfg)
|
| 274 |
+
add_hrnet_config(cfg)
|
| 275 |
+
add_bootstrap_config(cfg)
|
| 276 |
+
add_dataset_category_config(cfg)
|
| 277 |
+
add_evaluation_config(cfg)
|
Leffa/densepose/converters/__init__.py
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) Facebook, Inc. and its affiliates.
|
| 2 |
+
|
| 3 |
+
from .hflip import HFlipConverter
|
| 4 |
+
from .to_mask import ToMaskConverter
|
| 5 |
+
from .to_chart_result import ToChartResultConverter, ToChartResultConverterWithConfidences
|
| 6 |
+
from .segm_to_mask import (
|
| 7 |
+
predictor_output_with_fine_and_coarse_segm_to_mask,
|
| 8 |
+
predictor_output_with_coarse_segm_to_mask,
|
| 9 |
+
resample_fine_and_coarse_segm_to_bbox,
|
| 10 |
+
)
|
| 11 |
+
from .chart_output_to_chart_result import (
|
| 12 |
+
densepose_chart_predictor_output_to_result,
|
| 13 |
+
densepose_chart_predictor_output_to_result_with_confidences,
|
| 14 |
+
)
|
| 15 |
+
from .chart_output_hflip import densepose_chart_predictor_output_hflip
|
Leffa/densepose/converters/base.py
ADDED
|
@@ -0,0 +1,93 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) Facebook, Inc. and its affiliates.
|
| 2 |
+
|
| 3 |
+
from typing import Any, Tuple, Type
|
| 4 |
+
import torch
|
| 5 |
+
|
| 6 |
+
|
| 7 |
+
class BaseConverter:
|
| 8 |
+
"""
|
| 9 |
+
Converter base class to be reused by various converters.
|
| 10 |
+
Converter allows one to convert data from various source types to a particular
|
| 11 |
+
destination type. Each source type needs to register its converter. The
|
| 12 |
+
registration for each source type is valid for all descendants of that type.
|
| 13 |
+
"""
|
| 14 |
+
|
| 15 |
+
@classmethod
|
| 16 |
+
def register(cls, from_type: Type, converter: Any = None):
|
| 17 |
+
"""
|
| 18 |
+
Registers a converter for the specified type.
|
| 19 |
+
Can be used as a decorator (if converter is None), or called as a method.
|
| 20 |
+
|
| 21 |
+
Args:
|
| 22 |
+
from_type (type): type to register the converter for;
|
| 23 |
+
all instances of this type will use the same converter
|
| 24 |
+
converter (callable): converter to be registered for the given
|
| 25 |
+
type; if None, this method is assumed to be a decorator for the converter
|
| 26 |
+
"""
|
| 27 |
+
|
| 28 |
+
if converter is not None:
|
| 29 |
+
cls._do_register(from_type, converter)
|
| 30 |
+
|
| 31 |
+
def wrapper(converter: Any) -> Any:
|
| 32 |
+
cls._do_register(from_type, converter)
|
| 33 |
+
return converter
|
| 34 |
+
|
| 35 |
+
return wrapper
|
| 36 |
+
|
| 37 |
+
@classmethod
|
| 38 |
+
def _do_register(cls, from_type: Type, converter: Any):
|
| 39 |
+
cls.registry[from_type] = converter # pyre-ignore[16]
|
| 40 |
+
|
| 41 |
+
@classmethod
|
| 42 |
+
def _lookup_converter(cls, from_type: Type) -> Any:
|
| 43 |
+
"""
|
| 44 |
+
Perform recursive lookup for the given type
|
| 45 |
+
to find registered converter. If a converter was found for some base
|
| 46 |
+
class, it gets registered for this class to save on further lookups.
|
| 47 |
+
|
| 48 |
+
Args:
|
| 49 |
+
from_type: type for which to find a converter
|
| 50 |
+
Return:
|
| 51 |
+
callable or None - registered converter or None
|
| 52 |
+
if no suitable entry was found in the registry
|
| 53 |
+
"""
|
| 54 |
+
if from_type in cls.registry: # pyre-ignore[16]
|
| 55 |
+
return cls.registry[from_type]
|
| 56 |
+
for base in from_type.__bases__:
|
| 57 |
+
converter = cls._lookup_converter(base)
|
| 58 |
+
if converter is not None:
|
| 59 |
+
cls._do_register(from_type, converter)
|
| 60 |
+
return converter
|
| 61 |
+
return None
|
| 62 |
+
|
| 63 |
+
@classmethod
|
| 64 |
+
def convert(cls, instance: Any, *args, **kwargs):
|
| 65 |
+
"""
|
| 66 |
+
Convert an instance to the destination type using some registered
|
| 67 |
+
converter. Does recursive lookup for base classes, so there's no need
|
| 68 |
+
for explicit registration for derived classes.
|
| 69 |
+
|
| 70 |
+
Args:
|
| 71 |
+
instance: source instance to convert to the destination type
|
| 72 |
+
Return:
|
| 73 |
+
An instance of the destination type obtained from the source instance
|
| 74 |
+
Raises KeyError, if no suitable converter found
|
| 75 |
+
"""
|
| 76 |
+
instance_type = type(instance)
|
| 77 |
+
converter = cls._lookup_converter(instance_type)
|
| 78 |
+
if converter is None:
|
| 79 |
+
if cls.dst_type is None: # pyre-ignore[16]
|
| 80 |
+
output_type_str = "itself"
|
| 81 |
+
else:
|
| 82 |
+
output_type_str = cls.dst_type
|
| 83 |
+
raise KeyError(f"Could not find converter from {instance_type} to {output_type_str}")
|
| 84 |
+
return converter(instance, *args, **kwargs)
|
| 85 |
+
|
| 86 |
+
|
| 87 |
+
IntTupleBox = Tuple[int, int, int, int]
|
| 88 |
+
|
| 89 |
+
|
| 90 |
+
def make_int_box(box: torch.Tensor) -> IntTupleBox:
|
| 91 |
+
int_box = [0, 0, 0, 0]
|
| 92 |
+
int_box[0], int_box[1], int_box[2], int_box[3] = tuple(box.long().tolist())
|
| 93 |
+
return int_box[0], int_box[1], int_box[2], int_box[3]
|
Leffa/densepose/converters/builtin.py
ADDED
|
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) Facebook, Inc. and its affiliates.
|
| 2 |
+
|
| 3 |
+
from ..structures import DensePoseChartPredictorOutput, DensePoseEmbeddingPredictorOutput
|
| 4 |
+
from . import (
|
| 5 |
+
HFlipConverter,
|
| 6 |
+
ToChartResultConverter,
|
| 7 |
+
ToChartResultConverterWithConfidences,
|
| 8 |
+
ToMaskConverter,
|
| 9 |
+
densepose_chart_predictor_output_hflip,
|
| 10 |
+
densepose_chart_predictor_output_to_result,
|
| 11 |
+
densepose_chart_predictor_output_to_result_with_confidences,
|
| 12 |
+
predictor_output_with_coarse_segm_to_mask,
|
| 13 |
+
predictor_output_with_fine_and_coarse_segm_to_mask,
|
| 14 |
+
)
|
| 15 |
+
|
| 16 |
+
ToMaskConverter.register(
|
| 17 |
+
DensePoseChartPredictorOutput, predictor_output_with_fine_and_coarse_segm_to_mask
|
| 18 |
+
)
|
| 19 |
+
ToMaskConverter.register(
|
| 20 |
+
DensePoseEmbeddingPredictorOutput, predictor_output_with_coarse_segm_to_mask
|
| 21 |
+
)
|
| 22 |
+
|
| 23 |
+
ToChartResultConverter.register(
|
| 24 |
+
DensePoseChartPredictorOutput, densepose_chart_predictor_output_to_result
|
| 25 |
+
)
|
| 26 |
+
|
| 27 |
+
ToChartResultConverterWithConfidences.register(
|
| 28 |
+
DensePoseChartPredictorOutput, densepose_chart_predictor_output_to_result_with_confidences
|
| 29 |
+
)
|
| 30 |
+
|
| 31 |
+
HFlipConverter.register(DensePoseChartPredictorOutput, densepose_chart_predictor_output_hflip)
|
Leffa/densepose/converters/chart_output_hflip.py
ADDED
|
@@ -0,0 +1,71 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) Facebook, Inc. and its affiliates.
|
| 2 |
+
from dataclasses import fields
|
| 3 |
+
import torch
|
| 4 |
+
|
| 5 |
+
from densepose.structures import DensePoseChartPredictorOutput, DensePoseTransformData
|
| 6 |
+
|
| 7 |
+
|
| 8 |
+
def densepose_chart_predictor_output_hflip(
|
| 9 |
+
densepose_predictor_output: DensePoseChartPredictorOutput,
|
| 10 |
+
transform_data: DensePoseTransformData,
|
| 11 |
+
) -> DensePoseChartPredictorOutput:
|
| 12 |
+
"""
|
| 13 |
+
Change to take into account a Horizontal flip.
|
| 14 |
+
"""
|
| 15 |
+
if len(densepose_predictor_output) > 0:
|
| 16 |
+
|
| 17 |
+
PredictorOutput = type(densepose_predictor_output)
|
| 18 |
+
output_dict = {}
|
| 19 |
+
|
| 20 |
+
for field in fields(densepose_predictor_output):
|
| 21 |
+
field_value = getattr(densepose_predictor_output, field.name)
|
| 22 |
+
# flip tensors
|
| 23 |
+
if isinstance(field_value, torch.Tensor):
|
| 24 |
+
setattr(densepose_predictor_output, field.name, torch.flip(field_value, [3]))
|
| 25 |
+
|
| 26 |
+
densepose_predictor_output = _flip_iuv_semantics_tensor(
|
| 27 |
+
densepose_predictor_output, transform_data
|
| 28 |
+
)
|
| 29 |
+
densepose_predictor_output = _flip_segm_semantics_tensor(
|
| 30 |
+
densepose_predictor_output, transform_data
|
| 31 |
+
)
|
| 32 |
+
|
| 33 |
+
for field in fields(densepose_predictor_output):
|
| 34 |
+
output_dict[field.name] = getattr(densepose_predictor_output, field.name)
|
| 35 |
+
|
| 36 |
+
return PredictorOutput(**output_dict)
|
| 37 |
+
else:
|
| 38 |
+
return densepose_predictor_output
|
| 39 |
+
|
| 40 |
+
|
| 41 |
+
def _flip_iuv_semantics_tensor(
|
| 42 |
+
densepose_predictor_output: DensePoseChartPredictorOutput,
|
| 43 |
+
dp_transform_data: DensePoseTransformData,
|
| 44 |
+
) -> DensePoseChartPredictorOutput:
|
| 45 |
+
point_label_symmetries = dp_transform_data.point_label_symmetries
|
| 46 |
+
uv_symmetries = dp_transform_data.uv_symmetries
|
| 47 |
+
|
| 48 |
+
N, C, H, W = densepose_predictor_output.u.shape
|
| 49 |
+
u_loc = (densepose_predictor_output.u[:, 1:, :, :].clamp(0, 1) * 255).long()
|
| 50 |
+
v_loc = (densepose_predictor_output.v[:, 1:, :, :].clamp(0, 1) * 255).long()
|
| 51 |
+
Iindex = torch.arange(C - 1, device=densepose_predictor_output.u.device)[
|
| 52 |
+
None, :, None, None
|
| 53 |
+
].expand(N, C - 1, H, W)
|
| 54 |
+
densepose_predictor_output.u[:, 1:, :, :] = uv_symmetries["U_transforms"][Iindex, v_loc, u_loc]
|
| 55 |
+
densepose_predictor_output.v[:, 1:, :, :] = uv_symmetries["V_transforms"][Iindex, v_loc, u_loc]
|
| 56 |
+
|
| 57 |
+
for el in ["fine_segm", "u", "v"]:
|
| 58 |
+
densepose_predictor_output.__dict__[el] = densepose_predictor_output.__dict__[el][
|
| 59 |
+
:, point_label_symmetries, :, :
|
| 60 |
+
]
|
| 61 |
+
return densepose_predictor_output
|
| 62 |
+
|
| 63 |
+
|
| 64 |
+
def _flip_segm_semantics_tensor(
|
| 65 |
+
densepose_predictor_output: DensePoseChartPredictorOutput, dp_transform_data
|
| 66 |
+
):
|
| 67 |
+
if densepose_predictor_output.coarse_segm.shape[1] > 2:
|
| 68 |
+
densepose_predictor_output.coarse_segm = densepose_predictor_output.coarse_segm[
|
| 69 |
+
:, dp_transform_data.mask_label_symmetries, :, :
|
| 70 |
+
]
|
| 71 |
+
return densepose_predictor_output
|
Leffa/densepose/converters/chart_output_to_chart_result.py
ADDED
|
@@ -0,0 +1,188 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) Facebook, Inc. and its affiliates.
|
| 2 |
+
|
| 3 |
+
from typing import Dict
|
| 4 |
+
import torch
|
| 5 |
+
from torch.nn import functional as F
|
| 6 |
+
|
| 7 |
+
from detectron2.structures.boxes import Boxes, BoxMode
|
| 8 |
+
|
| 9 |
+
from ..structures import (
|
| 10 |
+
DensePoseChartPredictorOutput,
|
| 11 |
+
DensePoseChartResult,
|
| 12 |
+
DensePoseChartResultWithConfidences,
|
| 13 |
+
)
|
| 14 |
+
from . import resample_fine_and_coarse_segm_to_bbox
|
| 15 |
+
from .base import IntTupleBox, make_int_box
|
| 16 |
+
|
| 17 |
+
|
| 18 |
+
def resample_uv_tensors_to_bbox(
|
| 19 |
+
u: torch.Tensor,
|
| 20 |
+
v: torch.Tensor,
|
| 21 |
+
labels: torch.Tensor,
|
| 22 |
+
box_xywh_abs: IntTupleBox,
|
| 23 |
+
) -> torch.Tensor:
|
| 24 |
+
"""
|
| 25 |
+
Resamples U and V coordinate estimates for the given bounding box
|
| 26 |
+
|
| 27 |
+
Args:
|
| 28 |
+
u (tensor [1, C, H, W] of float): U coordinates
|
| 29 |
+
v (tensor [1, C, H, W] of float): V coordinates
|
| 30 |
+
labels (tensor [H, W] of long): labels obtained by resampling segmentation
|
| 31 |
+
outputs for the given bounding box
|
| 32 |
+
box_xywh_abs (tuple of 4 int): bounding box that corresponds to predictor outputs
|
| 33 |
+
Return:
|
| 34 |
+
Resampled U and V coordinates - a tensor [2, H, W] of float
|
| 35 |
+
"""
|
| 36 |
+
x, y, w, h = box_xywh_abs
|
| 37 |
+
w = max(int(w), 1)
|
| 38 |
+
h = max(int(h), 1)
|
| 39 |
+
u_bbox = F.interpolate(u, (h, w), mode="bilinear", align_corners=False)
|
| 40 |
+
v_bbox = F.interpolate(v, (h, w), mode="bilinear", align_corners=False)
|
| 41 |
+
uv = torch.zeros([2, h, w], dtype=torch.float32, device=u.device)
|
| 42 |
+
for part_id in range(1, u_bbox.size(1)):
|
| 43 |
+
uv[0][labels == part_id] = u_bbox[0, part_id][labels == part_id]
|
| 44 |
+
uv[1][labels == part_id] = v_bbox[0, part_id][labels == part_id]
|
| 45 |
+
return uv
|
| 46 |
+
|
| 47 |
+
|
| 48 |
+
def resample_uv_to_bbox(
|
| 49 |
+
predictor_output: DensePoseChartPredictorOutput,
|
| 50 |
+
labels: torch.Tensor,
|
| 51 |
+
box_xywh_abs: IntTupleBox,
|
| 52 |
+
) -> torch.Tensor:
|
| 53 |
+
"""
|
| 54 |
+
Resamples U and V coordinate estimates for the given bounding box
|
| 55 |
+
|
| 56 |
+
Args:
|
| 57 |
+
predictor_output (DensePoseChartPredictorOutput): DensePose predictor
|
| 58 |
+
output to be resampled
|
| 59 |
+
labels (tensor [H, W] of long): labels obtained by resampling segmentation
|
| 60 |
+
outputs for the given bounding box
|
| 61 |
+
box_xywh_abs (tuple of 4 int): bounding box that corresponds to predictor outputs
|
| 62 |
+
Return:
|
| 63 |
+
Resampled U and V coordinates - a tensor [2, H, W] of float
|
| 64 |
+
"""
|
| 65 |
+
return resample_uv_tensors_to_bbox(
|
| 66 |
+
predictor_output.u,
|
| 67 |
+
predictor_output.v,
|
| 68 |
+
labels,
|
| 69 |
+
box_xywh_abs,
|
| 70 |
+
)
|
| 71 |
+
|
| 72 |
+
|
| 73 |
+
def densepose_chart_predictor_output_to_result(
|
| 74 |
+
predictor_output: DensePoseChartPredictorOutput, boxes: Boxes
|
| 75 |
+
) -> DensePoseChartResult:
|
| 76 |
+
"""
|
| 77 |
+
Convert densepose chart predictor outputs to results
|
| 78 |
+
|
| 79 |
+
Args:
|
| 80 |
+
predictor_output (DensePoseChartPredictorOutput): DensePose predictor
|
| 81 |
+
output to be converted to results, must contain only 1 output
|
| 82 |
+
boxes (Boxes): bounding box that corresponds to the predictor output,
|
| 83 |
+
must contain only 1 bounding box
|
| 84 |
+
Return:
|
| 85 |
+
DensePose chart-based result (DensePoseChartResult)
|
| 86 |
+
"""
|
| 87 |
+
assert len(predictor_output) == 1 and len(boxes) == 1, (
|
| 88 |
+
f"Predictor output to result conversion can operate only single outputs"
|
| 89 |
+
f", got {len(predictor_output)} predictor outputs and {len(boxes)} boxes"
|
| 90 |
+
)
|
| 91 |
+
|
| 92 |
+
boxes_xyxy_abs = boxes.tensor.clone()
|
| 93 |
+
boxes_xywh_abs = BoxMode.convert(boxes_xyxy_abs, BoxMode.XYXY_ABS, BoxMode.XYWH_ABS)
|
| 94 |
+
box_xywh = make_int_box(boxes_xywh_abs[0])
|
| 95 |
+
|
| 96 |
+
labels = resample_fine_and_coarse_segm_to_bbox(predictor_output, box_xywh).squeeze(0)
|
| 97 |
+
uv = resample_uv_to_bbox(predictor_output, labels, box_xywh)
|
| 98 |
+
return DensePoseChartResult(labels=labels, uv=uv)
|
| 99 |
+
|
| 100 |
+
|
| 101 |
+
def resample_confidences_to_bbox(
|
| 102 |
+
predictor_output: DensePoseChartPredictorOutput,
|
| 103 |
+
labels: torch.Tensor,
|
| 104 |
+
box_xywh_abs: IntTupleBox,
|
| 105 |
+
) -> Dict[str, torch.Tensor]:
|
| 106 |
+
"""
|
| 107 |
+
Resamples confidences for the given bounding box
|
| 108 |
+
|
| 109 |
+
Args:
|
| 110 |
+
predictor_output (DensePoseChartPredictorOutput): DensePose predictor
|
| 111 |
+
output to be resampled
|
| 112 |
+
labels (tensor [H, W] of long): labels obtained by resampling segmentation
|
| 113 |
+
outputs for the given bounding box
|
| 114 |
+
box_xywh_abs (tuple of 4 int): bounding box that corresponds to predictor outputs
|
| 115 |
+
Return:
|
| 116 |
+
Resampled confidences - a dict of [H, W] tensors of float
|
| 117 |
+
"""
|
| 118 |
+
|
| 119 |
+
x, y, w, h = box_xywh_abs
|
| 120 |
+
w = max(int(w), 1)
|
| 121 |
+
h = max(int(h), 1)
|
| 122 |
+
|
| 123 |
+
confidence_names = [
|
| 124 |
+
"sigma_1",
|
| 125 |
+
"sigma_2",
|
| 126 |
+
"kappa_u",
|
| 127 |
+
"kappa_v",
|
| 128 |
+
"fine_segm_confidence",
|
| 129 |
+
"coarse_segm_confidence",
|
| 130 |
+
]
|
| 131 |
+
confidence_results = {key: None for key in confidence_names}
|
| 132 |
+
confidence_names = [
|
| 133 |
+
key for key in confidence_names if getattr(predictor_output, key) is not None
|
| 134 |
+
]
|
| 135 |
+
confidence_base = torch.zeros([h, w], dtype=torch.float32, device=predictor_output.u.device)
|
| 136 |
+
|
| 137 |
+
# assign data from channels that correspond to the labels
|
| 138 |
+
for key in confidence_names:
|
| 139 |
+
resampled_confidence = F.interpolate(
|
| 140 |
+
getattr(predictor_output, key),
|
| 141 |
+
(h, w),
|
| 142 |
+
mode="bilinear",
|
| 143 |
+
align_corners=False,
|
| 144 |
+
)
|
| 145 |
+
result = confidence_base.clone()
|
| 146 |
+
for part_id in range(1, predictor_output.u.size(1)):
|
| 147 |
+
if resampled_confidence.size(1) != predictor_output.u.size(1):
|
| 148 |
+
# confidence is not part-based, don't try to fill it part by part
|
| 149 |
+
continue
|
| 150 |
+
result[labels == part_id] = resampled_confidence[0, part_id][labels == part_id]
|
| 151 |
+
|
| 152 |
+
if resampled_confidence.size(1) != predictor_output.u.size(1):
|
| 153 |
+
# confidence is not part-based, fill the data with the first channel
|
| 154 |
+
# (targeted for segmentation confidences that have only 1 channel)
|
| 155 |
+
result = resampled_confidence[0, 0]
|
| 156 |
+
|
| 157 |
+
confidence_results[key] = result
|
| 158 |
+
|
| 159 |
+
return confidence_results # pyre-ignore[7]
|
| 160 |
+
|
| 161 |
+
|
| 162 |
+
def densepose_chart_predictor_output_to_result_with_confidences(
|
| 163 |
+
predictor_output: DensePoseChartPredictorOutput, boxes: Boxes
|
| 164 |
+
) -> DensePoseChartResultWithConfidences:
|
| 165 |
+
"""
|
| 166 |
+
Convert densepose chart predictor outputs to results
|
| 167 |
+
|
| 168 |
+
Args:
|
| 169 |
+
predictor_output (DensePoseChartPredictorOutput): DensePose predictor
|
| 170 |
+
output with confidences to be converted to results, must contain only 1 output
|
| 171 |
+
boxes (Boxes): bounding box that corresponds to the predictor output,
|
| 172 |
+
must contain only 1 bounding box
|
| 173 |
+
Return:
|
| 174 |
+
DensePose chart-based result with confidences (DensePoseChartResultWithConfidences)
|
| 175 |
+
"""
|
| 176 |
+
assert len(predictor_output) == 1 and len(boxes) == 1, (
|
| 177 |
+
f"Predictor output to result conversion can operate only single outputs"
|
| 178 |
+
f", got {len(predictor_output)} predictor outputs and {len(boxes)} boxes"
|
| 179 |
+
)
|
| 180 |
+
|
| 181 |
+
boxes_xyxy_abs = boxes.tensor.clone()
|
| 182 |
+
boxes_xywh_abs = BoxMode.convert(boxes_xyxy_abs, BoxMode.XYXY_ABS, BoxMode.XYWH_ABS)
|
| 183 |
+
box_xywh = make_int_box(boxes_xywh_abs[0])
|
| 184 |
+
|
| 185 |
+
labels = resample_fine_and_coarse_segm_to_bbox(predictor_output, box_xywh).squeeze(0)
|
| 186 |
+
uv = resample_uv_to_bbox(predictor_output, labels, box_xywh)
|
| 187 |
+
confidences = resample_confidences_to_bbox(predictor_output, labels, box_xywh)
|
| 188 |
+
return DensePoseChartResultWithConfidences(labels=labels, uv=uv, **confidences)
|
Leffa/densepose/converters/hflip.py
ADDED
|
@@ -0,0 +1,34 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) Facebook, Inc. and its affiliates.
|
| 2 |
+
|
| 3 |
+
from typing import Any
|
| 4 |
+
|
| 5 |
+
from .base import BaseConverter
|
| 6 |
+
|
| 7 |
+
|
| 8 |
+
class HFlipConverter(BaseConverter):
|
| 9 |
+
"""
|
| 10 |
+
Converts various DensePose predictor outputs to DensePose results.
|
| 11 |
+
Each DensePose predictor output type has to register its convertion strategy.
|
| 12 |
+
"""
|
| 13 |
+
|
| 14 |
+
registry = {}
|
| 15 |
+
dst_type = None
|
| 16 |
+
|
| 17 |
+
@classmethod
|
| 18 |
+
# pyre-fixme[14]: `convert` overrides method defined in `BaseConverter`
|
| 19 |
+
# inconsistently.
|
| 20 |
+
def convert(cls, predictor_outputs: Any, transform_data: Any, *args, **kwargs):
|
| 21 |
+
"""
|
| 22 |
+
Performs an horizontal flip on DensePose predictor outputs.
|
| 23 |
+
Does recursive lookup for base classes, so there's no need
|
| 24 |
+
for explicit registration for derived classes.
|
| 25 |
+
|
| 26 |
+
Args:
|
| 27 |
+
predictor_outputs: DensePose predictor output to be converted to BitMasks
|
| 28 |
+
transform_data: Anything useful for the flip
|
| 29 |
+
Return:
|
| 30 |
+
An instance of the same type as predictor_outputs
|
| 31 |
+
"""
|
| 32 |
+
return super(HFlipConverter, cls).convert(
|
| 33 |
+
predictor_outputs, transform_data, *args, **kwargs
|
| 34 |
+
)
|
Leffa/densepose/converters/segm_to_mask.py
ADDED
|
@@ -0,0 +1,150 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) Facebook, Inc. and its affiliates.
|
| 2 |
+
|
| 3 |
+
from typing import Any
|
| 4 |
+
import torch
|
| 5 |
+
from torch.nn import functional as F
|
| 6 |
+
|
| 7 |
+
from detectron2.structures import BitMasks, Boxes, BoxMode
|
| 8 |
+
|
| 9 |
+
from .base import IntTupleBox, make_int_box
|
| 10 |
+
from .to_mask import ImageSizeType
|
| 11 |
+
|
| 12 |
+
|
| 13 |
+
def resample_coarse_segm_tensor_to_bbox(coarse_segm: torch.Tensor, box_xywh_abs: IntTupleBox):
|
| 14 |
+
"""
|
| 15 |
+
Resample coarse segmentation tensor to the given
|
| 16 |
+
bounding box and derive labels for each pixel of the bounding box
|
| 17 |
+
|
| 18 |
+
Args:
|
| 19 |
+
coarse_segm: float tensor of shape [1, K, Hout, Wout]
|
| 20 |
+
box_xywh_abs (tuple of 4 int): bounding box given by its upper-left
|
| 21 |
+
corner coordinates, width (W) and height (H)
|
| 22 |
+
Return:
|
| 23 |
+
Labels for each pixel of the bounding box, a long tensor of size [1, H, W]
|
| 24 |
+
"""
|
| 25 |
+
x, y, w, h = box_xywh_abs
|
| 26 |
+
w = max(int(w), 1)
|
| 27 |
+
h = max(int(h), 1)
|
| 28 |
+
labels = F.interpolate(coarse_segm, (h, w), mode="bilinear", align_corners=False).argmax(dim=1)
|
| 29 |
+
return labels
|
| 30 |
+
|
| 31 |
+
|
| 32 |
+
def resample_fine_and_coarse_segm_tensors_to_bbox(
|
| 33 |
+
fine_segm: torch.Tensor, coarse_segm: torch.Tensor, box_xywh_abs: IntTupleBox
|
| 34 |
+
):
|
| 35 |
+
"""
|
| 36 |
+
Resample fine and coarse segmentation tensors to the given
|
| 37 |
+
bounding box and derive labels for each pixel of the bounding box
|
| 38 |
+
|
| 39 |
+
Args:
|
| 40 |
+
fine_segm: float tensor of shape [1, C, Hout, Wout]
|
| 41 |
+
coarse_segm: float tensor of shape [1, K, Hout, Wout]
|
| 42 |
+
box_xywh_abs (tuple of 4 int): bounding box given by its upper-left
|
| 43 |
+
corner coordinates, width (W) and height (H)
|
| 44 |
+
Return:
|
| 45 |
+
Labels for each pixel of the bounding box, a long tensor of size [1, H, W]
|
| 46 |
+
"""
|
| 47 |
+
x, y, w, h = box_xywh_abs
|
| 48 |
+
w = max(int(w), 1)
|
| 49 |
+
h = max(int(h), 1)
|
| 50 |
+
# coarse segmentation
|
| 51 |
+
coarse_segm_bbox = F.interpolate(
|
| 52 |
+
coarse_segm,
|
| 53 |
+
(h, w),
|
| 54 |
+
mode="bilinear",
|
| 55 |
+
align_corners=False,
|
| 56 |
+
).argmax(dim=1)
|
| 57 |
+
# combined coarse and fine segmentation
|
| 58 |
+
labels = (
|
| 59 |
+
F.interpolate(fine_segm, (h, w), mode="bilinear", align_corners=False).argmax(dim=1)
|
| 60 |
+
* (coarse_segm_bbox > 0).long()
|
| 61 |
+
)
|
| 62 |
+
return labels
|
| 63 |
+
|
| 64 |
+
|
| 65 |
+
def resample_fine_and_coarse_segm_to_bbox(predictor_output: Any, box_xywh_abs: IntTupleBox):
|
| 66 |
+
"""
|
| 67 |
+
Resample fine and coarse segmentation outputs from a predictor to the given
|
| 68 |
+
bounding box and derive labels for each pixel of the bounding box
|
| 69 |
+
|
| 70 |
+
Args:
|
| 71 |
+
predictor_output: DensePose predictor output that contains segmentation
|
| 72 |
+
results to be resampled
|
| 73 |
+
box_xywh_abs (tuple of 4 int): bounding box given by its upper-left
|
| 74 |
+
corner coordinates, width (W) and height (H)
|
| 75 |
+
Return:
|
| 76 |
+
Labels for each pixel of the bounding box, a long tensor of size [1, H, W]
|
| 77 |
+
"""
|
| 78 |
+
return resample_fine_and_coarse_segm_tensors_to_bbox(
|
| 79 |
+
predictor_output.fine_segm,
|
| 80 |
+
predictor_output.coarse_segm,
|
| 81 |
+
box_xywh_abs,
|
| 82 |
+
)
|
| 83 |
+
|
| 84 |
+
|
| 85 |
+
def predictor_output_with_coarse_segm_to_mask(
|
| 86 |
+
predictor_output: Any, boxes: Boxes, image_size_hw: ImageSizeType
|
| 87 |
+
) -> BitMasks:
|
| 88 |
+
"""
|
| 89 |
+
Convert predictor output with coarse and fine segmentation to a mask.
|
| 90 |
+
Assumes that predictor output has the following attributes:
|
| 91 |
+
- coarse_segm (tensor of size [N, D, H, W]): coarse segmentation
|
| 92 |
+
unnormalized scores for N instances; D is the number of coarse
|
| 93 |
+
segmentation labels, H and W is the resolution of the estimate
|
| 94 |
+
|
| 95 |
+
Args:
|
| 96 |
+
predictor_output: DensePose predictor output to be converted to mask
|
| 97 |
+
boxes (Boxes): bounding boxes that correspond to the DensePose
|
| 98 |
+
predictor outputs
|
| 99 |
+
image_size_hw (tuple [int, int]): image height Himg and width Wimg
|
| 100 |
+
Return:
|
| 101 |
+
BitMasks that contain a bool tensor of size [N, Himg, Wimg] with
|
| 102 |
+
a mask of the size of the image for each instance
|
| 103 |
+
"""
|
| 104 |
+
H, W = image_size_hw
|
| 105 |
+
boxes_xyxy_abs = boxes.tensor.clone()
|
| 106 |
+
boxes_xywh_abs = BoxMode.convert(boxes_xyxy_abs, BoxMode.XYXY_ABS, BoxMode.XYWH_ABS)
|
| 107 |
+
N = len(boxes_xywh_abs)
|
| 108 |
+
masks = torch.zeros((N, H, W), dtype=torch.bool, device=boxes.tensor.device)
|
| 109 |
+
for i in range(len(boxes_xywh_abs)):
|
| 110 |
+
box_xywh = make_int_box(boxes_xywh_abs[i])
|
| 111 |
+
box_mask = resample_coarse_segm_tensor_to_bbox(predictor_output[i].coarse_segm, box_xywh)
|
| 112 |
+
x, y, w, h = box_xywh
|
| 113 |
+
masks[i, y : y + h, x : x + w] = box_mask
|
| 114 |
+
|
| 115 |
+
return BitMasks(masks)
|
| 116 |
+
|
| 117 |
+
|
| 118 |
+
def predictor_output_with_fine_and_coarse_segm_to_mask(
|
| 119 |
+
predictor_output: Any, boxes: Boxes, image_size_hw: ImageSizeType
|
| 120 |
+
) -> BitMasks:
|
| 121 |
+
"""
|
| 122 |
+
Convert predictor output with coarse and fine segmentation to a mask.
|
| 123 |
+
Assumes that predictor output has the following attributes:
|
| 124 |
+
- coarse_segm (tensor of size [N, D, H, W]): coarse segmentation
|
| 125 |
+
unnormalized scores for N instances; D is the number of coarse
|
| 126 |
+
segmentation labels, H and W is the resolution of the estimate
|
| 127 |
+
- fine_segm (tensor of size [N, C, H, W]): fine segmentation
|
| 128 |
+
unnormalized scores for N instances; C is the number of fine
|
| 129 |
+
segmentation labels, H and W is the resolution of the estimate
|
| 130 |
+
|
| 131 |
+
Args:
|
| 132 |
+
predictor_output: DensePose predictor output to be converted to mask
|
| 133 |
+
boxes (Boxes): bounding boxes that correspond to the DensePose
|
| 134 |
+
predictor outputs
|
| 135 |
+
image_size_hw (tuple [int, int]): image height Himg and width Wimg
|
| 136 |
+
Return:
|
| 137 |
+
BitMasks that contain a bool tensor of size [N, Himg, Wimg] with
|
| 138 |
+
a mask of the size of the image for each instance
|
| 139 |
+
"""
|
| 140 |
+
H, W = image_size_hw
|
| 141 |
+
boxes_xyxy_abs = boxes.tensor.clone()
|
| 142 |
+
boxes_xywh_abs = BoxMode.convert(boxes_xyxy_abs, BoxMode.XYXY_ABS, BoxMode.XYWH_ABS)
|
| 143 |
+
N = len(boxes_xywh_abs)
|
| 144 |
+
masks = torch.zeros((N, H, W), dtype=torch.bool, device=boxes.tensor.device)
|
| 145 |
+
for i in range(len(boxes_xywh_abs)):
|
| 146 |
+
box_xywh = make_int_box(boxes_xywh_abs[i])
|
| 147 |
+
labels_i = resample_fine_and_coarse_segm_to_bbox(predictor_output[i], box_xywh)
|
| 148 |
+
x, y, w, h = box_xywh
|
| 149 |
+
masks[i, y : y + h, x : x + w] = labels_i > 0
|
| 150 |
+
return BitMasks(masks)
|
Leffa/densepose/converters/to_chart_result.py
ADDED
|
@@ -0,0 +1,70 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) Facebook, Inc. and its affiliates.
|
| 2 |
+
|
| 3 |
+
from typing import Any
|
| 4 |
+
|
| 5 |
+
from detectron2.structures import Boxes
|
| 6 |
+
|
| 7 |
+
from ..structures import DensePoseChartResult, DensePoseChartResultWithConfidences
|
| 8 |
+
from .base import BaseConverter
|
| 9 |
+
|
| 10 |
+
|
| 11 |
+
class ToChartResultConverter(BaseConverter):
|
| 12 |
+
"""
|
| 13 |
+
Converts various DensePose predictor outputs to DensePose results.
|
| 14 |
+
Each DensePose predictor output type has to register its convertion strategy.
|
| 15 |
+
"""
|
| 16 |
+
|
| 17 |
+
registry = {}
|
| 18 |
+
dst_type = DensePoseChartResult
|
| 19 |
+
|
| 20 |
+
@classmethod
|
| 21 |
+
# pyre-fixme[14]: `convert` overrides method defined in `BaseConverter`
|
| 22 |
+
# inconsistently.
|
| 23 |
+
def convert(cls, predictor_outputs: Any, boxes: Boxes, *args, **kwargs) -> DensePoseChartResult:
|
| 24 |
+
"""
|
| 25 |
+
Convert DensePose predictor outputs to DensePoseResult using some registered
|
| 26 |
+
converter. Does recursive lookup for base classes, so there's no need
|
| 27 |
+
for explicit registration for derived classes.
|
| 28 |
+
|
| 29 |
+
Args:
|
| 30 |
+
densepose_predictor_outputs: DensePose predictor output to be
|
| 31 |
+
converted to BitMasks
|
| 32 |
+
boxes (Boxes): bounding boxes that correspond to the DensePose
|
| 33 |
+
predictor outputs
|
| 34 |
+
Return:
|
| 35 |
+
An instance of DensePoseResult. If no suitable converter was found, raises KeyError
|
| 36 |
+
"""
|
| 37 |
+
return super(ToChartResultConverter, cls).convert(predictor_outputs, boxes, *args, **kwargs)
|
| 38 |
+
|
| 39 |
+
|
| 40 |
+
class ToChartResultConverterWithConfidences(BaseConverter):
|
| 41 |
+
"""
|
| 42 |
+
Converts various DensePose predictor outputs to DensePose results.
|
| 43 |
+
Each DensePose predictor output type has to register its convertion strategy.
|
| 44 |
+
"""
|
| 45 |
+
|
| 46 |
+
registry = {}
|
| 47 |
+
dst_type = DensePoseChartResultWithConfidences
|
| 48 |
+
|
| 49 |
+
@classmethod
|
| 50 |
+
# pyre-fixme[14]: `convert` overrides method defined in `BaseConverter`
|
| 51 |
+
# inconsistently.
|
| 52 |
+
def convert(
|
| 53 |
+
cls, predictor_outputs: Any, boxes: Boxes, *args, **kwargs
|
| 54 |
+
) -> DensePoseChartResultWithConfidences:
|
| 55 |
+
"""
|
| 56 |
+
Convert DensePose predictor outputs to DensePoseResult with confidences
|
| 57 |
+
using some registered converter. Does recursive lookup for base classes,
|
| 58 |
+
so there's no need for explicit registration for derived classes.
|
| 59 |
+
|
| 60 |
+
Args:
|
| 61 |
+
densepose_predictor_outputs: DensePose predictor output with confidences
|
| 62 |
+
to be converted to BitMasks
|
| 63 |
+
boxes (Boxes): bounding boxes that correspond to the DensePose
|
| 64 |
+
predictor outputs
|
| 65 |
+
Return:
|
| 66 |
+
An instance of DensePoseResult. If no suitable converter was found, raises KeyError
|
| 67 |
+
"""
|
| 68 |
+
return super(ToChartResultConverterWithConfidences, cls).convert(
|
| 69 |
+
predictor_outputs, boxes, *args, **kwargs
|
| 70 |
+
)
|
Leffa/densepose/converters/to_mask.py
ADDED
|
@@ -0,0 +1,49 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) Facebook, Inc. and its affiliates.
|
| 2 |
+
|
| 3 |
+
from typing import Any, Tuple
|
| 4 |
+
|
| 5 |
+
from detectron2.structures import BitMasks, Boxes
|
| 6 |
+
|
| 7 |
+
from .base import BaseConverter
|
| 8 |
+
|
| 9 |
+
ImageSizeType = Tuple[int, int]
|
| 10 |
+
|
| 11 |
+
|
| 12 |
+
class ToMaskConverter(BaseConverter):
|
| 13 |
+
"""
|
| 14 |
+
Converts various DensePose predictor outputs to masks
|
| 15 |
+
in bit mask format (see `BitMasks`). Each DensePose predictor output type
|
| 16 |
+
has to register its convertion strategy.
|
| 17 |
+
"""
|
| 18 |
+
|
| 19 |
+
registry = {}
|
| 20 |
+
dst_type = BitMasks
|
| 21 |
+
|
| 22 |
+
@classmethod
|
| 23 |
+
# pyre-fixme[14]: `convert` overrides method defined in `BaseConverter`
|
| 24 |
+
# inconsistently.
|
| 25 |
+
def convert(
|
| 26 |
+
cls,
|
| 27 |
+
densepose_predictor_outputs: Any,
|
| 28 |
+
boxes: Boxes,
|
| 29 |
+
image_size_hw: ImageSizeType,
|
| 30 |
+
*args,
|
| 31 |
+
**kwargs
|
| 32 |
+
) -> BitMasks:
|
| 33 |
+
"""
|
| 34 |
+
Convert DensePose predictor outputs to BitMasks using some registered
|
| 35 |
+
converter. Does recursive lookup for base classes, so there's no need
|
| 36 |
+
for explicit registration for derived classes.
|
| 37 |
+
|
| 38 |
+
Args:
|
| 39 |
+
densepose_predictor_outputs: DensePose predictor output to be
|
| 40 |
+
converted to BitMasks
|
| 41 |
+
boxes (Boxes): bounding boxes that correspond to the DensePose
|
| 42 |
+
predictor outputs
|
| 43 |
+
image_size_hw (tuple [int, int]): image height and width
|
| 44 |
+
Return:
|
| 45 |
+
An instance of `BitMasks`. If no suitable converter was found, raises KeyError
|
| 46 |
+
"""
|
| 47 |
+
return super(ToMaskConverter, cls).convert(
|
| 48 |
+
densepose_predictor_outputs, boxes, image_size_hw, *args, **kwargs
|
| 49 |
+
)
|
Leffa/densepose/engine/__init__.py
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) Facebook, Inc. and its affiliates.
|
| 2 |
+
|
| 3 |
+
from .trainer import Trainer
|
Leffa/densepose/engine/trainer.py
ADDED
|
@@ -0,0 +1,258 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
|
| 2 |
+
|
| 3 |
+
import logging
|
| 4 |
+
import os
|
| 5 |
+
from collections import OrderedDict
|
| 6 |
+
from typing import List, Optional, Union
|
| 7 |
+
import torch
|
| 8 |
+
from torch import nn
|
| 9 |
+
|
| 10 |
+
from detectron2.checkpoint import DetectionCheckpointer
|
| 11 |
+
from detectron2.config import CfgNode
|
| 12 |
+
from detectron2.engine import DefaultTrainer
|
| 13 |
+
from detectron2.evaluation import (
|
| 14 |
+
DatasetEvaluator,
|
| 15 |
+
DatasetEvaluators,
|
| 16 |
+
inference_on_dataset,
|
| 17 |
+
print_csv_format,
|
| 18 |
+
)
|
| 19 |
+
from detectron2.solver.build import get_default_optimizer_params, maybe_add_gradient_clipping
|
| 20 |
+
from detectron2.utils import comm
|
| 21 |
+
from detectron2.utils.events import EventWriter, get_event_storage
|
| 22 |
+
|
| 23 |
+
from densepose import DensePoseDatasetMapperTTA, DensePoseGeneralizedRCNNWithTTA, load_from_cfg
|
| 24 |
+
from densepose.data import (
|
| 25 |
+
DatasetMapper,
|
| 26 |
+
build_combined_loader,
|
| 27 |
+
build_detection_test_loader,
|
| 28 |
+
build_detection_train_loader,
|
| 29 |
+
build_inference_based_loaders,
|
| 30 |
+
has_inference_based_loaders,
|
| 31 |
+
)
|
| 32 |
+
from densepose.evaluation.d2_evaluator_adapter import Detectron2COCOEvaluatorAdapter
|
| 33 |
+
from densepose.evaluation.evaluator import DensePoseCOCOEvaluator, build_densepose_evaluator_storage
|
| 34 |
+
from densepose.modeling.cse import Embedder
|
| 35 |
+
|
| 36 |
+
|
| 37 |
+
class SampleCountingLoader:
|
| 38 |
+
def __init__(self, loader):
|
| 39 |
+
self.loader = loader
|
| 40 |
+
|
| 41 |
+
def __iter__(self):
|
| 42 |
+
it = iter(self.loader)
|
| 43 |
+
storage = get_event_storage()
|
| 44 |
+
while True:
|
| 45 |
+
try:
|
| 46 |
+
batch = next(it)
|
| 47 |
+
num_inst_per_dataset = {}
|
| 48 |
+
for data in batch:
|
| 49 |
+
dataset_name = data["dataset"]
|
| 50 |
+
if dataset_name not in num_inst_per_dataset:
|
| 51 |
+
num_inst_per_dataset[dataset_name] = 0
|
| 52 |
+
num_inst = len(data["instances"])
|
| 53 |
+
num_inst_per_dataset[dataset_name] += num_inst
|
| 54 |
+
for dataset_name in num_inst_per_dataset:
|
| 55 |
+
storage.put_scalar(f"batch/{dataset_name}", num_inst_per_dataset[dataset_name])
|
| 56 |
+
yield batch
|
| 57 |
+
except StopIteration:
|
| 58 |
+
break
|
| 59 |
+
|
| 60 |
+
|
| 61 |
+
class SampleCountMetricPrinter(EventWriter):
|
| 62 |
+
def __init__(self):
|
| 63 |
+
self.logger = logging.getLogger(__name__)
|
| 64 |
+
|
| 65 |
+
def write(self):
|
| 66 |
+
storage = get_event_storage()
|
| 67 |
+
batch_stats_strs = []
|
| 68 |
+
for key, buf in storage.histories().items():
|
| 69 |
+
if key.startswith("batch/"):
|
| 70 |
+
batch_stats_strs.append(f"{key} {buf.avg(20)}")
|
| 71 |
+
self.logger.info(", ".join(batch_stats_strs))
|
| 72 |
+
|
| 73 |
+
|
| 74 |
+
class Trainer(DefaultTrainer):
|
| 75 |
+
@classmethod
|
| 76 |
+
def extract_embedder_from_model(cls, model: nn.Module) -> Optional[Embedder]:
|
| 77 |
+
if isinstance(model, nn.parallel.DistributedDataParallel):
|
| 78 |
+
model = model.module
|
| 79 |
+
if hasattr(model, "roi_heads") and hasattr(model.roi_heads, "embedder"):
|
| 80 |
+
return model.roi_heads.embedder
|
| 81 |
+
return None
|
| 82 |
+
|
| 83 |
+
# TODO: the only reason to copy the base class code here is to pass the embedder from
|
| 84 |
+
# the model to the evaluator; that should be refactored to avoid unnecessary copy-pasting
|
| 85 |
+
@classmethod
|
| 86 |
+
def test(
|
| 87 |
+
cls,
|
| 88 |
+
cfg: CfgNode,
|
| 89 |
+
model: nn.Module,
|
| 90 |
+
evaluators: Optional[Union[DatasetEvaluator, List[DatasetEvaluator]]] = None,
|
| 91 |
+
):
|
| 92 |
+
"""
|
| 93 |
+
Args:
|
| 94 |
+
cfg (CfgNode):
|
| 95 |
+
model (nn.Module):
|
| 96 |
+
evaluators (DatasetEvaluator, list[DatasetEvaluator] or None): if None, will call
|
| 97 |
+
:meth:`build_evaluator`. Otherwise, must have the same length as
|
| 98 |
+
``cfg.DATASETS.TEST``.
|
| 99 |
+
|
| 100 |
+
Returns:
|
| 101 |
+
dict: a dict of result metrics
|
| 102 |
+
"""
|
| 103 |
+
logger = logging.getLogger(__name__)
|
| 104 |
+
if isinstance(evaluators, DatasetEvaluator):
|
| 105 |
+
evaluators = [evaluators]
|
| 106 |
+
if evaluators is not None:
|
| 107 |
+
assert len(cfg.DATASETS.TEST) == len(evaluators), "{} != {}".format(
|
| 108 |
+
len(cfg.DATASETS.TEST), len(evaluators)
|
| 109 |
+
)
|
| 110 |
+
|
| 111 |
+
results = OrderedDict()
|
| 112 |
+
for idx, dataset_name in enumerate(cfg.DATASETS.TEST):
|
| 113 |
+
data_loader = cls.build_test_loader(cfg, dataset_name)
|
| 114 |
+
# When evaluators are passed in as arguments,
|
| 115 |
+
# implicitly assume that evaluators can be created before data_loader.
|
| 116 |
+
if evaluators is not None:
|
| 117 |
+
evaluator = evaluators[idx]
|
| 118 |
+
else:
|
| 119 |
+
try:
|
| 120 |
+
embedder = cls.extract_embedder_from_model(model)
|
| 121 |
+
evaluator = cls.build_evaluator(cfg, dataset_name, embedder=embedder)
|
| 122 |
+
except NotImplementedError:
|
| 123 |
+
logger.warn(
|
| 124 |
+
"No evaluator found. Use `DefaultTrainer.test(evaluators=)`, "
|
| 125 |
+
"or implement its `build_evaluator` method."
|
| 126 |
+
)
|
| 127 |
+
results[dataset_name] = {}
|
| 128 |
+
continue
|
| 129 |
+
if cfg.DENSEPOSE_EVALUATION.DISTRIBUTED_INFERENCE or comm.is_main_process():
|
| 130 |
+
results_i = inference_on_dataset(model, data_loader, evaluator)
|
| 131 |
+
else:
|
| 132 |
+
results_i = {}
|
| 133 |
+
results[dataset_name] = results_i
|
| 134 |
+
if comm.is_main_process():
|
| 135 |
+
assert isinstance(
|
| 136 |
+
results_i, dict
|
| 137 |
+
), "Evaluator must return a dict on the main process. Got {} instead.".format(
|
| 138 |
+
results_i
|
| 139 |
+
)
|
| 140 |
+
logger.info("Evaluation results for {} in csv format:".format(dataset_name))
|
| 141 |
+
print_csv_format(results_i)
|
| 142 |
+
|
| 143 |
+
if len(results) == 1:
|
| 144 |
+
results = list(results.values())[0]
|
| 145 |
+
return results
|
| 146 |
+
|
| 147 |
+
@classmethod
|
| 148 |
+
def build_evaluator(
|
| 149 |
+
cls,
|
| 150 |
+
cfg: CfgNode,
|
| 151 |
+
dataset_name: str,
|
| 152 |
+
output_folder: Optional[str] = None,
|
| 153 |
+
embedder: Optional[Embedder] = None,
|
| 154 |
+
) -> DatasetEvaluators:
|
| 155 |
+
if output_folder is None:
|
| 156 |
+
output_folder = os.path.join(cfg.OUTPUT_DIR, "inference")
|
| 157 |
+
evaluators = []
|
| 158 |
+
distributed = cfg.DENSEPOSE_EVALUATION.DISTRIBUTED_INFERENCE
|
| 159 |
+
# Note: we currently use COCO evaluator for both COCO and LVIS datasets
|
| 160 |
+
# to have compatible metrics. LVIS bbox evaluator could also be used
|
| 161 |
+
# with an adapter to properly handle filtered / mapped categories
|
| 162 |
+
# evaluator_type = MetadataCatalog.get(dataset_name).evaluator_type
|
| 163 |
+
# if evaluator_type == "coco":
|
| 164 |
+
# evaluators.append(COCOEvaluator(dataset_name, output_dir=output_folder))
|
| 165 |
+
# elif evaluator_type == "lvis":
|
| 166 |
+
# evaluators.append(LVISEvaluator(dataset_name, output_dir=output_folder))
|
| 167 |
+
evaluators.append(
|
| 168 |
+
Detectron2COCOEvaluatorAdapter(
|
| 169 |
+
dataset_name, output_dir=output_folder, distributed=distributed
|
| 170 |
+
)
|
| 171 |
+
)
|
| 172 |
+
if cfg.MODEL.DENSEPOSE_ON:
|
| 173 |
+
storage = build_densepose_evaluator_storage(cfg, output_folder)
|
| 174 |
+
evaluators.append(
|
| 175 |
+
DensePoseCOCOEvaluator(
|
| 176 |
+
dataset_name,
|
| 177 |
+
distributed,
|
| 178 |
+
output_folder,
|
| 179 |
+
evaluator_type=cfg.DENSEPOSE_EVALUATION.TYPE,
|
| 180 |
+
min_iou_threshold=cfg.DENSEPOSE_EVALUATION.MIN_IOU_THRESHOLD,
|
| 181 |
+
storage=storage,
|
| 182 |
+
embedder=embedder,
|
| 183 |
+
should_evaluate_mesh_alignment=cfg.DENSEPOSE_EVALUATION.EVALUATE_MESH_ALIGNMENT,
|
| 184 |
+
mesh_alignment_mesh_names=cfg.DENSEPOSE_EVALUATION.MESH_ALIGNMENT_MESH_NAMES,
|
| 185 |
+
)
|
| 186 |
+
)
|
| 187 |
+
return DatasetEvaluators(evaluators)
|
| 188 |
+
|
| 189 |
+
@classmethod
|
| 190 |
+
def build_optimizer(cls, cfg: CfgNode, model: nn.Module):
|
| 191 |
+
params = get_default_optimizer_params(
|
| 192 |
+
model,
|
| 193 |
+
base_lr=cfg.SOLVER.BASE_LR,
|
| 194 |
+
weight_decay_norm=cfg.SOLVER.WEIGHT_DECAY_NORM,
|
| 195 |
+
bias_lr_factor=cfg.SOLVER.BIAS_LR_FACTOR,
|
| 196 |
+
weight_decay_bias=cfg.SOLVER.WEIGHT_DECAY_BIAS,
|
| 197 |
+
overrides={
|
| 198 |
+
"features": {
|
| 199 |
+
"lr": cfg.SOLVER.BASE_LR * cfg.MODEL.ROI_DENSEPOSE_HEAD.CSE.FEATURES_LR_FACTOR,
|
| 200 |
+
},
|
| 201 |
+
"embeddings": {
|
| 202 |
+
"lr": cfg.SOLVER.BASE_LR * cfg.MODEL.ROI_DENSEPOSE_HEAD.CSE.EMBEDDING_LR_FACTOR,
|
| 203 |
+
},
|
| 204 |
+
},
|
| 205 |
+
)
|
| 206 |
+
optimizer = torch.optim.SGD(
|
| 207 |
+
params,
|
| 208 |
+
cfg.SOLVER.BASE_LR,
|
| 209 |
+
momentum=cfg.SOLVER.MOMENTUM,
|
| 210 |
+
nesterov=cfg.SOLVER.NESTEROV,
|
| 211 |
+
weight_decay=cfg.SOLVER.WEIGHT_DECAY,
|
| 212 |
+
)
|
| 213 |
+
# pyre-fixme[6]: For 2nd param expected `Type[Optimizer]` but got `SGD`.
|
| 214 |
+
return maybe_add_gradient_clipping(cfg, optimizer)
|
| 215 |
+
|
| 216 |
+
@classmethod
|
| 217 |
+
def build_test_loader(cls, cfg: CfgNode, dataset_name):
|
| 218 |
+
return build_detection_test_loader(cfg, dataset_name, mapper=DatasetMapper(cfg, False))
|
| 219 |
+
|
| 220 |
+
@classmethod
|
| 221 |
+
def build_train_loader(cls, cfg: CfgNode):
|
| 222 |
+
data_loader = build_detection_train_loader(cfg, mapper=DatasetMapper(cfg, True))
|
| 223 |
+
if not has_inference_based_loaders(cfg):
|
| 224 |
+
return data_loader
|
| 225 |
+
model = cls.build_model(cfg)
|
| 226 |
+
model.to(cfg.BOOTSTRAP_MODEL.DEVICE)
|
| 227 |
+
DetectionCheckpointer(model).resume_or_load(cfg.BOOTSTRAP_MODEL.WEIGHTS, resume=False)
|
| 228 |
+
inference_based_loaders, ratios = build_inference_based_loaders(cfg, model)
|
| 229 |
+
loaders = [data_loader] + inference_based_loaders
|
| 230 |
+
ratios = [1.0] + ratios
|
| 231 |
+
combined_data_loader = build_combined_loader(cfg, loaders, ratios)
|
| 232 |
+
sample_counting_loader = SampleCountingLoader(combined_data_loader)
|
| 233 |
+
return sample_counting_loader
|
| 234 |
+
|
| 235 |
+
def build_writers(self):
|
| 236 |
+
writers = super().build_writers()
|
| 237 |
+
writers.append(SampleCountMetricPrinter())
|
| 238 |
+
return writers
|
| 239 |
+
|
| 240 |
+
@classmethod
|
| 241 |
+
def test_with_TTA(cls, cfg: CfgNode, model):
|
| 242 |
+
logger = logging.getLogger("detectron2.trainer")
|
| 243 |
+
# In the end of training, run an evaluation with TTA
|
| 244 |
+
# Only support some R-CNN models.
|
| 245 |
+
logger.info("Running inference with test-time augmentation ...")
|
| 246 |
+
transform_data = load_from_cfg(cfg)
|
| 247 |
+
model = DensePoseGeneralizedRCNNWithTTA(
|
| 248 |
+
cfg, model, transform_data, DensePoseDatasetMapperTTA(cfg)
|
| 249 |
+
)
|
| 250 |
+
evaluators = [
|
| 251 |
+
cls.build_evaluator(
|
| 252 |
+
cfg, name, output_folder=os.path.join(cfg.OUTPUT_DIR, "inference_TTA")
|
| 253 |
+
)
|
| 254 |
+
for name in cfg.DATASETS.TEST
|
| 255 |
+
]
|
| 256 |
+
res = cls.test(cfg, model, evaluators) # pyre-ignore[6]
|
| 257 |
+
res = OrderedDict({k + "_TTA": v for k, v in res.items()})
|
| 258 |
+
return res
|
Leffa/densepose/modeling/__init__.py
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) Facebook, Inc. and its affiliates.
|
| 2 |
+
|
| 3 |
+
from .confidence import DensePoseConfidenceModelConfig, DensePoseUVConfidenceType
|
| 4 |
+
from .filter import DensePoseDataFilter
|
| 5 |
+
from .inference import densepose_inference
|
| 6 |
+
from .utils import initialize_module_params
|
| 7 |
+
from .build import (
|
| 8 |
+
build_densepose_data_filter,
|
| 9 |
+
build_densepose_embedder,
|
| 10 |
+
build_densepose_head,
|
| 11 |
+
build_densepose_losses,
|
| 12 |
+
build_densepose_predictor,
|
| 13 |
+
)
|
Leffa/densepose/modeling/build.py
ADDED
|
@@ -0,0 +1,87 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) Facebook, Inc. and its affiliates.
|
| 2 |
+
|
| 3 |
+
from typing import Optional
|
| 4 |
+
from torch import nn
|
| 5 |
+
|
| 6 |
+
from detectron2.config import CfgNode
|
| 7 |
+
|
| 8 |
+
from .cse.embedder import Embedder
|
| 9 |
+
from .filter import DensePoseDataFilter
|
| 10 |
+
|
| 11 |
+
|
| 12 |
+
def build_densepose_predictor(cfg: CfgNode, input_channels: int):
|
| 13 |
+
"""
|
| 14 |
+
Create an instance of DensePose predictor based on configuration options.
|
| 15 |
+
|
| 16 |
+
Args:
|
| 17 |
+
cfg (CfgNode): configuration options
|
| 18 |
+
input_channels (int): input tensor size along the channel dimension
|
| 19 |
+
Return:
|
| 20 |
+
An instance of DensePose predictor
|
| 21 |
+
"""
|
| 22 |
+
from .predictors import DENSEPOSE_PREDICTOR_REGISTRY
|
| 23 |
+
|
| 24 |
+
predictor_name = cfg.MODEL.ROI_DENSEPOSE_HEAD.PREDICTOR_NAME
|
| 25 |
+
return DENSEPOSE_PREDICTOR_REGISTRY.get(predictor_name)(cfg, input_channels)
|
| 26 |
+
|
| 27 |
+
|
| 28 |
+
def build_densepose_data_filter(cfg: CfgNode):
|
| 29 |
+
"""
|
| 30 |
+
Build DensePose data filter which selects data for training
|
| 31 |
+
|
| 32 |
+
Args:
|
| 33 |
+
cfg (CfgNode): configuration options
|
| 34 |
+
|
| 35 |
+
Return:
|
| 36 |
+
Callable: list(Tensor), list(Instances) -> list(Tensor), list(Instances)
|
| 37 |
+
An instance of DensePose filter, which takes feature tensors and proposals
|
| 38 |
+
as an input and returns filtered features and proposals
|
| 39 |
+
"""
|
| 40 |
+
dp_filter = DensePoseDataFilter(cfg)
|
| 41 |
+
return dp_filter
|
| 42 |
+
|
| 43 |
+
|
| 44 |
+
def build_densepose_head(cfg: CfgNode, input_channels: int):
|
| 45 |
+
"""
|
| 46 |
+
Build DensePose head based on configurations options
|
| 47 |
+
|
| 48 |
+
Args:
|
| 49 |
+
cfg (CfgNode): configuration options
|
| 50 |
+
input_channels (int): input tensor size along the channel dimension
|
| 51 |
+
Return:
|
| 52 |
+
An instance of DensePose head
|
| 53 |
+
"""
|
| 54 |
+
from .roi_heads.registry import ROI_DENSEPOSE_HEAD_REGISTRY
|
| 55 |
+
|
| 56 |
+
head_name = cfg.MODEL.ROI_DENSEPOSE_HEAD.NAME
|
| 57 |
+
return ROI_DENSEPOSE_HEAD_REGISTRY.get(head_name)(cfg, input_channels)
|
| 58 |
+
|
| 59 |
+
|
| 60 |
+
def build_densepose_losses(cfg: CfgNode):
|
| 61 |
+
"""
|
| 62 |
+
Build DensePose loss based on configurations options
|
| 63 |
+
|
| 64 |
+
Args:
|
| 65 |
+
cfg (CfgNode): configuration options
|
| 66 |
+
Return:
|
| 67 |
+
An instance of DensePose loss
|
| 68 |
+
"""
|
| 69 |
+
from .losses import DENSEPOSE_LOSS_REGISTRY
|
| 70 |
+
|
| 71 |
+
loss_name = cfg.MODEL.ROI_DENSEPOSE_HEAD.LOSS_NAME
|
| 72 |
+
return DENSEPOSE_LOSS_REGISTRY.get(loss_name)(cfg)
|
| 73 |
+
|
| 74 |
+
|
| 75 |
+
def build_densepose_embedder(cfg: CfgNode) -> Optional[nn.Module]:
|
| 76 |
+
"""
|
| 77 |
+
Build embedder used to embed mesh vertices into an embedding space.
|
| 78 |
+
Embedder contains sub-embedders, one for each mesh ID.
|
| 79 |
+
|
| 80 |
+
Args:
|
| 81 |
+
cfg (cfgNode): configuration options
|
| 82 |
+
Return:
|
| 83 |
+
Embedding module
|
| 84 |
+
"""
|
| 85 |
+
if cfg.MODEL.ROI_DENSEPOSE_HEAD.CSE.EMBEDDERS:
|
| 86 |
+
return Embedder(cfg)
|
| 87 |
+
return None
|
Leffa/densepose/modeling/confidence.py
ADDED
|
@@ -0,0 +1,73 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) Facebook, Inc. and its affiliates.
|
| 2 |
+
|
| 3 |
+
from dataclasses import dataclass
|
| 4 |
+
from enum import Enum
|
| 5 |
+
|
| 6 |
+
from detectron2.config import CfgNode
|
| 7 |
+
|
| 8 |
+
|
| 9 |
+
class DensePoseUVConfidenceType(Enum):
|
| 10 |
+
"""
|
| 11 |
+
Statistical model type for confidence learning, possible values:
|
| 12 |
+
- "iid_iso": statistically independent identically distributed residuals
|
| 13 |
+
with anisotropic covariance
|
| 14 |
+
- "indep_aniso": statistically independent residuals with anisotropic
|
| 15 |
+
covariances
|
| 16 |
+
For details, see:
|
| 17 |
+
N. Neverova, D. Novotny, A. Vedaldi "Correlated Uncertainty for Learning
|
| 18 |
+
Dense Correspondences from Noisy Labels", p. 918--926, in Proc. NIPS 2019
|
| 19 |
+
"""
|
| 20 |
+
|
| 21 |
+
# fmt: off
|
| 22 |
+
IID_ISO = "iid_iso"
|
| 23 |
+
INDEP_ANISO = "indep_aniso"
|
| 24 |
+
# fmt: on
|
| 25 |
+
|
| 26 |
+
|
| 27 |
+
@dataclass
|
| 28 |
+
class DensePoseUVConfidenceConfig:
|
| 29 |
+
"""
|
| 30 |
+
Configuration options for confidence on UV data
|
| 31 |
+
"""
|
| 32 |
+
|
| 33 |
+
enabled: bool = False
|
| 34 |
+
# lower bound on UV confidences
|
| 35 |
+
epsilon: float = 0.01
|
| 36 |
+
type: DensePoseUVConfidenceType = DensePoseUVConfidenceType.IID_ISO
|
| 37 |
+
|
| 38 |
+
|
| 39 |
+
@dataclass
|
| 40 |
+
class DensePoseSegmConfidenceConfig:
|
| 41 |
+
"""
|
| 42 |
+
Configuration options for confidence on segmentation
|
| 43 |
+
"""
|
| 44 |
+
|
| 45 |
+
enabled: bool = False
|
| 46 |
+
# lower bound on confidence values
|
| 47 |
+
epsilon: float = 0.01
|
| 48 |
+
|
| 49 |
+
|
| 50 |
+
@dataclass
|
| 51 |
+
class DensePoseConfidenceModelConfig:
|
| 52 |
+
"""
|
| 53 |
+
Configuration options for confidence models
|
| 54 |
+
"""
|
| 55 |
+
|
| 56 |
+
# confidence for U and V values
|
| 57 |
+
uv_confidence: DensePoseUVConfidenceConfig
|
| 58 |
+
# segmentation confidence
|
| 59 |
+
segm_confidence: DensePoseSegmConfidenceConfig
|
| 60 |
+
|
| 61 |
+
@staticmethod
|
| 62 |
+
def from_cfg(cfg: CfgNode) -> "DensePoseConfidenceModelConfig":
|
| 63 |
+
return DensePoseConfidenceModelConfig(
|
| 64 |
+
uv_confidence=DensePoseUVConfidenceConfig(
|
| 65 |
+
enabled=cfg.MODEL.ROI_DENSEPOSE_HEAD.UV_CONFIDENCE.ENABLED,
|
| 66 |
+
epsilon=cfg.MODEL.ROI_DENSEPOSE_HEAD.UV_CONFIDENCE.EPSILON,
|
| 67 |
+
type=DensePoseUVConfidenceType(cfg.MODEL.ROI_DENSEPOSE_HEAD.UV_CONFIDENCE.TYPE),
|
| 68 |
+
),
|
| 69 |
+
segm_confidence=DensePoseSegmConfidenceConfig(
|
| 70 |
+
enabled=cfg.MODEL.ROI_DENSEPOSE_HEAD.SEGM_CONFIDENCE.ENABLED,
|
| 71 |
+
epsilon=cfg.MODEL.ROI_DENSEPOSE_HEAD.SEGM_CONFIDENCE.EPSILON,
|
| 72 |
+
),
|
| 73 |
+
)
|
Leffa/densepose/modeling/densepose_checkpoint.py
ADDED
|
@@ -0,0 +1,35 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) Facebook, Inc. and its affiliates.
|
| 2 |
+
from collections import OrderedDict
|
| 3 |
+
|
| 4 |
+
from detectron2.checkpoint import DetectionCheckpointer
|
| 5 |
+
|
| 6 |
+
|
| 7 |
+
def _rename_HRNet_weights(weights):
|
| 8 |
+
# We detect and rename HRNet weights for DensePose. 1956 and 1716 are values that are
|
| 9 |
+
# common to all HRNet pretrained weights, and should be enough to accurately identify them
|
| 10 |
+
if (
|
| 11 |
+
len(weights["model"].keys()) == 1956
|
| 12 |
+
and len([k for k in weights["model"].keys() if k.startswith("stage")]) == 1716
|
| 13 |
+
):
|
| 14 |
+
hrnet_weights = OrderedDict()
|
| 15 |
+
for k in weights["model"].keys():
|
| 16 |
+
hrnet_weights["backbone.bottom_up." + str(k)] = weights["model"][k]
|
| 17 |
+
return {"model": hrnet_weights}
|
| 18 |
+
else:
|
| 19 |
+
return weights
|
| 20 |
+
|
| 21 |
+
|
| 22 |
+
class DensePoseCheckpointer(DetectionCheckpointer):
|
| 23 |
+
"""
|
| 24 |
+
Same as :class:`DetectionCheckpointer`, but is able to handle HRNet weights
|
| 25 |
+
"""
|
| 26 |
+
|
| 27 |
+
def __init__(self, model, save_dir="", *, save_to_disk=None, **checkpointables):
|
| 28 |
+
super().__init__(model, save_dir, save_to_disk=save_to_disk, **checkpointables)
|
| 29 |
+
|
| 30 |
+
def _load_file(self, filename: str) -> object:
|
| 31 |
+
"""
|
| 32 |
+
Adding hrnet support
|
| 33 |
+
"""
|
| 34 |
+
weights = super()._load_file(filename)
|
| 35 |
+
return _rename_HRNet_weights(weights)
|
Leffa/densepose/modeling/filter.py
ADDED
|
@@ -0,0 +1,94 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) Facebook, Inc. and its affiliates.
|
| 2 |
+
|
| 3 |
+
from typing import List
|
| 4 |
+
import torch
|
| 5 |
+
|
| 6 |
+
from detectron2.config import CfgNode
|
| 7 |
+
from detectron2.structures import Instances
|
| 8 |
+
from detectron2.structures.boxes import matched_pairwise_iou
|
| 9 |
+
|
| 10 |
+
|
| 11 |
+
class DensePoseDataFilter:
|
| 12 |
+
def __init__(self, cfg: CfgNode):
|
| 13 |
+
self.iou_threshold = cfg.MODEL.ROI_DENSEPOSE_HEAD.FG_IOU_THRESHOLD
|
| 14 |
+
self.keep_masks = cfg.MODEL.ROI_DENSEPOSE_HEAD.COARSE_SEGM_TRAINED_BY_MASKS
|
| 15 |
+
|
| 16 |
+
@torch.no_grad()
|
| 17 |
+
def __call__(self, features: List[torch.Tensor], proposals_with_targets: List[Instances]):
|
| 18 |
+
"""
|
| 19 |
+
Filters proposals with targets to keep only the ones relevant for
|
| 20 |
+
DensePose training
|
| 21 |
+
|
| 22 |
+
Args:
|
| 23 |
+
features (list[Tensor]): input data as a list of features,
|
| 24 |
+
each feature is a tensor. Axis 0 represents the number of
|
| 25 |
+
images `N` in the input data; axes 1-3 are channels,
|
| 26 |
+
height, and width, which may vary between features
|
| 27 |
+
(e.g., if a feature pyramid is used).
|
| 28 |
+
proposals_with_targets (list[Instances]): length `N` list of
|
| 29 |
+
`Instances`. The i-th `Instances` contains instances
|
| 30 |
+
(proposals, GT) for the i-th input image,
|
| 31 |
+
Returns:
|
| 32 |
+
list[Tensor]: filtered features
|
| 33 |
+
list[Instances]: filtered proposals
|
| 34 |
+
"""
|
| 35 |
+
proposals_filtered = []
|
| 36 |
+
# TODO: the commented out code was supposed to correctly deal with situations
|
| 37 |
+
# where no valid DensePose GT is available for certain images. The corresponding
|
| 38 |
+
# image features were sliced and proposals were filtered. This led to performance
|
| 39 |
+
# deterioration, both in terms of runtime and in terms of evaluation results.
|
| 40 |
+
#
|
| 41 |
+
# feature_mask = torch.ones(
|
| 42 |
+
# len(proposals_with_targets),
|
| 43 |
+
# dtype=torch.bool,
|
| 44 |
+
# device=features[0].device if len(features) > 0 else torch.device("cpu"),
|
| 45 |
+
# )
|
| 46 |
+
for i, proposals_per_image in enumerate(proposals_with_targets):
|
| 47 |
+
if not proposals_per_image.has("gt_densepose") and (
|
| 48 |
+
not proposals_per_image.has("gt_masks") or not self.keep_masks
|
| 49 |
+
):
|
| 50 |
+
# feature_mask[i] = 0
|
| 51 |
+
continue
|
| 52 |
+
gt_boxes = proposals_per_image.gt_boxes
|
| 53 |
+
est_boxes = proposals_per_image.proposal_boxes
|
| 54 |
+
# apply match threshold for densepose head
|
| 55 |
+
iou = matched_pairwise_iou(gt_boxes, est_boxes)
|
| 56 |
+
iou_select = iou > self.iou_threshold
|
| 57 |
+
proposals_per_image = proposals_per_image[iou_select] # pyre-ignore[6]
|
| 58 |
+
|
| 59 |
+
N_gt_boxes = len(proposals_per_image.gt_boxes)
|
| 60 |
+
assert N_gt_boxes == len(proposals_per_image.proposal_boxes), (
|
| 61 |
+
f"The number of GT boxes {N_gt_boxes} is different from the "
|
| 62 |
+
f"number of proposal boxes {len(proposals_per_image.proposal_boxes)}"
|
| 63 |
+
)
|
| 64 |
+
# filter out any target without suitable annotation
|
| 65 |
+
if self.keep_masks:
|
| 66 |
+
gt_masks = (
|
| 67 |
+
proposals_per_image.gt_masks
|
| 68 |
+
if hasattr(proposals_per_image, "gt_masks")
|
| 69 |
+
else [None] * N_gt_boxes
|
| 70 |
+
)
|
| 71 |
+
else:
|
| 72 |
+
gt_masks = [None] * N_gt_boxes
|
| 73 |
+
gt_densepose = (
|
| 74 |
+
proposals_per_image.gt_densepose
|
| 75 |
+
if hasattr(proposals_per_image, "gt_densepose")
|
| 76 |
+
else [None] * N_gt_boxes
|
| 77 |
+
)
|
| 78 |
+
assert len(gt_masks) == N_gt_boxes
|
| 79 |
+
assert len(gt_densepose) == N_gt_boxes
|
| 80 |
+
selected_indices = [
|
| 81 |
+
i
|
| 82 |
+
for i, (dp_target, mask_target) in enumerate(zip(gt_densepose, gt_masks))
|
| 83 |
+
if (dp_target is not None) or (mask_target is not None)
|
| 84 |
+
]
|
| 85 |
+
# if not len(selected_indices):
|
| 86 |
+
# feature_mask[i] = 0
|
| 87 |
+
# continue
|
| 88 |
+
if len(selected_indices) != N_gt_boxes:
|
| 89 |
+
proposals_per_image = proposals_per_image[selected_indices] # pyre-ignore[6]
|
| 90 |
+
assert len(proposals_per_image.gt_boxes) == len(proposals_per_image.proposal_boxes)
|
| 91 |
+
proposals_filtered.append(proposals_per_image)
|
| 92 |
+
# features_filtered = [feature[feature_mask] for feature in features]
|
| 93 |
+
# return features_filtered, proposals_filtered
|
| 94 |
+
return features, proposals_filtered
|
Leffa/densepose/modeling/hrfpn.py
ADDED
|
@@ -0,0 +1,182 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) Facebook, Inc. and its affiliates.
|
| 2 |
+
"""
|
| 3 |
+
MIT License
|
| 4 |
+
Copyright (c) 2019 Microsoft
|
| 5 |
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
| 6 |
+
of this software and associated documentation files (the "Software"), to deal
|
| 7 |
+
in the Software without restriction, including without limitation the rights
|
| 8 |
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
| 9 |
+
copies of the Software, and to permit persons to whom the Software is
|
| 10 |
+
furnished to do so, subject to the following conditions:
|
| 11 |
+
The above copyright notice and this permission notice shall be included in all
|
| 12 |
+
copies or substantial portions of the Software.
|
| 13 |
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
| 14 |
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
| 15 |
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
| 16 |
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
| 17 |
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
| 18 |
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
| 19 |
+
SOFTWARE.
|
| 20 |
+
"""
|
| 21 |
+
|
| 22 |
+
import torch
|
| 23 |
+
import torch.nn as nn
|
| 24 |
+
import torch.nn.functional as F
|
| 25 |
+
|
| 26 |
+
from detectron2.layers import ShapeSpec
|
| 27 |
+
from detectron2.modeling.backbone import BACKBONE_REGISTRY
|
| 28 |
+
from detectron2.modeling.backbone.backbone import Backbone
|
| 29 |
+
|
| 30 |
+
from .hrnet import build_pose_hrnet_backbone
|
| 31 |
+
|
| 32 |
+
|
| 33 |
+
class HRFPN(Backbone):
|
| 34 |
+
"""HRFPN (High Resolution Feature Pyramids)
|
| 35 |
+
Transforms outputs of HRNet backbone so they are suitable for the ROI_heads
|
| 36 |
+
arXiv: https://arxiv.org/abs/1904.04514
|
| 37 |
+
Adapted from https://github.com/open-mmlab/mmdetection/blob/master/mmdet/models/necks/hrfpn.py
|
| 38 |
+
Args:
|
| 39 |
+
bottom_up: (list) output of HRNet
|
| 40 |
+
in_features (list): names of the input features (output of HRNet)
|
| 41 |
+
in_channels (list): number of channels for each branch
|
| 42 |
+
out_channels (int): output channels of feature pyramids
|
| 43 |
+
n_out_features (int): number of output stages
|
| 44 |
+
pooling (str): pooling for generating feature pyramids (from {MAX, AVG})
|
| 45 |
+
share_conv (bool): Have one conv per output, or share one with all the outputs
|
| 46 |
+
"""
|
| 47 |
+
|
| 48 |
+
def __init__(
|
| 49 |
+
self,
|
| 50 |
+
bottom_up,
|
| 51 |
+
in_features,
|
| 52 |
+
n_out_features,
|
| 53 |
+
in_channels,
|
| 54 |
+
out_channels,
|
| 55 |
+
pooling="AVG",
|
| 56 |
+
share_conv=False,
|
| 57 |
+
):
|
| 58 |
+
super(HRFPN, self).__init__()
|
| 59 |
+
assert isinstance(in_channels, list)
|
| 60 |
+
self.bottom_up = bottom_up
|
| 61 |
+
self.in_features = in_features
|
| 62 |
+
self.n_out_features = n_out_features
|
| 63 |
+
self.in_channels = in_channels
|
| 64 |
+
self.out_channels = out_channels
|
| 65 |
+
self.num_ins = len(in_channels)
|
| 66 |
+
self.share_conv = share_conv
|
| 67 |
+
|
| 68 |
+
if self.share_conv:
|
| 69 |
+
self.fpn_conv = nn.Conv2d(
|
| 70 |
+
in_channels=out_channels, out_channels=out_channels, kernel_size=3, padding=1
|
| 71 |
+
)
|
| 72 |
+
else:
|
| 73 |
+
self.fpn_conv = nn.ModuleList()
|
| 74 |
+
for _ in range(self.n_out_features):
|
| 75 |
+
self.fpn_conv.append(
|
| 76 |
+
nn.Conv2d(
|
| 77 |
+
in_channels=out_channels,
|
| 78 |
+
out_channels=out_channels,
|
| 79 |
+
kernel_size=3,
|
| 80 |
+
padding=1,
|
| 81 |
+
)
|
| 82 |
+
)
|
| 83 |
+
|
| 84 |
+
# Custom change: Replaces a simple bilinear interpolation
|
| 85 |
+
self.interp_conv = nn.ModuleList()
|
| 86 |
+
for i in range(len(self.in_features)):
|
| 87 |
+
self.interp_conv.append(
|
| 88 |
+
nn.Sequential(
|
| 89 |
+
nn.ConvTranspose2d(
|
| 90 |
+
in_channels=in_channels[i],
|
| 91 |
+
out_channels=in_channels[i],
|
| 92 |
+
kernel_size=4,
|
| 93 |
+
stride=2**i,
|
| 94 |
+
padding=0,
|
| 95 |
+
output_padding=0,
|
| 96 |
+
bias=False,
|
| 97 |
+
),
|
| 98 |
+
nn.BatchNorm2d(in_channels[i], momentum=0.1),
|
| 99 |
+
nn.ReLU(inplace=True),
|
| 100 |
+
)
|
| 101 |
+
)
|
| 102 |
+
|
| 103 |
+
# Custom change: Replaces a couple (reduction conv + pooling) by one conv
|
| 104 |
+
self.reduction_pooling_conv = nn.ModuleList()
|
| 105 |
+
for i in range(self.n_out_features):
|
| 106 |
+
self.reduction_pooling_conv.append(
|
| 107 |
+
nn.Sequential(
|
| 108 |
+
nn.Conv2d(sum(in_channels), out_channels, kernel_size=2**i, stride=2**i),
|
| 109 |
+
nn.BatchNorm2d(out_channels, momentum=0.1),
|
| 110 |
+
nn.ReLU(inplace=True),
|
| 111 |
+
)
|
| 112 |
+
)
|
| 113 |
+
|
| 114 |
+
if pooling == "MAX":
|
| 115 |
+
self.pooling = F.max_pool2d
|
| 116 |
+
else:
|
| 117 |
+
self.pooling = F.avg_pool2d
|
| 118 |
+
|
| 119 |
+
self._out_features = []
|
| 120 |
+
self._out_feature_channels = {}
|
| 121 |
+
self._out_feature_strides = {}
|
| 122 |
+
|
| 123 |
+
for i in range(self.n_out_features):
|
| 124 |
+
self._out_features.append("p%d" % (i + 1))
|
| 125 |
+
self._out_feature_channels.update({self._out_features[-1]: self.out_channels})
|
| 126 |
+
self._out_feature_strides.update({self._out_features[-1]: 2 ** (i + 2)})
|
| 127 |
+
|
| 128 |
+
# default init_weights for conv(msra) and norm in ConvModule
|
| 129 |
+
def init_weights(self):
|
| 130 |
+
for m in self.modules():
|
| 131 |
+
if isinstance(m, nn.Conv2d):
|
| 132 |
+
nn.init.kaiming_normal_(m.weight, a=1)
|
| 133 |
+
nn.init.constant_(m.bias, 0)
|
| 134 |
+
|
| 135 |
+
def forward(self, inputs):
|
| 136 |
+
bottom_up_features = self.bottom_up(inputs)
|
| 137 |
+
assert len(bottom_up_features) == len(self.in_features)
|
| 138 |
+
inputs = [bottom_up_features[f] for f in self.in_features]
|
| 139 |
+
|
| 140 |
+
outs = []
|
| 141 |
+
for i in range(len(inputs)):
|
| 142 |
+
outs.append(self.interp_conv[i](inputs[i]))
|
| 143 |
+
shape_2 = min(o.shape[2] for o in outs)
|
| 144 |
+
shape_3 = min(o.shape[3] for o in outs)
|
| 145 |
+
out = torch.cat([o[:, :, :shape_2, :shape_3] for o in outs], dim=1)
|
| 146 |
+
outs = []
|
| 147 |
+
for i in range(self.n_out_features):
|
| 148 |
+
outs.append(self.reduction_pooling_conv[i](out))
|
| 149 |
+
for i in range(len(outs)): # Make shapes consistent
|
| 150 |
+
outs[-1 - i] = outs[-1 - i][
|
| 151 |
+
:, :, : outs[-1].shape[2] * 2**i, : outs[-1].shape[3] * 2**i
|
| 152 |
+
]
|
| 153 |
+
outputs = []
|
| 154 |
+
for i in range(len(outs)):
|
| 155 |
+
if self.share_conv:
|
| 156 |
+
outputs.append(self.fpn_conv(outs[i]))
|
| 157 |
+
else:
|
| 158 |
+
outputs.append(self.fpn_conv[i](outs[i]))
|
| 159 |
+
|
| 160 |
+
assert len(self._out_features) == len(outputs)
|
| 161 |
+
return dict(zip(self._out_features, outputs))
|
| 162 |
+
|
| 163 |
+
|
| 164 |
+
@BACKBONE_REGISTRY.register()
|
| 165 |
+
def build_hrfpn_backbone(cfg, input_shape: ShapeSpec) -> HRFPN:
|
| 166 |
+
|
| 167 |
+
in_channels = cfg.MODEL.HRNET.STAGE4.NUM_CHANNELS
|
| 168 |
+
in_features = ["p%d" % (i + 1) for i in range(cfg.MODEL.HRNET.STAGE4.NUM_BRANCHES)]
|
| 169 |
+
n_out_features = len(cfg.MODEL.ROI_HEADS.IN_FEATURES)
|
| 170 |
+
out_channels = cfg.MODEL.HRNET.HRFPN.OUT_CHANNELS
|
| 171 |
+
hrnet = build_pose_hrnet_backbone(cfg, input_shape)
|
| 172 |
+
hrfpn = HRFPN(
|
| 173 |
+
hrnet,
|
| 174 |
+
in_features,
|
| 175 |
+
n_out_features,
|
| 176 |
+
in_channels,
|
| 177 |
+
out_channels,
|
| 178 |
+
pooling="AVG",
|
| 179 |
+
share_conv=False,
|
| 180 |
+
)
|
| 181 |
+
|
| 182 |
+
return hrfpn
|
Leffa/densepose/modeling/hrnet.py
ADDED
|
@@ -0,0 +1,474 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) Facebook, Inc. and its affiliates.
|
| 2 |
+
# ------------------------------------------------------------------------------
|
| 3 |
+
# Copyright (c) Microsoft
|
| 4 |
+
# Licensed under the MIT License.
|
| 5 |
+
# Written by Bin Xiao (leoxiaobin@gmail.com)
|
| 6 |
+
# Modified by Bowen Cheng (bcheng9@illinois.edu)
|
| 7 |
+
# Adapted from https://github.com/HRNet/Higher-HRNet-Human-Pose-Estimation/blob/master/lib/models/pose_higher_hrnet.py # noqa
|
| 8 |
+
# ------------------------------------------------------------------------------
|
| 9 |
+
|
| 10 |
+
from __future__ import absolute_import, division, print_function
|
| 11 |
+
import logging
|
| 12 |
+
import torch.nn as nn
|
| 13 |
+
|
| 14 |
+
from detectron2.layers import ShapeSpec
|
| 15 |
+
from detectron2.modeling.backbone import BACKBONE_REGISTRY
|
| 16 |
+
from detectron2.modeling.backbone.backbone import Backbone
|
| 17 |
+
|
| 18 |
+
BN_MOMENTUM = 0.1
|
| 19 |
+
logger = logging.getLogger(__name__)
|
| 20 |
+
|
| 21 |
+
__all__ = ["build_pose_hrnet_backbone", "PoseHigherResolutionNet"]
|
| 22 |
+
|
| 23 |
+
|
| 24 |
+
def conv3x3(in_planes, out_planes, stride=1):
|
| 25 |
+
"""3x3 convolution with padding"""
|
| 26 |
+
return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, padding=1, bias=False)
|
| 27 |
+
|
| 28 |
+
|
| 29 |
+
class BasicBlock(nn.Module):
|
| 30 |
+
expansion = 1
|
| 31 |
+
|
| 32 |
+
def __init__(self, inplanes, planes, stride=1, downsample=None):
|
| 33 |
+
super(BasicBlock, self).__init__()
|
| 34 |
+
self.conv1 = conv3x3(inplanes, planes, stride)
|
| 35 |
+
self.bn1 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM)
|
| 36 |
+
self.relu = nn.ReLU(inplace=True)
|
| 37 |
+
self.conv2 = conv3x3(planes, planes)
|
| 38 |
+
self.bn2 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM)
|
| 39 |
+
self.downsample = downsample
|
| 40 |
+
self.stride = stride
|
| 41 |
+
|
| 42 |
+
def forward(self, x):
|
| 43 |
+
residual = x
|
| 44 |
+
|
| 45 |
+
out = self.conv1(x)
|
| 46 |
+
out = self.bn1(out)
|
| 47 |
+
out = self.relu(out)
|
| 48 |
+
|
| 49 |
+
out = self.conv2(out)
|
| 50 |
+
out = self.bn2(out)
|
| 51 |
+
|
| 52 |
+
if self.downsample is not None:
|
| 53 |
+
residual = self.downsample(x)
|
| 54 |
+
|
| 55 |
+
out += residual
|
| 56 |
+
out = self.relu(out)
|
| 57 |
+
|
| 58 |
+
return out
|
| 59 |
+
|
| 60 |
+
|
| 61 |
+
class Bottleneck(nn.Module):
|
| 62 |
+
expansion = 4
|
| 63 |
+
|
| 64 |
+
def __init__(self, inplanes, planes, stride=1, downsample=None):
|
| 65 |
+
super(Bottleneck, self).__init__()
|
| 66 |
+
self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False)
|
| 67 |
+
self.bn1 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM)
|
| 68 |
+
self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
|
| 69 |
+
self.bn2 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM)
|
| 70 |
+
self.conv3 = nn.Conv2d(planes, planes * self.expansion, kernel_size=1, bias=False)
|
| 71 |
+
self.bn3 = nn.BatchNorm2d(planes * self.expansion, momentum=BN_MOMENTUM)
|
| 72 |
+
self.relu = nn.ReLU(inplace=True)
|
| 73 |
+
self.downsample = downsample
|
| 74 |
+
self.stride = stride
|
| 75 |
+
|
| 76 |
+
def forward(self, x):
|
| 77 |
+
residual = x
|
| 78 |
+
|
| 79 |
+
out = self.conv1(x)
|
| 80 |
+
out = self.bn1(out)
|
| 81 |
+
out = self.relu(out)
|
| 82 |
+
|
| 83 |
+
out = self.conv2(out)
|
| 84 |
+
out = self.bn2(out)
|
| 85 |
+
out = self.relu(out)
|
| 86 |
+
|
| 87 |
+
out = self.conv3(out)
|
| 88 |
+
out = self.bn3(out)
|
| 89 |
+
|
| 90 |
+
if self.downsample is not None:
|
| 91 |
+
residual = self.downsample(x)
|
| 92 |
+
|
| 93 |
+
out += residual
|
| 94 |
+
out = self.relu(out)
|
| 95 |
+
|
| 96 |
+
return out
|
| 97 |
+
|
| 98 |
+
|
| 99 |
+
class HighResolutionModule(nn.Module):
|
| 100 |
+
"""HighResolutionModule
|
| 101 |
+
Building block of the PoseHigherResolutionNet (see lower)
|
| 102 |
+
arXiv: https://arxiv.org/abs/1908.10357
|
| 103 |
+
Args:
|
| 104 |
+
num_branches (int): number of branches of the modyle
|
| 105 |
+
blocks (str): type of block of the module
|
| 106 |
+
num_blocks (int): number of blocks of the module
|
| 107 |
+
num_inchannels (int): number of input channels of the module
|
| 108 |
+
num_channels (list): number of channels of each branch
|
| 109 |
+
multi_scale_output (bool): only used by the last module of PoseHigherResolutionNet
|
| 110 |
+
"""
|
| 111 |
+
|
| 112 |
+
def __init__(
|
| 113 |
+
self,
|
| 114 |
+
num_branches,
|
| 115 |
+
blocks,
|
| 116 |
+
num_blocks,
|
| 117 |
+
num_inchannels,
|
| 118 |
+
num_channels,
|
| 119 |
+
multi_scale_output=True,
|
| 120 |
+
):
|
| 121 |
+
super(HighResolutionModule, self).__init__()
|
| 122 |
+
self._check_branches(num_branches, blocks, num_blocks, num_inchannels, num_channels)
|
| 123 |
+
|
| 124 |
+
self.num_inchannels = num_inchannels
|
| 125 |
+
self.num_branches = num_branches
|
| 126 |
+
|
| 127 |
+
self.multi_scale_output = multi_scale_output
|
| 128 |
+
|
| 129 |
+
self.branches = self._make_branches(num_branches, blocks, num_blocks, num_channels)
|
| 130 |
+
self.fuse_layers = self._make_fuse_layers()
|
| 131 |
+
self.relu = nn.ReLU(True)
|
| 132 |
+
|
| 133 |
+
def _check_branches(self, num_branches, blocks, num_blocks, num_inchannels, num_channels):
|
| 134 |
+
if num_branches != len(num_blocks):
|
| 135 |
+
error_msg = "NUM_BRANCHES({}) <> NUM_BLOCKS({})".format(num_branches, len(num_blocks))
|
| 136 |
+
logger.error(error_msg)
|
| 137 |
+
raise ValueError(error_msg)
|
| 138 |
+
|
| 139 |
+
if num_branches != len(num_channels):
|
| 140 |
+
error_msg = "NUM_BRANCHES({}) <> NUM_CHANNELS({})".format(
|
| 141 |
+
num_branches, len(num_channels)
|
| 142 |
+
)
|
| 143 |
+
logger.error(error_msg)
|
| 144 |
+
raise ValueError(error_msg)
|
| 145 |
+
|
| 146 |
+
if num_branches != len(num_inchannels):
|
| 147 |
+
error_msg = "NUM_BRANCHES({}) <> NUM_INCHANNELS({})".format(
|
| 148 |
+
num_branches, len(num_inchannels)
|
| 149 |
+
)
|
| 150 |
+
logger.error(error_msg)
|
| 151 |
+
raise ValueError(error_msg)
|
| 152 |
+
|
| 153 |
+
def _make_one_branch(self, branch_index, block, num_blocks, num_channels, stride=1):
|
| 154 |
+
downsample = None
|
| 155 |
+
if (
|
| 156 |
+
stride != 1
|
| 157 |
+
or self.num_inchannels[branch_index] != num_channels[branch_index] * block.expansion
|
| 158 |
+
):
|
| 159 |
+
downsample = nn.Sequential(
|
| 160 |
+
nn.Conv2d(
|
| 161 |
+
self.num_inchannels[branch_index],
|
| 162 |
+
num_channels[branch_index] * block.expansion,
|
| 163 |
+
kernel_size=1,
|
| 164 |
+
stride=stride,
|
| 165 |
+
bias=False,
|
| 166 |
+
),
|
| 167 |
+
nn.BatchNorm2d(num_channels[branch_index] * block.expansion, momentum=BN_MOMENTUM),
|
| 168 |
+
)
|
| 169 |
+
|
| 170 |
+
layers = []
|
| 171 |
+
layers.append(
|
| 172 |
+
block(self.num_inchannels[branch_index], num_channels[branch_index], stride, downsample)
|
| 173 |
+
)
|
| 174 |
+
self.num_inchannels[branch_index] = num_channels[branch_index] * block.expansion
|
| 175 |
+
for _ in range(1, num_blocks[branch_index]):
|
| 176 |
+
layers.append(block(self.num_inchannels[branch_index], num_channels[branch_index]))
|
| 177 |
+
|
| 178 |
+
return nn.Sequential(*layers)
|
| 179 |
+
|
| 180 |
+
def _make_branches(self, num_branches, block, num_blocks, num_channels):
|
| 181 |
+
branches = []
|
| 182 |
+
|
| 183 |
+
for i in range(num_branches):
|
| 184 |
+
branches.append(self._make_one_branch(i, block, num_blocks, num_channels))
|
| 185 |
+
|
| 186 |
+
return nn.ModuleList(branches)
|
| 187 |
+
|
| 188 |
+
def _make_fuse_layers(self):
|
| 189 |
+
if self.num_branches == 1:
|
| 190 |
+
return None
|
| 191 |
+
|
| 192 |
+
num_branches = self.num_branches
|
| 193 |
+
num_inchannels = self.num_inchannels
|
| 194 |
+
fuse_layers = []
|
| 195 |
+
for i in range(num_branches if self.multi_scale_output else 1):
|
| 196 |
+
fuse_layer = []
|
| 197 |
+
for j in range(num_branches):
|
| 198 |
+
if j > i:
|
| 199 |
+
fuse_layer.append(
|
| 200 |
+
nn.Sequential(
|
| 201 |
+
nn.Conv2d(num_inchannels[j], num_inchannels[i], 1, 1, 0, bias=False),
|
| 202 |
+
nn.BatchNorm2d(num_inchannels[i]),
|
| 203 |
+
nn.Upsample(scale_factor=2 ** (j - i), mode="nearest"),
|
| 204 |
+
)
|
| 205 |
+
)
|
| 206 |
+
elif j == i:
|
| 207 |
+
fuse_layer.append(None)
|
| 208 |
+
else:
|
| 209 |
+
conv3x3s = []
|
| 210 |
+
for k in range(i - j):
|
| 211 |
+
if k == i - j - 1:
|
| 212 |
+
num_outchannels_conv3x3 = num_inchannels[i]
|
| 213 |
+
conv3x3s.append(
|
| 214 |
+
nn.Sequential(
|
| 215 |
+
nn.Conv2d(
|
| 216 |
+
num_inchannels[j],
|
| 217 |
+
num_outchannels_conv3x3,
|
| 218 |
+
3,
|
| 219 |
+
2,
|
| 220 |
+
1,
|
| 221 |
+
bias=False,
|
| 222 |
+
),
|
| 223 |
+
nn.BatchNorm2d(num_outchannels_conv3x3),
|
| 224 |
+
)
|
| 225 |
+
)
|
| 226 |
+
else:
|
| 227 |
+
num_outchannels_conv3x3 = num_inchannels[j]
|
| 228 |
+
conv3x3s.append(
|
| 229 |
+
nn.Sequential(
|
| 230 |
+
nn.Conv2d(
|
| 231 |
+
num_inchannels[j],
|
| 232 |
+
num_outchannels_conv3x3,
|
| 233 |
+
3,
|
| 234 |
+
2,
|
| 235 |
+
1,
|
| 236 |
+
bias=False,
|
| 237 |
+
),
|
| 238 |
+
nn.BatchNorm2d(num_outchannels_conv3x3),
|
| 239 |
+
nn.ReLU(True),
|
| 240 |
+
)
|
| 241 |
+
)
|
| 242 |
+
fuse_layer.append(nn.Sequential(*conv3x3s))
|
| 243 |
+
fuse_layers.append(nn.ModuleList(fuse_layer))
|
| 244 |
+
|
| 245 |
+
return nn.ModuleList(fuse_layers)
|
| 246 |
+
|
| 247 |
+
def get_num_inchannels(self):
|
| 248 |
+
return self.num_inchannels
|
| 249 |
+
|
| 250 |
+
def forward(self, x):
|
| 251 |
+
if self.num_branches == 1:
|
| 252 |
+
return [self.branches[0](x[0])]
|
| 253 |
+
|
| 254 |
+
for i in range(self.num_branches):
|
| 255 |
+
x[i] = self.branches[i](x[i])
|
| 256 |
+
|
| 257 |
+
x_fuse = []
|
| 258 |
+
|
| 259 |
+
for i in range(len(self.fuse_layers)):
|
| 260 |
+
y = x[0] if i == 0 else self.fuse_layers[i][0](x[0])
|
| 261 |
+
for j in range(1, self.num_branches):
|
| 262 |
+
if i == j:
|
| 263 |
+
y = y + x[j]
|
| 264 |
+
else:
|
| 265 |
+
z = self.fuse_layers[i][j](x[j])[:, :, : y.shape[2], : y.shape[3]]
|
| 266 |
+
y = y + z
|
| 267 |
+
x_fuse.append(self.relu(y))
|
| 268 |
+
|
| 269 |
+
return x_fuse
|
| 270 |
+
|
| 271 |
+
|
| 272 |
+
blocks_dict = {"BASIC": BasicBlock, "BOTTLENECK": Bottleneck}
|
| 273 |
+
|
| 274 |
+
|
| 275 |
+
class PoseHigherResolutionNet(Backbone):
|
| 276 |
+
"""PoseHigherResolutionNet
|
| 277 |
+
Composed of several HighResolutionModule tied together with ConvNets
|
| 278 |
+
Adapted from the GitHub version to fit with HRFPN and the Detectron2 infrastructure
|
| 279 |
+
arXiv: https://arxiv.org/abs/1908.10357
|
| 280 |
+
"""
|
| 281 |
+
|
| 282 |
+
def __init__(self, cfg, **kwargs):
|
| 283 |
+
self.inplanes = cfg.MODEL.HRNET.STEM_INPLANES
|
| 284 |
+
super(PoseHigherResolutionNet, self).__init__()
|
| 285 |
+
|
| 286 |
+
# stem net
|
| 287 |
+
self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=2, padding=1, bias=False)
|
| 288 |
+
self.bn1 = nn.BatchNorm2d(64, momentum=BN_MOMENTUM)
|
| 289 |
+
self.conv2 = nn.Conv2d(64, 64, kernel_size=3, stride=2, padding=1, bias=False)
|
| 290 |
+
self.bn2 = nn.BatchNorm2d(64, momentum=BN_MOMENTUM)
|
| 291 |
+
self.relu = nn.ReLU(inplace=True)
|
| 292 |
+
self.layer1 = self._make_layer(Bottleneck, 64, 4)
|
| 293 |
+
|
| 294 |
+
self.stage2_cfg = cfg.MODEL.HRNET.STAGE2
|
| 295 |
+
num_channels = self.stage2_cfg.NUM_CHANNELS
|
| 296 |
+
block = blocks_dict[self.stage2_cfg.BLOCK]
|
| 297 |
+
num_channels = [num_channels[i] * block.expansion for i in range(len(num_channels))]
|
| 298 |
+
self.transition1 = self._make_transition_layer([256], num_channels)
|
| 299 |
+
self.stage2, pre_stage_channels = self._make_stage(self.stage2_cfg, num_channels)
|
| 300 |
+
|
| 301 |
+
self.stage3_cfg = cfg.MODEL.HRNET.STAGE3
|
| 302 |
+
num_channels = self.stage3_cfg.NUM_CHANNELS
|
| 303 |
+
block = blocks_dict[self.stage3_cfg.BLOCK]
|
| 304 |
+
num_channels = [num_channels[i] * block.expansion for i in range(len(num_channels))]
|
| 305 |
+
self.transition2 = self._make_transition_layer(pre_stage_channels, num_channels)
|
| 306 |
+
self.stage3, pre_stage_channels = self._make_stage(self.stage3_cfg, num_channels)
|
| 307 |
+
|
| 308 |
+
self.stage4_cfg = cfg.MODEL.HRNET.STAGE4
|
| 309 |
+
num_channels = self.stage4_cfg.NUM_CHANNELS
|
| 310 |
+
block = blocks_dict[self.stage4_cfg.BLOCK]
|
| 311 |
+
num_channels = [num_channels[i] * block.expansion for i in range(len(num_channels))]
|
| 312 |
+
self.transition3 = self._make_transition_layer(pre_stage_channels, num_channels)
|
| 313 |
+
self.stage4, pre_stage_channels = self._make_stage(
|
| 314 |
+
self.stage4_cfg, num_channels, multi_scale_output=True
|
| 315 |
+
)
|
| 316 |
+
|
| 317 |
+
self._out_features = []
|
| 318 |
+
self._out_feature_channels = {}
|
| 319 |
+
self._out_feature_strides = {}
|
| 320 |
+
|
| 321 |
+
for i in range(cfg.MODEL.HRNET.STAGE4.NUM_BRANCHES):
|
| 322 |
+
self._out_features.append("p%d" % (i + 1))
|
| 323 |
+
self._out_feature_channels.update(
|
| 324 |
+
{self._out_features[-1]: cfg.MODEL.HRNET.STAGE4.NUM_CHANNELS[i]}
|
| 325 |
+
)
|
| 326 |
+
self._out_feature_strides.update({self._out_features[-1]: 1})
|
| 327 |
+
|
| 328 |
+
def _get_deconv_cfg(self, deconv_kernel):
|
| 329 |
+
if deconv_kernel == 4:
|
| 330 |
+
padding = 1
|
| 331 |
+
output_padding = 0
|
| 332 |
+
elif deconv_kernel == 3:
|
| 333 |
+
padding = 1
|
| 334 |
+
output_padding = 1
|
| 335 |
+
elif deconv_kernel == 2:
|
| 336 |
+
padding = 0
|
| 337 |
+
output_padding = 0
|
| 338 |
+
|
| 339 |
+
return deconv_kernel, padding, output_padding
|
| 340 |
+
|
| 341 |
+
def _make_transition_layer(self, num_channels_pre_layer, num_channels_cur_layer):
|
| 342 |
+
num_branches_cur = len(num_channels_cur_layer)
|
| 343 |
+
num_branches_pre = len(num_channels_pre_layer)
|
| 344 |
+
|
| 345 |
+
transition_layers = []
|
| 346 |
+
for i in range(num_branches_cur):
|
| 347 |
+
if i < num_branches_pre:
|
| 348 |
+
if num_channels_cur_layer[i] != num_channels_pre_layer[i]:
|
| 349 |
+
transition_layers.append(
|
| 350 |
+
nn.Sequential(
|
| 351 |
+
nn.Conv2d(
|
| 352 |
+
num_channels_pre_layer[i],
|
| 353 |
+
num_channels_cur_layer[i],
|
| 354 |
+
3,
|
| 355 |
+
1,
|
| 356 |
+
1,
|
| 357 |
+
bias=False,
|
| 358 |
+
),
|
| 359 |
+
nn.BatchNorm2d(num_channels_cur_layer[i]),
|
| 360 |
+
nn.ReLU(inplace=True),
|
| 361 |
+
)
|
| 362 |
+
)
|
| 363 |
+
else:
|
| 364 |
+
transition_layers.append(None)
|
| 365 |
+
else:
|
| 366 |
+
conv3x3s = []
|
| 367 |
+
for j in range(i + 1 - num_branches_pre):
|
| 368 |
+
inchannels = num_channels_pre_layer[-1]
|
| 369 |
+
outchannels = (
|
| 370 |
+
num_channels_cur_layer[i] if j == i - num_branches_pre else inchannels
|
| 371 |
+
)
|
| 372 |
+
conv3x3s.append(
|
| 373 |
+
nn.Sequential(
|
| 374 |
+
nn.Conv2d(inchannels, outchannels, 3, 2, 1, bias=False),
|
| 375 |
+
nn.BatchNorm2d(outchannels),
|
| 376 |
+
nn.ReLU(inplace=True),
|
| 377 |
+
)
|
| 378 |
+
)
|
| 379 |
+
transition_layers.append(nn.Sequential(*conv3x3s))
|
| 380 |
+
|
| 381 |
+
return nn.ModuleList(transition_layers)
|
| 382 |
+
|
| 383 |
+
def _make_layer(self, block, planes, blocks, stride=1):
|
| 384 |
+
downsample = None
|
| 385 |
+
if stride != 1 or self.inplanes != planes * block.expansion:
|
| 386 |
+
downsample = nn.Sequential(
|
| 387 |
+
nn.Conv2d(
|
| 388 |
+
self.inplanes,
|
| 389 |
+
planes * block.expansion,
|
| 390 |
+
kernel_size=1,
|
| 391 |
+
stride=stride,
|
| 392 |
+
bias=False,
|
| 393 |
+
),
|
| 394 |
+
nn.BatchNorm2d(planes * block.expansion, momentum=BN_MOMENTUM),
|
| 395 |
+
)
|
| 396 |
+
|
| 397 |
+
layers = []
|
| 398 |
+
layers.append(block(self.inplanes, planes, stride, downsample))
|
| 399 |
+
self.inplanes = planes * block.expansion
|
| 400 |
+
for _ in range(1, blocks):
|
| 401 |
+
layers.append(block(self.inplanes, planes))
|
| 402 |
+
|
| 403 |
+
return nn.Sequential(*layers)
|
| 404 |
+
|
| 405 |
+
def _make_stage(self, layer_config, num_inchannels, multi_scale_output=True):
|
| 406 |
+
num_modules = layer_config["NUM_MODULES"]
|
| 407 |
+
num_branches = layer_config["NUM_BRANCHES"]
|
| 408 |
+
num_blocks = layer_config["NUM_BLOCKS"]
|
| 409 |
+
num_channels = layer_config["NUM_CHANNELS"]
|
| 410 |
+
block = blocks_dict[layer_config["BLOCK"]]
|
| 411 |
+
|
| 412 |
+
modules = []
|
| 413 |
+
for i in range(num_modules):
|
| 414 |
+
# multi_scale_output is only used last module
|
| 415 |
+
if not multi_scale_output and i == num_modules - 1:
|
| 416 |
+
reset_multi_scale_output = False
|
| 417 |
+
else:
|
| 418 |
+
reset_multi_scale_output = True
|
| 419 |
+
|
| 420 |
+
modules.append(
|
| 421 |
+
HighResolutionModule(
|
| 422 |
+
num_branches,
|
| 423 |
+
block,
|
| 424 |
+
num_blocks,
|
| 425 |
+
num_inchannels,
|
| 426 |
+
num_channels,
|
| 427 |
+
reset_multi_scale_output,
|
| 428 |
+
)
|
| 429 |
+
)
|
| 430 |
+
num_inchannels = modules[-1].get_num_inchannels()
|
| 431 |
+
|
| 432 |
+
return nn.Sequential(*modules), num_inchannels
|
| 433 |
+
|
| 434 |
+
def forward(self, x):
|
| 435 |
+
x = self.conv1(x)
|
| 436 |
+
x = self.bn1(x)
|
| 437 |
+
x = self.relu(x)
|
| 438 |
+
x = self.conv2(x)
|
| 439 |
+
x = self.bn2(x)
|
| 440 |
+
x = self.relu(x)
|
| 441 |
+
x = self.layer1(x)
|
| 442 |
+
|
| 443 |
+
x_list = []
|
| 444 |
+
for i in range(self.stage2_cfg.NUM_BRANCHES):
|
| 445 |
+
if self.transition1[i] is not None:
|
| 446 |
+
x_list.append(self.transition1[i](x))
|
| 447 |
+
else:
|
| 448 |
+
x_list.append(x)
|
| 449 |
+
y_list = self.stage2(x_list)
|
| 450 |
+
|
| 451 |
+
x_list = []
|
| 452 |
+
for i in range(self.stage3_cfg.NUM_BRANCHES):
|
| 453 |
+
if self.transition2[i] is not None:
|
| 454 |
+
x_list.append(self.transition2[i](y_list[-1]))
|
| 455 |
+
else:
|
| 456 |
+
x_list.append(y_list[i])
|
| 457 |
+
y_list = self.stage3(x_list)
|
| 458 |
+
|
| 459 |
+
x_list = []
|
| 460 |
+
for i in range(self.stage4_cfg.NUM_BRANCHES):
|
| 461 |
+
if self.transition3[i] is not None:
|
| 462 |
+
x_list.append(self.transition3[i](y_list[-1]))
|
| 463 |
+
else:
|
| 464 |
+
x_list.append(y_list[i])
|
| 465 |
+
y_list = self.stage4(x_list)
|
| 466 |
+
|
| 467 |
+
assert len(self._out_features) == len(y_list)
|
| 468 |
+
return dict(zip(self._out_features, y_list)) # final_outputs
|
| 469 |
+
|
| 470 |
+
|
| 471 |
+
@BACKBONE_REGISTRY.register()
|
| 472 |
+
def build_pose_hrnet_backbone(cfg, input_shape: ShapeSpec):
|
| 473 |
+
model = PoseHigherResolutionNet(cfg)
|
| 474 |
+
return model
|
Leffa/densepose/modeling/inference.py
ADDED
|
@@ -0,0 +1,44 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) Facebook, Inc. and its affiliates.
|
| 2 |
+
from dataclasses import fields
|
| 3 |
+
from typing import Any, List
|
| 4 |
+
import torch
|
| 5 |
+
|
| 6 |
+
from detectron2.structures import Instances
|
| 7 |
+
|
| 8 |
+
|
| 9 |
+
def densepose_inference(densepose_predictor_output: Any, detections: List[Instances]) -> None:
|
| 10 |
+
"""
|
| 11 |
+
Splits DensePose predictor outputs into chunks, each chunk corresponds to
|
| 12 |
+
detections on one image. Predictor output chunks are stored in `pred_densepose`
|
| 13 |
+
attribute of the corresponding `Instances` object.
|
| 14 |
+
|
| 15 |
+
Args:
|
| 16 |
+
densepose_predictor_output: a dataclass instance (can be of different types,
|
| 17 |
+
depending on predictor used for inference). Each field can be `None`
|
| 18 |
+
(if the corresponding output was not inferred) or a tensor of size
|
| 19 |
+
[N, ...], where N = N_1 + N_2 + .. + N_k is a total number of
|
| 20 |
+
detections on all images, N_1 is the number of detections on image 1,
|
| 21 |
+
N_2 is the number of detections on image 2, etc.
|
| 22 |
+
detections: a list of objects of type `Instance`, k-th object corresponds
|
| 23 |
+
to detections on k-th image.
|
| 24 |
+
"""
|
| 25 |
+
k = 0
|
| 26 |
+
for detection_i in detections:
|
| 27 |
+
if densepose_predictor_output is None:
|
| 28 |
+
# don't add `pred_densepose` attribute
|
| 29 |
+
continue
|
| 30 |
+
n_i = detection_i.__len__()
|
| 31 |
+
|
| 32 |
+
PredictorOutput = type(densepose_predictor_output)
|
| 33 |
+
output_i_dict = {}
|
| 34 |
+
# we assume here that `densepose_predictor_output` is a dataclass object
|
| 35 |
+
for field in fields(densepose_predictor_output):
|
| 36 |
+
field_value = getattr(densepose_predictor_output, field.name)
|
| 37 |
+
# slice tensors
|
| 38 |
+
if isinstance(field_value, torch.Tensor):
|
| 39 |
+
output_i_dict[field.name] = field_value[k : k + n_i]
|
| 40 |
+
# leave others as is
|
| 41 |
+
else:
|
| 42 |
+
output_i_dict[field.name] = field_value
|
| 43 |
+
detection_i.pred_densepose = PredictorOutput(**output_i_dict)
|
| 44 |
+
k += n_i
|
Leffa/densepose/modeling/losses/__init__.py
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) Facebook, Inc. and its affiliates.
|
| 2 |
+
|
| 3 |
+
from .chart import DensePoseChartLoss
|
| 4 |
+
from .chart_with_confidences import DensePoseChartWithConfidenceLoss
|
| 5 |
+
from .cse import DensePoseCseLoss
|
| 6 |
+
from .registry import DENSEPOSE_LOSS_REGISTRY
|
| 7 |
+
|
| 8 |
+
|
| 9 |
+
__all__ = [
|
| 10 |
+
"DensePoseChartLoss",
|
| 11 |
+
"DensePoseChartWithConfidenceLoss",
|
| 12 |
+
"DensePoseCseLoss",
|
| 13 |
+
"DENSEPOSE_LOSS_REGISTRY",
|
| 14 |
+
]
|
Leffa/densepose/modeling/losses/chart.py
ADDED
|
@@ -0,0 +1,291 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) Facebook, Inc. and its affiliates.
|
| 2 |
+
|
| 3 |
+
from typing import Any, List
|
| 4 |
+
import torch
|
| 5 |
+
from torch.nn import functional as F
|
| 6 |
+
|
| 7 |
+
from detectron2.config import CfgNode
|
| 8 |
+
from detectron2.structures import Instances
|
| 9 |
+
|
| 10 |
+
from .mask_or_segm import MaskOrSegmentationLoss
|
| 11 |
+
from .registry import DENSEPOSE_LOSS_REGISTRY
|
| 12 |
+
from .utils import (
|
| 13 |
+
BilinearInterpolationHelper,
|
| 14 |
+
ChartBasedAnnotationsAccumulator,
|
| 15 |
+
LossDict,
|
| 16 |
+
extract_packed_annotations_from_matches,
|
| 17 |
+
)
|
| 18 |
+
|
| 19 |
+
|
| 20 |
+
@DENSEPOSE_LOSS_REGISTRY.register()
|
| 21 |
+
class DensePoseChartLoss:
|
| 22 |
+
"""
|
| 23 |
+
DensePose loss for chart-based training. A mesh is split into charts,
|
| 24 |
+
each chart is given a label (I) and parametrized by 2 coordinates referred to
|
| 25 |
+
as U and V. Ground truth consists of a number of points annotated with
|
| 26 |
+
I, U and V values and coarse segmentation S defined for all pixels of the
|
| 27 |
+
object bounding box. In some cases (see `COARSE_SEGM_TRAINED_BY_MASKS`),
|
| 28 |
+
semantic segmentation annotations can be used as ground truth inputs as well.
|
| 29 |
+
|
| 30 |
+
Estimated values are tensors:
|
| 31 |
+
* U coordinates, tensor of shape [N, C, S, S]
|
| 32 |
+
* V coordinates, tensor of shape [N, C, S, S]
|
| 33 |
+
* fine segmentation estimates, tensor of shape [N, C, S, S] with raw unnormalized
|
| 34 |
+
scores for each fine segmentation label at each location
|
| 35 |
+
* coarse segmentation estimates, tensor of shape [N, D, S, S] with raw unnormalized
|
| 36 |
+
scores for each coarse segmentation label at each location
|
| 37 |
+
where N is the number of detections, C is the number of fine segmentation
|
| 38 |
+
labels, S is the estimate size ( = width = height) and D is the number of
|
| 39 |
+
coarse segmentation channels.
|
| 40 |
+
|
| 41 |
+
The losses are:
|
| 42 |
+
* regression (smooth L1) loss for U and V coordinates
|
| 43 |
+
* cross entropy loss for fine (I) and coarse (S) segmentations
|
| 44 |
+
Each loss has an associated weight
|
| 45 |
+
"""
|
| 46 |
+
|
| 47 |
+
def __init__(self, cfg: CfgNode):
|
| 48 |
+
"""
|
| 49 |
+
Initialize chart-based loss from configuration options
|
| 50 |
+
|
| 51 |
+
Args:
|
| 52 |
+
cfg (CfgNode): configuration options
|
| 53 |
+
"""
|
| 54 |
+
# fmt: off
|
| 55 |
+
self.heatmap_size = cfg.MODEL.ROI_DENSEPOSE_HEAD.HEATMAP_SIZE
|
| 56 |
+
self.w_points = cfg.MODEL.ROI_DENSEPOSE_HEAD.POINT_REGRESSION_WEIGHTS
|
| 57 |
+
self.w_part = cfg.MODEL.ROI_DENSEPOSE_HEAD.PART_WEIGHTS
|
| 58 |
+
self.w_segm = cfg.MODEL.ROI_DENSEPOSE_HEAD.INDEX_WEIGHTS
|
| 59 |
+
self.n_segm_chan = cfg.MODEL.ROI_DENSEPOSE_HEAD.NUM_COARSE_SEGM_CHANNELS
|
| 60 |
+
# fmt: on
|
| 61 |
+
self.segm_trained_by_masks = cfg.MODEL.ROI_DENSEPOSE_HEAD.COARSE_SEGM_TRAINED_BY_MASKS
|
| 62 |
+
self.segm_loss = MaskOrSegmentationLoss(cfg)
|
| 63 |
+
|
| 64 |
+
def __call__(
|
| 65 |
+
self, proposals_with_gt: List[Instances], densepose_predictor_outputs: Any, **kwargs
|
| 66 |
+
) -> LossDict:
|
| 67 |
+
"""
|
| 68 |
+
Produce chart-based DensePose losses
|
| 69 |
+
|
| 70 |
+
Args:
|
| 71 |
+
proposals_with_gt (list of Instances): detections with associated ground truth data
|
| 72 |
+
densepose_predictor_outputs: an object of a dataclass that contains predictor outputs
|
| 73 |
+
with estimated values; assumed to have the following attributes:
|
| 74 |
+
* coarse_segm - coarse segmentation estimates, tensor of shape [N, D, S, S]
|
| 75 |
+
* fine_segm - fine segmentation estimates, tensor of shape [N, C, S, S]
|
| 76 |
+
* u - U coordinate estimates per fine labels, tensor of shape [N, C, S, S]
|
| 77 |
+
* v - V coordinate estimates per fine labels, tensor of shape [N, C, S, S]
|
| 78 |
+
where N is the number of detections, C is the number of fine segmentation
|
| 79 |
+
labels, S is the estimate size ( = width = height) and D is the number of
|
| 80 |
+
coarse segmentation channels.
|
| 81 |
+
|
| 82 |
+
Return:
|
| 83 |
+
dict: str -> tensor: dict of losses with the following entries:
|
| 84 |
+
* `loss_densepose_U`: smooth L1 loss for U coordinate estimates
|
| 85 |
+
* `loss_densepose_V`: smooth L1 loss for V coordinate estimates
|
| 86 |
+
* `loss_densepose_I`: cross entropy for raw unnormalized scores for fine
|
| 87 |
+
segmentation estimates given ground truth labels;
|
| 88 |
+
* `loss_densepose_S`: cross entropy for raw unnormalized scores for coarse
|
| 89 |
+
segmentation estimates given ground truth labels;
|
| 90 |
+
"""
|
| 91 |
+
# densepose outputs are computed for all images and all bounding boxes;
|
| 92 |
+
# i.e. if a batch has 4 images with (3, 1, 2, 1) proposals respectively,
|
| 93 |
+
# the outputs will have size(0) == 3+1+2+1 == 7
|
| 94 |
+
|
| 95 |
+
if not len(proposals_with_gt):
|
| 96 |
+
return self.produce_fake_densepose_losses(densepose_predictor_outputs)
|
| 97 |
+
|
| 98 |
+
accumulator = ChartBasedAnnotationsAccumulator()
|
| 99 |
+
packed_annotations = extract_packed_annotations_from_matches(proposals_with_gt, accumulator)
|
| 100 |
+
|
| 101 |
+
# NOTE: we need to keep the same computation graph on all the GPUs to
|
| 102 |
+
# perform reduction properly. Hence even if we have no data on one
|
| 103 |
+
# of the GPUs, we still need to generate the computation graph.
|
| 104 |
+
# Add fake (zero) loss in the form Tensor.sum() * 0
|
| 105 |
+
if packed_annotations is None:
|
| 106 |
+
return self.produce_fake_densepose_losses(densepose_predictor_outputs)
|
| 107 |
+
|
| 108 |
+
h, w = densepose_predictor_outputs.u.shape[2:]
|
| 109 |
+
interpolator = BilinearInterpolationHelper.from_matches(
|
| 110 |
+
packed_annotations,
|
| 111 |
+
(h, w),
|
| 112 |
+
)
|
| 113 |
+
|
| 114 |
+
j_valid_fg = interpolator.j_valid * ( # pyre-ignore[16]
|
| 115 |
+
packed_annotations.fine_segm_labels_gt > 0
|
| 116 |
+
)
|
| 117 |
+
# pyre-fixme[6]: For 1st param expected `Tensor` but got `int`.
|
| 118 |
+
if not torch.any(j_valid_fg):
|
| 119 |
+
return self.produce_fake_densepose_losses(densepose_predictor_outputs)
|
| 120 |
+
|
| 121 |
+
losses_uv = self.produce_densepose_losses_uv(
|
| 122 |
+
proposals_with_gt,
|
| 123 |
+
densepose_predictor_outputs,
|
| 124 |
+
packed_annotations,
|
| 125 |
+
interpolator,
|
| 126 |
+
j_valid_fg, # pyre-ignore[6]
|
| 127 |
+
)
|
| 128 |
+
|
| 129 |
+
losses_segm = self.produce_densepose_losses_segm(
|
| 130 |
+
proposals_with_gt,
|
| 131 |
+
densepose_predictor_outputs,
|
| 132 |
+
packed_annotations,
|
| 133 |
+
interpolator,
|
| 134 |
+
j_valid_fg, # pyre-ignore[6]
|
| 135 |
+
)
|
| 136 |
+
|
| 137 |
+
return {**losses_uv, **losses_segm}
|
| 138 |
+
|
| 139 |
+
def produce_fake_densepose_losses(self, densepose_predictor_outputs: Any) -> LossDict:
|
| 140 |
+
"""
|
| 141 |
+
Fake losses for fine segmentation and U/V coordinates. These are used when
|
| 142 |
+
no suitable ground truth data was found in a batch. The loss has a value 0
|
| 143 |
+
and is primarily used to construct the computation graph, so that
|
| 144 |
+
`DistributedDataParallel` has similar graphs on all GPUs and can perform
|
| 145 |
+
reduction properly.
|
| 146 |
+
|
| 147 |
+
Args:
|
| 148 |
+
densepose_predictor_outputs: DensePose predictor outputs, an object
|
| 149 |
+
of a dataclass that is assumed to have the following attributes:
|
| 150 |
+
* fine_segm - fine segmentation estimates, tensor of shape [N, C, S, S]
|
| 151 |
+
* u - U coordinate estimates per fine labels, tensor of shape [N, C, S, S]
|
| 152 |
+
* v - V coordinate estimates per fine labels, tensor of shape [N, C, S, S]
|
| 153 |
+
Return:
|
| 154 |
+
dict: str -> tensor: dict of losses with the following entries:
|
| 155 |
+
* `loss_densepose_U`: has value 0
|
| 156 |
+
* `loss_densepose_V`: has value 0
|
| 157 |
+
* `loss_densepose_I`: has value 0
|
| 158 |
+
* `loss_densepose_S`: has value 0
|
| 159 |
+
"""
|
| 160 |
+
losses_uv = self.produce_fake_densepose_losses_uv(densepose_predictor_outputs)
|
| 161 |
+
losses_segm = self.produce_fake_densepose_losses_segm(densepose_predictor_outputs)
|
| 162 |
+
return {**losses_uv, **losses_segm}
|
| 163 |
+
|
| 164 |
+
def produce_fake_densepose_losses_uv(self, densepose_predictor_outputs: Any) -> LossDict:
|
| 165 |
+
"""
|
| 166 |
+
Fake losses for U/V coordinates. These are used when no suitable ground
|
| 167 |
+
truth data was found in a batch. The loss has a value 0
|
| 168 |
+
and is primarily used to construct the computation graph, so that
|
| 169 |
+
`DistributedDataParallel` has similar graphs on all GPUs and can perform
|
| 170 |
+
reduction properly.
|
| 171 |
+
|
| 172 |
+
Args:
|
| 173 |
+
densepose_predictor_outputs: DensePose predictor outputs, an object
|
| 174 |
+
of a dataclass that is assumed to have the following attributes:
|
| 175 |
+
* u - U coordinate estimates per fine labels, tensor of shape [N, C, S, S]
|
| 176 |
+
* v - V coordinate estimates per fine labels, tensor of shape [N, C, S, S]
|
| 177 |
+
Return:
|
| 178 |
+
dict: str -> tensor: dict of losses with the following entries:
|
| 179 |
+
* `loss_densepose_U`: has value 0
|
| 180 |
+
* `loss_densepose_V`: has value 0
|
| 181 |
+
"""
|
| 182 |
+
return {
|
| 183 |
+
"loss_densepose_U": densepose_predictor_outputs.u.sum() * 0,
|
| 184 |
+
"loss_densepose_V": densepose_predictor_outputs.v.sum() * 0,
|
| 185 |
+
}
|
| 186 |
+
|
| 187 |
+
def produce_fake_densepose_losses_segm(self, densepose_predictor_outputs: Any) -> LossDict:
|
| 188 |
+
"""
|
| 189 |
+
Fake losses for fine / coarse segmentation. These are used when
|
| 190 |
+
no suitable ground truth data was found in a batch. The loss has a value 0
|
| 191 |
+
and is primarily used to construct the computation graph, so that
|
| 192 |
+
`DistributedDataParallel` has similar graphs on all GPUs and can perform
|
| 193 |
+
reduction properly.
|
| 194 |
+
|
| 195 |
+
Args:
|
| 196 |
+
densepose_predictor_outputs: DensePose predictor outputs, an object
|
| 197 |
+
of a dataclass that is assumed to have the following attributes:
|
| 198 |
+
* fine_segm - fine segmentation estimates, tensor of shape [N, C, S, S]
|
| 199 |
+
* coarse_segm - coarse segmentation estimates, tensor of shape [N, D, S, S]
|
| 200 |
+
Return:
|
| 201 |
+
dict: str -> tensor: dict of losses with the following entries:
|
| 202 |
+
* `loss_densepose_I`: has value 0
|
| 203 |
+
* `loss_densepose_S`: has value 0, added only if `segm_trained_by_masks` is False
|
| 204 |
+
"""
|
| 205 |
+
losses = {
|
| 206 |
+
"loss_densepose_I": densepose_predictor_outputs.fine_segm.sum() * 0,
|
| 207 |
+
"loss_densepose_S": self.segm_loss.fake_value(densepose_predictor_outputs),
|
| 208 |
+
}
|
| 209 |
+
return losses
|
| 210 |
+
|
| 211 |
+
def produce_densepose_losses_uv(
|
| 212 |
+
self,
|
| 213 |
+
proposals_with_gt: List[Instances],
|
| 214 |
+
densepose_predictor_outputs: Any,
|
| 215 |
+
packed_annotations: Any,
|
| 216 |
+
interpolator: BilinearInterpolationHelper,
|
| 217 |
+
j_valid_fg: torch.Tensor,
|
| 218 |
+
) -> LossDict:
|
| 219 |
+
"""
|
| 220 |
+
Compute losses for U/V coordinates: smooth L1 loss between
|
| 221 |
+
estimated coordinates and the ground truth.
|
| 222 |
+
|
| 223 |
+
Args:
|
| 224 |
+
proposals_with_gt (list of Instances): detections with associated ground truth data
|
| 225 |
+
densepose_predictor_outputs: DensePose predictor outputs, an object
|
| 226 |
+
of a dataclass that is assumed to have the following attributes:
|
| 227 |
+
* u - U coordinate estimates per fine labels, tensor of shape [N, C, S, S]
|
| 228 |
+
* v - V coordinate estimates per fine labels, tensor of shape [N, C, S, S]
|
| 229 |
+
Return:
|
| 230 |
+
dict: str -> tensor: dict of losses with the following entries:
|
| 231 |
+
* `loss_densepose_U`: smooth L1 loss for U coordinate estimates
|
| 232 |
+
* `loss_densepose_V`: smooth L1 loss for V coordinate estimates
|
| 233 |
+
"""
|
| 234 |
+
u_gt = packed_annotations.u_gt[j_valid_fg]
|
| 235 |
+
u_est = interpolator.extract_at_points(densepose_predictor_outputs.u)[j_valid_fg]
|
| 236 |
+
v_gt = packed_annotations.v_gt[j_valid_fg]
|
| 237 |
+
v_est = interpolator.extract_at_points(densepose_predictor_outputs.v)[j_valid_fg]
|
| 238 |
+
return {
|
| 239 |
+
"loss_densepose_U": F.smooth_l1_loss(u_est, u_gt, reduction="sum") * self.w_points,
|
| 240 |
+
"loss_densepose_V": F.smooth_l1_loss(v_est, v_gt, reduction="sum") * self.w_points,
|
| 241 |
+
}
|
| 242 |
+
|
| 243 |
+
def produce_densepose_losses_segm(
|
| 244 |
+
self,
|
| 245 |
+
proposals_with_gt: List[Instances],
|
| 246 |
+
densepose_predictor_outputs: Any,
|
| 247 |
+
packed_annotations: Any,
|
| 248 |
+
interpolator: BilinearInterpolationHelper,
|
| 249 |
+
j_valid_fg: torch.Tensor,
|
| 250 |
+
) -> LossDict:
|
| 251 |
+
"""
|
| 252 |
+
Losses for fine / coarse segmentation: cross-entropy
|
| 253 |
+
for segmentation unnormalized scores given ground truth labels at
|
| 254 |
+
annotated points for fine segmentation and dense mask annotations
|
| 255 |
+
for coarse segmentation.
|
| 256 |
+
|
| 257 |
+
Args:
|
| 258 |
+
proposals_with_gt (list of Instances): detections with associated ground truth data
|
| 259 |
+
densepose_predictor_outputs: DensePose predictor outputs, an object
|
| 260 |
+
of a dataclass that is assumed to have the following attributes:
|
| 261 |
+
* fine_segm - fine segmentation estimates, tensor of shape [N, C, S, S]
|
| 262 |
+
* coarse_segm - coarse segmentation estimates, tensor of shape [N, D, S, S]
|
| 263 |
+
Return:
|
| 264 |
+
dict: str -> tensor: dict of losses with the following entries:
|
| 265 |
+
* `loss_densepose_I`: cross entropy for raw unnormalized scores for fine
|
| 266 |
+
segmentation estimates given ground truth labels
|
| 267 |
+
* `loss_densepose_S`: cross entropy for raw unnormalized scores for coarse
|
| 268 |
+
segmentation estimates given ground truth labels;
|
| 269 |
+
may be included if coarse segmentation is only trained
|
| 270 |
+
using DensePose ground truth; if additional supervision through
|
| 271 |
+
instance segmentation data is performed (`segm_trained_by_masks` is True),
|
| 272 |
+
this loss is handled by `produce_mask_losses` instead
|
| 273 |
+
"""
|
| 274 |
+
fine_segm_gt = packed_annotations.fine_segm_labels_gt[
|
| 275 |
+
interpolator.j_valid # pyre-ignore[16]
|
| 276 |
+
]
|
| 277 |
+
fine_segm_est = interpolator.extract_at_points(
|
| 278 |
+
densepose_predictor_outputs.fine_segm,
|
| 279 |
+
slice_fine_segm=slice(None),
|
| 280 |
+
w_ylo_xlo=interpolator.w_ylo_xlo[:, None], # pyre-ignore[16]
|
| 281 |
+
w_ylo_xhi=interpolator.w_ylo_xhi[:, None], # pyre-ignore[16]
|
| 282 |
+
w_yhi_xlo=interpolator.w_yhi_xlo[:, None], # pyre-ignore[16]
|
| 283 |
+
w_yhi_xhi=interpolator.w_yhi_xhi[:, None], # pyre-ignore[16]
|
| 284 |
+
)[interpolator.j_valid, :]
|
| 285 |
+
return {
|
| 286 |
+
"loss_densepose_I": F.cross_entropy(fine_segm_est, fine_segm_gt.long()) * self.w_part,
|
| 287 |
+
"loss_densepose_S": self.segm_loss(
|
| 288 |
+
proposals_with_gt, densepose_predictor_outputs, packed_annotations
|
| 289 |
+
)
|
| 290 |
+
* self.w_segm,
|
| 291 |
+
}
|
Leffa/densepose/modeling/losses/embed_utils.py
ADDED
|
@@ -0,0 +1,137 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
|
| 2 |
+
|
| 3 |
+
from dataclasses import dataclass
|
| 4 |
+
from typing import Any, Optional
|
| 5 |
+
import torch
|
| 6 |
+
|
| 7 |
+
from detectron2.structures import BoxMode, Instances
|
| 8 |
+
|
| 9 |
+
from .utils import AnnotationsAccumulator
|
| 10 |
+
|
| 11 |
+
|
| 12 |
+
@dataclass
|
| 13 |
+
class PackedCseAnnotations:
|
| 14 |
+
x_gt: torch.Tensor
|
| 15 |
+
y_gt: torch.Tensor
|
| 16 |
+
coarse_segm_gt: Optional[torch.Tensor]
|
| 17 |
+
vertex_mesh_ids_gt: torch.Tensor
|
| 18 |
+
vertex_ids_gt: torch.Tensor
|
| 19 |
+
bbox_xywh_gt: torch.Tensor
|
| 20 |
+
bbox_xywh_est: torch.Tensor
|
| 21 |
+
point_bbox_with_dp_indices: torch.Tensor
|
| 22 |
+
point_bbox_indices: torch.Tensor
|
| 23 |
+
bbox_indices: torch.Tensor
|
| 24 |
+
|
| 25 |
+
|
| 26 |
+
class CseAnnotationsAccumulator(AnnotationsAccumulator):
|
| 27 |
+
"""
|
| 28 |
+
Accumulates annotations by batches that correspond to objects detected on
|
| 29 |
+
individual images. Can pack them together into single tensors.
|
| 30 |
+
"""
|
| 31 |
+
|
| 32 |
+
def __init__(self):
|
| 33 |
+
self.x_gt = []
|
| 34 |
+
self.y_gt = []
|
| 35 |
+
self.s_gt = []
|
| 36 |
+
self.vertex_mesh_ids_gt = []
|
| 37 |
+
self.vertex_ids_gt = []
|
| 38 |
+
self.bbox_xywh_gt = []
|
| 39 |
+
self.bbox_xywh_est = []
|
| 40 |
+
self.point_bbox_with_dp_indices = []
|
| 41 |
+
self.point_bbox_indices = []
|
| 42 |
+
self.bbox_indices = []
|
| 43 |
+
self.nxt_bbox_with_dp_index = 0
|
| 44 |
+
self.nxt_bbox_index = 0
|
| 45 |
+
|
| 46 |
+
def accumulate(self, instances_one_image: Instances):
|
| 47 |
+
"""
|
| 48 |
+
Accumulate instances data for one image
|
| 49 |
+
|
| 50 |
+
Args:
|
| 51 |
+
instances_one_image (Instances): instances data to accumulate
|
| 52 |
+
"""
|
| 53 |
+
boxes_xywh_est = BoxMode.convert(
|
| 54 |
+
instances_one_image.proposal_boxes.tensor.clone(), BoxMode.XYXY_ABS, BoxMode.XYWH_ABS
|
| 55 |
+
)
|
| 56 |
+
boxes_xywh_gt = BoxMode.convert(
|
| 57 |
+
instances_one_image.gt_boxes.tensor.clone(), BoxMode.XYXY_ABS, BoxMode.XYWH_ABS
|
| 58 |
+
)
|
| 59 |
+
n_matches = len(boxes_xywh_gt)
|
| 60 |
+
assert n_matches == len(
|
| 61 |
+
boxes_xywh_est
|
| 62 |
+
), f"Got {len(boxes_xywh_est)} proposal boxes and {len(boxes_xywh_gt)} GT boxes"
|
| 63 |
+
if not n_matches:
|
| 64 |
+
# no detection - GT matches
|
| 65 |
+
return
|
| 66 |
+
if (
|
| 67 |
+
not hasattr(instances_one_image, "gt_densepose")
|
| 68 |
+
or instances_one_image.gt_densepose is None
|
| 69 |
+
):
|
| 70 |
+
# no densepose GT for the detections, just increase the bbox index
|
| 71 |
+
self.nxt_bbox_index += n_matches
|
| 72 |
+
return
|
| 73 |
+
for box_xywh_est, box_xywh_gt, dp_gt in zip(
|
| 74 |
+
boxes_xywh_est, boxes_xywh_gt, instances_one_image.gt_densepose
|
| 75 |
+
):
|
| 76 |
+
if (dp_gt is not None) and (len(dp_gt.x) > 0):
|
| 77 |
+
# pyre-fixme[6]: For 1st argument expected `Tensor` but got `float`.
|
| 78 |
+
# pyre-fixme[6]: For 2nd argument expected `Tensor` but got `float`.
|
| 79 |
+
self._do_accumulate(box_xywh_gt, box_xywh_est, dp_gt)
|
| 80 |
+
self.nxt_bbox_index += 1
|
| 81 |
+
|
| 82 |
+
def _do_accumulate(self, box_xywh_gt: torch.Tensor, box_xywh_est: torch.Tensor, dp_gt: Any):
|
| 83 |
+
"""
|
| 84 |
+
Accumulate instances data for one image, given that the data is not empty
|
| 85 |
+
|
| 86 |
+
Args:
|
| 87 |
+
box_xywh_gt (tensor): GT bounding box
|
| 88 |
+
box_xywh_est (tensor): estimated bounding box
|
| 89 |
+
dp_gt: GT densepose data with the following attributes:
|
| 90 |
+
- x: normalized X coordinates
|
| 91 |
+
- y: normalized Y coordinates
|
| 92 |
+
- segm: tensor of size [S, S] with coarse segmentation
|
| 93 |
+
-
|
| 94 |
+
"""
|
| 95 |
+
self.x_gt.append(dp_gt.x)
|
| 96 |
+
self.y_gt.append(dp_gt.y)
|
| 97 |
+
if hasattr(dp_gt, "segm"):
|
| 98 |
+
self.s_gt.append(dp_gt.segm.unsqueeze(0))
|
| 99 |
+
self.vertex_ids_gt.append(dp_gt.vertex_ids)
|
| 100 |
+
self.vertex_mesh_ids_gt.append(torch.full_like(dp_gt.vertex_ids, dp_gt.mesh_id))
|
| 101 |
+
self.bbox_xywh_gt.append(box_xywh_gt.view(-1, 4))
|
| 102 |
+
self.bbox_xywh_est.append(box_xywh_est.view(-1, 4))
|
| 103 |
+
self.point_bbox_with_dp_indices.append(
|
| 104 |
+
torch.full_like(dp_gt.vertex_ids, self.nxt_bbox_with_dp_index)
|
| 105 |
+
)
|
| 106 |
+
self.point_bbox_indices.append(torch.full_like(dp_gt.vertex_ids, self.nxt_bbox_index))
|
| 107 |
+
self.bbox_indices.append(self.nxt_bbox_index)
|
| 108 |
+
self.nxt_bbox_with_dp_index += 1
|
| 109 |
+
|
| 110 |
+
def pack(self) -> Optional[PackedCseAnnotations]:
|
| 111 |
+
"""
|
| 112 |
+
Pack data into tensors
|
| 113 |
+
"""
|
| 114 |
+
if not len(self.x_gt):
|
| 115 |
+
# TODO:
|
| 116 |
+
# returning proper empty annotations would require
|
| 117 |
+
# creating empty tensors of appropriate shape and
|
| 118 |
+
# type on an appropriate device;
|
| 119 |
+
# we return None so far to indicate empty annotations
|
| 120 |
+
return None
|
| 121 |
+
return PackedCseAnnotations(
|
| 122 |
+
x_gt=torch.cat(self.x_gt, 0),
|
| 123 |
+
y_gt=torch.cat(self.y_gt, 0),
|
| 124 |
+
vertex_mesh_ids_gt=torch.cat(self.vertex_mesh_ids_gt, 0),
|
| 125 |
+
vertex_ids_gt=torch.cat(self.vertex_ids_gt, 0),
|
| 126 |
+
# ignore segmentation annotations, if not all the instances contain those
|
| 127 |
+
coarse_segm_gt=torch.cat(self.s_gt, 0)
|
| 128 |
+
if len(self.s_gt) == len(self.bbox_xywh_gt)
|
| 129 |
+
else None,
|
| 130 |
+
bbox_xywh_gt=torch.cat(self.bbox_xywh_gt, 0),
|
| 131 |
+
bbox_xywh_est=torch.cat(self.bbox_xywh_est, 0),
|
| 132 |
+
point_bbox_with_dp_indices=torch.cat(self.point_bbox_with_dp_indices, 0),
|
| 133 |
+
point_bbox_indices=torch.cat(self.point_bbox_indices, 0),
|
| 134 |
+
bbox_indices=torch.as_tensor(
|
| 135 |
+
self.bbox_indices, dtype=torch.long, device=self.x_gt[0].device
|
| 136 |
+
),
|
| 137 |
+
)
|
Leffa/densepose/modeling/losses/mask_or_segm.py
ADDED
|
@@ -0,0 +1,77 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
|
| 2 |
+
|
| 3 |
+
from typing import Any, List
|
| 4 |
+
|
| 5 |
+
import torch
|
| 6 |
+
|
| 7 |
+
from detectron2.config import CfgNode
|
| 8 |
+
from detectron2.structures import Instances
|
| 9 |
+
|
| 10 |
+
from .mask import MaskLoss
|
| 11 |
+
from .segm import SegmentationLoss
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
class MaskOrSegmentationLoss:
|
| 15 |
+
"""
|
| 16 |
+
Mask or segmentation loss as cross-entropy for raw unnormalized scores
|
| 17 |
+
given ground truth labels. Ground truth labels are either defined by coarse
|
| 18 |
+
segmentation annotation, or by mask annotation, depending on the config
|
| 19 |
+
value MODEL.ROI_DENSEPOSE_HEAD.COARSE_SEGM_TRAINED_BY_MASKS
|
| 20 |
+
"""
|
| 21 |
+
|
| 22 |
+
def __init__(self, cfg: CfgNode):
|
| 23 |
+
"""
|
| 24 |
+
Initialize segmentation loss from configuration options
|
| 25 |
+
|
| 26 |
+
Args:
|
| 27 |
+
cfg (CfgNode): configuration options
|
| 28 |
+
"""
|
| 29 |
+
self.segm_trained_by_masks = (
|
| 30 |
+
cfg.MODEL.ROI_DENSEPOSE_HEAD.COARSE_SEGM_TRAINED_BY_MASKS
|
| 31 |
+
)
|
| 32 |
+
if self.segm_trained_by_masks:
|
| 33 |
+
self.mask_loss = MaskLoss()
|
| 34 |
+
self.segm_loss = SegmentationLoss(cfg)
|
| 35 |
+
|
| 36 |
+
def __call__(
|
| 37 |
+
self,
|
| 38 |
+
proposals_with_gt: List[Instances],
|
| 39 |
+
densepose_predictor_outputs: Any,
|
| 40 |
+
packed_annotations: Any,
|
| 41 |
+
) -> torch.Tensor:
|
| 42 |
+
"""
|
| 43 |
+
Compute segmentation loss as cross-entropy between aligned unnormalized
|
| 44 |
+
score estimates and ground truth; with ground truth given
|
| 45 |
+
either by masks, or by coarse segmentation annotations.
|
| 46 |
+
|
| 47 |
+
Args:
|
| 48 |
+
proposals_with_gt (list of Instances): detections with associated ground truth data
|
| 49 |
+
densepose_predictor_outputs: an object of a dataclass that contains predictor outputs
|
| 50 |
+
with estimated values; assumed to have the following attributes:
|
| 51 |
+
* coarse_segm - coarse segmentation estimates, tensor of shape [N, D, S, S]
|
| 52 |
+
packed_annotations: packed annotations for efficient loss computation
|
| 53 |
+
Return:
|
| 54 |
+
tensor: loss value as cross-entropy for raw unnormalized scores
|
| 55 |
+
given ground truth labels
|
| 56 |
+
"""
|
| 57 |
+
if self.segm_trained_by_masks:
|
| 58 |
+
return self.mask_loss(proposals_with_gt, densepose_predictor_outputs)
|
| 59 |
+
return self.segm_loss(
|
| 60 |
+
proposals_with_gt, densepose_predictor_outputs, packed_annotations
|
| 61 |
+
)
|
| 62 |
+
|
| 63 |
+
def fake_value(self, densepose_predictor_outputs: Any) -> torch.Tensor:
|
| 64 |
+
"""
|
| 65 |
+
Fake segmentation loss used when no suitable ground truth data
|
| 66 |
+
was found in a batch. The loss has a value 0 and is primarily used to
|
| 67 |
+
construct the computation graph, so that `DistributedDataParallel`
|
| 68 |
+
has similar graphs on all GPUs and can perform reduction properly.
|
| 69 |
+
|
| 70 |
+
Args:
|
| 71 |
+
densepose_predictor_outputs: DensePose predictor outputs, an object
|
| 72 |
+
of a dataclass that is assumed to have `coarse_segm`
|
| 73 |
+
attribute
|
| 74 |
+
Return:
|
| 75 |
+
Zero value loss with proper computation graph
|
| 76 |
+
"""
|
| 77 |
+
return densepose_predictor_outputs.coarse_segm.sum() * 0
|
Leffa/densepose/modeling/predictors/__init__.py
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) Facebook, Inc. and its affiliates.
|
| 2 |
+
|
| 3 |
+
from .chart import DensePoseChartPredictor
|
| 4 |
+
from .chart_confidence import DensePoseChartConfidencePredictorMixin
|
| 5 |
+
from .chart_with_confidence import DensePoseChartWithConfidencePredictor
|
| 6 |
+
from .cse import DensePoseEmbeddingPredictor
|
| 7 |
+
from .cse_confidence import DensePoseEmbeddingConfidencePredictorMixin
|
| 8 |
+
from .cse_with_confidence import DensePoseEmbeddingWithConfidencePredictor
|
| 9 |
+
from .registry import DENSEPOSE_PREDICTOR_REGISTRY
|
Leffa/densepose/modeling/predictors/chart.py
ADDED
|
@@ -0,0 +1,94 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) Facebook, Inc. and its affiliates.
|
| 2 |
+
|
| 3 |
+
import torch
|
| 4 |
+
from torch import nn
|
| 5 |
+
|
| 6 |
+
from detectron2.config import CfgNode
|
| 7 |
+
from detectron2.layers import ConvTranspose2d, interpolate
|
| 8 |
+
|
| 9 |
+
from ...structures import DensePoseChartPredictorOutput
|
| 10 |
+
from ..utils import initialize_module_params
|
| 11 |
+
from .registry import DENSEPOSE_PREDICTOR_REGISTRY
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
@DENSEPOSE_PREDICTOR_REGISTRY.register()
|
| 15 |
+
class DensePoseChartPredictor(nn.Module):
|
| 16 |
+
"""
|
| 17 |
+
Predictor (last layers of a DensePose model) that takes DensePose head outputs as an input
|
| 18 |
+
and produces 4 tensors which represent DensePose results for predefined body parts
|
| 19 |
+
(patches / charts):
|
| 20 |
+
* coarse segmentation, a tensor of shape [N, K, Hout, Wout]
|
| 21 |
+
* fine segmentation, a tensor of shape [N, C, Hout, Wout]
|
| 22 |
+
* U coordinates, a tensor of shape [N, C, Hout, Wout]
|
| 23 |
+
* V coordinates, a tensor of shape [N, C, Hout, Wout]
|
| 24 |
+
where
|
| 25 |
+
- N is the number of instances
|
| 26 |
+
- K is the number of coarse segmentation channels (
|
| 27 |
+
2 = foreground / background,
|
| 28 |
+
15 = one of 14 body parts / background)
|
| 29 |
+
- C is the number of fine segmentation channels (
|
| 30 |
+
24 fine body parts / background)
|
| 31 |
+
- Hout and Wout are height and width of predictions
|
| 32 |
+
"""
|
| 33 |
+
|
| 34 |
+
def __init__(self, cfg: CfgNode, input_channels: int):
|
| 35 |
+
"""
|
| 36 |
+
Initialize predictor using configuration options
|
| 37 |
+
|
| 38 |
+
Args:
|
| 39 |
+
cfg (CfgNode): configuration options
|
| 40 |
+
input_channels (int): input tensor size along the channel dimension
|
| 41 |
+
"""
|
| 42 |
+
super().__init__()
|
| 43 |
+
dim_in = input_channels
|
| 44 |
+
n_segm_chan = cfg.MODEL.ROI_DENSEPOSE_HEAD.NUM_COARSE_SEGM_CHANNELS
|
| 45 |
+
dim_out_patches = cfg.MODEL.ROI_DENSEPOSE_HEAD.NUM_PATCHES + 1
|
| 46 |
+
kernel_size = cfg.MODEL.ROI_DENSEPOSE_HEAD.DECONV_KERNEL
|
| 47 |
+
# coarse segmentation
|
| 48 |
+
self.ann_index_lowres = ConvTranspose2d(
|
| 49 |
+
dim_in, n_segm_chan, kernel_size, stride=2, padding=int(kernel_size / 2 - 1)
|
| 50 |
+
)
|
| 51 |
+
# fine segmentation
|
| 52 |
+
self.index_uv_lowres = ConvTranspose2d(
|
| 53 |
+
dim_in, dim_out_patches, kernel_size, stride=2, padding=int(kernel_size / 2 - 1)
|
| 54 |
+
)
|
| 55 |
+
# U
|
| 56 |
+
self.u_lowres = ConvTranspose2d(
|
| 57 |
+
dim_in, dim_out_patches, kernel_size, stride=2, padding=int(kernel_size / 2 - 1)
|
| 58 |
+
)
|
| 59 |
+
# V
|
| 60 |
+
self.v_lowres = ConvTranspose2d(
|
| 61 |
+
dim_in, dim_out_patches, kernel_size, stride=2, padding=int(kernel_size / 2 - 1)
|
| 62 |
+
)
|
| 63 |
+
self.scale_factor = cfg.MODEL.ROI_DENSEPOSE_HEAD.UP_SCALE
|
| 64 |
+
initialize_module_params(self)
|
| 65 |
+
|
| 66 |
+
def interp2d(self, tensor_nchw: torch.Tensor):
|
| 67 |
+
"""
|
| 68 |
+
Bilinear interpolation method to be used for upscaling
|
| 69 |
+
|
| 70 |
+
Args:
|
| 71 |
+
tensor_nchw (tensor): tensor of shape (N, C, H, W)
|
| 72 |
+
Return:
|
| 73 |
+
tensor of shape (N, C, Hout, Wout), where Hout and Wout are computed
|
| 74 |
+
by applying the scale factor to H and W
|
| 75 |
+
"""
|
| 76 |
+
return interpolate(
|
| 77 |
+
tensor_nchw, scale_factor=self.scale_factor, mode="bilinear", align_corners=False
|
| 78 |
+
)
|
| 79 |
+
|
| 80 |
+
def forward(self, head_outputs: torch.Tensor):
|
| 81 |
+
"""
|
| 82 |
+
Perform forward step on DensePose head outputs
|
| 83 |
+
|
| 84 |
+
Args:
|
| 85 |
+
head_outputs (tensor): DensePose head outputs, tensor of shape [N, D, H, W]
|
| 86 |
+
Return:
|
| 87 |
+
An instance of DensePoseChartPredictorOutput
|
| 88 |
+
"""
|
| 89 |
+
return DensePoseChartPredictorOutput(
|
| 90 |
+
coarse_segm=self.interp2d(self.ann_index_lowres(head_outputs)),
|
| 91 |
+
fine_segm=self.interp2d(self.index_uv_lowres(head_outputs)),
|
| 92 |
+
u=self.interp2d(self.u_lowres(head_outputs)),
|
| 93 |
+
v=self.interp2d(self.v_lowres(head_outputs)),
|
| 94 |
+
)
|
Leffa/densepose/modeling/predictors/chart_confidence.py
ADDED
|
@@ -0,0 +1,174 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) Facebook, Inc. and its affiliates.
|
| 2 |
+
|
| 3 |
+
from typing import Any
|
| 4 |
+
import torch
|
| 5 |
+
from torch.nn import functional as F
|
| 6 |
+
|
| 7 |
+
from detectron2.config import CfgNode
|
| 8 |
+
from detectron2.layers import ConvTranspose2d
|
| 9 |
+
|
| 10 |
+
from ...structures import decorate_predictor_output_class_with_confidences
|
| 11 |
+
from ..confidence import DensePoseConfidenceModelConfig, DensePoseUVConfidenceType
|
| 12 |
+
from ..utils import initialize_module_params
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
class DensePoseChartConfidencePredictorMixin:
|
| 16 |
+
"""
|
| 17 |
+
Predictor contains the last layers of a DensePose model that take DensePose head
|
| 18 |
+
outputs as an input and produce model outputs. Confidence predictor mixin is used
|
| 19 |
+
to generate confidences for segmentation and UV tensors estimated by some
|
| 20 |
+
base predictor. Several assumptions need to hold for the base predictor:
|
| 21 |
+
1) the `forward` method must return SIUV tuple as the first result (
|
| 22 |
+
S = coarse segmentation, I = fine segmentation, U and V are intrinsic
|
| 23 |
+
chart coordinates)
|
| 24 |
+
2) `interp2d` method must be defined to perform bilinear interpolation;
|
| 25 |
+
the same method is typically used for SIUV and confidences
|
| 26 |
+
Confidence predictor mixin provides confidence estimates, as described in:
|
| 27 |
+
N. Neverova et al., Correlated Uncertainty for Learning Dense Correspondences
|
| 28 |
+
from Noisy Labels, NeurIPS 2019
|
| 29 |
+
A. Sanakoyeu et al., Transferring Dense Pose to Proximal Animal Classes, CVPR 2020
|
| 30 |
+
"""
|
| 31 |
+
|
| 32 |
+
def __init__(self, cfg: CfgNode, input_channels: int):
|
| 33 |
+
"""
|
| 34 |
+
Initialize confidence predictor using configuration options.
|
| 35 |
+
|
| 36 |
+
Args:
|
| 37 |
+
cfg (CfgNode): configuration options
|
| 38 |
+
input_channels (int): number of input channels
|
| 39 |
+
"""
|
| 40 |
+
# we rely on base predictor to call nn.Module.__init__
|
| 41 |
+
super().__init__(cfg, input_channels) # pyre-ignore[19]
|
| 42 |
+
self.confidence_model_cfg = DensePoseConfidenceModelConfig.from_cfg(cfg)
|
| 43 |
+
self._initialize_confidence_estimation_layers(cfg, input_channels)
|
| 44 |
+
self._registry = {}
|
| 45 |
+
initialize_module_params(self) # pyre-ignore[6]
|
| 46 |
+
|
| 47 |
+
def _initialize_confidence_estimation_layers(self, cfg: CfgNode, dim_in: int):
|
| 48 |
+
"""
|
| 49 |
+
Initialize confidence estimation layers based on configuration options
|
| 50 |
+
|
| 51 |
+
Args:
|
| 52 |
+
cfg (CfgNode): configuration options
|
| 53 |
+
dim_in (int): number of input channels
|
| 54 |
+
"""
|
| 55 |
+
dim_out_patches = cfg.MODEL.ROI_DENSEPOSE_HEAD.NUM_PATCHES + 1
|
| 56 |
+
kernel_size = cfg.MODEL.ROI_DENSEPOSE_HEAD.DECONV_KERNEL
|
| 57 |
+
if self.confidence_model_cfg.uv_confidence.enabled:
|
| 58 |
+
if self.confidence_model_cfg.uv_confidence.type == DensePoseUVConfidenceType.IID_ISO:
|
| 59 |
+
self.sigma_2_lowres = ConvTranspose2d( # pyre-ignore[16]
|
| 60 |
+
dim_in, dim_out_patches, kernel_size, stride=2, padding=int(kernel_size / 2 - 1)
|
| 61 |
+
)
|
| 62 |
+
elif (
|
| 63 |
+
self.confidence_model_cfg.uv_confidence.type
|
| 64 |
+
== DensePoseUVConfidenceType.INDEP_ANISO
|
| 65 |
+
):
|
| 66 |
+
self.sigma_2_lowres = ConvTranspose2d(
|
| 67 |
+
dim_in, dim_out_patches, kernel_size, stride=2, padding=int(kernel_size / 2 - 1)
|
| 68 |
+
)
|
| 69 |
+
self.kappa_u_lowres = ConvTranspose2d( # pyre-ignore[16]
|
| 70 |
+
dim_in, dim_out_patches, kernel_size, stride=2, padding=int(kernel_size / 2 - 1)
|
| 71 |
+
)
|
| 72 |
+
self.kappa_v_lowres = ConvTranspose2d( # pyre-ignore[16]
|
| 73 |
+
dim_in, dim_out_patches, kernel_size, stride=2, padding=int(kernel_size / 2 - 1)
|
| 74 |
+
)
|
| 75 |
+
else:
|
| 76 |
+
raise ValueError(
|
| 77 |
+
f"Unknown confidence model type: "
|
| 78 |
+
f"{self.confidence_model_cfg.confidence_model_type}"
|
| 79 |
+
)
|
| 80 |
+
if self.confidence_model_cfg.segm_confidence.enabled:
|
| 81 |
+
self.fine_segm_confidence_lowres = ConvTranspose2d( # pyre-ignore[16]
|
| 82 |
+
dim_in, 1, kernel_size, stride=2, padding=int(kernel_size / 2 - 1)
|
| 83 |
+
)
|
| 84 |
+
self.coarse_segm_confidence_lowres = ConvTranspose2d( # pyre-ignore[16]
|
| 85 |
+
dim_in, 1, kernel_size, stride=2, padding=int(kernel_size / 2 - 1)
|
| 86 |
+
)
|
| 87 |
+
|
| 88 |
+
def forward(self, head_outputs: torch.Tensor):
|
| 89 |
+
"""
|
| 90 |
+
Perform forward operation on head outputs used as inputs for the predictor.
|
| 91 |
+
Calls forward method from the base predictor and uses its outputs to compute
|
| 92 |
+
confidences.
|
| 93 |
+
|
| 94 |
+
Args:
|
| 95 |
+
head_outputs (Tensor): head outputs used as predictor inputs
|
| 96 |
+
Return:
|
| 97 |
+
An instance of outputs with confidences,
|
| 98 |
+
see `decorate_predictor_output_class_with_confidences`
|
| 99 |
+
"""
|
| 100 |
+
# assuming base class returns SIUV estimates in its first result
|
| 101 |
+
base_predictor_outputs = super().forward(head_outputs) # pyre-ignore[16]
|
| 102 |
+
|
| 103 |
+
# create output instance by extending base predictor outputs:
|
| 104 |
+
output = self._create_output_instance(base_predictor_outputs)
|
| 105 |
+
|
| 106 |
+
if self.confidence_model_cfg.uv_confidence.enabled:
|
| 107 |
+
if self.confidence_model_cfg.uv_confidence.type == DensePoseUVConfidenceType.IID_ISO:
|
| 108 |
+
# assuming base class defines interp2d method for bilinear interpolation
|
| 109 |
+
output.sigma_2 = self.interp2d(self.sigma_2_lowres(head_outputs)) # pyre-ignore[16]
|
| 110 |
+
elif (
|
| 111 |
+
self.confidence_model_cfg.uv_confidence.type
|
| 112 |
+
== DensePoseUVConfidenceType.INDEP_ANISO
|
| 113 |
+
):
|
| 114 |
+
# assuming base class defines interp2d method for bilinear interpolation
|
| 115 |
+
output.sigma_2 = self.interp2d(self.sigma_2_lowres(head_outputs))
|
| 116 |
+
output.kappa_u = self.interp2d(self.kappa_u_lowres(head_outputs)) # pyre-ignore[16]
|
| 117 |
+
output.kappa_v = self.interp2d(self.kappa_v_lowres(head_outputs)) # pyre-ignore[16]
|
| 118 |
+
else:
|
| 119 |
+
raise ValueError(
|
| 120 |
+
f"Unknown confidence model type: "
|
| 121 |
+
f"{self.confidence_model_cfg.confidence_model_type}"
|
| 122 |
+
)
|
| 123 |
+
if self.confidence_model_cfg.segm_confidence.enabled:
|
| 124 |
+
# base predictor outputs are assumed to have `fine_segm` and `coarse_segm` attributes
|
| 125 |
+
# base predictor is assumed to define `interp2d` method for bilinear interpolation
|
| 126 |
+
output.fine_segm_confidence = (
|
| 127 |
+
F.softplus(
|
| 128 |
+
self.interp2d(self.fine_segm_confidence_lowres(head_outputs)) # pyre-ignore[16]
|
| 129 |
+
)
|
| 130 |
+
+ self.confidence_model_cfg.segm_confidence.epsilon
|
| 131 |
+
)
|
| 132 |
+
output.fine_segm = base_predictor_outputs.fine_segm * torch.repeat_interleave(
|
| 133 |
+
output.fine_segm_confidence, base_predictor_outputs.fine_segm.shape[1], dim=1
|
| 134 |
+
)
|
| 135 |
+
output.coarse_segm_confidence = (
|
| 136 |
+
F.softplus(
|
| 137 |
+
self.interp2d(
|
| 138 |
+
self.coarse_segm_confidence_lowres(head_outputs) # pyre-ignore[16]
|
| 139 |
+
)
|
| 140 |
+
)
|
| 141 |
+
+ self.confidence_model_cfg.segm_confidence.epsilon
|
| 142 |
+
)
|
| 143 |
+
output.coarse_segm = base_predictor_outputs.coarse_segm * torch.repeat_interleave(
|
| 144 |
+
output.coarse_segm_confidence, base_predictor_outputs.coarse_segm.shape[1], dim=1
|
| 145 |
+
)
|
| 146 |
+
|
| 147 |
+
return output
|
| 148 |
+
|
| 149 |
+
def _create_output_instance(self, base_predictor_outputs: Any):
|
| 150 |
+
"""
|
| 151 |
+
Create an instance of predictor outputs by copying the outputs from the
|
| 152 |
+
base predictor and initializing confidence
|
| 153 |
+
|
| 154 |
+
Args:
|
| 155 |
+
base_predictor_outputs: an instance of base predictor outputs
|
| 156 |
+
(the outputs type is assumed to be a dataclass)
|
| 157 |
+
Return:
|
| 158 |
+
An instance of outputs with confidences
|
| 159 |
+
"""
|
| 160 |
+
PredictorOutput = decorate_predictor_output_class_with_confidences(
|
| 161 |
+
type(base_predictor_outputs) # pyre-ignore[6]
|
| 162 |
+
)
|
| 163 |
+
# base_predictor_outputs is assumed to be a dataclass
|
| 164 |
+
# reassign all the fields from base_predictor_outputs (no deep copy!), add new fields
|
| 165 |
+
output = PredictorOutput(
|
| 166 |
+
**base_predictor_outputs.__dict__,
|
| 167 |
+
coarse_segm_confidence=None,
|
| 168 |
+
fine_segm_confidence=None,
|
| 169 |
+
sigma_1=None,
|
| 170 |
+
sigma_2=None,
|
| 171 |
+
kappa_u=None,
|
| 172 |
+
kappa_v=None,
|
| 173 |
+
)
|
| 174 |
+
return output
|
Leffa/densepose/modeling/predictors/chart_with_confidence.py
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) Facebook, Inc. and its affiliates.
|
| 2 |
+
|
| 3 |
+
from . import DensePoseChartConfidencePredictorMixin, DensePoseChartPredictor
|
| 4 |
+
from .registry import DENSEPOSE_PREDICTOR_REGISTRY
|
| 5 |
+
|
| 6 |
+
|
| 7 |
+
@DENSEPOSE_PREDICTOR_REGISTRY.register()
|
| 8 |
+
class DensePoseChartWithConfidencePredictor(
|
| 9 |
+
DensePoseChartConfidencePredictorMixin, DensePoseChartPredictor
|
| 10 |
+
):
|
| 11 |
+
"""
|
| 12 |
+
Predictor that combines chart and chart confidence estimation
|
| 13 |
+
"""
|
| 14 |
+
|
| 15 |
+
pass
|
Leffa/densepose/modeling/predictors/cse.py
ADDED
|
@@ -0,0 +1,70 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
|
| 2 |
+
|
| 3 |
+
import torch
|
| 4 |
+
from torch import nn
|
| 5 |
+
|
| 6 |
+
from detectron2.config import CfgNode
|
| 7 |
+
from detectron2.layers import ConvTranspose2d, interpolate
|
| 8 |
+
|
| 9 |
+
from ...structures import DensePoseEmbeddingPredictorOutput
|
| 10 |
+
from ..utils import initialize_module_params
|
| 11 |
+
from .registry import DENSEPOSE_PREDICTOR_REGISTRY
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
@DENSEPOSE_PREDICTOR_REGISTRY.register()
|
| 15 |
+
class DensePoseEmbeddingPredictor(nn.Module):
|
| 16 |
+
"""
|
| 17 |
+
Last layers of a DensePose model that take DensePose head outputs as an input
|
| 18 |
+
and produce model outputs for continuous surface embeddings (CSE).
|
| 19 |
+
"""
|
| 20 |
+
|
| 21 |
+
def __init__(self, cfg: CfgNode, input_channels: int):
|
| 22 |
+
"""
|
| 23 |
+
Initialize predictor using configuration options
|
| 24 |
+
|
| 25 |
+
Args:
|
| 26 |
+
cfg (CfgNode): configuration options
|
| 27 |
+
input_channels (int): input tensor size along the channel dimension
|
| 28 |
+
"""
|
| 29 |
+
super().__init__()
|
| 30 |
+
dim_in = input_channels
|
| 31 |
+
n_segm_chan = cfg.MODEL.ROI_DENSEPOSE_HEAD.NUM_COARSE_SEGM_CHANNELS
|
| 32 |
+
embed_size = cfg.MODEL.ROI_DENSEPOSE_HEAD.CSE.EMBED_SIZE
|
| 33 |
+
kernel_size = cfg.MODEL.ROI_DENSEPOSE_HEAD.DECONV_KERNEL
|
| 34 |
+
# coarse segmentation
|
| 35 |
+
self.coarse_segm_lowres = ConvTranspose2d(
|
| 36 |
+
dim_in, n_segm_chan, kernel_size, stride=2, padding=int(kernel_size / 2 - 1)
|
| 37 |
+
)
|
| 38 |
+
# embedding
|
| 39 |
+
self.embed_lowres = ConvTranspose2d(
|
| 40 |
+
dim_in, embed_size, kernel_size, stride=2, padding=int(kernel_size / 2 - 1)
|
| 41 |
+
)
|
| 42 |
+
self.scale_factor = cfg.MODEL.ROI_DENSEPOSE_HEAD.UP_SCALE
|
| 43 |
+
initialize_module_params(self)
|
| 44 |
+
|
| 45 |
+
def interp2d(self, tensor_nchw: torch.Tensor):
|
| 46 |
+
"""
|
| 47 |
+
Bilinear interpolation method to be used for upscaling
|
| 48 |
+
|
| 49 |
+
Args:
|
| 50 |
+
tensor_nchw (tensor): tensor of shape (N, C, H, W)
|
| 51 |
+
Return:
|
| 52 |
+
tensor of shape (N, C, Hout, Wout), where Hout and Wout are computed
|
| 53 |
+
by applying the scale factor to H and W
|
| 54 |
+
"""
|
| 55 |
+
return interpolate(
|
| 56 |
+
tensor_nchw, scale_factor=self.scale_factor, mode="bilinear", align_corners=False
|
| 57 |
+
)
|
| 58 |
+
|
| 59 |
+
def forward(self, head_outputs):
|
| 60 |
+
"""
|
| 61 |
+
Perform forward step on DensePose head outputs
|
| 62 |
+
|
| 63 |
+
Args:
|
| 64 |
+
head_outputs (tensor): DensePose head outputs, tensor of shape [N, D, H, W]
|
| 65 |
+
"""
|
| 66 |
+
embed_lowres = self.embed_lowres(head_outputs)
|
| 67 |
+
coarse_segm_lowres = self.coarse_segm_lowres(head_outputs)
|
| 68 |
+
embed = self.interp2d(embed_lowres)
|
| 69 |
+
coarse_segm = self.interp2d(coarse_segm_lowres)
|
| 70 |
+
return DensePoseEmbeddingPredictorOutput(embedding=embed, coarse_segm=coarse_segm)
|
Leffa/densepose/modeling/predictors/cse_confidence.py
ADDED
|
@@ -0,0 +1,115 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) Facebook, Inc. and its affiliates.
|
| 2 |
+
|
| 3 |
+
from typing import Any
|
| 4 |
+
import torch
|
| 5 |
+
from torch.nn import functional as F
|
| 6 |
+
|
| 7 |
+
from detectron2.config import CfgNode
|
| 8 |
+
from detectron2.layers import ConvTranspose2d
|
| 9 |
+
|
| 10 |
+
from densepose.modeling.confidence import DensePoseConfidenceModelConfig
|
| 11 |
+
from densepose.modeling.utils import initialize_module_params
|
| 12 |
+
from densepose.structures import decorate_cse_predictor_output_class_with_confidences
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
class DensePoseEmbeddingConfidencePredictorMixin:
|
| 16 |
+
"""
|
| 17 |
+
Predictor contains the last layers of a DensePose model that take DensePose head
|
| 18 |
+
outputs as an input and produce model outputs. Confidence predictor mixin is used
|
| 19 |
+
to generate confidences for coarse segmentation estimated by some
|
| 20 |
+
base predictor. Several assumptions need to hold for the base predictor:
|
| 21 |
+
1) the `forward` method must return CSE DensePose head outputs,
|
| 22 |
+
tensor of shape [N, D, H, W]
|
| 23 |
+
2) `interp2d` method must be defined to perform bilinear interpolation;
|
| 24 |
+
the same method is typically used for masks and confidences
|
| 25 |
+
Confidence predictor mixin provides confidence estimates, as described in:
|
| 26 |
+
N. Neverova et al., Correlated Uncertainty for Learning Dense Correspondences
|
| 27 |
+
from Noisy Labels, NeurIPS 2019
|
| 28 |
+
A. Sanakoyeu et al., Transferring Dense Pose to Proximal Animal Classes, CVPR 2020
|
| 29 |
+
"""
|
| 30 |
+
|
| 31 |
+
def __init__(self, cfg: CfgNode, input_channels: int):
|
| 32 |
+
"""
|
| 33 |
+
Initialize confidence predictor using configuration options.
|
| 34 |
+
|
| 35 |
+
Args:
|
| 36 |
+
cfg (CfgNode): configuration options
|
| 37 |
+
input_channels (int): number of input channels
|
| 38 |
+
"""
|
| 39 |
+
# we rely on base predictor to call nn.Module.__init__
|
| 40 |
+
super().__init__(cfg, input_channels) # pyre-ignore[19]
|
| 41 |
+
self.confidence_model_cfg = DensePoseConfidenceModelConfig.from_cfg(cfg)
|
| 42 |
+
self._initialize_confidence_estimation_layers(cfg, input_channels)
|
| 43 |
+
self._registry = {}
|
| 44 |
+
initialize_module_params(self) # pyre-ignore[6]
|
| 45 |
+
|
| 46 |
+
def _initialize_confidence_estimation_layers(self, cfg: CfgNode, dim_in: int):
|
| 47 |
+
"""
|
| 48 |
+
Initialize confidence estimation layers based on configuration options
|
| 49 |
+
|
| 50 |
+
Args:
|
| 51 |
+
cfg (CfgNode): configuration options
|
| 52 |
+
dim_in (int): number of input channels
|
| 53 |
+
"""
|
| 54 |
+
kernel_size = cfg.MODEL.ROI_DENSEPOSE_HEAD.DECONV_KERNEL
|
| 55 |
+
if self.confidence_model_cfg.segm_confidence.enabled:
|
| 56 |
+
self.coarse_segm_confidence_lowres = ConvTranspose2d( # pyre-ignore[16]
|
| 57 |
+
dim_in, 1, kernel_size, stride=2, padding=int(kernel_size / 2 - 1)
|
| 58 |
+
)
|
| 59 |
+
|
| 60 |
+
def forward(self, head_outputs: torch.Tensor):
|
| 61 |
+
"""
|
| 62 |
+
Perform forward operation on head outputs used as inputs for the predictor.
|
| 63 |
+
Calls forward method from the base predictor and uses its outputs to compute
|
| 64 |
+
confidences.
|
| 65 |
+
|
| 66 |
+
Args:
|
| 67 |
+
head_outputs (Tensor): head outputs used as predictor inputs
|
| 68 |
+
Return:
|
| 69 |
+
An instance of outputs with confidences,
|
| 70 |
+
see `decorate_cse_predictor_output_class_with_confidences`
|
| 71 |
+
"""
|
| 72 |
+
# assuming base class returns SIUV estimates in its first result
|
| 73 |
+
base_predictor_outputs = super().forward(head_outputs) # pyre-ignore[16]
|
| 74 |
+
|
| 75 |
+
# create output instance by extending base predictor outputs:
|
| 76 |
+
output = self._create_output_instance(base_predictor_outputs)
|
| 77 |
+
|
| 78 |
+
if self.confidence_model_cfg.segm_confidence.enabled:
|
| 79 |
+
# base predictor outputs are assumed to have `coarse_segm` attribute
|
| 80 |
+
# base predictor is assumed to define `interp2d` method for bilinear interpolation
|
| 81 |
+
output.coarse_segm_confidence = (
|
| 82 |
+
F.softplus(
|
| 83 |
+
self.interp2d( # pyre-ignore[16]
|
| 84 |
+
self.coarse_segm_confidence_lowres(head_outputs) # pyre-ignore[16]
|
| 85 |
+
)
|
| 86 |
+
)
|
| 87 |
+
+ self.confidence_model_cfg.segm_confidence.epsilon
|
| 88 |
+
)
|
| 89 |
+
output.coarse_segm = base_predictor_outputs.coarse_segm * torch.repeat_interleave(
|
| 90 |
+
output.coarse_segm_confidence, base_predictor_outputs.coarse_segm.shape[1], dim=1
|
| 91 |
+
)
|
| 92 |
+
|
| 93 |
+
return output
|
| 94 |
+
|
| 95 |
+
def _create_output_instance(self, base_predictor_outputs: Any):
|
| 96 |
+
"""
|
| 97 |
+
Create an instance of predictor outputs by copying the outputs from the
|
| 98 |
+
base predictor and initializing confidence
|
| 99 |
+
|
| 100 |
+
Args:
|
| 101 |
+
base_predictor_outputs: an instance of base predictor outputs
|
| 102 |
+
(the outputs type is assumed to be a dataclass)
|
| 103 |
+
Return:
|
| 104 |
+
An instance of outputs with confidences
|
| 105 |
+
"""
|
| 106 |
+
PredictorOutput = decorate_cse_predictor_output_class_with_confidences(
|
| 107 |
+
type(base_predictor_outputs) # pyre-ignore[6]
|
| 108 |
+
)
|
| 109 |
+
# base_predictor_outputs is assumed to be a dataclass
|
| 110 |
+
# reassign all the fields from base_predictor_outputs (no deep copy!), add new fields
|
| 111 |
+
output = PredictorOutput(
|
| 112 |
+
**base_predictor_outputs.__dict__,
|
| 113 |
+
coarse_segm_confidence=None,
|
| 114 |
+
)
|
| 115 |
+
return output
|
Leffa/densepose/modeling/predictors/cse_with_confidence.py
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) Facebook, Inc. and its affiliates.
|
| 2 |
+
|
| 3 |
+
from . import DensePoseEmbeddingConfidencePredictorMixin, DensePoseEmbeddingPredictor
|
| 4 |
+
from .registry import DENSEPOSE_PREDICTOR_REGISTRY
|
| 5 |
+
|
| 6 |
+
|
| 7 |
+
@DENSEPOSE_PREDICTOR_REGISTRY.register()
|
| 8 |
+
class DensePoseEmbeddingWithConfidencePredictor(
|
| 9 |
+
DensePoseEmbeddingConfidencePredictorMixin, DensePoseEmbeddingPredictor
|
| 10 |
+
):
|
| 11 |
+
"""
|
| 12 |
+
Predictor that combines CSE and CSE confidence estimation
|
| 13 |
+
"""
|
| 14 |
+
|
| 15 |
+
pass
|
Leffa/densepose/modeling/predictors/registry.py
ADDED
|
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) Facebook, Inc. and its affiliates.
|
| 2 |
+
|
| 3 |
+
from detectron2.utils.registry import Registry
|
| 4 |
+
|
| 5 |
+
DENSEPOSE_PREDICTOR_REGISTRY = Registry("DENSEPOSE_PREDICTOR")
|
Leffa/densepose/modeling/roi_heads/__init__.py
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) Facebook, Inc. and its affiliates.
|
| 2 |
+
|
| 3 |
+
from .v1convx import DensePoseV1ConvXHead
|
| 4 |
+
from .deeplab import DensePoseDeepLabHead
|
| 5 |
+
from .registry import ROI_DENSEPOSE_HEAD_REGISTRY
|
| 6 |
+
from .roi_head import Decoder, DensePoseROIHeads
|
Leffa/densepose/modeling/roi_heads/deeplab.py
ADDED
|
@@ -0,0 +1,263 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) Facebook, Inc. and its affiliates.
|
| 2 |
+
|
| 3 |
+
import fvcore.nn.weight_init as weight_init
|
| 4 |
+
import torch
|
| 5 |
+
from torch import nn
|
| 6 |
+
from torch.nn import functional as F
|
| 7 |
+
|
| 8 |
+
from detectron2.config import CfgNode
|
| 9 |
+
from detectron2.layers import Conv2d
|
| 10 |
+
|
| 11 |
+
from .registry import ROI_DENSEPOSE_HEAD_REGISTRY
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
@ROI_DENSEPOSE_HEAD_REGISTRY.register()
|
| 15 |
+
class DensePoseDeepLabHead(nn.Module):
|
| 16 |
+
"""
|
| 17 |
+
DensePose head using DeepLabV3 model from
|
| 18 |
+
"Rethinking Atrous Convolution for Semantic Image Segmentation"
|
| 19 |
+
<https://arxiv.org/abs/1706.05587>.
|
| 20 |
+
"""
|
| 21 |
+
|
| 22 |
+
def __init__(self, cfg: CfgNode, input_channels: int):
|
| 23 |
+
super(DensePoseDeepLabHead, self).__init__()
|
| 24 |
+
# fmt: off
|
| 25 |
+
hidden_dim = cfg.MODEL.ROI_DENSEPOSE_HEAD.CONV_HEAD_DIM
|
| 26 |
+
kernel_size = cfg.MODEL.ROI_DENSEPOSE_HEAD.CONV_HEAD_KERNEL
|
| 27 |
+
norm = cfg.MODEL.ROI_DENSEPOSE_HEAD.DEEPLAB.NORM
|
| 28 |
+
self.n_stacked_convs = cfg.MODEL.ROI_DENSEPOSE_HEAD.NUM_STACKED_CONVS
|
| 29 |
+
self.use_nonlocal = cfg.MODEL.ROI_DENSEPOSE_HEAD.DEEPLAB.NONLOCAL_ON
|
| 30 |
+
# fmt: on
|
| 31 |
+
pad_size = kernel_size // 2
|
| 32 |
+
n_channels = input_channels
|
| 33 |
+
|
| 34 |
+
self.ASPP = ASPP(input_channels, [6, 12, 56], n_channels) # 6, 12, 56
|
| 35 |
+
self.add_module("ASPP", self.ASPP)
|
| 36 |
+
|
| 37 |
+
if self.use_nonlocal:
|
| 38 |
+
self.NLBlock = NONLocalBlock2D(input_channels, bn_layer=True)
|
| 39 |
+
self.add_module("NLBlock", self.NLBlock)
|
| 40 |
+
# weight_init.c2_msra_fill(self.ASPP)
|
| 41 |
+
|
| 42 |
+
for i in range(self.n_stacked_convs):
|
| 43 |
+
norm_module = nn.GroupNorm(32, hidden_dim) if norm == "GN" else None
|
| 44 |
+
layer = Conv2d(
|
| 45 |
+
n_channels,
|
| 46 |
+
hidden_dim,
|
| 47 |
+
kernel_size,
|
| 48 |
+
stride=1,
|
| 49 |
+
padding=pad_size,
|
| 50 |
+
bias=not norm,
|
| 51 |
+
norm=norm_module,
|
| 52 |
+
)
|
| 53 |
+
weight_init.c2_msra_fill(layer)
|
| 54 |
+
n_channels = hidden_dim
|
| 55 |
+
layer_name = self._get_layer_name(i)
|
| 56 |
+
self.add_module(layer_name, layer)
|
| 57 |
+
self.n_out_channels = hidden_dim
|
| 58 |
+
# initialize_module_params(self)
|
| 59 |
+
|
| 60 |
+
def forward(self, features):
|
| 61 |
+
x0 = features
|
| 62 |
+
x = self.ASPP(x0)
|
| 63 |
+
if self.use_nonlocal:
|
| 64 |
+
x = self.NLBlock(x)
|
| 65 |
+
output = x
|
| 66 |
+
for i in range(self.n_stacked_convs):
|
| 67 |
+
layer_name = self._get_layer_name(i)
|
| 68 |
+
x = getattr(self, layer_name)(x)
|
| 69 |
+
x = F.relu(x)
|
| 70 |
+
output = x
|
| 71 |
+
return output
|
| 72 |
+
|
| 73 |
+
def _get_layer_name(self, i: int):
|
| 74 |
+
layer_name = "body_conv_fcn{}".format(i + 1)
|
| 75 |
+
return layer_name
|
| 76 |
+
|
| 77 |
+
|
| 78 |
+
# Copied from
|
| 79 |
+
# https://github.com/pytorch/vision/blob/master/torchvision/models/segmentation/deeplabv3.py
|
| 80 |
+
# See https://arxiv.org/pdf/1706.05587.pdf for details
|
| 81 |
+
class ASPPConv(nn.Sequential):
|
| 82 |
+
def __init__(self, in_channels, out_channels, dilation):
|
| 83 |
+
modules = [
|
| 84 |
+
nn.Conv2d(
|
| 85 |
+
in_channels, out_channels, 3, padding=dilation, dilation=dilation, bias=False
|
| 86 |
+
),
|
| 87 |
+
nn.GroupNorm(32, out_channels),
|
| 88 |
+
nn.ReLU(),
|
| 89 |
+
]
|
| 90 |
+
super(ASPPConv, self).__init__(*modules)
|
| 91 |
+
|
| 92 |
+
|
| 93 |
+
class ASPPPooling(nn.Sequential):
|
| 94 |
+
def __init__(self, in_channels, out_channels):
|
| 95 |
+
super(ASPPPooling, self).__init__(
|
| 96 |
+
nn.AdaptiveAvgPool2d(1),
|
| 97 |
+
nn.Conv2d(in_channels, out_channels, 1, bias=False),
|
| 98 |
+
nn.GroupNorm(32, out_channels),
|
| 99 |
+
nn.ReLU(),
|
| 100 |
+
)
|
| 101 |
+
|
| 102 |
+
def forward(self, x):
|
| 103 |
+
size = x.shape[-2:]
|
| 104 |
+
x = super(ASPPPooling, self).forward(x)
|
| 105 |
+
return F.interpolate(x, size=size, mode="bilinear", align_corners=False)
|
| 106 |
+
|
| 107 |
+
|
| 108 |
+
class ASPP(nn.Module):
|
| 109 |
+
def __init__(self, in_channels, atrous_rates, out_channels):
|
| 110 |
+
super(ASPP, self).__init__()
|
| 111 |
+
modules = []
|
| 112 |
+
modules.append(
|
| 113 |
+
nn.Sequential(
|
| 114 |
+
nn.Conv2d(in_channels, out_channels, 1, bias=False),
|
| 115 |
+
nn.GroupNorm(32, out_channels),
|
| 116 |
+
nn.ReLU(),
|
| 117 |
+
)
|
| 118 |
+
)
|
| 119 |
+
|
| 120 |
+
rate1, rate2, rate3 = tuple(atrous_rates)
|
| 121 |
+
modules.append(ASPPConv(in_channels, out_channels, rate1))
|
| 122 |
+
modules.append(ASPPConv(in_channels, out_channels, rate2))
|
| 123 |
+
modules.append(ASPPConv(in_channels, out_channels, rate3))
|
| 124 |
+
modules.append(ASPPPooling(in_channels, out_channels))
|
| 125 |
+
|
| 126 |
+
self.convs = nn.ModuleList(modules)
|
| 127 |
+
|
| 128 |
+
self.project = nn.Sequential(
|
| 129 |
+
nn.Conv2d(5 * out_channels, out_channels, 1, bias=False),
|
| 130 |
+
# nn.BatchNorm2d(out_channels),
|
| 131 |
+
nn.ReLU()
|
| 132 |
+
# nn.Dropout(0.5)
|
| 133 |
+
)
|
| 134 |
+
|
| 135 |
+
def forward(self, x):
|
| 136 |
+
res = []
|
| 137 |
+
for conv in self.convs:
|
| 138 |
+
res.append(conv(x))
|
| 139 |
+
res = torch.cat(res, dim=1)
|
| 140 |
+
return self.project(res)
|
| 141 |
+
|
| 142 |
+
|
| 143 |
+
# copied from
|
| 144 |
+
# https://github.com/AlexHex7/Non-local_pytorch/blob/master/lib/non_local_embedded_gaussian.py
|
| 145 |
+
# See https://arxiv.org/abs/1711.07971 for details
|
| 146 |
+
class _NonLocalBlockND(nn.Module):
|
| 147 |
+
def __init__(
|
| 148 |
+
self, in_channels, inter_channels=None, dimension=3, sub_sample=True, bn_layer=True
|
| 149 |
+
):
|
| 150 |
+
super(_NonLocalBlockND, self).__init__()
|
| 151 |
+
|
| 152 |
+
assert dimension in [1, 2, 3]
|
| 153 |
+
|
| 154 |
+
self.dimension = dimension
|
| 155 |
+
self.sub_sample = sub_sample
|
| 156 |
+
|
| 157 |
+
self.in_channels = in_channels
|
| 158 |
+
self.inter_channels = inter_channels
|
| 159 |
+
|
| 160 |
+
if self.inter_channels is None:
|
| 161 |
+
self.inter_channels = in_channels // 2
|
| 162 |
+
if self.inter_channels == 0:
|
| 163 |
+
self.inter_channels = 1
|
| 164 |
+
|
| 165 |
+
if dimension == 3:
|
| 166 |
+
conv_nd = nn.Conv3d
|
| 167 |
+
max_pool_layer = nn.MaxPool3d(kernel_size=(1, 2, 2))
|
| 168 |
+
bn = nn.GroupNorm # (32, hidden_dim) #nn.BatchNorm3d
|
| 169 |
+
elif dimension == 2:
|
| 170 |
+
conv_nd = nn.Conv2d
|
| 171 |
+
max_pool_layer = nn.MaxPool2d(kernel_size=(2, 2))
|
| 172 |
+
bn = nn.GroupNorm # (32, hidden_dim)nn.BatchNorm2d
|
| 173 |
+
else:
|
| 174 |
+
conv_nd = nn.Conv1d
|
| 175 |
+
max_pool_layer = nn.MaxPool1d(kernel_size=2)
|
| 176 |
+
bn = nn.GroupNorm # (32, hidden_dim)nn.BatchNorm1d
|
| 177 |
+
|
| 178 |
+
self.g = conv_nd(
|
| 179 |
+
in_channels=self.in_channels,
|
| 180 |
+
out_channels=self.inter_channels,
|
| 181 |
+
kernel_size=1,
|
| 182 |
+
stride=1,
|
| 183 |
+
padding=0,
|
| 184 |
+
)
|
| 185 |
+
|
| 186 |
+
if bn_layer:
|
| 187 |
+
self.W = nn.Sequential(
|
| 188 |
+
conv_nd(
|
| 189 |
+
in_channels=self.inter_channels,
|
| 190 |
+
out_channels=self.in_channels,
|
| 191 |
+
kernel_size=1,
|
| 192 |
+
stride=1,
|
| 193 |
+
padding=0,
|
| 194 |
+
),
|
| 195 |
+
bn(32, self.in_channels),
|
| 196 |
+
)
|
| 197 |
+
nn.init.constant_(self.W[1].weight, 0)
|
| 198 |
+
nn.init.constant_(self.W[1].bias, 0)
|
| 199 |
+
else:
|
| 200 |
+
self.W = conv_nd(
|
| 201 |
+
in_channels=self.inter_channels,
|
| 202 |
+
out_channels=self.in_channels,
|
| 203 |
+
kernel_size=1,
|
| 204 |
+
stride=1,
|
| 205 |
+
padding=0,
|
| 206 |
+
)
|
| 207 |
+
nn.init.constant_(self.W.weight, 0)
|
| 208 |
+
nn.init.constant_(self.W.bias, 0)
|
| 209 |
+
|
| 210 |
+
self.theta = conv_nd(
|
| 211 |
+
in_channels=self.in_channels,
|
| 212 |
+
out_channels=self.inter_channels,
|
| 213 |
+
kernel_size=1,
|
| 214 |
+
stride=1,
|
| 215 |
+
padding=0,
|
| 216 |
+
)
|
| 217 |
+
self.phi = conv_nd(
|
| 218 |
+
in_channels=self.in_channels,
|
| 219 |
+
out_channels=self.inter_channels,
|
| 220 |
+
kernel_size=1,
|
| 221 |
+
stride=1,
|
| 222 |
+
padding=0,
|
| 223 |
+
)
|
| 224 |
+
|
| 225 |
+
if sub_sample:
|
| 226 |
+
self.g = nn.Sequential(self.g, max_pool_layer)
|
| 227 |
+
self.phi = nn.Sequential(self.phi, max_pool_layer)
|
| 228 |
+
|
| 229 |
+
def forward(self, x):
|
| 230 |
+
"""
|
| 231 |
+
:param x: (b, c, t, h, w)
|
| 232 |
+
:return:
|
| 233 |
+
"""
|
| 234 |
+
|
| 235 |
+
batch_size = x.size(0)
|
| 236 |
+
|
| 237 |
+
g_x = self.g(x).view(batch_size, self.inter_channels, -1)
|
| 238 |
+
g_x = g_x.permute(0, 2, 1)
|
| 239 |
+
|
| 240 |
+
theta_x = self.theta(x).view(batch_size, self.inter_channels, -1)
|
| 241 |
+
theta_x = theta_x.permute(0, 2, 1)
|
| 242 |
+
phi_x = self.phi(x).view(batch_size, self.inter_channels, -1)
|
| 243 |
+
f = torch.matmul(theta_x, phi_x)
|
| 244 |
+
f_div_C = F.softmax(f, dim=-1)
|
| 245 |
+
|
| 246 |
+
y = torch.matmul(f_div_C, g_x)
|
| 247 |
+
y = y.permute(0, 2, 1).contiguous()
|
| 248 |
+
y = y.view(batch_size, self.inter_channels, *x.size()[2:])
|
| 249 |
+
W_y = self.W(y)
|
| 250 |
+
z = W_y + x
|
| 251 |
+
|
| 252 |
+
return z
|
| 253 |
+
|
| 254 |
+
|
| 255 |
+
class NONLocalBlock2D(_NonLocalBlockND):
|
| 256 |
+
def __init__(self, in_channels, inter_channels=None, sub_sample=True, bn_layer=True):
|
| 257 |
+
super(NONLocalBlock2D, self).__init__(
|
| 258 |
+
in_channels,
|
| 259 |
+
inter_channels=inter_channels,
|
| 260 |
+
dimension=2,
|
| 261 |
+
sub_sample=sub_sample,
|
| 262 |
+
bn_layer=bn_layer,
|
| 263 |
+
)
|
Leffa/densepose/modeling/roi_heads/registry.py
ADDED
|
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) Facebook, Inc. and its affiliates.
|
| 2 |
+
|
| 3 |
+
from detectron2.utils.registry import Registry
|
| 4 |
+
|
| 5 |
+
ROI_DENSEPOSE_HEAD_REGISTRY = Registry("ROI_DENSEPOSE_HEAD")
|
Leffa/densepose/modeling/roi_heads/roi_head.py
ADDED
|
@@ -0,0 +1,218 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) Facebook, Inc. and its affiliates.
|
| 2 |
+
|
| 3 |
+
import numpy as np
|
| 4 |
+
from typing import Dict, List, Optional
|
| 5 |
+
import fvcore.nn.weight_init as weight_init
|
| 6 |
+
import torch
|
| 7 |
+
import torch.nn as nn
|
| 8 |
+
from torch.nn import functional as F
|
| 9 |
+
|
| 10 |
+
from detectron2.layers import Conv2d, ShapeSpec, get_norm
|
| 11 |
+
from detectron2.modeling import ROI_HEADS_REGISTRY, StandardROIHeads
|
| 12 |
+
from detectron2.modeling.poolers import ROIPooler
|
| 13 |
+
from detectron2.modeling.roi_heads import select_foreground_proposals
|
| 14 |
+
from detectron2.structures import ImageList, Instances
|
| 15 |
+
|
| 16 |
+
from .. import (
|
| 17 |
+
build_densepose_data_filter,
|
| 18 |
+
build_densepose_embedder,
|
| 19 |
+
build_densepose_head,
|
| 20 |
+
build_densepose_losses,
|
| 21 |
+
build_densepose_predictor,
|
| 22 |
+
densepose_inference,
|
| 23 |
+
)
|
| 24 |
+
|
| 25 |
+
|
| 26 |
+
class Decoder(nn.Module):
|
| 27 |
+
"""
|
| 28 |
+
A semantic segmentation head described in detail in the Panoptic Feature Pyramid Networks paper
|
| 29 |
+
(https://arxiv.org/abs/1901.02446). It takes FPN features as input and merges information from
|
| 30 |
+
all levels of the FPN into single output.
|
| 31 |
+
"""
|
| 32 |
+
|
| 33 |
+
def __init__(self, cfg, input_shape: Dict[str, ShapeSpec], in_features):
|
| 34 |
+
super(Decoder, self).__init__()
|
| 35 |
+
|
| 36 |
+
# fmt: off
|
| 37 |
+
self.in_features = in_features
|
| 38 |
+
feature_strides = {k: v.stride for k, v in input_shape.items()}
|
| 39 |
+
feature_channels = {k: v.channels for k, v in input_shape.items()}
|
| 40 |
+
num_classes = cfg.MODEL.ROI_DENSEPOSE_HEAD.DECODER_NUM_CLASSES
|
| 41 |
+
conv_dims = cfg.MODEL.ROI_DENSEPOSE_HEAD.DECODER_CONV_DIMS
|
| 42 |
+
self.common_stride = cfg.MODEL.ROI_DENSEPOSE_HEAD.DECODER_COMMON_STRIDE
|
| 43 |
+
norm = cfg.MODEL.ROI_DENSEPOSE_HEAD.DECODER_NORM
|
| 44 |
+
# fmt: on
|
| 45 |
+
|
| 46 |
+
self.scale_heads = []
|
| 47 |
+
for in_feature in self.in_features:
|
| 48 |
+
head_ops = []
|
| 49 |
+
head_length = max(
|
| 50 |
+
1, int(np.log2(feature_strides[in_feature]) - np.log2(self.common_stride))
|
| 51 |
+
)
|
| 52 |
+
for k in range(head_length):
|
| 53 |
+
conv = Conv2d(
|
| 54 |
+
feature_channels[in_feature] if k == 0 else conv_dims,
|
| 55 |
+
conv_dims,
|
| 56 |
+
kernel_size=3,
|
| 57 |
+
stride=1,
|
| 58 |
+
padding=1,
|
| 59 |
+
bias=not norm,
|
| 60 |
+
norm=get_norm(norm, conv_dims),
|
| 61 |
+
activation=F.relu,
|
| 62 |
+
)
|
| 63 |
+
weight_init.c2_msra_fill(conv)
|
| 64 |
+
head_ops.append(conv)
|
| 65 |
+
if feature_strides[in_feature] != self.common_stride:
|
| 66 |
+
head_ops.append(
|
| 67 |
+
nn.Upsample(scale_factor=2, mode="bilinear", align_corners=False)
|
| 68 |
+
)
|
| 69 |
+
self.scale_heads.append(nn.Sequential(*head_ops))
|
| 70 |
+
self.add_module(in_feature, self.scale_heads[-1])
|
| 71 |
+
self.predictor = Conv2d(conv_dims, num_classes, kernel_size=1, stride=1, padding=0)
|
| 72 |
+
weight_init.c2_msra_fill(self.predictor)
|
| 73 |
+
|
| 74 |
+
def forward(self, features: List[torch.Tensor]):
|
| 75 |
+
for i, _ in enumerate(self.in_features):
|
| 76 |
+
if i == 0:
|
| 77 |
+
x = self.scale_heads[i](features[i])
|
| 78 |
+
else:
|
| 79 |
+
x = x + self.scale_heads[i](features[i])
|
| 80 |
+
x = self.predictor(x)
|
| 81 |
+
return x
|
| 82 |
+
|
| 83 |
+
|
| 84 |
+
@ROI_HEADS_REGISTRY.register()
|
| 85 |
+
class DensePoseROIHeads(StandardROIHeads):
|
| 86 |
+
"""
|
| 87 |
+
A Standard ROIHeads which contains an addition of DensePose head.
|
| 88 |
+
"""
|
| 89 |
+
|
| 90 |
+
def __init__(self, cfg, input_shape):
|
| 91 |
+
super().__init__(cfg, input_shape)
|
| 92 |
+
self._init_densepose_head(cfg, input_shape)
|
| 93 |
+
|
| 94 |
+
def _init_densepose_head(self, cfg, input_shape):
|
| 95 |
+
# fmt: off
|
| 96 |
+
self.densepose_on = cfg.MODEL.DENSEPOSE_ON
|
| 97 |
+
if not self.densepose_on:
|
| 98 |
+
return
|
| 99 |
+
self.densepose_data_filter = build_densepose_data_filter(cfg)
|
| 100 |
+
dp_pooler_resolution = cfg.MODEL.ROI_DENSEPOSE_HEAD.POOLER_RESOLUTION
|
| 101 |
+
dp_pooler_sampling_ratio = cfg.MODEL.ROI_DENSEPOSE_HEAD.POOLER_SAMPLING_RATIO
|
| 102 |
+
dp_pooler_type = cfg.MODEL.ROI_DENSEPOSE_HEAD.POOLER_TYPE
|
| 103 |
+
self.use_decoder = cfg.MODEL.ROI_DENSEPOSE_HEAD.DECODER_ON
|
| 104 |
+
# fmt: on
|
| 105 |
+
if self.use_decoder:
|
| 106 |
+
dp_pooler_scales = (1.0 / input_shape[self.in_features[0]].stride,)
|
| 107 |
+
else:
|
| 108 |
+
dp_pooler_scales = tuple(1.0 / input_shape[k].stride for k in self.in_features)
|
| 109 |
+
in_channels = [input_shape[f].channels for f in self.in_features][0]
|
| 110 |
+
|
| 111 |
+
if self.use_decoder:
|
| 112 |
+
self.decoder = Decoder(cfg, input_shape, self.in_features)
|
| 113 |
+
|
| 114 |
+
self.densepose_pooler = ROIPooler(
|
| 115 |
+
output_size=dp_pooler_resolution,
|
| 116 |
+
scales=dp_pooler_scales,
|
| 117 |
+
sampling_ratio=dp_pooler_sampling_ratio,
|
| 118 |
+
pooler_type=dp_pooler_type,
|
| 119 |
+
)
|
| 120 |
+
self.densepose_head = build_densepose_head(cfg, in_channels)
|
| 121 |
+
self.densepose_predictor = build_densepose_predictor(
|
| 122 |
+
cfg, self.densepose_head.n_out_channels
|
| 123 |
+
)
|
| 124 |
+
self.densepose_losses = build_densepose_losses(cfg)
|
| 125 |
+
self.embedder = build_densepose_embedder(cfg)
|
| 126 |
+
|
| 127 |
+
def _forward_densepose(self, features: Dict[str, torch.Tensor], instances: List[Instances]):
|
| 128 |
+
"""
|
| 129 |
+
Forward logic of the densepose prediction branch.
|
| 130 |
+
|
| 131 |
+
Args:
|
| 132 |
+
features (dict[str, Tensor]): input data as a mapping from feature
|
| 133 |
+
map name to tensor. Axis 0 represents the number of images `N` in
|
| 134 |
+
the input data; axes 1-3 are channels, height, and width, which may
|
| 135 |
+
vary between feature maps (e.g., if a feature pyramid is used).
|
| 136 |
+
instances (list[Instances]): length `N` list of `Instances`. The i-th
|
| 137 |
+
`Instances` contains instances for the i-th input image,
|
| 138 |
+
In training, they can be the proposals.
|
| 139 |
+
In inference, they can be the predicted boxes.
|
| 140 |
+
|
| 141 |
+
Returns:
|
| 142 |
+
In training, a dict of losses.
|
| 143 |
+
In inference, update `instances` with new fields "densepose" and return it.
|
| 144 |
+
"""
|
| 145 |
+
if not self.densepose_on:
|
| 146 |
+
return {} if self.training else instances
|
| 147 |
+
|
| 148 |
+
features_list = [features[f] for f in self.in_features]
|
| 149 |
+
if self.training:
|
| 150 |
+
proposals, _ = select_foreground_proposals(instances, self.num_classes)
|
| 151 |
+
features_list, proposals = self.densepose_data_filter(features_list, proposals)
|
| 152 |
+
if len(proposals) > 0:
|
| 153 |
+
proposal_boxes = [x.proposal_boxes for x in proposals]
|
| 154 |
+
|
| 155 |
+
if self.use_decoder:
|
| 156 |
+
features_list = [self.decoder(features_list)]
|
| 157 |
+
|
| 158 |
+
features_dp = self.densepose_pooler(features_list, proposal_boxes)
|
| 159 |
+
densepose_head_outputs = self.densepose_head(features_dp)
|
| 160 |
+
densepose_predictor_outputs = self.densepose_predictor(densepose_head_outputs)
|
| 161 |
+
densepose_loss_dict = self.densepose_losses(
|
| 162 |
+
proposals, densepose_predictor_outputs, embedder=self.embedder
|
| 163 |
+
)
|
| 164 |
+
return densepose_loss_dict
|
| 165 |
+
else:
|
| 166 |
+
pred_boxes = [x.pred_boxes for x in instances]
|
| 167 |
+
|
| 168 |
+
if self.use_decoder:
|
| 169 |
+
features_list = [self.decoder(features_list)]
|
| 170 |
+
|
| 171 |
+
features_dp = self.densepose_pooler(features_list, pred_boxes)
|
| 172 |
+
if len(features_dp) > 0:
|
| 173 |
+
densepose_head_outputs = self.densepose_head(features_dp)
|
| 174 |
+
densepose_predictor_outputs = self.densepose_predictor(densepose_head_outputs)
|
| 175 |
+
else:
|
| 176 |
+
densepose_predictor_outputs = None
|
| 177 |
+
|
| 178 |
+
densepose_inference(densepose_predictor_outputs, instances)
|
| 179 |
+
return instances
|
| 180 |
+
|
| 181 |
+
def forward(
|
| 182 |
+
self,
|
| 183 |
+
images: ImageList,
|
| 184 |
+
features: Dict[str, torch.Tensor],
|
| 185 |
+
proposals: List[Instances],
|
| 186 |
+
targets: Optional[List[Instances]] = None,
|
| 187 |
+
):
|
| 188 |
+
instances, losses = super().forward(images, features, proposals, targets)
|
| 189 |
+
del targets, images
|
| 190 |
+
|
| 191 |
+
if self.training:
|
| 192 |
+
losses.update(self._forward_densepose(features, instances))
|
| 193 |
+
return instances, losses
|
| 194 |
+
|
| 195 |
+
def forward_with_given_boxes(
|
| 196 |
+
self, features: Dict[str, torch.Tensor], instances: List[Instances]
|
| 197 |
+
):
|
| 198 |
+
"""
|
| 199 |
+
Use the given boxes in `instances` to produce other (non-box) per-ROI outputs.
|
| 200 |
+
|
| 201 |
+
This is useful for downstream tasks where a box is known, but need to obtain
|
| 202 |
+
other attributes (outputs of other heads).
|
| 203 |
+
Test-time augmentation also uses this.
|
| 204 |
+
|
| 205 |
+
Args:
|
| 206 |
+
features: same as in `forward()`
|
| 207 |
+
instances (list[Instances]): instances to predict other outputs. Expect the keys
|
| 208 |
+
"pred_boxes" and "pred_classes" to exist.
|
| 209 |
+
|
| 210 |
+
Returns:
|
| 211 |
+
instances (list[Instances]):
|
| 212 |
+
the same `Instances` objects, with extra
|
| 213 |
+
fields such as `pred_masks` or `pred_keypoints`.
|
| 214 |
+
"""
|
| 215 |
+
|
| 216 |
+
instances = super().forward_with_given_boxes(features, instances)
|
| 217 |
+
instances = self._forward_densepose(features, instances)
|
| 218 |
+
return instances
|
Leffa/densepose/modeling/roi_heads/v1convx.py
ADDED
|
@@ -0,0 +1,64 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) Facebook, Inc. and its affiliates.
|
| 2 |
+
|
| 3 |
+
import torch
|
| 4 |
+
from torch import nn
|
| 5 |
+
from torch.nn import functional as F
|
| 6 |
+
|
| 7 |
+
from detectron2.config import CfgNode
|
| 8 |
+
from detectron2.layers import Conv2d
|
| 9 |
+
|
| 10 |
+
from ..utils import initialize_module_params
|
| 11 |
+
from .registry import ROI_DENSEPOSE_HEAD_REGISTRY
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
@ROI_DENSEPOSE_HEAD_REGISTRY.register()
|
| 15 |
+
class DensePoseV1ConvXHead(nn.Module):
|
| 16 |
+
"""
|
| 17 |
+
Fully convolutional DensePose head.
|
| 18 |
+
"""
|
| 19 |
+
|
| 20 |
+
def __init__(self, cfg: CfgNode, input_channels: int):
|
| 21 |
+
"""
|
| 22 |
+
Initialize DensePose fully convolutional head
|
| 23 |
+
|
| 24 |
+
Args:
|
| 25 |
+
cfg (CfgNode): configuration options
|
| 26 |
+
input_channels (int): number of input channels
|
| 27 |
+
"""
|
| 28 |
+
super(DensePoseV1ConvXHead, self).__init__()
|
| 29 |
+
# fmt: off
|
| 30 |
+
hidden_dim = cfg.MODEL.ROI_DENSEPOSE_HEAD.CONV_HEAD_DIM
|
| 31 |
+
kernel_size = cfg.MODEL.ROI_DENSEPOSE_HEAD.CONV_HEAD_KERNEL
|
| 32 |
+
self.n_stacked_convs = cfg.MODEL.ROI_DENSEPOSE_HEAD.NUM_STACKED_CONVS
|
| 33 |
+
# fmt: on
|
| 34 |
+
pad_size = kernel_size // 2
|
| 35 |
+
n_channels = input_channels
|
| 36 |
+
for i in range(self.n_stacked_convs):
|
| 37 |
+
layer = Conv2d(n_channels, hidden_dim, kernel_size, stride=1, padding=pad_size)
|
| 38 |
+
layer_name = self._get_layer_name(i)
|
| 39 |
+
self.add_module(layer_name, layer)
|
| 40 |
+
n_channels = hidden_dim
|
| 41 |
+
self.n_out_channels = n_channels
|
| 42 |
+
initialize_module_params(self)
|
| 43 |
+
|
| 44 |
+
def forward(self, features: torch.Tensor):
|
| 45 |
+
"""
|
| 46 |
+
Apply DensePose fully convolutional head to the input features
|
| 47 |
+
|
| 48 |
+
Args:
|
| 49 |
+
features (tensor): input features
|
| 50 |
+
Result:
|
| 51 |
+
A tensor of DensePose head outputs
|
| 52 |
+
"""
|
| 53 |
+
x = features
|
| 54 |
+
output = x
|
| 55 |
+
for i in range(self.n_stacked_convs):
|
| 56 |
+
layer_name = self._get_layer_name(i)
|
| 57 |
+
x = getattr(self, layer_name)(x)
|
| 58 |
+
x = F.relu(x)
|
| 59 |
+
output = x
|
| 60 |
+
return output
|
| 61 |
+
|
| 62 |
+
def _get_layer_name(self, i: int):
|
| 63 |
+
layer_name = "body_conv_fcn{}".format(i + 1)
|
| 64 |
+
return layer_name
|
Leffa/densepose/modeling/test_time_augmentation.py
ADDED
|
@@ -0,0 +1,207 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) Facebook, Inc. and its affiliates.
|
| 2 |
+
import copy
|
| 3 |
+
import numpy as np
|
| 4 |
+
import torch
|
| 5 |
+
from fvcore.transforms import HFlipTransform, TransformList
|
| 6 |
+
from torch.nn import functional as F
|
| 7 |
+
|
| 8 |
+
from detectron2.data.transforms import RandomRotation, RotationTransform, apply_transform_gens
|
| 9 |
+
from detectron2.modeling.postprocessing import detector_postprocess
|
| 10 |
+
from detectron2.modeling.test_time_augmentation import DatasetMapperTTA, GeneralizedRCNNWithTTA
|
| 11 |
+
|
| 12 |
+
from ..converters import HFlipConverter
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
class DensePoseDatasetMapperTTA(DatasetMapperTTA):
|
| 16 |
+
def __init__(self, cfg):
|
| 17 |
+
super().__init__(cfg=cfg)
|
| 18 |
+
self.angles = cfg.TEST.AUG.ROTATION_ANGLES
|
| 19 |
+
|
| 20 |
+
def __call__(self, dataset_dict):
|
| 21 |
+
ret = super().__call__(dataset_dict=dataset_dict)
|
| 22 |
+
numpy_image = dataset_dict["image"].permute(1, 2, 0).numpy()
|
| 23 |
+
for angle in self.angles:
|
| 24 |
+
rotate = RandomRotation(angle=angle, expand=True)
|
| 25 |
+
new_numpy_image, tfms = apply_transform_gens([rotate], np.copy(numpy_image))
|
| 26 |
+
torch_image = torch.from_numpy(np.ascontiguousarray(new_numpy_image.transpose(2, 0, 1)))
|
| 27 |
+
dic = copy.deepcopy(dataset_dict)
|
| 28 |
+
# In DatasetMapperTTA, there is a pre_tfm transform (resize or no-op) that is
|
| 29 |
+
# added at the beginning of each TransformList. That's '.transforms[0]'.
|
| 30 |
+
dic["transforms"] = TransformList(
|
| 31 |
+
[ret[-1]["transforms"].transforms[0]] + tfms.transforms
|
| 32 |
+
)
|
| 33 |
+
dic["image"] = torch_image
|
| 34 |
+
ret.append(dic)
|
| 35 |
+
return ret
|
| 36 |
+
|
| 37 |
+
|
| 38 |
+
class DensePoseGeneralizedRCNNWithTTA(GeneralizedRCNNWithTTA):
|
| 39 |
+
def __init__(self, cfg, model, transform_data, tta_mapper=None, batch_size=1):
|
| 40 |
+
"""
|
| 41 |
+
Args:
|
| 42 |
+
cfg (CfgNode):
|
| 43 |
+
model (GeneralizedRCNN): a GeneralizedRCNN to apply TTA on.
|
| 44 |
+
transform_data (DensePoseTransformData): contains symmetry label
|
| 45 |
+
transforms used for horizontal flip
|
| 46 |
+
tta_mapper (callable): takes a dataset dict and returns a list of
|
| 47 |
+
augmented versions of the dataset dict. Defaults to
|
| 48 |
+
`DatasetMapperTTA(cfg)`.
|
| 49 |
+
batch_size (int): batch the augmented images into this batch size for inference.
|
| 50 |
+
"""
|
| 51 |
+
self._transform_data = transform_data.to(model.device)
|
| 52 |
+
super().__init__(cfg=cfg, model=model, tta_mapper=tta_mapper, batch_size=batch_size)
|
| 53 |
+
|
| 54 |
+
# the implementation follows closely the one from detectron2/modeling
|
| 55 |
+
def _inference_one_image(self, input):
|
| 56 |
+
"""
|
| 57 |
+
Args:
|
| 58 |
+
input (dict): one dataset dict with "image" field being a CHW tensor
|
| 59 |
+
|
| 60 |
+
Returns:
|
| 61 |
+
dict: one output dict
|
| 62 |
+
"""
|
| 63 |
+
orig_shape = (input["height"], input["width"])
|
| 64 |
+
# For some reason, resize with uint8 slightly increases box AP but decreases densepose AP
|
| 65 |
+
input["image"] = input["image"].to(torch.uint8)
|
| 66 |
+
augmented_inputs, tfms = self._get_augmented_inputs(input)
|
| 67 |
+
# Detect boxes from all augmented versions
|
| 68 |
+
with self._turn_off_roi_heads(["mask_on", "keypoint_on", "densepose_on"]):
|
| 69 |
+
# temporarily disable roi heads
|
| 70 |
+
all_boxes, all_scores, all_classes = self._get_augmented_boxes(augmented_inputs, tfms)
|
| 71 |
+
merged_instances = self._merge_detections(all_boxes, all_scores, all_classes, orig_shape)
|
| 72 |
+
|
| 73 |
+
if self.cfg.MODEL.MASK_ON or self.cfg.MODEL.DENSEPOSE_ON:
|
| 74 |
+
# Use the detected boxes to obtain new fields
|
| 75 |
+
augmented_instances = self._rescale_detected_boxes(
|
| 76 |
+
augmented_inputs, merged_instances, tfms
|
| 77 |
+
)
|
| 78 |
+
# run forward on the detected boxes
|
| 79 |
+
outputs = self._batch_inference(augmented_inputs, augmented_instances)
|
| 80 |
+
# Delete now useless variables to avoid being out of memory
|
| 81 |
+
del augmented_inputs, augmented_instances
|
| 82 |
+
# average the predictions
|
| 83 |
+
if self.cfg.MODEL.MASK_ON:
|
| 84 |
+
merged_instances.pred_masks = self._reduce_pred_masks(outputs, tfms)
|
| 85 |
+
if self.cfg.MODEL.DENSEPOSE_ON:
|
| 86 |
+
merged_instances.pred_densepose = self._reduce_pred_densepose(outputs, tfms)
|
| 87 |
+
# postprocess
|
| 88 |
+
merged_instances = detector_postprocess(merged_instances, *orig_shape)
|
| 89 |
+
return {"instances": merged_instances}
|
| 90 |
+
else:
|
| 91 |
+
return {"instances": merged_instances}
|
| 92 |
+
|
| 93 |
+
def _get_augmented_boxes(self, augmented_inputs, tfms):
|
| 94 |
+
# Heavily based on detectron2/modeling/test_time_augmentation.py
|
| 95 |
+
# Only difference is that RotationTransform is excluded from bbox computation
|
| 96 |
+
# 1: forward with all augmented images
|
| 97 |
+
outputs = self._batch_inference(augmented_inputs)
|
| 98 |
+
# 2: union the results
|
| 99 |
+
all_boxes = []
|
| 100 |
+
all_scores = []
|
| 101 |
+
all_classes = []
|
| 102 |
+
for output, tfm in zip(outputs, tfms):
|
| 103 |
+
# Need to inverse the transforms on boxes, to obtain results on original image
|
| 104 |
+
if not any(isinstance(t, RotationTransform) for t in tfm.transforms):
|
| 105 |
+
# Some transforms can't compute bbox correctly
|
| 106 |
+
pred_boxes = output.pred_boxes.tensor
|
| 107 |
+
original_pred_boxes = tfm.inverse().apply_box(pred_boxes.cpu().numpy())
|
| 108 |
+
all_boxes.append(torch.from_numpy(original_pred_boxes).to(pred_boxes.device))
|
| 109 |
+
all_scores.extend(output.scores)
|
| 110 |
+
all_classes.extend(output.pred_classes)
|
| 111 |
+
all_boxes = torch.cat(all_boxes, dim=0)
|
| 112 |
+
return all_boxes, all_scores, all_classes
|
| 113 |
+
|
| 114 |
+
def _reduce_pred_densepose(self, outputs, tfms):
|
| 115 |
+
# Should apply inverse transforms on densepose preds.
|
| 116 |
+
# We assume only rotation, resize & flip are used. pred_masks is a scale-invariant
|
| 117 |
+
# representation, so we handle the other ones specially
|
| 118 |
+
for idx, (output, tfm) in enumerate(zip(outputs, tfms)):
|
| 119 |
+
for t in tfm.transforms:
|
| 120 |
+
for attr in ["coarse_segm", "fine_segm", "u", "v"]:
|
| 121 |
+
setattr(
|
| 122 |
+
output.pred_densepose,
|
| 123 |
+
attr,
|
| 124 |
+
_inverse_rotation(
|
| 125 |
+
getattr(output.pred_densepose, attr), output.pred_boxes.tensor, t
|
| 126 |
+
),
|
| 127 |
+
)
|
| 128 |
+
if any(isinstance(t, HFlipTransform) for t in tfm.transforms):
|
| 129 |
+
output.pred_densepose = HFlipConverter.convert(
|
| 130 |
+
output.pred_densepose, self._transform_data
|
| 131 |
+
)
|
| 132 |
+
self._incremental_avg_dp(outputs[0].pred_densepose, output.pred_densepose, idx)
|
| 133 |
+
return outputs[0].pred_densepose
|
| 134 |
+
|
| 135 |
+
# incrementally computed average: u_(n + 1) = u_n + (x_(n+1) - u_n) / (n + 1).
|
| 136 |
+
def _incremental_avg_dp(self, avg, new_el, idx):
|
| 137 |
+
for attr in ["coarse_segm", "fine_segm", "u", "v"]:
|
| 138 |
+
setattr(avg, attr, (getattr(avg, attr) * idx + getattr(new_el, attr)) / (idx + 1))
|
| 139 |
+
if idx:
|
| 140 |
+
# Deletion of the > 0 index intermediary values to prevent GPU OOM
|
| 141 |
+
setattr(new_el, attr, None)
|
| 142 |
+
return avg
|
| 143 |
+
|
| 144 |
+
|
| 145 |
+
def _inverse_rotation(densepose_attrs, boxes, transform):
|
| 146 |
+
# resample outputs to image size and rotate back the densepose preds
|
| 147 |
+
# on the rotated images to the space of the original image
|
| 148 |
+
if len(boxes) == 0 or not isinstance(transform, RotationTransform):
|
| 149 |
+
return densepose_attrs
|
| 150 |
+
boxes = boxes.int().cpu().numpy()
|
| 151 |
+
wh_boxes = boxes[:, 2:] - boxes[:, :2] # bboxes in the rotated space
|
| 152 |
+
inv_boxes = rotate_box_inverse(transform, boxes).astype(int) # bboxes in original image
|
| 153 |
+
wh_diff = (inv_boxes[:, 2:] - inv_boxes[:, :2] - wh_boxes) // 2 # diff between new/old bboxes
|
| 154 |
+
rotation_matrix = torch.tensor([transform.rm_image]).to(device=densepose_attrs.device).float()
|
| 155 |
+
rotation_matrix[:, :, -1] = 0
|
| 156 |
+
# To apply grid_sample for rotation, we need to have enough space to fit the original and
|
| 157 |
+
# rotated bboxes. l_bds and r_bds are the left/right bounds that will be used to
|
| 158 |
+
# crop the difference once the rotation is done
|
| 159 |
+
l_bds = np.maximum(0, -wh_diff)
|
| 160 |
+
for i in range(len(densepose_attrs)):
|
| 161 |
+
if min(wh_boxes[i]) <= 0:
|
| 162 |
+
continue
|
| 163 |
+
densepose_attr = densepose_attrs[[i]].clone()
|
| 164 |
+
# 1. Interpolate densepose attribute to size of the rotated bbox
|
| 165 |
+
densepose_attr = F.interpolate(densepose_attr, wh_boxes[i].tolist()[::-1], mode="bilinear")
|
| 166 |
+
# 2. Pad the interpolated attribute so it has room for the original + rotated bbox
|
| 167 |
+
densepose_attr = F.pad(densepose_attr, tuple(np.repeat(np.maximum(0, wh_diff[i]), 2)))
|
| 168 |
+
# 3. Compute rotation grid and transform
|
| 169 |
+
grid = F.affine_grid(rotation_matrix, size=densepose_attr.shape)
|
| 170 |
+
densepose_attr = F.grid_sample(densepose_attr, grid)
|
| 171 |
+
# 4. Compute right bounds and crop the densepose_attr to the size of the original bbox
|
| 172 |
+
r_bds = densepose_attr.shape[2:][::-1] - l_bds[i]
|
| 173 |
+
densepose_attr = densepose_attr[:, :, l_bds[i][1] : r_bds[1], l_bds[i][0] : r_bds[0]]
|
| 174 |
+
if min(densepose_attr.shape) > 0:
|
| 175 |
+
# Interpolate back to the original size of the densepose attribute
|
| 176 |
+
densepose_attr = F.interpolate(
|
| 177 |
+
densepose_attr, densepose_attrs.shape[-2:], mode="bilinear"
|
| 178 |
+
)
|
| 179 |
+
# Adding a very small probability to the background class to fill padded zones
|
| 180 |
+
densepose_attr[:, 0] += 1e-10
|
| 181 |
+
densepose_attrs[i] = densepose_attr
|
| 182 |
+
return densepose_attrs
|
| 183 |
+
|
| 184 |
+
|
| 185 |
+
def rotate_box_inverse(rot_tfm, rotated_box):
|
| 186 |
+
"""
|
| 187 |
+
rotated_box is a N * 4 array of [x0, y0, x1, y1] boxes
|
| 188 |
+
When a bbox is rotated, it gets bigger, because we need to surround the tilted bbox
|
| 189 |
+
So when a bbox is rotated then inverse-rotated, it is much bigger than the original
|
| 190 |
+
This function aims to invert the rotation on the box, but also resize it to its original size
|
| 191 |
+
"""
|
| 192 |
+
# 1. Compute the inverse rotation of the rotated bboxes (bigger than it )
|
| 193 |
+
invrot_box = rot_tfm.inverse().apply_box(rotated_box)
|
| 194 |
+
h, w = rotated_box[:, 3] - rotated_box[:, 1], rotated_box[:, 2] - rotated_box[:, 0]
|
| 195 |
+
ih, iw = invrot_box[:, 3] - invrot_box[:, 1], invrot_box[:, 2] - invrot_box[:, 0]
|
| 196 |
+
assert 2 * rot_tfm.abs_sin**2 != 1, "45 degrees angle can't be inverted"
|
| 197 |
+
# 2. Inverse the corresponding computation in the rotation transform
|
| 198 |
+
# to get the original height/width of the rotated boxes
|
| 199 |
+
orig_h = (h * rot_tfm.abs_cos - w * rot_tfm.abs_sin) / (1 - 2 * rot_tfm.abs_sin**2)
|
| 200 |
+
orig_w = (w * rot_tfm.abs_cos - h * rot_tfm.abs_sin) / (1 - 2 * rot_tfm.abs_sin**2)
|
| 201 |
+
# 3. Resize the inverse-rotated bboxes to their original size
|
| 202 |
+
invrot_box[:, 0] += (iw - orig_w) / 2
|
| 203 |
+
invrot_box[:, 1] += (ih - orig_h) / 2
|
| 204 |
+
invrot_box[:, 2] -= (iw - orig_w) / 2
|
| 205 |
+
invrot_box[:, 3] -= (ih - orig_h) / 2
|
| 206 |
+
|
| 207 |
+
return invrot_box
|
Leffa/densepose/modeling/utils.py
ADDED
|
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) Facebook, Inc. and its affiliates.
|
| 2 |
+
|
| 3 |
+
from torch import nn
|
| 4 |
+
|
| 5 |
+
|
| 6 |
+
def initialize_module_params(module: nn.Module) -> None:
|
| 7 |
+
for name, param in module.named_parameters():
|
| 8 |
+
if "bias" in name:
|
| 9 |
+
nn.init.constant_(param, 0)
|
| 10 |
+
elif "weight" in name:
|
| 11 |
+
nn.init.kaiming_normal_(param, mode="fan_out", nonlinearity="relu")
|
Leffa/densepose/utils/__init__.py
ADDED
|
File without changes
|
Leffa/densepose/utils/dbhelper.py
ADDED
|
@@ -0,0 +1,147 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) Facebook, Inc. and its affiliates.
|
| 2 |
+
from typing import Any, Dict, Optional, Tuple
|
| 3 |
+
|
| 4 |
+
|
| 5 |
+
class EntrySelector:
|
| 6 |
+
"""
|
| 7 |
+
Base class for entry selectors
|
| 8 |
+
"""
|
| 9 |
+
|
| 10 |
+
@staticmethod
|
| 11 |
+
def from_string(spec: str) -> "EntrySelector":
|
| 12 |
+
if spec == "*":
|
| 13 |
+
return AllEntrySelector()
|
| 14 |
+
return FieldEntrySelector(spec)
|
| 15 |
+
|
| 16 |
+
|
| 17 |
+
class AllEntrySelector(EntrySelector):
|
| 18 |
+
"""
|
| 19 |
+
Selector that accepts all entries
|
| 20 |
+
"""
|
| 21 |
+
|
| 22 |
+
SPECIFIER = "*"
|
| 23 |
+
|
| 24 |
+
def __call__(self, entry):
|
| 25 |
+
return True
|
| 26 |
+
|
| 27 |
+
|
| 28 |
+
class FieldEntrySelector(EntrySelector):
|
| 29 |
+
"""
|
| 30 |
+
Selector that accepts only entries that match provided field
|
| 31 |
+
specifier(s). Only a limited set of specifiers is supported for now:
|
| 32 |
+
<specifiers>::=<specifier>[<comma><specifiers>]
|
| 33 |
+
<specifier>::=<field_name>[<type_delim><type>]<equal><value_or_range>
|
| 34 |
+
<field_name> is a valid identifier
|
| 35 |
+
<type> ::= "int" | "str"
|
| 36 |
+
<equal> ::= "="
|
| 37 |
+
<comma> ::= ","
|
| 38 |
+
<type_delim> ::= ":"
|
| 39 |
+
<value_or_range> ::= <value> | <range>
|
| 40 |
+
<range> ::= <value><range_delim><value>
|
| 41 |
+
<range_delim> ::= "-"
|
| 42 |
+
<value> is a string without spaces and special symbols
|
| 43 |
+
(e.g. <comma>, <equal>, <type_delim>, <range_delim>)
|
| 44 |
+
"""
|
| 45 |
+
|
| 46 |
+
_SPEC_DELIM = ","
|
| 47 |
+
_TYPE_DELIM = ":"
|
| 48 |
+
_RANGE_DELIM = "-"
|
| 49 |
+
_EQUAL = "="
|
| 50 |
+
_ERROR_PREFIX = "Invalid field selector specifier"
|
| 51 |
+
|
| 52 |
+
class _FieldEntryValuePredicate:
|
| 53 |
+
"""
|
| 54 |
+
Predicate that checks strict equality for the specified entry field
|
| 55 |
+
"""
|
| 56 |
+
|
| 57 |
+
def __init__(self, name: str, typespec: Optional[str], value: str):
|
| 58 |
+
import builtins
|
| 59 |
+
|
| 60 |
+
self.name = name
|
| 61 |
+
self.type = getattr(builtins, typespec) if typespec is not None else str
|
| 62 |
+
self.value = value
|
| 63 |
+
|
| 64 |
+
def __call__(self, entry):
|
| 65 |
+
return entry[self.name] == self.type(self.value)
|
| 66 |
+
|
| 67 |
+
class _FieldEntryRangePredicate:
|
| 68 |
+
"""
|
| 69 |
+
Predicate that checks whether an entry field falls into the specified range
|
| 70 |
+
"""
|
| 71 |
+
|
| 72 |
+
def __init__(self, name: str, typespec: Optional[str], vmin: str, vmax: str):
|
| 73 |
+
import builtins
|
| 74 |
+
|
| 75 |
+
self.name = name
|
| 76 |
+
self.type = getattr(builtins, typespec) if typespec is not None else str
|
| 77 |
+
self.vmin = vmin
|
| 78 |
+
self.vmax = vmax
|
| 79 |
+
|
| 80 |
+
def __call__(self, entry):
|
| 81 |
+
return (entry[self.name] >= self.type(self.vmin)) and (
|
| 82 |
+
entry[self.name] <= self.type(self.vmax)
|
| 83 |
+
)
|
| 84 |
+
|
| 85 |
+
def __init__(self, spec: str):
|
| 86 |
+
self._predicates = self._parse_specifier_into_predicates(spec)
|
| 87 |
+
|
| 88 |
+
def __call__(self, entry: Dict[str, Any]):
|
| 89 |
+
for predicate in self._predicates:
|
| 90 |
+
if not predicate(entry):
|
| 91 |
+
return False
|
| 92 |
+
return True
|
| 93 |
+
|
| 94 |
+
def _parse_specifier_into_predicates(self, spec: str):
|
| 95 |
+
predicates = []
|
| 96 |
+
specs = spec.split(self._SPEC_DELIM)
|
| 97 |
+
for subspec in specs:
|
| 98 |
+
eq_idx = subspec.find(self._EQUAL)
|
| 99 |
+
if eq_idx > 0:
|
| 100 |
+
field_name_with_type = subspec[:eq_idx]
|
| 101 |
+
field_name, field_type = self._parse_field_name_type(field_name_with_type)
|
| 102 |
+
field_value_or_range = subspec[eq_idx + 1 :]
|
| 103 |
+
if self._is_range_spec(field_value_or_range):
|
| 104 |
+
vmin, vmax = self._get_range_spec(field_value_or_range)
|
| 105 |
+
predicate = FieldEntrySelector._FieldEntryRangePredicate(
|
| 106 |
+
field_name, field_type, vmin, vmax
|
| 107 |
+
)
|
| 108 |
+
else:
|
| 109 |
+
predicate = FieldEntrySelector._FieldEntryValuePredicate(
|
| 110 |
+
field_name, field_type, field_value_or_range
|
| 111 |
+
)
|
| 112 |
+
predicates.append(predicate)
|
| 113 |
+
elif eq_idx == 0:
|
| 114 |
+
self._parse_error(f'"{subspec}", field name is empty!')
|
| 115 |
+
else:
|
| 116 |
+
self._parse_error(f'"{subspec}", should have format ' "<field>=<value_or_range>!")
|
| 117 |
+
return predicates
|
| 118 |
+
|
| 119 |
+
def _parse_field_name_type(self, field_name_with_type: str) -> Tuple[str, Optional[str]]:
|
| 120 |
+
type_delim_idx = field_name_with_type.find(self._TYPE_DELIM)
|
| 121 |
+
if type_delim_idx > 0:
|
| 122 |
+
field_name = field_name_with_type[:type_delim_idx]
|
| 123 |
+
field_type = field_name_with_type[type_delim_idx + 1 :]
|
| 124 |
+
elif type_delim_idx == 0:
|
| 125 |
+
self._parse_error(f'"{field_name_with_type}", field name is empty!')
|
| 126 |
+
else:
|
| 127 |
+
field_name = field_name_with_type
|
| 128 |
+
field_type = None
|
| 129 |
+
# pyre-fixme[61]: `field_name` may not be initialized here.
|
| 130 |
+
# pyre-fixme[61]: `field_type` may not be initialized here.
|
| 131 |
+
return field_name, field_type
|
| 132 |
+
|
| 133 |
+
def _is_range_spec(self, field_value_or_range):
|
| 134 |
+
delim_idx = field_value_or_range.find(self._RANGE_DELIM)
|
| 135 |
+
return delim_idx > 0
|
| 136 |
+
|
| 137 |
+
def _get_range_spec(self, field_value_or_range):
|
| 138 |
+
if self._is_range_spec(field_value_or_range):
|
| 139 |
+
delim_idx = field_value_or_range.find(self._RANGE_DELIM)
|
| 140 |
+
vmin = field_value_or_range[:delim_idx]
|
| 141 |
+
vmax = field_value_or_range[delim_idx + 1 :]
|
| 142 |
+
return vmin, vmax
|
| 143 |
+
else:
|
| 144 |
+
self._parse_error('"field_value_or_range", range of values expected!')
|
| 145 |
+
|
| 146 |
+
def _parse_error(self, msg):
|
| 147 |
+
raise ValueError(f"{self._ERROR_PREFIX}: {msg}")
|
Leffa/densepose/utils/logger.py
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) Facebook, Inc. and its affiliates.
|
| 2 |
+
import logging
|
| 3 |
+
|
| 4 |
+
|
| 5 |
+
def verbosity_to_level(verbosity) -> int:
|
| 6 |
+
if verbosity is not None:
|
| 7 |
+
if verbosity == 0:
|
| 8 |
+
return logging.WARNING
|
| 9 |
+
elif verbosity == 1:
|
| 10 |
+
return logging.INFO
|
| 11 |
+
elif verbosity >= 2:
|
| 12 |
+
return logging.DEBUG
|
| 13 |
+
return logging.WARNING
|
Leffa/densepose/utils/transform.py
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) Facebook, Inc. and its affiliates.
|
| 2 |
+
from detectron2.data import MetadataCatalog
|
| 3 |
+
from detectron2.utils.file_io import PathManager
|
| 4 |
+
|
| 5 |
+
from densepose import DensePoseTransformData
|
| 6 |
+
|
| 7 |
+
|
| 8 |
+
def load_for_dataset(dataset_name):
|
| 9 |
+
path = MetadataCatalog.get(dataset_name).densepose_transform_src
|
| 10 |
+
densepose_transform_data_fpath = PathManager.get_local_path(path)
|
| 11 |
+
return DensePoseTransformData.load(densepose_transform_data_fpath)
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
def load_from_cfg(cfg):
|
| 15 |
+
return load_for_dataset(cfg.DATASETS.TEST[0])
|
Leffa/leffa_utils/densepose_for_mask.py
ADDED
|
@@ -0,0 +1,170 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import glob
|
| 2 |
+
import os
|
| 3 |
+
import shutil
|
| 4 |
+
import time
|
| 5 |
+
from random import randint
|
| 6 |
+
|
| 7 |
+
import cv2
|
| 8 |
+
import numpy as np
|
| 9 |
+
import torch
|
| 10 |
+
from densepose import add_densepose_config
|
| 11 |
+
from densepose.vis.base import CompoundVisualizer
|
| 12 |
+
from densepose.vis.densepose_results import DensePoseResultsFineSegmentationVisualizer
|
| 13 |
+
from densepose.vis.extractor import CompoundExtractor, create_extractor
|
| 14 |
+
from detectron2.config import get_cfg
|
| 15 |
+
from detectron2.data.detection_utils import read_image
|
| 16 |
+
from detectron2.engine.defaults import DefaultPredictor
|
| 17 |
+
from PIL import Image
|
| 18 |
+
|
| 19 |
+
|
| 20 |
+
class DensePose:
|
| 21 |
+
"""
|
| 22 |
+
DensePose used in this project is from Detectron2 (https://github.com/facebookresearch/detectron2).
|
| 23 |
+
These codes are modified from https://github.com/facebookresearch/detectron2/tree/main/projects/DensePose.
|
| 24 |
+
The checkpoint is downloaded from https://github.com/facebookresearch/detectron2/blob/main/projects/DensePose/doc/DENSEPOSE_IUV.md#ModelZoo.
|
| 25 |
+
|
| 26 |
+
We use the model R_50_FPN_s1x with id 165712039, but other models should also work.
|
| 27 |
+
The config file is downloaded from https://github.com/facebookresearch/detectron2/tree/main/projects/DensePose/configs.
|
| 28 |
+
Noted that the config file should match the model checkpoint and Base-DensePose-RCNN-FPN.yaml is also needed.
|
| 29 |
+
"""
|
| 30 |
+
|
| 31 |
+
def __init__(self, model_path="./checkpoints/densepose_", device="cuda"):
|
| 32 |
+
self.device = device
|
| 33 |
+
self.config_path = os.path.join(model_path, "densepose_rcnn_R_50_FPN_s1x.yaml")
|
| 34 |
+
self.model_path = os.path.join(model_path, "model_final_162be9.pkl")
|
| 35 |
+
self.visualizations = ["dp_segm"]
|
| 36 |
+
self.VISUALIZERS = {"dp_segm": DensePoseResultsFineSegmentationVisualizer}
|
| 37 |
+
self.min_score = 0.8
|
| 38 |
+
|
| 39 |
+
self.cfg = self.setup_config()
|
| 40 |
+
self.predictor = DefaultPredictor(self.cfg)
|
| 41 |
+
self.predictor.model.to(self.device)
|
| 42 |
+
|
| 43 |
+
def setup_config(self):
|
| 44 |
+
opts = ["MODEL.ROI_HEADS.SCORE_THRESH_TEST", str(self.min_score)]
|
| 45 |
+
cfg = get_cfg()
|
| 46 |
+
add_densepose_config(cfg)
|
| 47 |
+
cfg.merge_from_file(self.config_path)
|
| 48 |
+
cfg.merge_from_list(opts)
|
| 49 |
+
cfg.MODEL.WEIGHTS = self.model_path
|
| 50 |
+
cfg.freeze()
|
| 51 |
+
return cfg
|
| 52 |
+
|
| 53 |
+
@staticmethod
|
| 54 |
+
def _get_input_file_list(input_spec: str):
|
| 55 |
+
if os.path.isdir(input_spec):
|
| 56 |
+
file_list = [
|
| 57 |
+
os.path.join(input_spec, fname)
|
| 58 |
+
for fname in os.listdir(input_spec)
|
| 59 |
+
if os.path.isfile(os.path.join(input_spec, fname))
|
| 60 |
+
]
|
| 61 |
+
elif os.path.isfile(input_spec):
|
| 62 |
+
file_list = [input_spec]
|
| 63 |
+
else:
|
| 64 |
+
file_list = glob.glob(input_spec)
|
| 65 |
+
return file_list
|
| 66 |
+
|
| 67 |
+
def create_context(self, cfg, output_path):
|
| 68 |
+
vis_specs = self.visualizations
|
| 69 |
+
visualizers = []
|
| 70 |
+
extractors = []
|
| 71 |
+
for vis_spec in vis_specs:
|
| 72 |
+
texture_atlas = texture_atlases_dict = None
|
| 73 |
+
vis = self.VISUALIZERS[vis_spec](
|
| 74 |
+
cfg=cfg,
|
| 75 |
+
texture_atlas=texture_atlas,
|
| 76 |
+
texture_atlases_dict=texture_atlases_dict,
|
| 77 |
+
alpha=1.0,
|
| 78 |
+
)
|
| 79 |
+
visualizers.append(vis)
|
| 80 |
+
extractor = create_extractor(vis)
|
| 81 |
+
extractors.append(extractor)
|
| 82 |
+
visualizer = CompoundVisualizer(visualizers)
|
| 83 |
+
extractor = CompoundExtractor(extractors)
|
| 84 |
+
context = {
|
| 85 |
+
"extractor": extractor,
|
| 86 |
+
"visualizer": visualizer,
|
| 87 |
+
"out_fname": output_path,
|
| 88 |
+
"entry_idx": 0,
|
| 89 |
+
}
|
| 90 |
+
return context
|
| 91 |
+
|
| 92 |
+
def execute_on_outputs(self, context, entry, outputs):
|
| 93 |
+
extractor = context["extractor"]
|
| 94 |
+
|
| 95 |
+
data = extractor(outputs)
|
| 96 |
+
|
| 97 |
+
H, W, _ = entry["image"].shape
|
| 98 |
+
result = np.zeros((H, W), dtype=np.uint8)
|
| 99 |
+
|
| 100 |
+
data, box = data[0]
|
| 101 |
+
x, y, w, h = [int(_) for _ in box[0].cpu().numpy()]
|
| 102 |
+
i_array = data[0].labels[None].cpu().numpy()[0]
|
| 103 |
+
result[y : y + h, x : x + w] = i_array
|
| 104 |
+
result = Image.fromarray(result)
|
| 105 |
+
result.save(context["out_fname"])
|
| 106 |
+
|
| 107 |
+
def __call__(self, image_or_path, resize=512) -> Image.Image:
|
| 108 |
+
"""
|
| 109 |
+
:param image_or_path: Path of the input image.
|
| 110 |
+
:param resize: Resize the input image if its max size is larger than this value.
|
| 111 |
+
:return: Dense pose image.
|
| 112 |
+
"""
|
| 113 |
+
# random tmp path with timestamp
|
| 114 |
+
tmp_path = f"./densepose_/tmp/"
|
| 115 |
+
if not os.path.exists(tmp_path):
|
| 116 |
+
os.makedirs(tmp_path)
|
| 117 |
+
|
| 118 |
+
image_path = os.path.join(
|
| 119 |
+
tmp_path, f"{int(time.time())}-{self.device}-{randint(0, 100000)}.png"
|
| 120 |
+
)
|
| 121 |
+
if isinstance(image_or_path, str):
|
| 122 |
+
assert image_or_path.split(".")[-1] in [
|
| 123 |
+
"jpg",
|
| 124 |
+
"png",
|
| 125 |
+
], "Only support jpg and png images."
|
| 126 |
+
shutil.copy(image_or_path, image_path)
|
| 127 |
+
elif isinstance(image_or_path, Image.Image):
|
| 128 |
+
image_or_path.save(image_path)
|
| 129 |
+
else:
|
| 130 |
+
shutil.rmtree(tmp_path)
|
| 131 |
+
raise TypeError("image_path must be str or PIL.Image.Image")
|
| 132 |
+
|
| 133 |
+
output_path = image_path.replace(".png", "_dense.png").replace(
|
| 134 |
+
".jpg", "_dense.png"
|
| 135 |
+
)
|
| 136 |
+
w, h = Image.open(image_path).size
|
| 137 |
+
|
| 138 |
+
file_list = self._get_input_file_list(image_path)
|
| 139 |
+
assert len(file_list), "No input images found!"
|
| 140 |
+
context = self.create_context(self.cfg, output_path)
|
| 141 |
+
for file_name in file_list:
|
| 142 |
+
img = read_image(file_name, format="BGR") # predictor expects BGR image.
|
| 143 |
+
# resize
|
| 144 |
+
if (_ := max(img.shape)) > resize:
|
| 145 |
+
scale = resize / _
|
| 146 |
+
img = cv2.resize(
|
| 147 |
+
img, (int(img.shape[1] * scale), int(img.shape[0] * scale))
|
| 148 |
+
)
|
| 149 |
+
|
| 150 |
+
with torch.no_grad():
|
| 151 |
+
outputs = self.predictor(img)["instances"]
|
| 152 |
+
try:
|
| 153 |
+
self.execute_on_outputs(
|
| 154 |
+
context, {"file_name": file_name, "image": img}, outputs
|
| 155 |
+
)
|
| 156 |
+
except Exception as e:
|
| 157 |
+
null_gray = Image.new("L", (1, 1))
|
| 158 |
+
null_gray.save(output_path)
|
| 159 |
+
|
| 160 |
+
dense_gray = Image.open(output_path).convert("L")
|
| 161 |
+
dense_gray = dense_gray.resize((w, h), Image.NEAREST)
|
| 162 |
+
# remove image_path and output_path
|
| 163 |
+
os.remove(image_path)
|
| 164 |
+
os.remove(output_path)
|
| 165 |
+
|
| 166 |
+
return dense_gray
|
| 167 |
+
|
| 168 |
+
|
| 169 |
+
if __name__ == "__main__":
|
| 170 |
+
pass
|
Leffa/leffa_utils/densepose_predictor.py
ADDED
|
@@ -0,0 +1,77 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import numpy as np
|
| 2 |
+
import torch
|
| 3 |
+
from densepose import add_densepose_config
|
| 4 |
+
from densepose.vis.densepose_results import (
|
| 5 |
+
DensePoseResultsFineSegmentationVisualizer as Visualizer,
|
| 6 |
+
)
|
| 7 |
+
from densepose.vis.extractor import DensePoseResultExtractor
|
| 8 |
+
from detectron2.config import get_cfg
|
| 9 |
+
from detectron2.engine import DefaultPredictor
|
| 10 |
+
|
| 11 |
+
|
| 12 |
+
class DensePosePredictor(object):
|
| 13 |
+
def __init__(self,
|
| 14 |
+
config_path="./ckpts/densepose/densepose_rcnn_R_50_FPN_s1x.yaml",
|
| 15 |
+
weights_path="./ckpts/densepose/model_final_162be9.pkl"
|
| 16 |
+
):
|
| 17 |
+
cfg = get_cfg()
|
| 18 |
+
add_densepose_config(cfg)
|
| 19 |
+
cfg.merge_from_file(
|
| 20 |
+
config_path) # Use the path to the config file from densepose
|
| 21 |
+
cfg.MODEL.WEIGHTS = weights_path # Use the path to the pre-trained model weights
|
| 22 |
+
cfg.MODEL.DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
|
| 23 |
+
cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.5 # Adjust as needed
|
| 24 |
+
self.predictor = DefaultPredictor(cfg)
|
| 25 |
+
self.extractor = DensePoseResultExtractor()
|
| 26 |
+
self.visualizer = Visualizer()
|
| 27 |
+
|
| 28 |
+
def predict(self, image):
|
| 29 |
+
if isinstance(image, str):
|
| 30 |
+
image = cv2.imread(image)
|
| 31 |
+
with torch.no_grad():
|
| 32 |
+
outputs = self.predictor(image)["instances"]
|
| 33 |
+
outputs = self.extractor(outputs)
|
| 34 |
+
return outputs
|
| 35 |
+
|
| 36 |
+
def predict_iuv(self, image):
|
| 37 |
+
outputs = self.predict(image)
|
| 38 |
+
|
| 39 |
+
img_i = outputs[0][0].labels[None, ...]
|
| 40 |
+
img_uv = outputs[0][0].uv
|
| 41 |
+
img_uv = (img_uv - img_uv.min()) / (img_uv.max() - img_uv.min())
|
| 42 |
+
img_uv *= 255
|
| 43 |
+
img_iuv = torch.cat([img_i, img_uv], dim=0)
|
| 44 |
+
img_iuv = img_iuv.permute(1, 2, 0)
|
| 45 |
+
img_iuv = img_iuv.cpu().numpy()
|
| 46 |
+
|
| 47 |
+
position = [int(x) for x in outputs[1][0].cpu().numpy().tolist()]
|
| 48 |
+
x1, y1, w, h = position
|
| 49 |
+
x2 = x1 + w
|
| 50 |
+
y2 = y1 + h
|
| 51 |
+
image_iuv = np.zeros(image.shape, dtype=image.dtype)
|
| 52 |
+
image_iuv[y1:y2, x1:x2, :] = img_iuv
|
| 53 |
+
image_iuv = image_iuv[:, :, [0, 2, 1]]
|
| 54 |
+
|
| 55 |
+
return image_iuv
|
| 56 |
+
|
| 57 |
+
def predict_seg(self, image):
|
| 58 |
+
outputs = self.predict(image)
|
| 59 |
+
|
| 60 |
+
image_seg = np.zeros(image.shape, dtype=image.dtype)
|
| 61 |
+
self.visualizer.visualize(image_seg, outputs)
|
| 62 |
+
|
| 63 |
+
return image_seg
|
| 64 |
+
|
| 65 |
+
|
| 66 |
+
if __name__ == "__main__":
|
| 67 |
+
import sys
|
| 68 |
+
|
| 69 |
+
import cv2
|
| 70 |
+
|
| 71 |
+
image_path = sys.argv[1]
|
| 72 |
+
image = cv2.imread(image_path)
|
| 73 |
+
predictor = DensePosePredictor()
|
| 74 |
+
image_iuv = predictor.predict_iuv(image)
|
| 75 |
+
image_seg = predictor.predict_seg(image)
|
| 76 |
+
cv2.imwrite(".".join(image_path.split(".")[:-1]) + "_iuv.jpg", image_iuv)
|
| 77 |
+
cv2.imwrite(".".join(image_path.split(".")[:-1]) + "_seg.jpg", image_seg)
|
Leffa/leffa_utils/garment_agnostic_mask_predictor.py
ADDED
|
@@ -0,0 +1,415 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
from typing import Union
|
| 3 |
+
|
| 4 |
+
import cv2
|
| 5 |
+
import numpy as np
|
| 6 |
+
import torch
|
| 7 |
+
from diffusers.image_processor import VaeImageProcessor
|
| 8 |
+
from PIL import Image
|
| 9 |
+
from SCHP import SCHP # type: ignore
|
| 10 |
+
|
| 11 |
+
from leffa_utils.densepose_for_mask import DensePose # type: ignore
|
| 12 |
+
|
| 13 |
+
DENSE_INDEX_MAP = {
|
| 14 |
+
"background": [0],
|
| 15 |
+
"torso": [1, 2],
|
| 16 |
+
"right hand": [3],
|
| 17 |
+
"left hand": [4],
|
| 18 |
+
"right foot": [5],
|
| 19 |
+
"left foot": [6],
|
| 20 |
+
"right thigh": [7, 9],
|
| 21 |
+
"left thigh": [8, 10],
|
| 22 |
+
"right leg": [11, 13],
|
| 23 |
+
"left leg": [12, 14],
|
| 24 |
+
"left big arm": [15, 17],
|
| 25 |
+
"right big arm": [16, 18],
|
| 26 |
+
"left forearm": [19, 21],
|
| 27 |
+
"right forearm": [20, 22],
|
| 28 |
+
"face": [23, 24],
|
| 29 |
+
"thighs": [7, 8, 9, 10],
|
| 30 |
+
"legs": [11, 12, 13, 14],
|
| 31 |
+
"hands": [3, 4],
|
| 32 |
+
"feet": [5, 6],
|
| 33 |
+
"big arms": [15, 16, 17, 18],
|
| 34 |
+
"forearms": [19, 20, 21, 22],
|
| 35 |
+
}
|
| 36 |
+
|
| 37 |
+
ATR_MAPPING = {
|
| 38 |
+
"Background": 0,
|
| 39 |
+
"Hat": 1,
|
| 40 |
+
"Hair": 2,
|
| 41 |
+
"Sunglasses": 3,
|
| 42 |
+
"Upper-clothes": 4,
|
| 43 |
+
"Skirt": 5,
|
| 44 |
+
"Pants": 6,
|
| 45 |
+
"Dress": 7,
|
| 46 |
+
"Belt": 8,
|
| 47 |
+
"Left-shoe": 9,
|
| 48 |
+
"Right-shoe": 10,
|
| 49 |
+
"Face": 11,
|
| 50 |
+
"Left-leg": 12,
|
| 51 |
+
"Right-leg": 13,
|
| 52 |
+
"Left-arm": 14,
|
| 53 |
+
"Right-arm": 15,
|
| 54 |
+
"Bag": 16,
|
| 55 |
+
"Scarf": 17,
|
| 56 |
+
}
|
| 57 |
+
|
| 58 |
+
LIP_MAPPING = {
|
| 59 |
+
"Background": 0,
|
| 60 |
+
"Hat": 1,
|
| 61 |
+
"Hair": 2,
|
| 62 |
+
"Glove": 3,
|
| 63 |
+
"Sunglasses": 4,
|
| 64 |
+
"Upper-clothes": 5,
|
| 65 |
+
"Dress": 6,
|
| 66 |
+
"Coat": 7,
|
| 67 |
+
"Socks": 8,
|
| 68 |
+
"Pants": 9,
|
| 69 |
+
"Jumpsuits": 10,
|
| 70 |
+
"Scarf": 11,
|
| 71 |
+
"Skirt": 12,
|
| 72 |
+
"Face": 13,
|
| 73 |
+
"Left-arm": 14,
|
| 74 |
+
"Right-arm": 15,
|
| 75 |
+
"Left-leg": 16,
|
| 76 |
+
"Right-leg": 17,
|
| 77 |
+
"Left-shoe": 18,
|
| 78 |
+
"Right-shoe": 19,
|
| 79 |
+
}
|
| 80 |
+
|
| 81 |
+
PROTECT_BODY_PARTS = {
|
| 82 |
+
"upper": ["Left-leg", "Right-leg"],
|
| 83 |
+
"lower": ["Right-arm", "Left-arm", "Face"],
|
| 84 |
+
"overall": [],
|
| 85 |
+
"inner": ["Left-leg", "Right-leg"],
|
| 86 |
+
"outer": ["Left-leg", "Right-leg"],
|
| 87 |
+
}
|
| 88 |
+
PROTECT_CLOTH_PARTS = {
|
| 89 |
+
"upper": {"ATR": ["Skirt", "Pants"], "LIP": ["Skirt", "Pants"]},
|
| 90 |
+
"lower": {"ATR": ["Upper-clothes"], "LIP": ["Upper-clothes", "Coat"]},
|
| 91 |
+
"overall": {"ATR": [], "LIP": []},
|
| 92 |
+
"inner": {
|
| 93 |
+
"ATR": ["Dress", "Coat", "Skirt", "Pants"],
|
| 94 |
+
"LIP": ["Dress", "Coat", "Skirt", "Pants", "Jumpsuits"],
|
| 95 |
+
},
|
| 96 |
+
"outer": {
|
| 97 |
+
"ATR": ["Dress", "Pants", "Skirt"],
|
| 98 |
+
"LIP": ["Upper-clothes", "Dress", "Pants", "Skirt", "Jumpsuits"],
|
| 99 |
+
},
|
| 100 |
+
}
|
| 101 |
+
MASK_CLOTH_PARTS = {
|
| 102 |
+
"upper": ["Upper-clothes", "Coat", "Dress", "Jumpsuits"],
|
| 103 |
+
"lower": ["Pants", "Skirt", "Dress", "Jumpsuits"],
|
| 104 |
+
"overall": ["Upper-clothes", "Dress", "Pants", "Skirt", "Coat", "Jumpsuits"],
|
| 105 |
+
"inner": ["Upper-clothes"],
|
| 106 |
+
"outer": [
|
| 107 |
+
"Coat",
|
| 108 |
+
],
|
| 109 |
+
}
|
| 110 |
+
MASK_DENSE_PARTS = {
|
| 111 |
+
"upper": ["torso", "big arms", "forearms"],
|
| 112 |
+
"lower": ["thighs", "legs"],
|
| 113 |
+
"overall": ["torso", "thighs", "legs", "big arms", "forearms"],
|
| 114 |
+
"inner": ["torso"],
|
| 115 |
+
"outer": ["torso", "big arms", "forearms"],
|
| 116 |
+
}
|
| 117 |
+
|
| 118 |
+
schp_public_protect_parts = [
|
| 119 |
+
"Hat",
|
| 120 |
+
"Hair",
|
| 121 |
+
"Sunglasses",
|
| 122 |
+
"Left-shoe",
|
| 123 |
+
"Right-shoe",
|
| 124 |
+
"Bag",
|
| 125 |
+
"Glove",
|
| 126 |
+
"Scarf",
|
| 127 |
+
]
|
| 128 |
+
schp_protect_parts = {
|
| 129 |
+
"upper": ["Left-leg", "Right-leg", "Skirt", "Pants", "Jumpsuits"],
|
| 130 |
+
"lower": ["Left-arm", "Right-arm", "Upper-clothes", "Coat"],
|
| 131 |
+
"overall": [],
|
| 132 |
+
"inner": ["Left-leg", "Right-leg", "Skirt", "Pants", "Jumpsuits", "Coat"],
|
| 133 |
+
"outer": ["Left-leg", "Right-leg", "Skirt", "Pants", "Jumpsuits", "Upper-clothes"],
|
| 134 |
+
}
|
| 135 |
+
schp_mask_parts = {
|
| 136 |
+
"upper": ["Upper-clothes", "Dress", "Coat", "Jumpsuits"],
|
| 137 |
+
"lower": ["Pants", "Skirt", "Dress", "Jumpsuits", "socks"],
|
| 138 |
+
"overall": [
|
| 139 |
+
"Upper-clothes",
|
| 140 |
+
"Dress",
|
| 141 |
+
"Pants",
|
| 142 |
+
"Skirt",
|
| 143 |
+
"Coat",
|
| 144 |
+
"Jumpsuits",
|
| 145 |
+
"socks",
|
| 146 |
+
],
|
| 147 |
+
"inner": ["Upper-clothes"],
|
| 148 |
+
"outer": [
|
| 149 |
+
"Coat",
|
| 150 |
+
],
|
| 151 |
+
}
|
| 152 |
+
|
| 153 |
+
dense_mask_parts = {
|
| 154 |
+
"upper": ["torso", "big arms", "forearms"],
|
| 155 |
+
"lower": ["thighs", "legs"],
|
| 156 |
+
"overall": ["torso", "thighs", "legs", "big arms", "forearms"],
|
| 157 |
+
"inner": ["torso"],
|
| 158 |
+
"outer": ["torso", "big arms", "forearms"],
|
| 159 |
+
}
|
| 160 |
+
|
| 161 |
+
|
| 162 |
+
def vis_mask(image, mask):
|
| 163 |
+
image = np.array(image).astype(np.uint8)
|
| 164 |
+
mask = np.array(mask).astype(np.uint8)
|
| 165 |
+
mask[mask > 127] = 255
|
| 166 |
+
mask[mask <= 127] = 0
|
| 167 |
+
mask = np.expand_dims(mask, axis=-1)
|
| 168 |
+
mask = np.repeat(mask, 3, axis=-1)
|
| 169 |
+
mask = mask / 255
|
| 170 |
+
return Image.fromarray((image * (1 - mask)).astype(np.uint8))
|
| 171 |
+
|
| 172 |
+
|
| 173 |
+
def part_mask_of(part: Union[str, list], parse: np.ndarray, mapping: dict):
|
| 174 |
+
if isinstance(part, str):
|
| 175 |
+
part = [part]
|
| 176 |
+
mask = np.zeros_like(parse)
|
| 177 |
+
for _ in part:
|
| 178 |
+
if _ not in mapping:
|
| 179 |
+
continue
|
| 180 |
+
if isinstance(mapping[_], list):
|
| 181 |
+
for i in mapping[_]:
|
| 182 |
+
mask += parse == i
|
| 183 |
+
else:
|
| 184 |
+
mask += parse == mapping[_]
|
| 185 |
+
return mask
|
| 186 |
+
|
| 187 |
+
|
| 188 |
+
def hull_mask(mask_area: np.ndarray):
|
| 189 |
+
ret, binary = cv2.threshold(mask_area, 127, 255, cv2.THRESH_BINARY)
|
| 190 |
+
contours, hierarchy = cv2.findContours(
|
| 191 |
+
binary, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE
|
| 192 |
+
)
|
| 193 |
+
hull_mask = np.zeros_like(mask_area)
|
| 194 |
+
for c in contours:
|
| 195 |
+
hull = cv2.convexHull(c)
|
| 196 |
+
hull_mask = cv2.fillPoly(np.zeros_like(mask_area), [hull], 255) | hull_mask
|
| 197 |
+
return hull_mask
|
| 198 |
+
|
| 199 |
+
|
| 200 |
+
class AutoMasker:
|
| 201 |
+
def __init__(
|
| 202 |
+
self,
|
| 203 |
+
densepose_path: str = "./ckpts/densepose",
|
| 204 |
+
schp_path: str = "./ckpts/schp",
|
| 205 |
+
device="cuda",
|
| 206 |
+
):
|
| 207 |
+
np.random.seed(0)
|
| 208 |
+
torch.manual_seed(0)
|
| 209 |
+
torch.cuda.manual_seed(0)
|
| 210 |
+
|
| 211 |
+
self.densepose_processor = DensePose(densepose_path, device)
|
| 212 |
+
self.schp_processor_atr = SCHP(
|
| 213 |
+
ckpt_path=os.path.join(schp_path, "exp-schp-201908301523-atr.pth"),
|
| 214 |
+
device=device,
|
| 215 |
+
)
|
| 216 |
+
self.schp_processor_lip = SCHP(
|
| 217 |
+
ckpt_path=os.path.join(schp_path, "exp-schp-201908261155-lip.pth"),
|
| 218 |
+
device=device,
|
| 219 |
+
)
|
| 220 |
+
|
| 221 |
+
self.mask_processor = VaeImageProcessor(
|
| 222 |
+
vae_scale_factor=8,
|
| 223 |
+
do_normalize=False,
|
| 224 |
+
do_binarize=True,
|
| 225 |
+
do_convert_grayscale=True,
|
| 226 |
+
)
|
| 227 |
+
|
| 228 |
+
def process_densepose(self, image_or_path):
|
| 229 |
+
return self.densepose_processor(image_or_path, resize=1024)
|
| 230 |
+
|
| 231 |
+
def process_schp_lip(self, image_or_path):
|
| 232 |
+
return self.schp_processor_lip(image_or_path)
|
| 233 |
+
|
| 234 |
+
def process_schp_atr(self, image_or_path):
|
| 235 |
+
return self.schp_processor_atr(image_or_path)
|
| 236 |
+
|
| 237 |
+
def preprocess_image(self, image_or_path):
|
| 238 |
+
return {
|
| 239 |
+
"densepose": self.densepose_processor(image_or_path, resize=1024),
|
| 240 |
+
"schp_atr": self.schp_processor_atr(image_or_path),
|
| 241 |
+
"schp_lip": self.schp_processor_lip(image_or_path),
|
| 242 |
+
}
|
| 243 |
+
|
| 244 |
+
@staticmethod
|
| 245 |
+
def cloth_agnostic_mask(
|
| 246 |
+
densepose_mask: Image.Image,
|
| 247 |
+
schp_lip_mask: Image.Image,
|
| 248 |
+
schp_atr_mask: Image.Image,
|
| 249 |
+
part: str = "overall",
|
| 250 |
+
**kwargs,
|
| 251 |
+
):
|
| 252 |
+
assert part in [
|
| 253 |
+
"upper",
|
| 254 |
+
"lower",
|
| 255 |
+
"overall",
|
| 256 |
+
"inner",
|
| 257 |
+
"outer",
|
| 258 |
+
], f"part should be one of ['upper', 'lower', 'overall', 'inner', 'outer'], but got {part}"
|
| 259 |
+
w, h = densepose_mask.size
|
| 260 |
+
|
| 261 |
+
dilate_kernel = max(w, h) // 250
|
| 262 |
+
dilate_kernel = dilate_kernel if dilate_kernel % 2 == 1 else dilate_kernel + 1
|
| 263 |
+
dilate_kernel = np.ones((dilate_kernel, dilate_kernel), np.uint8)
|
| 264 |
+
|
| 265 |
+
kernal_size = max(w, h) // 25
|
| 266 |
+
kernal_size = kernal_size if kernal_size % 2 == 1 else kernal_size + 1
|
| 267 |
+
|
| 268 |
+
densepose_mask = np.array(densepose_mask)
|
| 269 |
+
schp_lip_mask = np.array(schp_lip_mask)
|
| 270 |
+
schp_atr_mask = np.array(schp_atr_mask)
|
| 271 |
+
|
| 272 |
+
# Strong Protect Area (Hands, Face, Accessory, Feet)
|
| 273 |
+
hands_protect_area = part_mask_of(
|
| 274 |
+
["hands", "feet"], densepose_mask, DENSE_INDEX_MAP
|
| 275 |
+
)
|
| 276 |
+
hands_protect_area = cv2.dilate(hands_protect_area, dilate_kernel, iterations=1)
|
| 277 |
+
hands_protect_area = hands_protect_area & (
|
| 278 |
+
part_mask_of(
|
| 279 |
+
["Left-arm", "Right-arm", "Left-leg", "Right-leg"],
|
| 280 |
+
schp_atr_mask,
|
| 281 |
+
ATR_MAPPING,
|
| 282 |
+
)
|
| 283 |
+
| part_mask_of(
|
| 284 |
+
["Left-arm", "Right-arm", "Left-leg", "Right-leg"],
|
| 285 |
+
schp_lip_mask,
|
| 286 |
+
LIP_MAPPING,
|
| 287 |
+
)
|
| 288 |
+
)
|
| 289 |
+
face_protect_area = part_mask_of("Face", schp_lip_mask, LIP_MAPPING)
|
| 290 |
+
|
| 291 |
+
strong_protect_area = hands_protect_area | face_protect_area
|
| 292 |
+
|
| 293 |
+
# Weak Protect Area (Hair, Irrelevant Clothes, Body Parts)
|
| 294 |
+
body_protect_area = part_mask_of(
|
| 295 |
+
PROTECT_BODY_PARTS[part], schp_lip_mask, LIP_MAPPING
|
| 296 |
+
) | part_mask_of(PROTECT_BODY_PARTS[part], schp_atr_mask, ATR_MAPPING)
|
| 297 |
+
hair_protect_area = part_mask_of(
|
| 298 |
+
["Hair"], schp_lip_mask, LIP_MAPPING
|
| 299 |
+
) | part_mask_of(["Hair"], schp_atr_mask, ATR_MAPPING)
|
| 300 |
+
cloth_protect_area = part_mask_of(
|
| 301 |
+
PROTECT_CLOTH_PARTS[part]["LIP"], schp_lip_mask, LIP_MAPPING
|
| 302 |
+
) | part_mask_of(PROTECT_CLOTH_PARTS[part]["ATR"], schp_atr_mask, ATR_MAPPING)
|
| 303 |
+
accessory_protect_area = part_mask_of(
|
| 304 |
+
(
|
| 305 |
+
accessory_parts := [
|
| 306 |
+
"Hat",
|
| 307 |
+
"Glove",
|
| 308 |
+
"Sunglasses",
|
| 309 |
+
"Bag",
|
| 310 |
+
"Left-shoe",
|
| 311 |
+
"Right-shoe",
|
| 312 |
+
"Scarf",
|
| 313 |
+
"Socks",
|
| 314 |
+
]
|
| 315 |
+
),
|
| 316 |
+
schp_lip_mask,
|
| 317 |
+
LIP_MAPPING,
|
| 318 |
+
) | part_mask_of(accessory_parts, schp_atr_mask, ATR_MAPPING)
|
| 319 |
+
weak_protect_area = (
|
| 320 |
+
body_protect_area
|
| 321 |
+
| cloth_protect_area
|
| 322 |
+
| hair_protect_area
|
| 323 |
+
| strong_protect_area
|
| 324 |
+
| accessory_protect_area
|
| 325 |
+
)
|
| 326 |
+
|
| 327 |
+
# Mask Area
|
| 328 |
+
strong_mask_area = part_mask_of(
|
| 329 |
+
MASK_CLOTH_PARTS[part], schp_lip_mask, LIP_MAPPING
|
| 330 |
+
) | part_mask_of(MASK_CLOTH_PARTS[part], schp_atr_mask, ATR_MAPPING)
|
| 331 |
+
background_area = part_mask_of(
|
| 332 |
+
["Background"], schp_lip_mask, LIP_MAPPING
|
| 333 |
+
) & part_mask_of(["Background"], schp_atr_mask, ATR_MAPPING)
|
| 334 |
+
mask_dense_area = part_mask_of(
|
| 335 |
+
MASK_DENSE_PARTS[part], densepose_mask, DENSE_INDEX_MAP
|
| 336 |
+
)
|
| 337 |
+
mask_dense_area = cv2.resize(
|
| 338 |
+
mask_dense_area.astype(np.uint8),
|
| 339 |
+
None,
|
| 340 |
+
fx=0.25,
|
| 341 |
+
fy=0.25,
|
| 342 |
+
interpolation=cv2.INTER_NEAREST,
|
| 343 |
+
)
|
| 344 |
+
mask_dense_area = cv2.dilate(mask_dense_area, dilate_kernel, iterations=2)
|
| 345 |
+
mask_dense_area = cv2.resize(
|
| 346 |
+
mask_dense_area.astype(np.uint8),
|
| 347 |
+
None,
|
| 348 |
+
fx=4,
|
| 349 |
+
fy=4,
|
| 350 |
+
interpolation=cv2.INTER_NEAREST,
|
| 351 |
+
)
|
| 352 |
+
|
| 353 |
+
mask_area = (
|
| 354 |
+
np.ones_like(densepose_mask) & (~weak_protect_area) & (~background_area)
|
| 355 |
+
) | mask_dense_area
|
| 356 |
+
|
| 357 |
+
mask_area = (
|
| 358 |
+
hull_mask(mask_area * 255) // 255
|
| 359 |
+
) # Convex Hull to expand the mask area
|
| 360 |
+
mask_area = mask_area & (~weak_protect_area)
|
| 361 |
+
mask_area = cv2.GaussianBlur(mask_area * 255, (kernal_size, kernal_size), 0)
|
| 362 |
+
mask_area[mask_area < 25] = 0
|
| 363 |
+
mask_area[mask_area >= 25] = 1
|
| 364 |
+
mask_area = (mask_area | strong_mask_area) & (~strong_protect_area)
|
| 365 |
+
mask_area = cv2.dilate(mask_area, dilate_kernel, iterations=1)
|
| 366 |
+
|
| 367 |
+
return Image.fromarray(mask_area * 255)
|
| 368 |
+
|
| 369 |
+
def __call__(
|
| 370 |
+
self,
|
| 371 |
+
image: Union[str, Image.Image],
|
| 372 |
+
mask_type: str = "upper",
|
| 373 |
+
):
|
| 374 |
+
assert mask_type in [
|
| 375 |
+
"upper",
|
| 376 |
+
"lower",
|
| 377 |
+
"overall",
|
| 378 |
+
"inner",
|
| 379 |
+
"outer",
|
| 380 |
+
], f"mask_type should be one of ['upper', 'lower', 'overall', 'inner', 'outer'], but got {mask_type}"
|
| 381 |
+
preprocess_results = self.preprocess_image(image)
|
| 382 |
+
mask = self.cloth_agnostic_mask(
|
| 383 |
+
preprocess_results["densepose"],
|
| 384 |
+
preprocess_results["schp_lip"],
|
| 385 |
+
preprocess_results["schp_atr"],
|
| 386 |
+
part=mask_type,
|
| 387 |
+
)
|
| 388 |
+
return {
|
| 389 |
+
"mask": mask,
|
| 390 |
+
"densepose": preprocess_results["densepose"],
|
| 391 |
+
"schp_lip": preprocess_results["schp_lip"],
|
| 392 |
+
"schp_atr": preprocess_results["schp_atr"],
|
| 393 |
+
}
|
| 394 |
+
|
| 395 |
+
|
| 396 |
+
if __name__ == "__main__":
|
| 397 |
+
import os
|
| 398 |
+
import sys
|
| 399 |
+
|
| 400 |
+
from PIL import Image
|
| 401 |
+
|
| 402 |
+
automasker = AutoMasker()
|
| 403 |
+
|
| 404 |
+
image_path = sys.argv[1]
|
| 405 |
+
image = Image.open(image_path).convert("RGB")
|
| 406 |
+
outputs = automasker(
|
| 407 |
+
image,
|
| 408 |
+
"upper",
|
| 409 |
+
# "lower",
|
| 410 |
+
)
|
| 411 |
+
mask = outputs["mask"]
|
| 412 |
+
# densepose = outputs["densepose"] # densepose I map, range 0~24
|
| 413 |
+
# schp_lip = outputs["schp_lip"]
|
| 414 |
+
# schp_atr = outputs["schp_atr"]
|
| 415 |
+
mask.save(".".join(image_path.split(".")[:-1]) + "_mask.jpg")
|
Leffa/leffa_utils/utils.py
ADDED
|
@@ -0,0 +1,379 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import cv2
|
| 3 |
+
import torch
|
| 4 |
+
import numpy as np
|
| 5 |
+
from numpy.linalg import lstsq
|
| 6 |
+
from PIL import Image, ImageDraw
|
| 7 |
+
|
| 8 |
+
|
| 9 |
+
def resize_and_center(image, target_width, target_height):
|
| 10 |
+
img = np.array(image)
|
| 11 |
+
|
| 12 |
+
if img.shape[-1] == 4:
|
| 13 |
+
img = cv2.cvtColor(img, cv2.COLOR_RGBA2RGB)
|
| 14 |
+
elif len(img.shape) == 2 or img.shape[-1] == 1:
|
| 15 |
+
img = cv2.cvtColor(img, cv2.COLOR_GRAY2RGB)
|
| 16 |
+
|
| 17 |
+
original_height, original_width = img.shape[:2]
|
| 18 |
+
|
| 19 |
+
scale = min(target_height / original_height, target_width / original_width)
|
| 20 |
+
new_height = int(original_height * scale)
|
| 21 |
+
new_width = int(original_width * scale)
|
| 22 |
+
|
| 23 |
+
resized_img = cv2.resize(img, (new_width, new_height),
|
| 24 |
+
interpolation=cv2.INTER_CUBIC)
|
| 25 |
+
|
| 26 |
+
padded_img = np.ones((target_height, target_width, 3),
|
| 27 |
+
dtype=np.uint8) * 255
|
| 28 |
+
|
| 29 |
+
top = (target_height - new_height) // 2
|
| 30 |
+
left = (target_width - new_width) // 2
|
| 31 |
+
|
| 32 |
+
padded_img[top:top + new_height, left:left + new_width] = resized_img
|
| 33 |
+
|
| 34 |
+
return Image.fromarray(padded_img)
|
| 35 |
+
|
| 36 |
+
|
| 37 |
+
def list_dir(folder_path):
|
| 38 |
+
# Collect all file paths within the directory
|
| 39 |
+
file_paths = []
|
| 40 |
+
for root, _, files in os.walk(folder_path):
|
| 41 |
+
for file in files:
|
| 42 |
+
file_paths.append(os.path.join(root, file))
|
| 43 |
+
|
| 44 |
+
file_paths = sorted(file_paths)
|
| 45 |
+
return file_paths
|
| 46 |
+
|
| 47 |
+
|
| 48 |
+
label_map = {
|
| 49 |
+
"background": 0,
|
| 50 |
+
"hat": 1,
|
| 51 |
+
"hair": 2,
|
| 52 |
+
"sunglasses": 3,
|
| 53 |
+
"upper_clothes": 4,
|
| 54 |
+
"skirt": 5,
|
| 55 |
+
"pants": 6,
|
| 56 |
+
"dress": 7,
|
| 57 |
+
"belt": 8,
|
| 58 |
+
"left_shoe": 9,
|
| 59 |
+
"right_shoe": 10,
|
| 60 |
+
"head": 11,
|
| 61 |
+
"left_leg": 12,
|
| 62 |
+
"right_leg": 13,
|
| 63 |
+
"left_arm": 14,
|
| 64 |
+
"right_arm": 15,
|
| 65 |
+
"bag": 16,
|
| 66 |
+
"scarf": 17,
|
| 67 |
+
"neck": 18,
|
| 68 |
+
}
|
| 69 |
+
|
| 70 |
+
|
| 71 |
+
def extend_arm_mask(wrist, elbow, scale):
|
| 72 |
+
wrist = elbow + scale * (wrist - elbow)
|
| 73 |
+
return wrist
|
| 74 |
+
|
| 75 |
+
|
| 76 |
+
def hole_fill(img):
|
| 77 |
+
img = np.pad(img[1:-1, 1:-1], pad_width=1,
|
| 78 |
+
mode='constant', constant_values=0)
|
| 79 |
+
img_copy = img.copy()
|
| 80 |
+
mask = np.zeros((img.shape[0] + 2, img.shape[1] + 2), dtype=np.uint8)
|
| 81 |
+
|
| 82 |
+
cv2.floodFill(img, mask, (0, 0), 255)
|
| 83 |
+
img_inverse = cv2.bitwise_not(img)
|
| 84 |
+
dst = cv2.bitwise_or(img_copy, img_inverse)
|
| 85 |
+
return dst
|
| 86 |
+
|
| 87 |
+
|
| 88 |
+
def refine_mask(mask):
|
| 89 |
+
contours, hierarchy = cv2.findContours(mask.astype(np.uint8),
|
| 90 |
+
cv2.RETR_CCOMP, cv2.CHAIN_APPROX_TC89_L1)
|
| 91 |
+
area = []
|
| 92 |
+
for j in range(len(contours)):
|
| 93 |
+
a_d = cv2.contourArea(contours[j], True)
|
| 94 |
+
area.append(abs(a_d))
|
| 95 |
+
refine_mask = np.zeros_like(mask).astype(np.uint8)
|
| 96 |
+
if len(area) != 0:
|
| 97 |
+
i = area.index(max(area))
|
| 98 |
+
cv2.drawContours(refine_mask, contours, i, color=255, thickness=-1)
|
| 99 |
+
|
| 100 |
+
return refine_mask
|
| 101 |
+
|
| 102 |
+
|
| 103 |
+
def get_agnostic_mask_hd(model_parse, keypoint, category, size=(384, 512)):
|
| 104 |
+
model_type = "hd"
|
| 105 |
+
##############################
|
| 106 |
+
width, height = size
|
| 107 |
+
im_parse = model_parse.resize((width, height), Image.NEAREST)
|
| 108 |
+
parse_array = np.array(im_parse)
|
| 109 |
+
|
| 110 |
+
if model_type == 'hd':
|
| 111 |
+
arm_width = 60
|
| 112 |
+
elif model_type == 'dc':
|
| 113 |
+
arm_width = 45
|
| 114 |
+
else:
|
| 115 |
+
raise ValueError("model_type must be \'hd\' or \'dc\'!")
|
| 116 |
+
|
| 117 |
+
parse_head = (parse_array == 1).astype(np.float32) + \
|
| 118 |
+
(parse_array == 3).astype(np.float32) + \
|
| 119 |
+
(parse_array == 11).astype(np.float32)
|
| 120 |
+
|
| 121 |
+
parser_mask_fixed = (parse_array == label_map["left_shoe"]).astype(np.float32) + \
|
| 122 |
+
(parse_array == label_map["right_shoe"]).astype(np.float32) + \
|
| 123 |
+
(parse_array == label_map["hat"]).astype(np.float32) + \
|
| 124 |
+
(parse_array == label_map["sunglasses"]).astype(np.float32) + \
|
| 125 |
+
(parse_array == label_map["bag"]).astype(np.float32)
|
| 126 |
+
|
| 127 |
+
parser_mask_changeable = (
|
| 128 |
+
parse_array == label_map["background"]).astype(np.float32)
|
| 129 |
+
|
| 130 |
+
arms_left = (parse_array == 14).astype(np.float32)
|
| 131 |
+
arms_right = (parse_array == 15).astype(np.float32)
|
| 132 |
+
|
| 133 |
+
if category == 'dresses':
|
| 134 |
+
parse_mask = (parse_array == 7).astype(np.float32) + \
|
| 135 |
+
(parse_array == 4).astype(np.float32) + \
|
| 136 |
+
(parse_array == 5).astype(np.float32) + \
|
| 137 |
+
(parse_array == 6).astype(np.float32)
|
| 138 |
+
|
| 139 |
+
parser_mask_changeable += np.logical_and(
|
| 140 |
+
parse_array, np.logical_not(parser_mask_fixed))
|
| 141 |
+
|
| 142 |
+
elif category == 'upper_body':
|
| 143 |
+
parse_mask = (parse_array == 4).astype(np.float32) + \
|
| 144 |
+
(parse_array == 7).astype(np.float32)
|
| 145 |
+
parser_mask_fixed_lower_cloth = (parse_array == label_map["skirt"]).astype(np.float32) + \
|
| 146 |
+
(parse_array == label_map["pants"]).astype(
|
| 147 |
+
np.float32)
|
| 148 |
+
parser_mask_fixed += parser_mask_fixed_lower_cloth
|
| 149 |
+
parser_mask_changeable += np.logical_and(
|
| 150 |
+
parse_array, np.logical_not(parser_mask_fixed))
|
| 151 |
+
elif category == 'lower_body':
|
| 152 |
+
parse_mask = (parse_array == 6).astype(np.float32) + \
|
| 153 |
+
(parse_array == 12).astype(np.float32) + \
|
| 154 |
+
(parse_array == 13).astype(np.float32) + \
|
| 155 |
+
(parse_array == 5).astype(np.float32)
|
| 156 |
+
parser_mask_fixed += (parse_array == label_map["upper_clothes"]).astype(np.float32) + \
|
| 157 |
+
(parse_array == 14).astype(np.float32) + \
|
| 158 |
+
(parse_array == 15).astype(np.float32)
|
| 159 |
+
parser_mask_changeable += np.logical_and(
|
| 160 |
+
parse_array, np.logical_not(parser_mask_fixed))
|
| 161 |
+
else:
|
| 162 |
+
raise NotImplementedError
|
| 163 |
+
|
| 164 |
+
# Load pose points
|
| 165 |
+
pose_data = keypoint["pose_keypoints_2d"]
|
| 166 |
+
pose_data = np.array(pose_data)
|
| 167 |
+
pose_data = pose_data.reshape((-1, 2))
|
| 168 |
+
|
| 169 |
+
im_arms_left = Image.new('L', (width, height))
|
| 170 |
+
im_arms_right = Image.new('L', (width, height))
|
| 171 |
+
arms_draw_left = ImageDraw.Draw(im_arms_left)
|
| 172 |
+
arms_draw_right = ImageDraw.Draw(im_arms_right)
|
| 173 |
+
if category == 'dresses' or category == 'upper_body':
|
| 174 |
+
shoulder_right = np.multiply(tuple(pose_data[2][:2]), height / 512.0)
|
| 175 |
+
shoulder_left = np.multiply(tuple(pose_data[5][:2]), height / 512.0)
|
| 176 |
+
elbow_right = np.multiply(tuple(pose_data[3][:2]), height / 512.0)
|
| 177 |
+
elbow_left = np.multiply(tuple(pose_data[6][:2]), height / 512.0)
|
| 178 |
+
wrist_right = np.multiply(tuple(pose_data[4][:2]), height / 512.0)
|
| 179 |
+
wrist_left = np.multiply(tuple(pose_data[7][:2]), height / 512.0)
|
| 180 |
+
ARM_LINE_WIDTH = int(arm_width / 512 * height)
|
| 181 |
+
size_left = [shoulder_left[0] - ARM_LINE_WIDTH // 2, shoulder_left[1] - ARM_LINE_WIDTH //
|
| 182 |
+
2, shoulder_left[0] + ARM_LINE_WIDTH // 2, shoulder_left[1] + ARM_LINE_WIDTH // 2]
|
| 183 |
+
size_right = [shoulder_right[0] - ARM_LINE_WIDTH // 2, shoulder_right[1] - ARM_LINE_WIDTH // 2, shoulder_right[0] + ARM_LINE_WIDTH // 2,
|
| 184 |
+
shoulder_right[1] + ARM_LINE_WIDTH // 2]
|
| 185 |
+
|
| 186 |
+
if wrist_right[0] <= 1. and wrist_right[1] <= 1.:
|
| 187 |
+
im_arms_right = arms_right
|
| 188 |
+
else:
|
| 189 |
+
wrist_right = extend_arm_mask(wrist_right, elbow_right, 1.2)
|
| 190 |
+
arms_draw_right.line(np.concatenate((shoulder_right, elbow_right, wrist_right)).astype(
|
| 191 |
+
np.uint16).tolist(), 'white', ARM_LINE_WIDTH, 'curve')
|
| 192 |
+
arms_draw_right.arc(size_right, 0, 360,
|
| 193 |
+
'white', ARM_LINE_WIDTH // 2)
|
| 194 |
+
|
| 195 |
+
if wrist_left[0] <= 1. and wrist_left[1] <= 1.:
|
| 196 |
+
im_arms_left = arms_left
|
| 197 |
+
else:
|
| 198 |
+
wrist_left = extend_arm_mask(wrist_left, elbow_left, 1.2)
|
| 199 |
+
arms_draw_left.line(np.concatenate((wrist_left, elbow_left, shoulder_left)).astype(
|
| 200 |
+
np.uint16).tolist(), 'white', ARM_LINE_WIDTH, 'curve')
|
| 201 |
+
arms_draw_left.arc(size_left, 0, 360, 'white', ARM_LINE_WIDTH // 2)
|
| 202 |
+
|
| 203 |
+
hands_left = np.logical_and(np.logical_not(im_arms_left), arms_left)
|
| 204 |
+
hands_right = np.logical_and(np.logical_not(im_arms_right), arms_right)
|
| 205 |
+
parser_mask_fixed += hands_left + hands_right
|
| 206 |
+
|
| 207 |
+
parser_mask_fixed = cv2.erode(parser_mask_fixed, np.ones(
|
| 208 |
+
(5, 5), np.uint16), iterations=1)
|
| 209 |
+
|
| 210 |
+
parser_mask_fixed = np.logical_or(parser_mask_fixed, parse_head)
|
| 211 |
+
parse_mask = cv2.dilate(parse_mask, np.ones(
|
| 212 |
+
(10, 10), np.uint16), iterations=5)
|
| 213 |
+
if category == 'dresses' or category == 'upper_body':
|
| 214 |
+
neck_mask = (parse_array == 18).astype(np.float32)
|
| 215 |
+
neck_mask = cv2.dilate(neck_mask, np.ones(
|
| 216 |
+
(5, 5), np.uint16), iterations=1)
|
| 217 |
+
neck_mask = np.logical_and(neck_mask, np.logical_not(parse_head))
|
| 218 |
+
parse_mask = np.logical_or(parse_mask, neck_mask)
|
| 219 |
+
arm_mask = cv2.dilate(np.logical_or(im_arms_left, im_arms_right).astype(
|
| 220 |
+
'float32'), np.ones((5, 5), np.uint16), iterations=4)
|
| 221 |
+
parse_mask += np.logical_or(parse_mask, arm_mask)
|
| 222 |
+
|
| 223 |
+
parse_mask = np.logical_and(
|
| 224 |
+
parser_mask_changeable, np.logical_not(parse_mask))
|
| 225 |
+
|
| 226 |
+
parse_mask_total = np.logical_or(parse_mask, parser_mask_fixed)
|
| 227 |
+
inpaint_mask = 1 - parse_mask_total
|
| 228 |
+
img = np.where(inpaint_mask, 255, 0)
|
| 229 |
+
dst = hole_fill(img.astype(np.uint8))
|
| 230 |
+
dst = refine_mask(dst)
|
| 231 |
+
inpaint_mask = dst / 255 * 1
|
| 232 |
+
mask = Image.fromarray(inpaint_mask.astype(np.uint8) * 255)
|
| 233 |
+
|
| 234 |
+
return mask
|
| 235 |
+
|
| 236 |
+
|
| 237 |
+
def get_agnostic_mask_dc(model_parse, keypoint, category, size=(384, 512)):
|
| 238 |
+
parse_array = np.array(model_parse)
|
| 239 |
+
pose_data = keypoint["pose_keypoints_2d"]
|
| 240 |
+
pose_data = np.array(pose_data)
|
| 241 |
+
pose_data = pose_data.reshape((-1, 2))
|
| 242 |
+
|
| 243 |
+
parse_shape = (parse_array > 0).astype(np.float32)
|
| 244 |
+
|
| 245 |
+
parse_head = (parse_array == 1).astype(np.float32) + \
|
| 246 |
+
(parse_array == 2).astype(np.float32) + \
|
| 247 |
+
(parse_array == 3).astype(np.float32) + \
|
| 248 |
+
(parse_array == 11).astype(np.float32) + \
|
| 249 |
+
(parse_array == 18).astype(np.float32)
|
| 250 |
+
|
| 251 |
+
parser_mask_fixed = (parse_array == label_map["hair"]).astype(np.float32) + \
|
| 252 |
+
(parse_array == label_map["left_shoe"]).astype(np.float32) + \
|
| 253 |
+
(parse_array == label_map["right_shoe"]).astype(np.float32) + \
|
| 254 |
+
(parse_array == label_map["hat"]).astype(np.float32) + \
|
| 255 |
+
(parse_array == label_map["sunglasses"]).astype(np.float32) + \
|
| 256 |
+
(parse_array == label_map["scarf"]).astype(np.float32) + \
|
| 257 |
+
(parse_array == label_map["bag"]).astype(np.float32)
|
| 258 |
+
|
| 259 |
+
parser_mask_changeable = (
|
| 260 |
+
parse_array == label_map["background"]).astype(np.float32)
|
| 261 |
+
|
| 262 |
+
arms = (parse_array == 14).astype(np.float32) + \
|
| 263 |
+
(parse_array == 15).astype(np.float32)
|
| 264 |
+
|
| 265 |
+
if category == 'dresses':
|
| 266 |
+
label_cat = 7
|
| 267 |
+
parse_mask = (parse_array == 7).astype(np.float32) + \
|
| 268 |
+
(parse_array == 12).astype(np.float32) + \
|
| 269 |
+
(parse_array == 13).astype(np.float32)
|
| 270 |
+
parser_mask_changeable += np.logical_and(
|
| 271 |
+
parse_array, np.logical_not(parser_mask_fixed))
|
| 272 |
+
|
| 273 |
+
elif category == 'upper_body':
|
| 274 |
+
label_cat = 4
|
| 275 |
+
parse_mask = (parse_array == 4).astype(np.float32)
|
| 276 |
+
|
| 277 |
+
parser_mask_fixed += (parse_array == label_map["skirt"]).astype(np.float32) + \
|
| 278 |
+
(parse_array == label_map["pants"]).astype(np.float32)
|
| 279 |
+
|
| 280 |
+
parser_mask_changeable += np.logical_and(
|
| 281 |
+
parse_array, np.logical_not(parser_mask_fixed))
|
| 282 |
+
elif category == 'lower_body':
|
| 283 |
+
label_cat = 6
|
| 284 |
+
parse_mask = (parse_array == 6).astype(np.float32) + \
|
| 285 |
+
(parse_array == 12).astype(np.float32) + \
|
| 286 |
+
(parse_array == 13).astype(np.float32)
|
| 287 |
+
|
| 288 |
+
parser_mask_fixed += (parse_array == label_map["upper_clothes"]).astype(np.float32) + \
|
| 289 |
+
(parse_array == 14).astype(np.float32) + \
|
| 290 |
+
(parse_array == 15).astype(np.float32)
|
| 291 |
+
parser_mask_changeable += np.logical_and(
|
| 292 |
+
parse_array, np.logical_not(parser_mask_fixed))
|
| 293 |
+
|
| 294 |
+
parse_head = torch.from_numpy(parse_head) # [0,1]
|
| 295 |
+
parse_mask = torch.from_numpy(parse_mask) # [0,1]
|
| 296 |
+
parser_mask_fixed = torch.from_numpy(parser_mask_fixed)
|
| 297 |
+
parser_mask_changeable = torch.from_numpy(parser_mask_changeable)
|
| 298 |
+
|
| 299 |
+
# dilation
|
| 300 |
+
parse_without_cloth = np.logical_and(
|
| 301 |
+
parse_shape, np.logical_not(parse_mask))
|
| 302 |
+
parse_mask = parse_mask.cpu().numpy()
|
| 303 |
+
|
| 304 |
+
width = size[0]
|
| 305 |
+
height = size[1]
|
| 306 |
+
|
| 307 |
+
im_arms = Image.new('L', (width, height))
|
| 308 |
+
arms_draw = ImageDraw.Draw(im_arms)
|
| 309 |
+
if category == 'dresses' or category == 'upper_body':
|
| 310 |
+
shoulder_right = tuple(np.multiply(pose_data[2, :2], height / 512.0))
|
| 311 |
+
shoulder_left = tuple(np.multiply(pose_data[5, :2], height / 512.0))
|
| 312 |
+
elbow_right = tuple(np.multiply(pose_data[3, :2], height / 512.0))
|
| 313 |
+
elbow_left = tuple(np.multiply(pose_data[6, :2], height / 512.0))
|
| 314 |
+
wrist_right = tuple(np.multiply(pose_data[4, :2], height / 512.0))
|
| 315 |
+
wrist_left = tuple(np.multiply(pose_data[7, :2], height / 512.0))
|
| 316 |
+
if wrist_right[0] <= 1. and wrist_right[1] <= 1.:
|
| 317 |
+
if elbow_right[0] <= 1. and elbow_right[1] <= 1.:
|
| 318 |
+
arms_draw.line(
|
| 319 |
+
[wrist_left, elbow_left, shoulder_left, shoulder_right], 'white', 30, 'curve')
|
| 320 |
+
else:
|
| 321 |
+
arms_draw.line([wrist_left, elbow_left, shoulder_left, shoulder_right, elbow_right], 'white', 30,
|
| 322 |
+
'curve')
|
| 323 |
+
elif wrist_left[0] <= 1. and wrist_left[1] <= 1.:
|
| 324 |
+
if elbow_left[0] <= 1. and elbow_left[1] <= 1.:
|
| 325 |
+
arms_draw.line([shoulder_left, shoulder_right,
|
| 326 |
+
elbow_right, wrist_right], 'white', 30, 'curve')
|
| 327 |
+
else:
|
| 328 |
+
arms_draw.line([elbow_left, shoulder_left, shoulder_right, elbow_right, wrist_right], 'white', 30,
|
| 329 |
+
'curve')
|
| 330 |
+
else:
|
| 331 |
+
arms_draw.line([wrist_left, elbow_left, shoulder_left, shoulder_right, elbow_right, wrist_right], 'white',
|
| 332 |
+
30, 'curve')
|
| 333 |
+
|
| 334 |
+
if height > 512:
|
| 335 |
+
im_arms = cv2.dilate(np.float32(im_arms), np.ones(
|
| 336 |
+
(10, 10), np.uint16), iterations=5)
|
| 337 |
+
elif height > 256:
|
| 338 |
+
im_arms = cv2.dilate(np.float32(im_arms), np.ones(
|
| 339 |
+
(5, 5), np.uint16), iterations=5)
|
| 340 |
+
hands = np.logical_and(np.logical_not(im_arms), arms)
|
| 341 |
+
parse_mask += im_arms
|
| 342 |
+
parser_mask_fixed += hands
|
| 343 |
+
|
| 344 |
+
# delete neck
|
| 345 |
+
parse_head_2 = torch.clone(parse_head)
|
| 346 |
+
if category == 'dresses' or category == 'upper_body':
|
| 347 |
+
points = []
|
| 348 |
+
points.append(np.multiply(pose_data[2, :2], height / 512.0))
|
| 349 |
+
points.append(np.multiply(pose_data[5, :2], height / 512.0))
|
| 350 |
+
x_coords, y_coords = zip(*points)
|
| 351 |
+
A = np.vstack([x_coords, np.ones(len(x_coords))]).T
|
| 352 |
+
m, c = lstsq(A, y_coords, rcond=None)[0]
|
| 353 |
+
for i in range(parse_array.shape[1]):
|
| 354 |
+
y = i * m + c
|
| 355 |
+
parse_head_2[int(y - 20 * (height / 512.0)):, i] = 0
|
| 356 |
+
|
| 357 |
+
parser_mask_fixed = np.logical_or(
|
| 358 |
+
parser_mask_fixed, np.array(parse_head_2, dtype=np.uint16))
|
| 359 |
+
parse_mask += np.logical_or(parse_mask, np.logical_and(np.array(parse_head, dtype=np.uint16),
|
| 360 |
+
np.logical_not(np.array(parse_head_2, dtype=np.uint16))))
|
| 361 |
+
|
| 362 |
+
if height > 512:
|
| 363 |
+
parse_mask = cv2.dilate(parse_mask, np.ones(
|
| 364 |
+
(20, 20), np.uint16), iterations=5)
|
| 365 |
+
elif height > 256:
|
| 366 |
+
parse_mask = cv2.dilate(parse_mask, np.ones(
|
| 367 |
+
(10, 10), np.uint16), iterations=5)
|
| 368 |
+
else:
|
| 369 |
+
parse_mask = cv2.dilate(parse_mask, np.ones(
|
| 370 |
+
(5, 5), np.uint16), iterations=5)
|
| 371 |
+
parse_mask = np.logical_and(
|
| 372 |
+
parser_mask_changeable, np.logical_not(parse_mask))
|
| 373 |
+
parse_mask_total = np.logical_or(parse_mask, parser_mask_fixed)
|
| 374 |
+
inpaint_mask = 1 - parse_mask_total
|
| 375 |
+
img = np.where(inpaint_mask, 255, 0)
|
| 376 |
+
img = hole_fill(img.astype(np.uint8))
|
| 377 |
+
inpaint_mask = img / 255 * 1
|
| 378 |
+
mask = Image.fromarray(inpaint_mask.astype(np.uint8) * 255)
|
| 379 |
+
return mask
|
Leffa/preprocess/humanparsing/mhp_extension/detectron2/docker/Dockerfile
ADDED
|
@@ -0,0 +1,49 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
FROM nvidia/cuda:10.1-cudnn7-devel
|
| 2 |
+
|
| 3 |
+
ENV DEBIAN_FRONTEND noninteractive
|
| 4 |
+
RUN apt-get update && apt-get install -y \
|
| 5 |
+
python3-opencv ca-certificates python3-dev git wget sudo \
|
| 6 |
+
cmake ninja-build protobuf-compiler libprotobuf-dev && \
|
| 7 |
+
rm -rf /var/lib/apt/lists/*
|
| 8 |
+
RUN ln -sv /usr/bin/python3 /usr/bin/python
|
| 9 |
+
|
| 10 |
+
# create a non-root user
|
| 11 |
+
ARG USER_ID=1000
|
| 12 |
+
RUN useradd -m --no-log-init --system --uid ${USER_ID} appuser -g sudo
|
| 13 |
+
RUN echo '%sudo ALL=(ALL) NOPASSWD:ALL' >> /etc/sudoers
|
| 14 |
+
USER appuser
|
| 15 |
+
WORKDIR /home/appuser
|
| 16 |
+
|
| 17 |
+
ENV PATH="/home/appuser/.local/bin:${PATH}"
|
| 18 |
+
RUN wget https://bootstrap.pypa.io/get-pip.py && \
|
| 19 |
+
python3 get-pip.py --user && \
|
| 20 |
+
rm get-pip.py
|
| 21 |
+
|
| 22 |
+
# install dependencies
|
| 23 |
+
# See https://pytorch.org/ for other options if you use a different version of CUDA
|
| 24 |
+
RUN pip install --user tensorboard cython
|
| 25 |
+
RUN pip install --user torch==1.5+cu101 torchvision==0.6+cu101 -f https://download.pytorch.org/whl/torch_stable.html
|
| 26 |
+
RUN pip install --user 'git+https://github.com/cocodataset/cocoapi.git#subdirectory=PythonAPI'
|
| 27 |
+
|
| 28 |
+
RUN pip install --user 'git+https://github.com/facebookresearch/fvcore'
|
| 29 |
+
# install detectron2
|
| 30 |
+
RUN git clone https://github.com/facebookresearch/detectron2 detectron2_repo
|
| 31 |
+
# set FORCE_CUDA because during `docker build` cuda is not accessible
|
| 32 |
+
ENV FORCE_CUDA="1"
|
| 33 |
+
# This will by default build detectron2 for all common cuda architectures and take a lot more time,
|
| 34 |
+
# because inside `docker build`, there is no way to tell which architecture will be used.
|
| 35 |
+
ARG TORCH_CUDA_ARCH_LIST="Kepler;Kepler+Tesla;Maxwell;Maxwell+Tegra;Pascal;Volta;Turing"
|
| 36 |
+
ENV TORCH_CUDA_ARCH_LIST="${TORCH_CUDA_ARCH_LIST}"
|
| 37 |
+
|
| 38 |
+
RUN pip install --user -e detectron2_repo
|
| 39 |
+
|
| 40 |
+
# Set a fixed model cache directory.
|
| 41 |
+
ENV FVCORE_CACHE="/tmp"
|
| 42 |
+
WORKDIR /home/appuser/detectron2_repo
|
| 43 |
+
|
| 44 |
+
# run detectron2 under user "appuser":
|
| 45 |
+
# wget http://images.cocodataset.org/val2017/000000439715.jpg -O input.jpg
|
| 46 |
+
# python3 demo/demo.py \
|
| 47 |
+
#--config-file configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml \
|
| 48 |
+
#--input input.jpg --output outputs/ \
|
| 49 |
+
#--opts MODEL.WEIGHTS detectron2://COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x/137849600/model_final_f10217.pkl
|
Leffa/preprocess/humanparsing/mhp_extension/detectron2/docker/Dockerfile-circleci
ADDED
|
@@ -0,0 +1,17 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
FROM nvidia/cuda:10.1-cudnn7-devel
|
| 2 |
+
# This dockerfile only aims to provide an environment for unittest on CircleCI
|
| 3 |
+
|
| 4 |
+
ENV DEBIAN_FRONTEND noninteractive
|
| 5 |
+
RUN apt-get update && apt-get install -y \
|
| 6 |
+
python3-opencv ca-certificates python3-dev git wget sudo ninja-build && \
|
| 7 |
+
rm -rf /var/lib/apt/lists/*
|
| 8 |
+
|
| 9 |
+
RUN wget -q https://bootstrap.pypa.io/get-pip.py && \
|
| 10 |
+
python3 get-pip.py && \
|
| 11 |
+
rm get-pip.py
|
| 12 |
+
|
| 13 |
+
# install dependencies
|
| 14 |
+
# See https://pytorch.org/ for other options if you use a different version of CUDA
|
| 15 |
+
RUN pip install tensorboard cython
|
| 16 |
+
RUN pip install torch==1.5+cu101 torchvision==0.6+cu101 -f https://download.pytorch.org/whl/torch_stable.html
|
| 17 |
+
RUN pip install 'git+https://github.com/cocodataset/cocoapi.git#subdirectory=PythonAPI'
|