b3h-young123 commited on Feb 14, 2025

Commit

481ec5f

verified ·

1 Parent(s): 37508df

Add files using upload-large-folder tool

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

Leffa/3rdparty/densepose/__init__.py +20 -0
Leffa/3rdparty/densepose/config.py +277 -0
Leffa/3rdparty/densepose/converters/__init__.py +15 -0
Leffa/3rdparty/densepose/converters/base.py +93 -0
Leffa/3rdparty/densepose/converters/builtin.py +31 -0
Leffa/3rdparty/densepose/converters/chart_output_hflip.py +71 -0
Leffa/3rdparty/densepose/converters/chart_output_to_chart_result.py +188 -0
Leffa/3rdparty/densepose/converters/hflip.py +34 -0
Leffa/3rdparty/densepose/converters/segm_to_mask.py +150 -0
Leffa/3rdparty/densepose/converters/to_chart_result.py +70 -0
Leffa/3rdparty/densepose/converters/to_mask.py +49 -0
Leffa/3rdparty/densepose/engine/__init__.py +3 -0
Leffa/3rdparty/densepose/engine/trainer.py +258 -0
Leffa/3rdparty/densepose/modeling/__init__.py +13 -0
Leffa/3rdparty/densepose/modeling/build.py +87 -0
Leffa/3rdparty/densepose/modeling/confidence.py +73 -0
Leffa/3rdparty/densepose/modeling/densepose_checkpoint.py +35 -0
Leffa/3rdparty/densepose/modeling/filter.py +94 -0
Leffa/3rdparty/densepose/modeling/hrfpn.py +182 -0
Leffa/3rdparty/densepose/modeling/hrnet.py +474 -0
Leffa/3rdparty/densepose/modeling/inference.py +44 -0
Leffa/3rdparty/densepose/modeling/losses/__init__.py +14 -0
Leffa/3rdparty/densepose/modeling/losses/chart.py +291 -0
Leffa/3rdparty/densepose/modeling/losses/chart_with_confidences.py +209 -0
Leffa/3rdparty/densepose/modeling/losses/cse.py +115 -0
Leffa/3rdparty/densepose/modeling/losses/cycle_pix2shape.py +152 -0
Leffa/3rdparty/densepose/modeling/losses/cycle_shape2shape.py +117 -0
Leffa/3rdparty/densepose/modeling/losses/embed.py +119 -0
Leffa/3rdparty/densepose/modeling/losses/embed_utils.py +137 -0
Leffa/3rdparty/densepose/modeling/losses/mask.py +125 -0
Leffa/3rdparty/densepose/modeling/losses/mask_or_segm.py +77 -0
Leffa/3rdparty/densepose/modeling/losses/registry.py +5 -0
Leffa/3rdparty/densepose/modeling/losses/soft_embed.py +133 -0
Leffa/3rdparty/densepose/modeling/losses/utils.py +443 -0
Leffa/3rdparty/densepose/modeling/predictors/__init__.py +9 -0
Leffa/3rdparty/densepose/modeling/predictors/chart.py +94 -0
Leffa/3rdparty/densepose/modeling/predictors/chart_confidence.py +174 -0
Leffa/3rdparty/densepose/modeling/predictors/chart_with_confidence.py +15 -0
Leffa/3rdparty/densepose/modeling/predictors/cse.py +70 -0
Leffa/3rdparty/densepose/modeling/predictors/cse_confidence.py +115 -0
Leffa/3rdparty/densepose/modeling/predictors/cse_with_confidence.py +15 -0
Leffa/3rdparty/densepose/modeling/predictors/registry.py +5 -0
Leffa/3rdparty/densepose/modeling/roi_heads/__init__.py +6 -0
Leffa/3rdparty/densepose/modeling/roi_heads/deeplab.py +263 -0
Leffa/3rdparty/densepose/modeling/roi_heads/registry.py +5 -0
Leffa/3rdparty/densepose/modeling/roi_heads/roi_head.py +218 -0
Leffa/3rdparty/densepose/modeling/roi_heads/v1convx.py +64 -0
Leffa/3rdparty/densepose/modeling/test_time_augmentation.py +207 -0
Leffa/3rdparty/densepose/modeling/utils.py +11 -0
Leffa/3rdparty/densepose/utils/__init__.py +0 -0

Leffa/3rdparty/densepose/__init__.py ADDED Viewed

	@@ -0,0 +1,20 @@

+# Copyright (c) Facebook, Inc. and its affiliates.
+from .data.datasets import builtin  # just to register data
+from .converters import builtin as builtin_converters  # register converters
+from .config import (
+    add_densepose_config,
+    add_densepose_head_config,
+    add_hrnet_config,
+    add_dataset_category_config,
+    add_bootstrap_config,
+    load_bootstrap_config,
+)
+from .structures import DensePoseDataRelative, DensePoseList, DensePoseTransformData
+from .evaluation import DensePoseCOCOEvaluator
+from .modeling.roi_heads import DensePoseROIHeads
+from .modeling.test_time_augmentation import (
+    DensePoseGeneralizedRCNNWithTTA,
+    DensePoseDatasetMapperTTA,
+)
+from .utils.transform import load_from_cfg
+from .modeling.hrfpn import build_hrfpn_backbone

Leffa/3rdparty/densepose/config.py ADDED Viewed

	@@ -0,0 +1,277 @@

+# -*- coding = utf-8 -*-
+# Copyright (c) Facebook, Inc. and its affiliates.
+# pyre-ignore-all-errors
+from detectron2.config import CfgNode as CN
+def add_dataset_category_config(cfg: CN) -> None:
+    """
+    Add config for additional category-related dataset options
+     - category whitelisting
+     - category mapping
+    """
+    _C = cfg
+    _C.DATASETS.CATEGORY_MAPS = CN(new_allowed=True)
+    _C.DATASETS.WHITELISTED_CATEGORIES = CN(new_allowed=True)
+    # class to mesh mapping
+    _C.DATASETS.CLASS_TO_MESH_NAME_MAPPING = CN(new_allowed=True)
+def add_evaluation_config(cfg: CN) -> None:
+    _C = cfg
+    _C.DENSEPOSE_EVALUATION = CN()
+    # evaluator type, possible values:
+    #  - "iou": evaluator for models that produce iou data
+    #  - "cse": evaluator for models that produce cse data
+    _C.DENSEPOSE_EVALUATION.TYPE = "iou"
+    # storage for DensePose results, possible values:
+    #  - "none": no explicit storage, all the results are stored in the
+    #            dictionary with predictions, memory intensive;
+    #            historically the default storage type
+    #  - "ram": RAM storage, uses per-process RAM storage, which is
+    #           reduced to a single process storage on later stages,
+    #           less memory intensive
+    #  - "file": file storage, uses per-process file-based storage,
+    #            the least memory intensive, but may create bottlenecks
+    #            on file system accesses
+    _C.DENSEPOSE_EVALUATION.STORAGE = "none"
+    # minimum threshold for IOU values: the lower its values is,
+    # the more matches are produced (and the higher the AP score)
+    _C.DENSEPOSE_EVALUATION.MIN_IOU_THRESHOLD = 0.5
+    # Non-distributed inference is slower (at inference time) but can avoid RAM OOM
+    _C.DENSEPOSE_EVALUATION.DISTRIBUTED_INFERENCE = True
+    # evaluate mesh alignment based on vertex embeddings, only makes sense in CSE context
+    _C.DENSEPOSE_EVALUATION.EVALUATE_MESH_ALIGNMENT = False
+    # meshes to compute mesh alignment for
+    _C.DENSEPOSE_EVALUATION.MESH_ALIGNMENT_MESH_NAMES = []
+def add_bootstrap_config(cfg: CN) -> None:
+    """ """
+    _C = cfg
+    _C.BOOTSTRAP_DATASETS = []
+    _C.BOOTSTRAP_MODEL = CN()
+    _C.BOOTSTRAP_MODEL.WEIGHTS = ""
+    _C.BOOTSTRAP_MODEL.DEVICE = "cuda"
+def get_bootstrap_dataset_config() -> CN:
+    _C = CN()
+    _C.DATASET = ""
+    # ratio used to mix data loaders
+    _C.RATIO = 0.1
+    # image loader
+    _C.IMAGE_LOADER = CN(new_allowed=True)
+    _C.IMAGE_LOADER.TYPE = ""
+    _C.IMAGE_LOADER.BATCH_SIZE = 4
+    _C.IMAGE_LOADER.NUM_WORKERS = 4
+    _C.IMAGE_LOADER.CATEGORIES = []
+    _C.IMAGE_LOADER.MAX_COUNT_PER_CATEGORY = 1_000_000
+    _C.IMAGE_LOADER.CATEGORY_TO_CLASS_MAPPING = CN(new_allowed=True)
+    # inference
+    _C.INFERENCE = CN()
+    # batch size for model inputs
+    _C.INFERENCE.INPUT_BATCH_SIZE = 4
+    # batch size to group model outputs
+    _C.INFERENCE.OUTPUT_BATCH_SIZE = 2
+    # sampled data
+    _C.DATA_SAMPLER = CN(new_allowed=True)
+    _C.DATA_SAMPLER.TYPE = ""
+    _C.DATA_SAMPLER.USE_GROUND_TRUTH_CATEGORIES = False
+    # filter
+    _C.FILTER = CN(new_allowed=True)
+    _C.FILTER.TYPE = ""
+    return _C
+def load_bootstrap_config(cfg: CN) -> None:
+    """
+    Bootstrap datasets are given as a list of `dict` that are not automatically
+    converted into CfgNode. This method processes all bootstrap dataset entries
+    and ensures that they are in CfgNode format and comply with the specification
+    """
+    if not cfg.BOOTSTRAP_DATASETS:
+        return
+    bootstrap_datasets_cfgnodes = []
+    for dataset_cfg in cfg.BOOTSTRAP_DATASETS:
+        _C = get_bootstrap_dataset_config().clone()
+        _C.merge_from_other_cfg(CN(dataset_cfg))
+        bootstrap_datasets_cfgnodes.append(_C)
+    cfg.BOOTSTRAP_DATASETS = bootstrap_datasets_cfgnodes
+def add_densepose_head_cse_config(cfg: CN) -> None:
+    """
+    Add configuration options for Continuous Surface Embeddings (CSE)
+    """
+    _C = cfg
+    _C.MODEL.ROI_DENSEPOSE_HEAD.CSE = CN()
+    # Dimensionality D of the embedding space
+    _C.MODEL.ROI_DENSEPOSE_HEAD.CSE.EMBED_SIZE = 16
+    # Embedder specifications for various mesh IDs
+    _C.MODEL.ROI_DENSEPOSE_HEAD.CSE.EMBEDDERS = CN(new_allowed=True)
+    # normalization coefficient for embedding distances
+    _C.MODEL.ROI_DENSEPOSE_HEAD.CSE.EMBEDDING_DIST_GAUSS_SIGMA = 0.01
+    # normalization coefficient for geodesic distances
+    _C.MODEL.ROI_DENSEPOSE_HEAD.CSE.GEODESIC_DIST_GAUSS_SIGMA = 0.01
+    # embedding loss weight
+    _C.MODEL.ROI_DENSEPOSE_HEAD.CSE.EMBED_LOSS_WEIGHT = 0.6
+    # embedding loss name, currently the following options are supported:
+    # - EmbeddingLoss: cross-entropy on vertex labels
+    # - SoftEmbeddingLoss: cross-entropy on vertex label combined with
+    #    Gaussian penalty on distance between vertices
+    _C.MODEL.ROI_DENSEPOSE_HEAD.CSE.EMBED_LOSS_NAME = "EmbeddingLoss"
+    # optimizer hyperparameters
+    _C.MODEL.ROI_DENSEPOSE_HEAD.CSE.FEATURES_LR_FACTOR = 1.0
+    _C.MODEL.ROI_DENSEPOSE_HEAD.CSE.EMBEDDING_LR_FACTOR = 1.0
+    # Shape to shape cycle consistency loss parameters:
+    _C.MODEL.ROI_DENSEPOSE_HEAD.CSE.SHAPE_TO_SHAPE_CYCLE_LOSS = CN({"ENABLED": False})
+    # shape to shape cycle consistency loss weight
+    _C.MODEL.ROI_DENSEPOSE_HEAD.CSE.SHAPE_TO_SHAPE_CYCLE_LOSS.WEIGHT = 0.025
+    # norm type used for loss computation
+    _C.MODEL.ROI_DENSEPOSE_HEAD.CSE.SHAPE_TO_SHAPE_CYCLE_LOSS.NORM_P = 2
+    # normalization term for embedding similarity matrices
+    _C.MODEL.ROI_DENSEPOSE_HEAD.CSE.SHAPE_TO_SHAPE_CYCLE_LOSS.TEMPERATURE = 0.05
+    # maximum number of vertices to include into shape to shape cycle loss
+    # if negative or zero, all vertices are considered
+    # if positive, random subset of vertices of given size is considered
+    _C.MODEL.ROI_DENSEPOSE_HEAD.CSE.SHAPE_TO_SHAPE_CYCLE_LOSS.MAX_NUM_VERTICES = 4936
+    # Pixel to shape cycle consistency loss parameters:
+    _C.MODEL.ROI_DENSEPOSE_HEAD.CSE.PIX_TO_SHAPE_CYCLE_LOSS = CN({"ENABLED": False})
+    # pixel to shape cycle consistency loss weight
+    _C.MODEL.ROI_DENSEPOSE_HEAD.CSE.PIX_TO_SHAPE_CYCLE_LOSS.WEIGHT = 0.0001
+    # norm type used for loss computation
+    _C.MODEL.ROI_DENSEPOSE_HEAD.CSE.PIX_TO_SHAPE_CYCLE_LOSS.NORM_P = 2
+    # map images to all meshes and back (if false, use only gt meshes from the batch)
+    _C.MODEL.ROI_DENSEPOSE_HEAD.CSE.PIX_TO_SHAPE_CYCLE_LOSS.USE_ALL_MESHES_NOT_GT_ONLY = False
+    # Randomly select at most this number of pixels from every instance
+    # if negative or zero, all vertices are considered
+    _C.MODEL.ROI_DENSEPOSE_HEAD.CSE.PIX_TO_SHAPE_CYCLE_LOSS.NUM_PIXELS_TO_SAMPLE = 100
+    # normalization factor for pixel to pixel distances (higher value = smoother distribution)
+    _C.MODEL.ROI_DENSEPOSE_HEAD.CSE.PIX_TO_SHAPE_CYCLE_LOSS.PIXEL_SIGMA = 5.0
+    _C.MODEL.ROI_DENSEPOSE_HEAD.CSE.PIX_TO_SHAPE_CYCLE_LOSS.TEMPERATURE_PIXEL_TO_VERTEX = 0.05
+    _C.MODEL.ROI_DENSEPOSE_HEAD.CSE.PIX_TO_SHAPE_CYCLE_LOSS.TEMPERATURE_VERTEX_TO_PIXEL = 0.05
+def add_densepose_head_config(cfg: CN) -> None:
+    """
+    Add config for densepose head.
+    """
+    _C = cfg
+    _C.MODEL.DENSEPOSE_ON = True
+    _C.MODEL.ROI_DENSEPOSE_HEAD = CN()
+    _C.MODEL.ROI_DENSEPOSE_HEAD.NAME = ""
+    _C.MODEL.ROI_DENSEPOSE_HEAD.NUM_STACKED_CONVS = 8
+    # Number of parts used for point labels
+    _C.MODEL.ROI_DENSEPOSE_HEAD.NUM_PATCHES = 24
+    _C.MODEL.ROI_DENSEPOSE_HEAD.DECONV_KERNEL = 4
+    _C.MODEL.ROI_DENSEPOSE_HEAD.CONV_HEAD_DIM = 512
+    _C.MODEL.ROI_DENSEPOSE_HEAD.CONV_HEAD_KERNEL = 3
+    _C.MODEL.ROI_DENSEPOSE_HEAD.UP_SCALE = 2
+    _C.MODEL.ROI_DENSEPOSE_HEAD.HEATMAP_SIZE = 112
+    _C.MODEL.ROI_DENSEPOSE_HEAD.POOLER_TYPE = "ROIAlignV2"
+    _C.MODEL.ROI_DENSEPOSE_HEAD.POOLER_RESOLUTION = 28
+    _C.MODEL.ROI_DENSEPOSE_HEAD.POOLER_SAMPLING_RATIO = 2
+    _C.MODEL.ROI_DENSEPOSE_HEAD.NUM_COARSE_SEGM_CHANNELS = 2  # 15 or 2
+    # Overlap threshold for an RoI to be considered foreground (if >= FG_IOU_THRESHOLD)
+    _C.MODEL.ROI_DENSEPOSE_HEAD.FG_IOU_THRESHOLD = 0.7
+    # Loss weights for annotation masks.(14 Parts)
+    _C.MODEL.ROI_DENSEPOSE_HEAD.INDEX_WEIGHTS = 5.0
+    # Loss weights for surface parts. (24 Parts)
+    _C.MODEL.ROI_DENSEPOSE_HEAD.PART_WEIGHTS = 1.0
+    # Loss weights for UV regression.
+    _C.MODEL.ROI_DENSEPOSE_HEAD.POINT_REGRESSION_WEIGHTS = 0.01
+    # Coarse segmentation is trained using instance segmentation task data
+    _C.MODEL.ROI_DENSEPOSE_HEAD.COARSE_SEGM_TRAINED_BY_MASKS = False
+    # For Decoder
+    _C.MODEL.ROI_DENSEPOSE_HEAD.DECODER_ON = True
+    _C.MODEL.ROI_DENSEPOSE_HEAD.DECODER_NUM_CLASSES = 256
+    _C.MODEL.ROI_DENSEPOSE_HEAD.DECODER_CONV_DIMS = 256
+    _C.MODEL.ROI_DENSEPOSE_HEAD.DECODER_NORM = ""
+    _C.MODEL.ROI_DENSEPOSE_HEAD.DECODER_COMMON_STRIDE = 4
+    # For DeepLab head
+    _C.MODEL.ROI_DENSEPOSE_HEAD.DEEPLAB = CN()
+    _C.MODEL.ROI_DENSEPOSE_HEAD.DEEPLAB.NORM = "GN"
+    _C.MODEL.ROI_DENSEPOSE_HEAD.DEEPLAB.NONLOCAL_ON = 0
+    # Predictor class name, must be registered in DENSEPOSE_PREDICTOR_REGISTRY
+    # Some registered predictors:
+    #   "DensePoseChartPredictor": predicts segmentation and UV coordinates for predefined charts
+    #   "DensePoseChartWithConfidencePredictor": predicts segmentation, UV coordinates
+    #       and associated confidences for predefined charts (default)
+    #   "DensePoseEmbeddingWithConfidencePredictor": predicts segmentation, embeddings
+    #       and associated confidences for CSE
+    _C.MODEL.ROI_DENSEPOSE_HEAD.PREDICTOR_NAME = "DensePoseChartWithConfidencePredictor"
+    # Loss class name, must be registered in DENSEPOSE_LOSS_REGISTRY
+    # Some registered losses:
+    #   "DensePoseChartLoss": loss for chart-based models that estimate
+    #      segmentation and UV coordinates
+    #   "DensePoseChartWithConfidenceLoss": loss for chart-based models that estimate
+    #      segmentation, UV coordinates and the corresponding confidences (default)
+    _C.MODEL.ROI_DENSEPOSE_HEAD.LOSS_NAME = "DensePoseChartWithConfidenceLoss"
+    # Confidences
+    # Enable learning UV confidences (variances) along with the actual values
+    _C.MODEL.ROI_DENSEPOSE_HEAD.UV_CONFIDENCE = CN({"ENABLED": False})
+    # UV confidence lower bound
+    _C.MODEL.ROI_DENSEPOSE_HEAD.UV_CONFIDENCE.EPSILON = 0.01
+    # Enable learning segmentation confidences (variances) along with the actual values
+    _C.MODEL.ROI_DENSEPOSE_HEAD.SEGM_CONFIDENCE = CN({"ENABLED": False})
+    # Segmentation confidence lower bound
+    _C.MODEL.ROI_DENSEPOSE_HEAD.SEGM_CONFIDENCE.EPSILON = 0.01
+    # Statistical model type for confidence learning, possible values:
+    # - "iid_iso": statistically independent identically distributed residuals
+    #    with isotropic covariance
+    # - "indep_aniso": statistically independent residuals with anisotropic
+    #    covariances
+    _C.MODEL.ROI_DENSEPOSE_HEAD.UV_CONFIDENCE.TYPE = "iid_iso"
+    # List of angles for rotation in data augmentation during training
+    _C.INPUT.ROTATION_ANGLES = [0]
+    _C.TEST.AUG.ROTATION_ANGLES = ()  # Rotation TTA
+    add_densepose_head_cse_config(cfg)
+def add_hrnet_config(cfg: CN) -> None:
+    """
+    Add config for HRNet backbone.
+    """
+    _C = cfg
+    # For HigherHRNet w32
+    _C.MODEL.HRNET = CN()
+    _C.MODEL.HRNET.STEM_INPLANES = 64
+    _C.MODEL.HRNET.STAGE2 = CN()
+    _C.MODEL.HRNET.STAGE2.NUM_MODULES = 1
+    _C.MODEL.HRNET.STAGE2.NUM_BRANCHES = 2
+    _C.MODEL.HRNET.STAGE2.BLOCK = "BASIC"
+    _C.MODEL.HRNET.STAGE2.NUM_BLOCKS = [4, 4]
+    _C.MODEL.HRNET.STAGE2.NUM_CHANNELS = [32, 64]
+    _C.MODEL.HRNET.STAGE2.FUSE_METHOD = "SUM"
+    _C.MODEL.HRNET.STAGE3 = CN()
+    _C.MODEL.HRNET.STAGE3.NUM_MODULES = 4
+    _C.MODEL.HRNET.STAGE3.NUM_BRANCHES = 3
+    _C.MODEL.HRNET.STAGE3.BLOCK = "BASIC"
+    _C.MODEL.HRNET.STAGE3.NUM_BLOCKS = [4, 4, 4]
+    _C.MODEL.HRNET.STAGE3.NUM_CHANNELS = [32, 64, 128]
+    _C.MODEL.HRNET.STAGE3.FUSE_METHOD = "SUM"
+    _C.MODEL.HRNET.STAGE4 = CN()
+    _C.MODEL.HRNET.STAGE4.NUM_MODULES = 3
+    _C.MODEL.HRNET.STAGE4.NUM_BRANCHES = 4
+    _C.MODEL.HRNET.STAGE4.BLOCK = "BASIC"
+    _C.MODEL.HRNET.STAGE4.NUM_BLOCKS = [4, 4, 4, 4]
+    _C.MODEL.HRNET.STAGE4.NUM_CHANNELS = [32, 64, 128, 256]
+    _C.MODEL.HRNET.STAGE4.FUSE_METHOD = "SUM"
+    _C.MODEL.HRNET.HRFPN = CN()
+    _C.MODEL.HRNET.HRFPN.OUT_CHANNELS = 256
+def add_densepose_config(cfg: CN) -> None:
+    add_densepose_head_config(cfg)
+    add_hrnet_config(cfg)
+    add_bootstrap_config(cfg)
+    add_dataset_category_config(cfg)
+    add_evaluation_config(cfg)

Leffa/3rdparty/densepose/converters/__init__.py ADDED Viewed

	@@ -0,0 +1,15 @@

+# Copyright (c) Facebook, Inc. and its affiliates.
+from .hflip import HFlipConverter
+from .to_mask import ToMaskConverter
+from .to_chart_result import ToChartResultConverter, ToChartResultConverterWithConfidences
+from .segm_to_mask import (
+    predictor_output_with_fine_and_coarse_segm_to_mask,
+    predictor_output_with_coarse_segm_to_mask,
+    resample_fine_and_coarse_segm_to_bbox,
+)
+from .chart_output_to_chart_result import (
+    densepose_chart_predictor_output_to_result,
+    densepose_chart_predictor_output_to_result_with_confidences,
+)
+from .chart_output_hflip import densepose_chart_predictor_output_hflip

Leffa/3rdparty/densepose/converters/base.py ADDED Viewed

	@@ -0,0 +1,93 @@

+# Copyright (c) Facebook, Inc. and its affiliates.
+from typing import Any, Tuple, Type
+import torch
+class BaseConverter:
+    """
+    Converter base class to be reused by various converters.
+    Converter allows one to convert data from various source types to a particular
+    destination type. Each source type needs to register its converter. The
+    registration for each source type is valid for all descendants of that type.
+    """
+    @classmethod
+    def register(cls, from_type: Type, converter: Any = None):
+        """
+        Registers a converter for the specified type.
+        Can be used as a decorator (if converter is None), or called as a method.
+        Args:
+            from_type (type): type to register the converter for;
+                all instances of this type will use the same converter
+            converter (callable): converter to be registered for the given
+                type; if None, this method is assumed to be a decorator for the converter
+        """
+        if converter is not None:
+            cls._do_register(from_type, converter)
+        def wrapper(converter: Any) -> Any:
+            cls._do_register(from_type, converter)
+            return converter
+        return wrapper
+    @classmethod
+    def _do_register(cls, from_type: Type, converter: Any):
+        cls.registry[from_type] = converter  # pyre-ignore[16]
+    @classmethod
+    def _lookup_converter(cls, from_type: Type) -> Any:
+        """
+        Perform recursive lookup for the given type
+        to find registered converter. If a converter was found for some base
+        class, it gets registered for this class to save on further lookups.
+        Args:
+            from_type: type for which to find a converter
+        Return:
+            callable or None - registered converter or None
+                if no suitable entry was found in the registry
+        """
+        if from_type in cls.registry:  # pyre-ignore[16]
+            return cls.registry[from_type]
+        for base in from_type.__bases__:
+            converter = cls._lookup_converter(base)
+            if converter is not None:
+                cls._do_register(from_type, converter)
+                return converter
+        return None
+    @classmethod
+    def convert(cls, instance: Any, *args, **kwargs):
+        """
+        Convert an instance to the destination type using some registered
+        converter. Does recursive lookup for base classes, so there's no need
+        for explicit registration for derived classes.
+        Args:
+            instance: source instance to convert to the destination type
+        Return:
+            An instance of the destination type obtained from the source instance
+            Raises KeyError, if no suitable converter found
+        """
+        instance_type = type(instance)
+        converter = cls._lookup_converter(instance_type)
+        if converter is None:
+            if cls.dst_type is None:  # pyre-ignore[16]
+                output_type_str = "itself"
+            else:
+                output_type_str = cls.dst_type
+            raise KeyError(f"Could not find converter from {instance_type} to {output_type_str}")
+        return converter(instance, *args, **kwargs)
+IntTupleBox = Tuple[int, int, int, int]
+def make_int_box(box: torch.Tensor) -> IntTupleBox:
+    int_box = [0, 0, 0, 0]
+    int_box[0], int_box[1], int_box[2], int_box[3] = tuple(box.long().tolist())
+    return int_box[0], int_box[1], int_box[2], int_box[3]

Leffa/3rdparty/densepose/converters/builtin.py ADDED Viewed

	@@ -0,0 +1,31 @@

+# Copyright (c) Facebook, Inc. and its affiliates.
+from ..structures import DensePoseChartPredictorOutput, DensePoseEmbeddingPredictorOutput
+from . import (
+    HFlipConverter,
+    ToChartResultConverter,
+    ToChartResultConverterWithConfidences,
+    ToMaskConverter,
+    densepose_chart_predictor_output_hflip,
+    densepose_chart_predictor_output_to_result,
+    densepose_chart_predictor_output_to_result_with_confidences,
+    predictor_output_with_coarse_segm_to_mask,
+    predictor_output_with_fine_and_coarse_segm_to_mask,
+)
+ToMaskConverter.register(
+    DensePoseChartPredictorOutput, predictor_output_with_fine_and_coarse_segm_to_mask
+)
+ToMaskConverter.register(
+    DensePoseEmbeddingPredictorOutput, predictor_output_with_coarse_segm_to_mask
+)
+ToChartResultConverter.register(
+    DensePoseChartPredictorOutput, densepose_chart_predictor_output_to_result
+)
+ToChartResultConverterWithConfidences.register(
+    DensePoseChartPredictorOutput, densepose_chart_predictor_output_to_result_with_confidences
+)
+HFlipConverter.register(DensePoseChartPredictorOutput, densepose_chart_predictor_output_hflip)

Leffa/3rdparty/densepose/converters/chart_output_hflip.py ADDED Viewed

	@@ -0,0 +1,71 @@

+# Copyright (c) Facebook, Inc. and its affiliates.
+from dataclasses import fields
+import torch
+from densepose.structures import DensePoseChartPredictorOutput, DensePoseTransformData
+def densepose_chart_predictor_output_hflip(
+    densepose_predictor_output: DensePoseChartPredictorOutput,
+    transform_data: DensePoseTransformData,
+) -> DensePoseChartPredictorOutput:
+    """
+    Change  to take into account a Horizontal flip.
+    """
+    if len(densepose_predictor_output) > 0:
+        PredictorOutput = type(densepose_predictor_output)
+        output_dict = {}
+        for field in fields(densepose_predictor_output):
+            field_value = getattr(densepose_predictor_output, field.name)
+            # flip tensors
+            if isinstance(field_value, torch.Tensor):
+                setattr(densepose_predictor_output, field.name, torch.flip(field_value, [3]))
+        densepose_predictor_output = _flip_iuv_semantics_tensor(
+            densepose_predictor_output, transform_data
+        )
+        densepose_predictor_output = _flip_segm_semantics_tensor(
+            densepose_predictor_output, transform_data
+        )
+        for field in fields(densepose_predictor_output):
+            output_dict[field.name] = getattr(densepose_predictor_output, field.name)
+        return PredictorOutput(**output_dict)
+    else:
+        return densepose_predictor_output
+def _flip_iuv_semantics_tensor(
+    densepose_predictor_output: DensePoseChartPredictorOutput,
+    dp_transform_data: DensePoseTransformData,
+) -> DensePoseChartPredictorOutput:
+    point_label_symmetries = dp_transform_data.point_label_symmetries
+    uv_symmetries = dp_transform_data.uv_symmetries
+    N, C, H, W = densepose_predictor_output.u.shape
+    u_loc = (densepose_predictor_output.u[:, 1:, :, :].clamp(0, 1) * 255).long()
+    v_loc = (densepose_predictor_output.v[:, 1:, :, :].clamp(0, 1) * 255).long()
+    Iindex = torch.arange(C - 1, device=densepose_predictor_output.u.device)[
+        None, :, None, None
+    ].expand(N, C - 1, H, W)
+    densepose_predictor_output.u[:, 1:, :, :] = uv_symmetries["U_transforms"][Iindex, v_loc, u_loc]
+    densepose_predictor_output.v[:, 1:, :, :] = uv_symmetries["V_transforms"][Iindex, v_loc, u_loc]
+    for el in ["fine_segm", "u", "v"]:
+        densepose_predictor_output.__dict__[el] = densepose_predictor_output.__dict__[el][
+            :, point_label_symmetries, :, :
+        ]
+    return densepose_predictor_output
+def _flip_segm_semantics_tensor(
+    densepose_predictor_output: DensePoseChartPredictorOutput, dp_transform_data
+):
+    if densepose_predictor_output.coarse_segm.shape[1] > 2:
+        densepose_predictor_output.coarse_segm = densepose_predictor_output.coarse_segm[
+            :, dp_transform_data.mask_label_symmetries, :, :
+        ]
+    return densepose_predictor_output

Leffa/3rdparty/densepose/converters/chart_output_to_chart_result.py ADDED Viewed

	@@ -0,0 +1,188 @@

+# Copyright (c) Facebook, Inc. and its affiliates.
+from typing import Dict
+import torch
+from torch.nn import functional as F
+from detectron2.structures.boxes import Boxes, BoxMode
+from ..structures import (
+    DensePoseChartPredictorOutput,
+    DensePoseChartResult,
+    DensePoseChartResultWithConfidences,
+)
+from . import resample_fine_and_coarse_segm_to_bbox
+from .base import IntTupleBox, make_int_box
+def resample_uv_tensors_to_bbox(
+    u: torch.Tensor,
+    v: torch.Tensor,
+    labels: torch.Tensor,
+    box_xywh_abs: IntTupleBox,
+) -> torch.Tensor:
+    """
+    Resamples U and V coordinate estimates for the given bounding box
+    Args:
+        u (tensor [1, C, H, W] of float): U coordinates
+        v (tensor [1, C, H, W] of float): V coordinates
+        labels (tensor [H, W] of long): labels obtained by resampling segmentation
+            outputs for the given bounding box
+        box_xywh_abs (tuple of 4 int): bounding box that corresponds to predictor outputs
+    Return:
+       Resampled U and V coordinates - a tensor [2, H, W] of float
+    """
+    x, y, w, h = box_xywh_abs
+    w = max(int(w), 1)
+    h = max(int(h), 1)
+    u_bbox = F.interpolate(u, (h, w), mode="bilinear", align_corners=False)
+    v_bbox = F.interpolate(v, (h, w), mode="bilinear", align_corners=False)
+    uv = torch.zeros([2, h, w], dtype=torch.float32, device=u.device)
+    for part_id in range(1, u_bbox.size(1)):
+        uv[0][labels == part_id] = u_bbox[0, part_id][labels == part_id]
+        uv[1][labels == part_id] = v_bbox[0, part_id][labels == part_id]
+    return uv
+def resample_uv_to_bbox(
+    predictor_output: DensePoseChartPredictorOutput,
+    labels: torch.Tensor,
+    box_xywh_abs: IntTupleBox,
+) -> torch.Tensor:
+    """
+    Resamples U and V coordinate estimates for the given bounding box
+    Args:
+        predictor_output (DensePoseChartPredictorOutput): DensePose predictor
+            output to be resampled
+        labels (tensor [H, W] of long): labels obtained by resampling segmentation
+            outputs for the given bounding box
+        box_xywh_abs (tuple of 4 int): bounding box that corresponds to predictor outputs
+    Return:
+       Resampled U and V coordinates - a tensor [2, H, W] of float
+    """
+    return resample_uv_tensors_to_bbox(
+        predictor_output.u,
+        predictor_output.v,
+        labels,
+        box_xywh_abs,
+    )
+def densepose_chart_predictor_output_to_result(
+    predictor_output: DensePoseChartPredictorOutput, boxes: Boxes
+) -> DensePoseChartResult:
+    """
+    Convert densepose chart predictor outputs to results
+    Args:
+        predictor_output (DensePoseChartPredictorOutput): DensePose predictor
+            output to be converted to results, must contain only 1 output
+        boxes (Boxes): bounding box that corresponds to the predictor output,
+            must contain only 1 bounding box
+    Return:
+       DensePose chart-based result (DensePoseChartResult)
+    """
+    assert len(predictor_output) == 1 and len(boxes) == 1, (
+        f"Predictor output to result conversion can operate only single outputs"
+        f", got {len(predictor_output)} predictor outputs and {len(boxes)} boxes"
+    )
+    boxes_xyxy_abs = boxes.tensor.clone()
+    boxes_xywh_abs = BoxMode.convert(boxes_xyxy_abs, BoxMode.XYXY_ABS, BoxMode.XYWH_ABS)
+    box_xywh = make_int_box(boxes_xywh_abs[0])
+    labels = resample_fine_and_coarse_segm_to_bbox(predictor_output, box_xywh).squeeze(0)
+    uv = resample_uv_to_bbox(predictor_output, labels, box_xywh)
+    return DensePoseChartResult(labels=labels, uv=uv)
+def resample_confidences_to_bbox(
+    predictor_output: DensePoseChartPredictorOutput,
+    labels: torch.Tensor,
+    box_xywh_abs: IntTupleBox,
+) -> Dict[str, torch.Tensor]:
+    """
+    Resamples confidences for the given bounding box
+    Args:
+        predictor_output (DensePoseChartPredictorOutput): DensePose predictor
+            output to be resampled
+        labels (tensor [H, W] of long): labels obtained by resampling segmentation
+            outputs for the given bounding box
+        box_xywh_abs (tuple of 4 int): bounding box that corresponds to predictor outputs
+    Return:
+       Resampled confidences - a dict of [H, W] tensors of float
+    """
+    x, y, w, h = box_xywh_abs
+    w = max(int(w), 1)
+    h = max(int(h), 1)
+    confidence_names = [
+        "sigma_1",
+        "sigma_2",
+        "kappa_u",
+        "kappa_v",
+        "fine_segm_confidence",
+        "coarse_segm_confidence",
+    ]
+    confidence_results = {key: None for key in confidence_names}
+    confidence_names = [
+        key for key in confidence_names if getattr(predictor_output, key) is not None
+    ]
+    confidence_base = torch.zeros([h, w], dtype=torch.float32, device=predictor_output.u.device)
+    # assign data from channels that correspond to the labels
+    for key in confidence_names:
+        resampled_confidence = F.interpolate(
+            getattr(predictor_output, key),
+            (h, w),
+            mode="bilinear",
+            align_corners=False,
+        )
+        result = confidence_base.clone()
+        for part_id in range(1, predictor_output.u.size(1)):
+            if resampled_confidence.size(1) != predictor_output.u.size(1):
+                # confidence is not part-based, don't try to fill it part by part
+                continue
+            result[labels == part_id] = resampled_confidence[0, part_id][labels == part_id]
+        if resampled_confidence.size(1) != predictor_output.u.size(1):
+            # confidence is not part-based, fill the data with the first channel
+            # (targeted for segmentation confidences that have only 1 channel)
+            result = resampled_confidence[0, 0]
+        confidence_results[key] = result
+    return confidence_results  # pyre-ignore[7]
+def densepose_chart_predictor_output_to_result_with_confidences(
+    predictor_output: DensePoseChartPredictorOutput, boxes: Boxes
+) -> DensePoseChartResultWithConfidences:
+    """
+    Convert densepose chart predictor outputs to results
+    Args:
+        predictor_output (DensePoseChartPredictorOutput): DensePose predictor
+            output with confidences to be converted to results, must contain only 1 output
+        boxes (Boxes): bounding box that corresponds to the predictor output,
+            must contain only 1 bounding box
+    Return:
+       DensePose chart-based result with confidences (DensePoseChartResultWithConfidences)
+    """
+    assert len(predictor_output) == 1 and len(boxes) == 1, (
+        f"Predictor output to result conversion can operate only single outputs"
+        f", got {len(predictor_output)} predictor outputs and {len(boxes)} boxes"
+    )
+    boxes_xyxy_abs = boxes.tensor.clone()
+    boxes_xywh_abs = BoxMode.convert(boxes_xyxy_abs, BoxMode.XYXY_ABS, BoxMode.XYWH_ABS)
+    box_xywh = make_int_box(boxes_xywh_abs[0])
+    labels = resample_fine_and_coarse_segm_to_bbox(predictor_output, box_xywh).squeeze(0)
+    uv = resample_uv_to_bbox(predictor_output, labels, box_xywh)
+    confidences = resample_confidences_to_bbox(predictor_output, labels, box_xywh)
+    return DensePoseChartResultWithConfidences(labels=labels, uv=uv, **confidences)

Leffa/3rdparty/densepose/converters/hflip.py ADDED Viewed

	@@ -0,0 +1,34 @@

+# Copyright (c) Facebook, Inc. and its affiliates.
+from typing import Any
+from .base import BaseConverter
+class HFlipConverter(BaseConverter):
+    """
+    Converts various DensePose predictor outputs to DensePose results.
+    Each DensePose predictor output type has to register its convertion strategy.
+    """
+    registry = {}
+    dst_type = None
+    @classmethod
+    # pyre-fixme[14]: `convert` overrides method defined in `BaseConverter`
+    #  inconsistently.
+    def convert(cls, predictor_outputs: Any, transform_data: Any, *args, **kwargs):
+        """
+        Performs an horizontal flip on DensePose predictor outputs.
+        Does recursive lookup for base classes, so there's no need
+        for explicit registration for derived classes.
+        Args:
+            predictor_outputs: DensePose predictor output to be converted to BitMasks
+            transform_data: Anything useful for the flip
+        Return:
+            An instance of the same type as predictor_outputs
+        """
+        return super(HFlipConverter, cls).convert(
+            predictor_outputs, transform_data, *args, **kwargs
+        )

Leffa/3rdparty/densepose/converters/segm_to_mask.py ADDED Viewed

	@@ -0,0 +1,150 @@

+# Copyright (c) Facebook, Inc. and its affiliates.
+from typing import Any
+import torch
+from torch.nn import functional as F
+from detectron2.structures import BitMasks, Boxes, BoxMode
+from .base import IntTupleBox, make_int_box
+from .to_mask import ImageSizeType
+def resample_coarse_segm_tensor_to_bbox(coarse_segm: torch.Tensor, box_xywh_abs: IntTupleBox):
+    """
+    Resample coarse segmentation tensor to the given
+    bounding box and derive labels for each pixel of the bounding box
+    Args:
+        coarse_segm: float tensor of shape [1, K, Hout, Wout]
+        box_xywh_abs (tuple of 4 int): bounding box given by its upper-left
+            corner coordinates, width (W) and height (H)
+    Return:
+        Labels for each pixel of the bounding box, a long tensor of size [1, H, W]
+    """
+    x, y, w, h = box_xywh_abs
+    w = max(int(w), 1)
+    h = max(int(h), 1)
+    labels = F.interpolate(coarse_segm, (h, w), mode="bilinear", align_corners=False).argmax(dim=1)
+    return labels
+def resample_fine_and_coarse_segm_tensors_to_bbox(
+    fine_segm: torch.Tensor, coarse_segm: torch.Tensor, box_xywh_abs: IntTupleBox
+):
+    """
+    Resample fine and coarse segmentation tensors to the given
+    bounding box and derive labels for each pixel of the bounding box
+    Args:
+        fine_segm: float tensor of shape [1, C, Hout, Wout]
+        coarse_segm: float tensor of shape [1, K, Hout, Wout]
+        box_xywh_abs (tuple of 4 int): bounding box given by its upper-left
+            corner coordinates, width (W) and height (H)
+    Return:
+        Labels for each pixel of the bounding box, a long tensor of size [1, H, W]
+    """
+    x, y, w, h = box_xywh_abs
+    w = max(int(w), 1)
+    h = max(int(h), 1)
+    # coarse segmentation
+    coarse_segm_bbox = F.interpolate(
+        coarse_segm,
+        (h, w),
+        mode="bilinear",
+        align_corners=False,
+    ).argmax(dim=1)
+    # combined coarse and fine segmentation
+    labels = (
+        F.interpolate(fine_segm, (h, w), mode="bilinear", align_corners=False).argmax(dim=1)
+        * (coarse_segm_bbox > 0).long()
+    )
+    return labels
+def resample_fine_and_coarse_segm_to_bbox(predictor_output: Any, box_xywh_abs: IntTupleBox):
+    """
+    Resample fine and coarse segmentation outputs from a predictor to the given
+    bounding box and derive labels for each pixel of the bounding box
+    Args:
+        predictor_output: DensePose predictor output that contains segmentation
+            results to be resampled
+        box_xywh_abs (tuple of 4 int): bounding box given by its upper-left
+            corner coordinates, width (W) and height (H)
+    Return:
+        Labels for each pixel of the bounding box, a long tensor of size [1, H, W]
+    """
+    return resample_fine_and_coarse_segm_tensors_to_bbox(
+        predictor_output.fine_segm,
+        predictor_output.coarse_segm,
+        box_xywh_abs,
+    )
+def predictor_output_with_coarse_segm_to_mask(
+    predictor_output: Any, boxes: Boxes, image_size_hw: ImageSizeType
+) -> BitMasks:
+    """
+    Convert predictor output with coarse and fine segmentation to a mask.
+    Assumes that predictor output has the following attributes:
+     - coarse_segm (tensor of size [N, D, H, W]): coarse segmentation
+         unnormalized scores for N instances; D is the number of coarse
+         segmentation labels, H and W is the resolution of the estimate
+    Args:
+        predictor_output: DensePose predictor output to be converted to mask
+        boxes (Boxes): bounding boxes that correspond to the DensePose
+            predictor outputs
+        image_size_hw (tuple [int, int]): image height Himg and width Wimg
+    Return:
+        BitMasks that contain a bool tensor of size [N, Himg, Wimg] with
+        a mask of the size of the image for each instance
+    """
+    H, W = image_size_hw
+    boxes_xyxy_abs = boxes.tensor.clone()
+    boxes_xywh_abs = BoxMode.convert(boxes_xyxy_abs, BoxMode.XYXY_ABS, BoxMode.XYWH_ABS)
+    N = len(boxes_xywh_abs)
+    masks = torch.zeros((N, H, W), dtype=torch.bool, device=boxes.tensor.device)
+    for i in range(len(boxes_xywh_abs)):
+        box_xywh = make_int_box(boxes_xywh_abs[i])
+        box_mask = resample_coarse_segm_tensor_to_bbox(predictor_output[i].coarse_segm, box_xywh)
+        x, y, w, h = box_xywh
+        masks[i, y : y + h, x : x + w] = box_mask
+    return BitMasks(masks)
+def predictor_output_with_fine_and_coarse_segm_to_mask(
+    predictor_output: Any, boxes: Boxes, image_size_hw: ImageSizeType
+) -> BitMasks:
+    """
+    Convert predictor output with coarse and fine segmentation to a mask.
+    Assumes that predictor output has the following attributes:
+     - coarse_segm (tensor of size [N, D, H, W]): coarse segmentation
+         unnormalized scores for N instances; D is the number of coarse
+         segmentation labels, H and W is the resolution of the estimate
+     - fine_segm (tensor of size [N, C, H, W]): fine segmentation
+         unnormalized scores for N instances; C is the number of fine
+         segmentation labels, H and W is the resolution of the estimate
+    Args:
+        predictor_output: DensePose predictor output to be converted to mask
+        boxes (Boxes): bounding boxes that correspond to the DensePose
+            predictor outputs
+        image_size_hw (tuple [int, int]): image height Himg and width Wimg
+    Return:
+        BitMasks that contain a bool tensor of size [N, Himg, Wimg] with
+        a mask of the size of the image for each instance
+    """
+    H, W = image_size_hw
+    boxes_xyxy_abs = boxes.tensor.clone()
+    boxes_xywh_abs = BoxMode.convert(boxes_xyxy_abs, BoxMode.XYXY_ABS, BoxMode.XYWH_ABS)
+    N = len(boxes_xywh_abs)
+    masks = torch.zeros((N, H, W), dtype=torch.bool, device=boxes.tensor.device)
+    for i in range(len(boxes_xywh_abs)):
+        box_xywh = make_int_box(boxes_xywh_abs[i])
+        labels_i = resample_fine_and_coarse_segm_to_bbox(predictor_output[i], box_xywh)
+        x, y, w, h = box_xywh
+        masks[i, y : y + h, x : x + w] = labels_i > 0
+    return BitMasks(masks)

Leffa/3rdparty/densepose/converters/to_chart_result.py ADDED Viewed

	@@ -0,0 +1,70 @@

+# Copyright (c) Facebook, Inc. and its affiliates.
+from typing import Any
+from detectron2.structures import Boxes
+from ..structures import DensePoseChartResult, DensePoseChartResultWithConfidences
+from .base import BaseConverter
+class ToChartResultConverter(BaseConverter):
+    """
+    Converts various DensePose predictor outputs to DensePose results.
+    Each DensePose predictor output type has to register its convertion strategy.
+    """
+    registry = {}
+    dst_type = DensePoseChartResult
+    @classmethod
+    # pyre-fixme[14]: `convert` overrides method defined in `BaseConverter`
+    #  inconsistently.
+    def convert(cls, predictor_outputs: Any, boxes: Boxes, *args, **kwargs) -> DensePoseChartResult:
+        """
+        Convert DensePose predictor outputs to DensePoseResult using some registered
+        converter. Does recursive lookup for base classes, so there's no need
+        for explicit registration for derived classes.
+        Args:
+            densepose_predictor_outputs: DensePose predictor output to be
+                converted to BitMasks
+            boxes (Boxes): bounding boxes that correspond to the DensePose
+                predictor outputs
+        Return:
+            An instance of DensePoseResult. If no suitable converter was found, raises KeyError
+        """
+        return super(ToChartResultConverter, cls).convert(predictor_outputs, boxes, *args, **kwargs)
+class ToChartResultConverterWithConfidences(BaseConverter):
+    """
+    Converts various DensePose predictor outputs to DensePose results.
+    Each DensePose predictor output type has to register its convertion strategy.
+    """
+    registry = {}
+    dst_type = DensePoseChartResultWithConfidences
+    @classmethod
+    # pyre-fixme[14]: `convert` overrides method defined in `BaseConverter`
+    #  inconsistently.
+    def convert(
+        cls, predictor_outputs: Any, boxes: Boxes, *args, **kwargs
+    ) -> DensePoseChartResultWithConfidences:
+        """
+        Convert DensePose predictor outputs to DensePoseResult with confidences
+        using some registered converter. Does recursive lookup for base classes,
+        so there's no need for explicit registration for derived classes.
+        Args:
+            densepose_predictor_outputs: DensePose predictor output with confidences
+                to be converted to BitMasks
+            boxes (Boxes): bounding boxes that correspond to the DensePose
+                predictor outputs
+        Return:
+            An instance of DensePoseResult. If no suitable converter was found, raises KeyError
+        """
+        return super(ToChartResultConverterWithConfidences, cls).convert(
+            predictor_outputs, boxes, *args, **kwargs
+        )

Leffa/3rdparty/densepose/converters/to_mask.py ADDED Viewed

	@@ -0,0 +1,49 @@

+# Copyright (c) Facebook, Inc. and its affiliates.
+from typing import Any, Tuple
+from detectron2.structures import BitMasks, Boxes
+from .base import BaseConverter
+ImageSizeType = Tuple[int, int]
+class ToMaskConverter(BaseConverter):
+    """
+    Converts various DensePose predictor outputs to masks
+    in bit mask format (see `BitMasks`). Each DensePose predictor output type
+    has to register its convertion strategy.
+    """
+    registry = {}
+    dst_type = BitMasks
+    @classmethod
+    # pyre-fixme[14]: `convert` overrides method defined in `BaseConverter`
+    #  inconsistently.
+    def convert(
+        cls,
+        densepose_predictor_outputs: Any,
+        boxes: Boxes,
+        image_size_hw: ImageSizeType,
+        *args,
+        **kwargs
+    ) -> BitMasks:
+        """
+        Convert DensePose predictor outputs to BitMasks using some registered
+        converter. Does recursive lookup for base classes, so there's no need
+        for explicit registration for derived classes.
+        Args:
+            densepose_predictor_outputs: DensePose predictor output to be
+                converted to BitMasks
+            boxes (Boxes): bounding boxes that correspond to the DensePose
+                predictor outputs
+            image_size_hw (tuple [int, int]): image height and width
+        Return:
+            An instance of `BitMasks`. If no suitable converter was found, raises KeyError
+        """
+        return super(ToMaskConverter, cls).convert(
+            densepose_predictor_outputs, boxes, image_size_hw, *args, **kwargs
+        )

Leffa/3rdparty/densepose/engine/__init__.py ADDED Viewed

	@@ -0,0 +1,3 @@


1	+ # Copyright (c) Facebook, Inc. and its affiliates.
2	+
3	+ from .trainer import Trainer

Leffa/3rdparty/densepose/engine/trainer.py ADDED Viewed

	@@ -0,0 +1,258 @@

+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+import logging
+import os
+from collections import OrderedDict
+from typing import List, Optional, Union
+import torch
+from torch import nn
+from detectron2.checkpoint import DetectionCheckpointer
+from detectron2.config import CfgNode
+from detectron2.engine import DefaultTrainer
+from detectron2.evaluation import (
+    DatasetEvaluator,
+    DatasetEvaluators,
+    inference_on_dataset,
+    print_csv_format,
+)
+from detectron2.solver.build import get_default_optimizer_params, maybe_add_gradient_clipping
+from detectron2.utils import comm
+from detectron2.utils.events import EventWriter, get_event_storage
+from densepose import DensePoseDatasetMapperTTA, DensePoseGeneralizedRCNNWithTTA, load_from_cfg
+from densepose.data import (
+    DatasetMapper,
+    build_combined_loader,
+    build_detection_test_loader,
+    build_detection_train_loader,
+    build_inference_based_loaders,
+    has_inference_based_loaders,
+)
+from densepose.evaluation.d2_evaluator_adapter import Detectron2COCOEvaluatorAdapter
+from densepose.evaluation.evaluator import DensePoseCOCOEvaluator, build_densepose_evaluator_storage
+from densepose.modeling.cse import Embedder
+class SampleCountingLoader:
+    def __init__(self, loader):
+        self.loader = loader
+    def __iter__(self):
+        it = iter(self.loader)
+        storage = get_event_storage()
+        while True:
+            try:
+                batch = next(it)
+                num_inst_per_dataset = {}
+                for data in batch:
+                    dataset_name = data["dataset"]
+                    if dataset_name not in num_inst_per_dataset:
+                        num_inst_per_dataset[dataset_name] = 0
+                    num_inst = len(data["instances"])
+                    num_inst_per_dataset[dataset_name] += num_inst
+                for dataset_name in num_inst_per_dataset:
+                    storage.put_scalar(f"batch/{dataset_name}", num_inst_per_dataset[dataset_name])
+                yield batch
+            except StopIteration:
+                break
+class SampleCountMetricPrinter(EventWriter):
+    def __init__(self):
+        self.logger = logging.getLogger(__name__)
+    def write(self):
+        storage = get_event_storage()
+        batch_stats_strs = []
+        for key, buf in storage.histories().items():
+            if key.startswith("batch/"):
+                batch_stats_strs.append(f"{key} {buf.avg(20)}")
+        self.logger.info(", ".join(batch_stats_strs))
+class Trainer(DefaultTrainer):
+    @classmethod
+    def extract_embedder_from_model(cls, model: nn.Module) -> Optional[Embedder]:
+        if isinstance(model, nn.parallel.DistributedDataParallel):
+            model = model.module
+        if hasattr(model, "roi_heads") and hasattr(model.roi_heads, "embedder"):
+            return model.roi_heads.embedder
+        return None
+    # TODO: the only reason to copy the base class code here is to pass the embedder from
+    # the model to the evaluator; that should be refactored to avoid unnecessary copy-pasting
+    @classmethod
+    def test(
+        cls,
+        cfg: CfgNode,
+        model: nn.Module,
+        evaluators: Optional[Union[DatasetEvaluator, List[DatasetEvaluator]]] = None,
+    ):
+        """
+        Args:
+            cfg (CfgNode):
+            model (nn.Module):
+            evaluators (DatasetEvaluator, list[DatasetEvaluator] or None): if None, will call
+                :meth:`build_evaluator`. Otherwise, must have the same length as
+                ``cfg.DATASETS.TEST``.
+        Returns:
+            dict: a dict of result metrics
+        """
+        logger = logging.getLogger(__name__)
+        if isinstance(evaluators, DatasetEvaluator):
+            evaluators = [evaluators]
+        if evaluators is not None:
+            assert len(cfg.DATASETS.TEST) == len(evaluators), "{} != {}".format(
+                len(cfg.DATASETS.TEST), len(evaluators)
+            )
+        results = OrderedDict()
+        for idx, dataset_name in enumerate(cfg.DATASETS.TEST):
+            data_loader = cls.build_test_loader(cfg, dataset_name)
+            # When evaluators are passed in as arguments,
+            # implicitly assume that evaluators can be created before data_loader.
+            if evaluators is not None:
+                evaluator = evaluators[idx]
+            else:
+                try:
+                    embedder = cls.extract_embedder_from_model(model)
+                    evaluator = cls.build_evaluator(cfg, dataset_name, embedder=embedder)
+                except NotImplementedError:
+                    logger.warn(
+                        "No evaluator found. Use `DefaultTrainer.test(evaluators=)`, "
+                        "or implement its `build_evaluator` method."
+                    )
+                    results[dataset_name] = {}
+                    continue
+            if cfg.DENSEPOSE_EVALUATION.DISTRIBUTED_INFERENCE or comm.is_main_process():
+                results_i = inference_on_dataset(model, data_loader, evaluator)
+            else:
+                results_i = {}
+            results[dataset_name] = results_i
+            if comm.is_main_process():
+                assert isinstance(
+                    results_i, dict
+                ), "Evaluator must return a dict on the main process. Got {} instead.".format(
+                    results_i
+                )
+                logger.info("Evaluation results for {} in csv format:".format(dataset_name))
+                print_csv_format(results_i)
+        if len(results) == 1:
+            results = list(results.values())[0]
+        return results
+    @classmethod
+    def build_evaluator(
+        cls,
+        cfg: CfgNode,
+        dataset_name: str,
+        output_folder: Optional[str] = None,
+        embedder: Optional[Embedder] = None,
+    ) -> DatasetEvaluators:
+        if output_folder is None:
+            output_folder = os.path.join(cfg.OUTPUT_DIR, "inference")
+        evaluators = []
+        distributed = cfg.DENSEPOSE_EVALUATION.DISTRIBUTED_INFERENCE
+        # Note: we currently use COCO evaluator for both COCO and LVIS datasets
+        # to have compatible metrics. LVIS bbox evaluator could also be used
+        # with an adapter to properly handle filtered / mapped categories
+        # evaluator_type = MetadataCatalog.get(dataset_name).evaluator_type
+        # if evaluator_type == "coco":
+        #     evaluators.append(COCOEvaluator(dataset_name, output_dir=output_folder))
+        # elif evaluator_type == "lvis":
+        #     evaluators.append(LVISEvaluator(dataset_name, output_dir=output_folder))
+        evaluators.append(
+            Detectron2COCOEvaluatorAdapter(
+                dataset_name, output_dir=output_folder, distributed=distributed
+            )
+        )
+        if cfg.MODEL.DENSEPOSE_ON:
+            storage = build_densepose_evaluator_storage(cfg, output_folder)
+            evaluators.append(
+                DensePoseCOCOEvaluator(
+                    dataset_name,
+                    distributed,
+                    output_folder,
+                    evaluator_type=cfg.DENSEPOSE_EVALUATION.TYPE,
+                    min_iou_threshold=cfg.DENSEPOSE_EVALUATION.MIN_IOU_THRESHOLD,
+                    storage=storage,
+                    embedder=embedder,
+                    should_evaluate_mesh_alignment=cfg.DENSEPOSE_EVALUATION.EVALUATE_MESH_ALIGNMENT,
+                    mesh_alignment_mesh_names=cfg.DENSEPOSE_EVALUATION.MESH_ALIGNMENT_MESH_NAMES,
+                )
+            )
+        return DatasetEvaluators(evaluators)
+    @classmethod
+    def build_optimizer(cls, cfg: CfgNode, model: nn.Module):
+        params = get_default_optimizer_params(
+            model,
+            base_lr=cfg.SOLVER.BASE_LR,
+            weight_decay_norm=cfg.SOLVER.WEIGHT_DECAY_NORM,
+            bias_lr_factor=cfg.SOLVER.BIAS_LR_FACTOR,
+            weight_decay_bias=cfg.SOLVER.WEIGHT_DECAY_BIAS,
+            overrides={
+                "features": {
+                    "lr": cfg.SOLVER.BASE_LR * cfg.MODEL.ROI_DENSEPOSE_HEAD.CSE.FEATURES_LR_FACTOR,
+                },
+                "embeddings": {
+                    "lr": cfg.SOLVER.BASE_LR * cfg.MODEL.ROI_DENSEPOSE_HEAD.CSE.EMBEDDING_LR_FACTOR,
+                },
+            },
+        )
+        optimizer = torch.optim.SGD(
+            params,
+            cfg.SOLVER.BASE_LR,
+            momentum=cfg.SOLVER.MOMENTUM,
+            nesterov=cfg.SOLVER.NESTEROV,
+            weight_decay=cfg.SOLVER.WEIGHT_DECAY,
+        )
+        # pyre-fixme[6]: For 2nd param expected `Type[Optimizer]` but got `SGD`.
+        return maybe_add_gradient_clipping(cfg, optimizer)
+    @classmethod
+    def build_test_loader(cls, cfg: CfgNode, dataset_name):
+        return build_detection_test_loader(cfg, dataset_name, mapper=DatasetMapper(cfg, False))
+    @classmethod
+    def build_train_loader(cls, cfg: CfgNode):
+        data_loader = build_detection_train_loader(cfg, mapper=DatasetMapper(cfg, True))
+        if not has_inference_based_loaders(cfg):
+            return data_loader
+        model = cls.build_model(cfg)
+        model.to(cfg.BOOTSTRAP_MODEL.DEVICE)
+        DetectionCheckpointer(model).resume_or_load(cfg.BOOTSTRAP_MODEL.WEIGHTS, resume=False)
+        inference_based_loaders, ratios = build_inference_based_loaders(cfg, model)
+        loaders = [data_loader] + inference_based_loaders
+        ratios = [1.0] + ratios
+        combined_data_loader = build_combined_loader(cfg, loaders, ratios)
+        sample_counting_loader = SampleCountingLoader(combined_data_loader)
+        return sample_counting_loader
+    def build_writers(self):
+        writers = super().build_writers()
+        writers.append(SampleCountMetricPrinter())
+        return writers
+    @classmethod
+    def test_with_TTA(cls, cfg: CfgNode, model):
+        logger = logging.getLogger("detectron2.trainer")
+        # In the end of training, run an evaluation with TTA
+        # Only support some R-CNN models.
+        logger.info("Running inference with test-time augmentation ...")
+        transform_data = load_from_cfg(cfg)
+        model = DensePoseGeneralizedRCNNWithTTA(
+            cfg, model, transform_data, DensePoseDatasetMapperTTA(cfg)
+        )
+        evaluators = [
+            cls.build_evaluator(
+                cfg, name, output_folder=os.path.join(cfg.OUTPUT_DIR, "inference_TTA")
+            )
+            for name in cfg.DATASETS.TEST
+        ]
+        res = cls.test(cfg, model, evaluators)  # pyre-ignore[6]
+        res = OrderedDict({k + "_TTA": v for k, v in res.items()})
+        return res

Leffa/3rdparty/densepose/modeling/__init__.py ADDED Viewed

	@@ -0,0 +1,13 @@

+# Copyright (c) Facebook, Inc. and its affiliates.
+from .confidence import DensePoseConfidenceModelConfig, DensePoseUVConfidenceType
+from .filter import DensePoseDataFilter
+from .inference import densepose_inference
+from .utils import initialize_module_params
+from .build import (
+    build_densepose_data_filter,
+    build_densepose_embedder,
+    build_densepose_head,
+    build_densepose_losses,
+    build_densepose_predictor,
+)

Leffa/3rdparty/densepose/modeling/build.py ADDED Viewed

	@@ -0,0 +1,87 @@

+# Copyright (c) Facebook, Inc. and its affiliates.
+from typing import Optional
+from torch import nn
+from detectron2.config import CfgNode
+from .cse.embedder import Embedder
+from .filter import DensePoseDataFilter
+def build_densepose_predictor(cfg: CfgNode, input_channels: int):
+    """
+    Create an instance of DensePose predictor based on configuration options.
+    Args:
+        cfg (CfgNode): configuration options
+        input_channels (int): input tensor size along the channel dimension
+    Return:
+        An instance of DensePose predictor
+    """
+    from .predictors import DENSEPOSE_PREDICTOR_REGISTRY
+    predictor_name = cfg.MODEL.ROI_DENSEPOSE_HEAD.PREDICTOR_NAME
+    return DENSEPOSE_PREDICTOR_REGISTRY.get(predictor_name)(cfg, input_channels)
+def build_densepose_data_filter(cfg: CfgNode):
+    """
+    Build DensePose data filter which selects data for training
+    Args:
+        cfg (CfgNode): configuration options
+    Return:
+        Callable: list(Tensor), list(Instances) -> list(Tensor), list(Instances)
+        An instance of DensePose filter, which takes feature tensors and proposals
+        as an input and returns filtered features and proposals
+    """
+    dp_filter = DensePoseDataFilter(cfg)
+    return dp_filter
+def build_densepose_head(cfg: CfgNode, input_channels: int):
+    """
+    Build DensePose head based on configurations options
+    Args:
+        cfg (CfgNode): configuration options
+        input_channels (int): input tensor size along the channel dimension
+    Return:
+        An instance of DensePose head
+    """
+    from .roi_heads.registry import ROI_DENSEPOSE_HEAD_REGISTRY
+    head_name = cfg.MODEL.ROI_DENSEPOSE_HEAD.NAME
+    return ROI_DENSEPOSE_HEAD_REGISTRY.get(head_name)(cfg, input_channels)
+def build_densepose_losses(cfg: CfgNode):
+    """
+    Build DensePose loss based on configurations options
+    Args:
+        cfg (CfgNode): configuration options
+    Return:
+        An instance of DensePose loss
+    """
+    from .losses import DENSEPOSE_LOSS_REGISTRY
+    loss_name = cfg.MODEL.ROI_DENSEPOSE_HEAD.LOSS_NAME
+    return DENSEPOSE_LOSS_REGISTRY.get(loss_name)(cfg)
+def build_densepose_embedder(cfg: CfgNode) -> Optional[nn.Module]:
+    """
+    Build embedder used to embed mesh vertices into an embedding space.
+    Embedder contains sub-embedders, one for each mesh ID.
+    Args:
+        cfg (cfgNode): configuration options
+    Return:
+        Embedding module
+    """
+    if cfg.MODEL.ROI_DENSEPOSE_HEAD.CSE.EMBEDDERS:
+        return Embedder(cfg)
+    return None

Leffa/3rdparty/densepose/modeling/confidence.py ADDED Viewed

	@@ -0,0 +1,73 @@

+# Copyright (c) Facebook, Inc. and its affiliates.
+from dataclasses import dataclass
+from enum import Enum
+from detectron2.config import CfgNode
+class DensePoseUVConfidenceType(Enum):
+    """
+    Statistical model type for confidence learning, possible values:
+     - "iid_iso": statistically independent identically distributed residuals
+         with anisotropic covariance
+     - "indep_aniso": statistically independent residuals with anisotropic
+         covariances
+    For details, see:
+    N. Neverova, D. Novotny, A. Vedaldi "Correlated Uncertainty for Learning
+    Dense Correspondences from Noisy Labels", p. 918--926, in Proc. NIPS 2019
+    """
+    # fmt: off
+    IID_ISO     = "iid_iso"
+    INDEP_ANISO = "indep_aniso"
+    # fmt: on
+@dataclass
+class DensePoseUVConfidenceConfig:
+    """
+    Configuration options for confidence on UV data
+    """
+    enabled: bool = False
+    # lower bound on UV confidences
+    epsilon: float = 0.01
+    type: DensePoseUVConfidenceType = DensePoseUVConfidenceType.IID_ISO
+@dataclass
+class DensePoseSegmConfidenceConfig:
+    """
+    Configuration options for confidence on segmentation
+    """
+    enabled: bool = False
+    # lower bound on confidence values
+    epsilon: float = 0.01
+@dataclass
+class DensePoseConfidenceModelConfig:
+    """
+    Configuration options for confidence models
+    """
+    # confidence for U and V values
+    uv_confidence: DensePoseUVConfidenceConfig
+    # segmentation confidence
+    segm_confidence: DensePoseSegmConfidenceConfig
+    @staticmethod
+    def from_cfg(cfg: CfgNode) -> "DensePoseConfidenceModelConfig":
+        return DensePoseConfidenceModelConfig(
+            uv_confidence=DensePoseUVConfidenceConfig(
+                enabled=cfg.MODEL.ROI_DENSEPOSE_HEAD.UV_CONFIDENCE.ENABLED,
+                epsilon=cfg.MODEL.ROI_DENSEPOSE_HEAD.UV_CONFIDENCE.EPSILON,
+                type=DensePoseUVConfidenceType(cfg.MODEL.ROI_DENSEPOSE_HEAD.UV_CONFIDENCE.TYPE),
+            ),
+            segm_confidence=DensePoseSegmConfidenceConfig(
+                enabled=cfg.MODEL.ROI_DENSEPOSE_HEAD.SEGM_CONFIDENCE.ENABLED,
+                epsilon=cfg.MODEL.ROI_DENSEPOSE_HEAD.SEGM_CONFIDENCE.EPSILON,
+            ),
+        )

Leffa/3rdparty/densepose/modeling/densepose_checkpoint.py ADDED Viewed

	@@ -0,0 +1,35 @@

+# Copyright (c) Facebook, Inc. and its affiliates.
+from collections import OrderedDict
+from detectron2.checkpoint import DetectionCheckpointer
+def _rename_HRNet_weights(weights):
+    # We detect and  rename HRNet weights for DensePose. 1956 and 1716 are values that are
+    # common to all HRNet pretrained weights, and should be enough to accurately identify them
+    if (
+        len(weights["model"].keys()) == 1956
+        and len([k for k in weights["model"].keys() if k.startswith("stage")]) == 1716
+    ):
+        hrnet_weights = OrderedDict()
+        for k in weights["model"].keys():
+            hrnet_weights["backbone.bottom_up." + str(k)] = weights["model"][k]
+        return {"model": hrnet_weights}
+    else:
+        return weights
+class DensePoseCheckpointer(DetectionCheckpointer):
+    """
+    Same as :class:`DetectionCheckpointer`, but is able to handle HRNet weights
+    """
+    def __init__(self, model, save_dir="", *, save_to_disk=None, **checkpointables):
+        super().__init__(model, save_dir, save_to_disk=save_to_disk, **checkpointables)
+    def _load_file(self, filename: str) -> object:
+        """
+        Adding hrnet support
+        """
+        weights = super()._load_file(filename)
+        return _rename_HRNet_weights(weights)

Leffa/3rdparty/densepose/modeling/filter.py ADDED Viewed

	@@ -0,0 +1,94 @@

+# Copyright (c) Facebook, Inc. and its affiliates.
+from typing import List
+import torch
+from detectron2.config import CfgNode
+from detectron2.structures import Instances
+from detectron2.structures.boxes import matched_pairwise_iou
+class DensePoseDataFilter:
+    def __init__(self, cfg: CfgNode):
+        self.iou_threshold = cfg.MODEL.ROI_DENSEPOSE_HEAD.FG_IOU_THRESHOLD
+        self.keep_masks = cfg.MODEL.ROI_DENSEPOSE_HEAD.COARSE_SEGM_TRAINED_BY_MASKS
+    @torch.no_grad()
+    def __call__(self, features: List[torch.Tensor], proposals_with_targets: List[Instances]):
+        """
+        Filters proposals with targets to keep only the ones relevant for
+        DensePose training
+        Args:
+            features (list[Tensor]): input data as a list of features,
+                each feature is a tensor. Axis 0 represents the number of
+                images `N` in the input data; axes 1-3 are channels,
+                height, and width, which may vary between features
+                (e.g., if a feature pyramid is used).
+            proposals_with_targets (list[Instances]): length `N` list of
+                `Instances`. The i-th `Instances` contains instances
+                (proposals, GT) for the i-th input image,
+        Returns:
+            list[Tensor]: filtered features
+            list[Instances]: filtered proposals
+        """
+        proposals_filtered = []
+        # TODO: the commented out code was supposed to correctly deal with situations
+        # where no valid DensePose GT is available for certain images. The corresponding
+        # image features were sliced and proposals were filtered. This led to performance
+        # deterioration, both in terms of runtime and in terms of evaluation results.
+        #
+        # feature_mask = torch.ones(
+        #    len(proposals_with_targets),
+        #    dtype=torch.bool,
+        #    device=features[0].device if len(features) > 0 else torch.device("cpu"),
+        # )
+        for i, proposals_per_image in enumerate(proposals_with_targets):
+            if not proposals_per_image.has("gt_densepose") and (
+                not proposals_per_image.has("gt_masks") or not self.keep_masks
+            ):
+                # feature_mask[i] = 0
+                continue
+            gt_boxes = proposals_per_image.gt_boxes
+            est_boxes = proposals_per_image.proposal_boxes
+            # apply match threshold for densepose head
+            iou = matched_pairwise_iou(gt_boxes, est_boxes)
+            iou_select = iou > self.iou_threshold
+            proposals_per_image = proposals_per_image[iou_select]  # pyre-ignore[6]
+            N_gt_boxes = len(proposals_per_image.gt_boxes)
+            assert N_gt_boxes == len(proposals_per_image.proposal_boxes), (
+                f"The number of GT boxes {N_gt_boxes} is different from the "
+                f"number of proposal boxes {len(proposals_per_image.proposal_boxes)}"
+            )
+            # filter out any target without suitable annotation
+            if self.keep_masks:
+                gt_masks = (
+                    proposals_per_image.gt_masks
+                    if hasattr(proposals_per_image, "gt_masks")
+                    else [None] * N_gt_boxes
+                )
+            else:
+                gt_masks = [None] * N_gt_boxes
+            gt_densepose = (
+                proposals_per_image.gt_densepose
+                if hasattr(proposals_per_image, "gt_densepose")
+                else [None] * N_gt_boxes
+            )
+            assert len(gt_masks) == N_gt_boxes
+            assert len(gt_densepose) == N_gt_boxes
+            selected_indices = [
+                i
+                for i, (dp_target, mask_target) in enumerate(zip(gt_densepose, gt_masks))
+                if (dp_target is not None) or (mask_target is not None)
+            ]
+            # if not len(selected_indices):
+            #     feature_mask[i] = 0
+            #     continue
+            if len(selected_indices) != N_gt_boxes:
+                proposals_per_image = proposals_per_image[selected_indices]  # pyre-ignore[6]
+            assert len(proposals_per_image.gt_boxes) == len(proposals_per_image.proposal_boxes)
+            proposals_filtered.append(proposals_per_image)
+        # features_filtered = [feature[feature_mask] for feature in features]
+        # return features_filtered, proposals_filtered
+        return features, proposals_filtered

Leffa/3rdparty/densepose/modeling/hrfpn.py ADDED Viewed

	@@ -0,0 +1,182 @@

+# Copyright (c) Facebook, Inc. and its affiliates.
+"""
+MIT License
+Copyright (c) 2019 Microsoft
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+"""
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from detectron2.layers import ShapeSpec
+from detectron2.modeling.backbone import BACKBONE_REGISTRY
+from detectron2.modeling.backbone.backbone import Backbone
+from .hrnet import build_pose_hrnet_backbone
+class HRFPN(Backbone):
+    """HRFPN (High Resolution Feature Pyramids)
+    Transforms outputs of HRNet backbone so they are suitable for the ROI_heads
+    arXiv: https://arxiv.org/abs/1904.04514
+    Adapted from https://github.com/open-mmlab/mmdetection/blob/master/mmdet/models/necks/hrfpn.py
+    Args:
+        bottom_up: (list) output of HRNet
+        in_features (list): names of the input features (output of HRNet)
+        in_channels (list): number of channels for each branch
+        out_channels (int): output channels of feature pyramids
+        n_out_features (int): number of output stages
+        pooling (str): pooling for generating feature pyramids (from {MAX, AVG})
+        share_conv (bool): Have one conv per output, or share one with all the outputs
+    """
+    def __init__(
+        self,
+        bottom_up,
+        in_features,
+        n_out_features,
+        in_channels,
+        out_channels,
+        pooling="AVG",
+        share_conv=False,
+    ):
+        super(HRFPN, self).__init__()
+        assert isinstance(in_channels, list)
+        self.bottom_up = bottom_up
+        self.in_features = in_features
+        self.n_out_features = n_out_features
+        self.in_channels = in_channels
+        self.out_channels = out_channels
+        self.num_ins = len(in_channels)
+        self.share_conv = share_conv
+        if self.share_conv:
+            self.fpn_conv = nn.Conv2d(
+                in_channels=out_channels, out_channels=out_channels, kernel_size=3, padding=1
+            )
+        else:
+            self.fpn_conv = nn.ModuleList()
+            for _ in range(self.n_out_features):
+                self.fpn_conv.append(
+                    nn.Conv2d(
+                        in_channels=out_channels,
+                        out_channels=out_channels,
+                        kernel_size=3,
+                        padding=1,
+                    )
+                )
+        # Custom change: Replaces a simple bilinear interpolation
+        self.interp_conv = nn.ModuleList()
+        for i in range(len(self.in_features)):
+            self.interp_conv.append(
+                nn.Sequential(
+                    nn.ConvTranspose2d(
+                        in_channels=in_channels[i],
+                        out_channels=in_channels[i],
+                        kernel_size=4,
+                        stride=2**i,
+                        padding=0,
+                        output_padding=0,
+                        bias=False,
+                    ),
+                    nn.BatchNorm2d(in_channels[i], momentum=0.1),
+                    nn.ReLU(inplace=True),
+                )
+            )
+        # Custom change: Replaces a couple (reduction conv + pooling) by one conv
+        self.reduction_pooling_conv = nn.ModuleList()
+        for i in range(self.n_out_features):
+            self.reduction_pooling_conv.append(
+                nn.Sequential(
+                    nn.Conv2d(sum(in_channels), out_channels, kernel_size=2**i, stride=2**i),
+                    nn.BatchNorm2d(out_channels, momentum=0.1),
+                    nn.ReLU(inplace=True),
+                )
+            )
+        if pooling == "MAX":
+            self.pooling = F.max_pool2d
+        else:
+            self.pooling = F.avg_pool2d
+        self._out_features = []
+        self._out_feature_channels = {}
+        self._out_feature_strides = {}
+        for i in range(self.n_out_features):
+            self._out_features.append("p%d" % (i + 1))
+            self._out_feature_channels.update({self._out_features[-1]: self.out_channels})
+            self._out_feature_strides.update({self._out_features[-1]: 2 ** (i + 2)})
+    # default init_weights for conv(msra) and norm in ConvModule
+    def init_weights(self):
+        for m in self.modules():
+            if isinstance(m, nn.Conv2d):
+                nn.init.kaiming_normal_(m.weight, a=1)
+                nn.init.constant_(m.bias, 0)
+    def forward(self, inputs):
+        bottom_up_features = self.bottom_up(inputs)
+        assert len(bottom_up_features) == len(self.in_features)
+        inputs = [bottom_up_features[f] for f in self.in_features]
+        outs = []
+        for i in range(len(inputs)):
+            outs.append(self.interp_conv[i](inputs[i]))
+        shape_2 = min(o.shape[2] for o in outs)
+        shape_3 = min(o.shape[3] for o in outs)
+        out = torch.cat([o[:, :, :shape_2, :shape_3] for o in outs], dim=1)
+        outs = []
+        for i in range(self.n_out_features):
+            outs.append(self.reduction_pooling_conv[i](out))
+        for i in range(len(outs)):  # Make shapes consistent
+            outs[-1 - i] = outs[-1 - i][
+                :, :, : outs[-1].shape[2] * 2**i, : outs[-1].shape[3] * 2**i
+            ]
+        outputs = []
+        for i in range(len(outs)):
+            if self.share_conv:
+                outputs.append(self.fpn_conv(outs[i]))
+            else:
+                outputs.append(self.fpn_conv[i](outs[i]))
+        assert len(self._out_features) == len(outputs)
+        return dict(zip(self._out_features, outputs))
+@BACKBONE_REGISTRY.register()
+def build_hrfpn_backbone(cfg, input_shape: ShapeSpec) -> HRFPN:
+    in_channels = cfg.MODEL.HRNET.STAGE4.NUM_CHANNELS
+    in_features = ["p%d" % (i + 1) for i in range(cfg.MODEL.HRNET.STAGE4.NUM_BRANCHES)]
+    n_out_features = len(cfg.MODEL.ROI_HEADS.IN_FEATURES)
+    out_channels = cfg.MODEL.HRNET.HRFPN.OUT_CHANNELS
+    hrnet = build_pose_hrnet_backbone(cfg, input_shape)
+    hrfpn = HRFPN(
+        hrnet,
+        in_features,
+        n_out_features,
+        in_channels,
+        out_channels,
+        pooling="AVG",
+        share_conv=False,
+    )
+    return hrfpn

Leffa/3rdparty/densepose/modeling/hrnet.py ADDED Viewed

	@@ -0,0 +1,474 @@

+# Copyright (c) Facebook, Inc. and its affiliates.
+# ------------------------------------------------------------------------------
+# Copyright (c) Microsoft
+# Licensed under the MIT License.
+# Written by Bin Xiao (leoxiaobin@gmail.com)
+# Modified by Bowen Cheng (bcheng9@illinois.edu)
+# Adapted from https://github.com/HRNet/Higher-HRNet-Human-Pose-Estimation/blob/master/lib/models/pose_higher_hrnet.py  # noqa
+# ------------------------------------------------------------------------------
+from __future__ import absolute_import, division, print_function
+import logging
+import torch.nn as nn
+from detectron2.layers import ShapeSpec
+from detectron2.modeling.backbone import BACKBONE_REGISTRY
+from detectron2.modeling.backbone.backbone import Backbone
+BN_MOMENTUM = 0.1
+logger = logging.getLogger(__name__)
+__all__ = ["build_pose_hrnet_backbone", "PoseHigherResolutionNet"]
+def conv3x3(in_planes, out_planes, stride=1):
+    """3x3 convolution with padding"""
+    return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, padding=1, bias=False)
+class BasicBlock(nn.Module):
+    expansion = 1
+    def __init__(self, inplanes, planes, stride=1, downsample=None):
+        super(BasicBlock, self).__init__()
+        self.conv1 = conv3x3(inplanes, planes, stride)
+        self.bn1 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM)
+        self.relu = nn.ReLU(inplace=True)
+        self.conv2 = conv3x3(planes, planes)
+        self.bn2 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM)
+        self.downsample = downsample
+        self.stride = stride
+    def forward(self, x):
+        residual = x
+        out = self.conv1(x)
+        out = self.bn1(out)
+        out = self.relu(out)
+        out = self.conv2(out)
+        out = self.bn2(out)
+        if self.downsample is not None:
+            residual = self.downsample(x)
+        out += residual
+        out = self.relu(out)
+        return out
+class Bottleneck(nn.Module):
+    expansion = 4
+    def __init__(self, inplanes, planes, stride=1, downsample=None):
+        super(Bottleneck, self).__init__()
+        self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False)
+        self.bn1 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM)
+        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
+        self.bn2 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM)
+        self.conv3 = nn.Conv2d(planes, planes * self.expansion, kernel_size=1, bias=False)
+        self.bn3 = nn.BatchNorm2d(planes * self.expansion, momentum=BN_MOMENTUM)
+        self.relu = nn.ReLU(inplace=True)
+        self.downsample = downsample
+        self.stride = stride
+    def forward(self, x):
+        residual = x
+        out = self.conv1(x)
+        out = self.bn1(out)
+        out = self.relu(out)
+        out = self.conv2(out)
+        out = self.bn2(out)
+        out = self.relu(out)
+        out = self.conv3(out)
+        out = self.bn3(out)
+        if self.downsample is not None:
+            residual = self.downsample(x)
+        out += residual
+        out = self.relu(out)
+        return out
+class HighResolutionModule(nn.Module):
+    """HighResolutionModule
+    Building block of the PoseHigherResolutionNet (see lower)
+    arXiv: https://arxiv.org/abs/1908.10357
+    Args:
+        num_branches (int): number of branches of the modyle
+        blocks (str): type of block of the module
+        num_blocks (int): number of blocks of the module
+        num_inchannels (int): number of input channels of the module
+        num_channels (list): number of channels of each branch
+        multi_scale_output (bool): only used by the last module of PoseHigherResolutionNet
+    """
+    def __init__(
+        self,
+        num_branches,
+        blocks,
+        num_blocks,
+        num_inchannels,
+        num_channels,
+        multi_scale_output=True,
+    ):
+        super(HighResolutionModule, self).__init__()
+        self._check_branches(num_branches, blocks, num_blocks, num_inchannels, num_channels)
+        self.num_inchannels = num_inchannels
+        self.num_branches = num_branches
+        self.multi_scale_output = multi_scale_output
+        self.branches = self._make_branches(num_branches, blocks, num_blocks, num_channels)
+        self.fuse_layers = self._make_fuse_layers()
+        self.relu = nn.ReLU(True)
+    def _check_branches(self, num_branches, blocks, num_blocks, num_inchannels, num_channels):
+        if num_branches != len(num_blocks):
+            error_msg = "NUM_BRANCHES({}) <> NUM_BLOCKS({})".format(num_branches, len(num_blocks))
+            logger.error(error_msg)
+            raise ValueError(error_msg)
+        if num_branches != len(num_channels):
+            error_msg = "NUM_BRANCHES({}) <> NUM_CHANNELS({})".format(
+                num_branches, len(num_channels)
+            )
+            logger.error(error_msg)
+            raise ValueError(error_msg)
+        if num_branches != len(num_inchannels):
+            error_msg = "NUM_BRANCHES({}) <> NUM_INCHANNELS({})".format(
+                num_branches, len(num_inchannels)
+            )
+            logger.error(error_msg)
+            raise ValueError(error_msg)
+    def _make_one_branch(self, branch_index, block, num_blocks, num_channels, stride=1):
+        downsample = None
+        if (
+            stride != 1
+            or self.num_inchannels[branch_index] != num_channels[branch_index] * block.expansion
+        ):
+            downsample = nn.Sequential(
+                nn.Conv2d(
+                    self.num_inchannels[branch_index],
+                    num_channels[branch_index] * block.expansion,
+                    kernel_size=1,
+                    stride=stride,
+                    bias=False,
+                ),
+                nn.BatchNorm2d(num_channels[branch_index] * block.expansion, momentum=BN_MOMENTUM),
+            )
+        layers = []
+        layers.append(
+            block(self.num_inchannels[branch_index], num_channels[branch_index], stride, downsample)
+        )
+        self.num_inchannels[branch_index] = num_channels[branch_index] * block.expansion
+        for _ in range(1, num_blocks[branch_index]):
+            layers.append(block(self.num_inchannels[branch_index], num_channels[branch_index]))
+        return nn.Sequential(*layers)
+    def _make_branches(self, num_branches, block, num_blocks, num_channels):
+        branches = []
+        for i in range(num_branches):
+            branches.append(self._make_one_branch(i, block, num_blocks, num_channels))
+        return nn.ModuleList(branches)
+    def _make_fuse_layers(self):
+        if self.num_branches == 1:
+            return None
+        num_branches = self.num_branches
+        num_inchannels = self.num_inchannels
+        fuse_layers = []
+        for i in range(num_branches if self.multi_scale_output else 1):
+            fuse_layer = []
+            for j in range(num_branches):
+                if j > i:
+                    fuse_layer.append(
+                        nn.Sequential(
+                            nn.Conv2d(num_inchannels[j], num_inchannels[i], 1, 1, 0, bias=False),
+                            nn.BatchNorm2d(num_inchannels[i]),
+                            nn.Upsample(scale_factor=2 ** (j - i), mode="nearest"),
+                        )
+                    )
+                elif j == i:
+                    fuse_layer.append(None)
+                else:
+                    conv3x3s = []
+                    for k in range(i - j):
+                        if k == i - j - 1:
+                            num_outchannels_conv3x3 = num_inchannels[i]
+                            conv3x3s.append(
+                                nn.Sequential(
+                                    nn.Conv2d(
+                                        num_inchannels[j],
+                                        num_outchannels_conv3x3,
+                                        3,
+                                        2,
+                                        1,
+                                        bias=False,
+                                    ),
+                                    nn.BatchNorm2d(num_outchannels_conv3x3),
+                                )
+                            )
+                        else:
+                            num_outchannels_conv3x3 = num_inchannels[j]
+                            conv3x3s.append(
+                                nn.Sequential(
+                                    nn.Conv2d(
+                                        num_inchannels[j],
+                                        num_outchannels_conv3x3,
+                                        3,
+                                        2,
+                                        1,
+                                        bias=False,
+                                    ),
+                                    nn.BatchNorm2d(num_outchannels_conv3x3),
+                                    nn.ReLU(True),
+                                )
+                            )
+                    fuse_layer.append(nn.Sequential(*conv3x3s))
+            fuse_layers.append(nn.ModuleList(fuse_layer))
+        return nn.ModuleList(fuse_layers)
+    def get_num_inchannels(self):
+        return self.num_inchannels
+    def forward(self, x):
+        if self.num_branches == 1:
+            return [self.branches[0](x[0])]
+        for i in range(self.num_branches):
+            x[i] = self.branches[i](x[i])
+        x_fuse = []
+        for i in range(len(self.fuse_layers)):
+            y = x[0] if i == 0 else self.fuse_layers[i][0](x[0])
+            for j in range(1, self.num_branches):
+                if i == j:
+                    y = y + x[j]
+                else:
+                    z = self.fuse_layers[i][j](x[j])[:, :, : y.shape[2], : y.shape[3]]
+                    y = y + z
+            x_fuse.append(self.relu(y))
+        return x_fuse
+blocks_dict = {"BASIC": BasicBlock, "BOTTLENECK": Bottleneck}
+class PoseHigherResolutionNet(Backbone):
+    """PoseHigherResolutionNet
+    Composed of several HighResolutionModule tied together with ConvNets
+    Adapted from the GitHub version to fit with HRFPN and the Detectron2 infrastructure
+    arXiv: https://arxiv.org/abs/1908.10357
+    """
+    def __init__(self, cfg, **kwargs):
+        self.inplanes = cfg.MODEL.HRNET.STEM_INPLANES
+        super(PoseHigherResolutionNet, self).__init__()
+        # stem net
+        self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=2, padding=1, bias=False)
+        self.bn1 = nn.BatchNorm2d(64, momentum=BN_MOMENTUM)
+        self.conv2 = nn.Conv2d(64, 64, kernel_size=3, stride=2, padding=1, bias=False)
+        self.bn2 = nn.BatchNorm2d(64, momentum=BN_MOMENTUM)
+        self.relu = nn.ReLU(inplace=True)
+        self.layer1 = self._make_layer(Bottleneck, 64, 4)
+        self.stage2_cfg = cfg.MODEL.HRNET.STAGE2
+        num_channels = self.stage2_cfg.NUM_CHANNELS
+        block = blocks_dict[self.stage2_cfg.BLOCK]
+        num_channels = [num_channels[i] * block.expansion for i in range(len(num_channels))]
+        self.transition1 = self._make_transition_layer([256], num_channels)
+        self.stage2, pre_stage_channels = self._make_stage(self.stage2_cfg, num_channels)
+        self.stage3_cfg = cfg.MODEL.HRNET.STAGE3
+        num_channels = self.stage3_cfg.NUM_CHANNELS
+        block = blocks_dict[self.stage3_cfg.BLOCK]
+        num_channels = [num_channels[i] * block.expansion for i in range(len(num_channels))]
+        self.transition2 = self._make_transition_layer(pre_stage_channels, num_channels)
+        self.stage3, pre_stage_channels = self._make_stage(self.stage3_cfg, num_channels)
+        self.stage4_cfg = cfg.MODEL.HRNET.STAGE4
+        num_channels = self.stage4_cfg.NUM_CHANNELS
+        block = blocks_dict[self.stage4_cfg.BLOCK]
+        num_channels = [num_channels[i] * block.expansion for i in range(len(num_channels))]
+        self.transition3 = self._make_transition_layer(pre_stage_channels, num_channels)
+        self.stage4, pre_stage_channels = self._make_stage(
+            self.stage4_cfg, num_channels, multi_scale_output=True
+        )
+        self._out_features = []
+        self._out_feature_channels = {}
+        self._out_feature_strides = {}
+        for i in range(cfg.MODEL.HRNET.STAGE4.NUM_BRANCHES):
+            self._out_features.append("p%d" % (i + 1))
+            self._out_feature_channels.update(
+                {self._out_features[-1]: cfg.MODEL.HRNET.STAGE4.NUM_CHANNELS[i]}
+            )
+            self._out_feature_strides.update({self._out_features[-1]: 1})
+    def _get_deconv_cfg(self, deconv_kernel):
+        if deconv_kernel == 4:
+            padding = 1
+            output_padding = 0
+        elif deconv_kernel == 3:
+            padding = 1
+            output_padding = 1
+        elif deconv_kernel == 2:
+            padding = 0
+            output_padding = 0
+        return deconv_kernel, padding, output_padding
+    def _make_transition_layer(self, num_channels_pre_layer, num_channels_cur_layer):
+        num_branches_cur = len(num_channels_cur_layer)
+        num_branches_pre = len(num_channels_pre_layer)
+        transition_layers = []
+        for i in range(num_branches_cur):
+            if i < num_branches_pre:
+                if num_channels_cur_layer[i] != num_channels_pre_layer[i]:
+                    transition_layers.append(
+                        nn.Sequential(
+                            nn.Conv2d(
+                                num_channels_pre_layer[i],
+                                num_channels_cur_layer[i],
+                                3,
+                                1,
+                                1,
+                                bias=False,
+                            ),
+                            nn.BatchNorm2d(num_channels_cur_layer[i]),
+                            nn.ReLU(inplace=True),
+                        )
+                    )
+                else:
+                    transition_layers.append(None)
+            else:
+                conv3x3s = []
+                for j in range(i + 1 - num_branches_pre):
+                    inchannels = num_channels_pre_layer[-1]
+                    outchannels = (
+                        num_channels_cur_layer[i] if j == i - num_branches_pre else inchannels
+                    )
+                    conv3x3s.append(
+                        nn.Sequential(
+                            nn.Conv2d(inchannels, outchannels, 3, 2, 1, bias=False),
+                            nn.BatchNorm2d(outchannels),
+                            nn.ReLU(inplace=True),
+                        )
+                    )
+                transition_layers.append(nn.Sequential(*conv3x3s))
+        return nn.ModuleList(transition_layers)
+    def _make_layer(self, block, planes, blocks, stride=1):
+        downsample = None
+        if stride != 1 or self.inplanes != planes * block.expansion:
+            downsample = nn.Sequential(
+                nn.Conv2d(
+                    self.inplanes,
+                    planes * block.expansion,
+                    kernel_size=1,
+                    stride=stride,
+                    bias=False,
+                ),
+                nn.BatchNorm2d(planes * block.expansion, momentum=BN_MOMENTUM),
+            )
+        layers = []
+        layers.append(block(self.inplanes, planes, stride, downsample))
+        self.inplanes = planes * block.expansion
+        for _ in range(1, blocks):
+            layers.append(block(self.inplanes, planes))
+        return nn.Sequential(*layers)
+    def _make_stage(self, layer_config, num_inchannels, multi_scale_output=True):
+        num_modules = layer_config["NUM_MODULES"]
+        num_branches = layer_config["NUM_BRANCHES"]
+        num_blocks = layer_config["NUM_BLOCKS"]
+        num_channels = layer_config["NUM_CHANNELS"]
+        block = blocks_dict[layer_config["BLOCK"]]
+        modules = []
+        for i in range(num_modules):
+            # multi_scale_output is only used last module
+            if not multi_scale_output and i == num_modules - 1:
+                reset_multi_scale_output = False
+            else:
+                reset_multi_scale_output = True
+            modules.append(
+                HighResolutionModule(
+                    num_branches,
+                    block,
+                    num_blocks,
+                    num_inchannels,
+                    num_channels,
+                    reset_multi_scale_output,
+                )
+            )
+            num_inchannels = modules[-1].get_num_inchannels()
+        return nn.Sequential(*modules), num_inchannels
+    def forward(self, x):
+        x = self.conv1(x)
+        x = self.bn1(x)
+        x = self.relu(x)
+        x = self.conv2(x)
+        x = self.bn2(x)
+        x = self.relu(x)
+        x = self.layer1(x)
+        x_list = []
+        for i in range(self.stage2_cfg.NUM_BRANCHES):
+            if self.transition1[i] is not None:
+                x_list.append(self.transition1[i](x))
+            else:
+                x_list.append(x)
+        y_list = self.stage2(x_list)
+        x_list = []
+        for i in range(self.stage3_cfg.NUM_BRANCHES):
+            if self.transition2[i] is not None:
+                x_list.append(self.transition2[i](y_list[-1]))
+            else:
+                x_list.append(y_list[i])
+        y_list = self.stage3(x_list)
+        x_list = []
+        for i in range(self.stage4_cfg.NUM_BRANCHES):
+            if self.transition3[i] is not None:
+                x_list.append(self.transition3[i](y_list[-1]))
+            else:
+                x_list.append(y_list[i])
+        y_list = self.stage4(x_list)
+        assert len(self._out_features) == len(y_list)
+        return dict(zip(self._out_features, y_list))  # final_outputs
+@BACKBONE_REGISTRY.register()
+def build_pose_hrnet_backbone(cfg, input_shape: ShapeSpec):
+    model = PoseHigherResolutionNet(cfg)
+    return model

Leffa/3rdparty/densepose/modeling/inference.py ADDED Viewed

	@@ -0,0 +1,44 @@

+# Copyright (c) Facebook, Inc. and its affiliates.
+from dataclasses import fields
+from typing import Any, List
+import torch
+from detectron2.structures import Instances
+def densepose_inference(densepose_predictor_output: Any, detections: List[Instances]) -> None:
+    """
+    Splits DensePose predictor outputs into chunks, each chunk corresponds to
+    detections on one image. Predictor output chunks are stored in `pred_densepose`
+    attribute of the corresponding `Instances` object.
+    Args:
+        densepose_predictor_output: a dataclass instance (can be of different types,
+            depending on predictor used for inference). Each field can be `None`
+            (if the corresponding output was not inferred) or a tensor of size
+            [N, ...], where N = N_1 + N_2 + .. + N_k is a total number of
+            detections on all images, N_1 is the number of detections on image 1,
+            N_2 is the number of detections on image 2, etc.
+        detections: a list of objects of type `Instance`, k-th object corresponds
+            to detections on k-th image.
+    """
+    k = 0
+    for detection_i in detections:
+        if densepose_predictor_output is None:
+            # don't add `pred_densepose` attribute
+            continue
+        n_i = detection_i.__len__()
+        PredictorOutput = type(densepose_predictor_output)
+        output_i_dict = {}
+        # we assume here that `densepose_predictor_output` is a dataclass object
+        for field in fields(densepose_predictor_output):
+            field_value = getattr(densepose_predictor_output, field.name)
+            # slice tensors
+            if isinstance(field_value, torch.Tensor):
+                output_i_dict[field.name] = field_value[k : k + n_i]
+            # leave others as is
+            else:
+                output_i_dict[field.name] = field_value
+        detection_i.pred_densepose = PredictorOutput(**output_i_dict)
+        k += n_i

Leffa/3rdparty/densepose/modeling/losses/__init__.py ADDED Viewed

	@@ -0,0 +1,14 @@

+# Copyright (c) Facebook, Inc. and its affiliates.
+from .chart import DensePoseChartLoss
+from .chart_with_confidences import DensePoseChartWithConfidenceLoss
+from .cse import DensePoseCseLoss
+from .registry import DENSEPOSE_LOSS_REGISTRY
+__all__ = [
+    "DensePoseChartLoss",
+    "DensePoseChartWithConfidenceLoss",
+    "DensePoseCseLoss",
+    "DENSEPOSE_LOSS_REGISTRY",
+]

Leffa/3rdparty/densepose/modeling/losses/chart.py ADDED Viewed

	@@ -0,0 +1,291 @@

+# Copyright (c) Facebook, Inc. and its affiliates.
+from typing import Any, List
+import torch
+from torch.nn import functional as F
+from detectron2.config import CfgNode
+from detectron2.structures import Instances
+from .mask_or_segm import MaskOrSegmentationLoss
+from .registry import DENSEPOSE_LOSS_REGISTRY
+from .utils import (
+    BilinearInterpolationHelper,
+    ChartBasedAnnotationsAccumulator,
+    LossDict,
+    extract_packed_annotations_from_matches,
+)
+@DENSEPOSE_LOSS_REGISTRY.register()
+class DensePoseChartLoss:
+    """
+    DensePose loss for chart-based training. A mesh is split into charts,
+    each chart is given a label (I) and parametrized by 2 coordinates referred to
+    as U and V. Ground truth consists of a number of points annotated with
+    I, U and V values and coarse segmentation S defined for all pixels of the
+    object bounding box. In some cases (see `COARSE_SEGM_TRAINED_BY_MASKS`),
+    semantic segmentation annotations can be used as ground truth inputs as well.
+    Estimated values are tensors:
+     * U coordinates, tensor of shape [N, C, S, S]
+     * V coordinates, tensor of shape [N, C, S, S]
+     * fine segmentation estimates, tensor of shape [N, C, S, S] with raw unnormalized
+       scores for each fine segmentation label at each location
+     * coarse segmentation estimates, tensor of shape [N, D, S, S] with raw unnormalized
+       scores for each coarse segmentation label at each location
+    where N is the number of detections, C is the number of fine segmentation
+    labels, S is the estimate size ( = width = height) and D is the number of
+    coarse segmentation channels.
+    The losses are:
+    * regression (smooth L1) loss for U and V coordinates
+    * cross entropy loss for fine (I) and coarse (S) segmentations
+    Each loss has an associated weight
+    """
+    def __init__(self, cfg: CfgNode):
+        """
+        Initialize chart-based loss from configuration options
+        Args:
+            cfg (CfgNode): configuration options
+        """
+        # fmt: off
+        self.heatmap_size = cfg.MODEL.ROI_DENSEPOSE_HEAD.HEATMAP_SIZE
+        self.w_points     = cfg.MODEL.ROI_DENSEPOSE_HEAD.POINT_REGRESSION_WEIGHTS
+        self.w_part       = cfg.MODEL.ROI_DENSEPOSE_HEAD.PART_WEIGHTS
+        self.w_segm       = cfg.MODEL.ROI_DENSEPOSE_HEAD.INDEX_WEIGHTS
+        self.n_segm_chan  = cfg.MODEL.ROI_DENSEPOSE_HEAD.NUM_COARSE_SEGM_CHANNELS
+        # fmt: on
+        self.segm_trained_by_masks = cfg.MODEL.ROI_DENSEPOSE_HEAD.COARSE_SEGM_TRAINED_BY_MASKS
+        self.segm_loss = MaskOrSegmentationLoss(cfg)
+    def __call__(
+        self, proposals_with_gt: List[Instances], densepose_predictor_outputs: Any, **kwargs
+    ) -> LossDict:
+        """
+        Produce chart-based DensePose losses
+        Args:
+            proposals_with_gt (list of Instances): detections with associated ground truth data
+            densepose_predictor_outputs: an object of a dataclass that contains predictor outputs
+                with estimated values; assumed to have the following attributes:
+                * coarse_segm - coarse segmentation estimates, tensor of shape [N, D, S, S]
+                * fine_segm - fine segmentation estimates, tensor of shape [N, C, S, S]
+                * u - U coordinate estimates per fine labels, tensor of shape [N, C, S, S]
+                * v - V coordinate estimates per fine labels, tensor of shape [N, C, S, S]
+            where N is the number of detections, C is the number of fine segmentation
+            labels, S is the estimate size ( = width = height) and D is the number of
+            coarse segmentation channels.
+        Return:
+            dict: str -> tensor: dict of losses with the following entries:
+             * `loss_densepose_U`: smooth L1 loss for U coordinate estimates
+             * `loss_densepose_V`: smooth L1 loss for V coordinate estimates
+             * `loss_densepose_I`: cross entropy for raw unnormalized scores for fine
+                 segmentation estimates given ground truth labels;
+             * `loss_densepose_S`: cross entropy for raw unnormalized scores for coarse
+                 segmentation estimates given ground truth labels;
+        """
+        # densepose outputs are computed for all images and all bounding boxes;
+        # i.e. if a batch has 4 images with (3, 1, 2, 1) proposals respectively,
+        # the outputs will have size(0) == 3+1+2+1 == 7
+        if not len(proposals_with_gt):
+            return self.produce_fake_densepose_losses(densepose_predictor_outputs)
+        accumulator = ChartBasedAnnotationsAccumulator()
+        packed_annotations = extract_packed_annotations_from_matches(proposals_with_gt, accumulator)
+        # NOTE: we need to keep the same computation graph on all the GPUs to
+        # perform reduction properly. Hence even if we have no data on one
+        # of the GPUs, we still need to generate the computation graph.
+        # Add fake (zero) loss in the form Tensor.sum() * 0
+        if packed_annotations is None:
+            return self.produce_fake_densepose_losses(densepose_predictor_outputs)
+        h, w = densepose_predictor_outputs.u.shape[2:]
+        interpolator = BilinearInterpolationHelper.from_matches(
+            packed_annotations,
+            (h, w),
+        )
+        j_valid_fg = interpolator.j_valid * (  # pyre-ignore[16]
+            packed_annotations.fine_segm_labels_gt > 0
+        )
+        # pyre-fixme[6]: For 1st param expected `Tensor` but got `int`.
+        if not torch.any(j_valid_fg):
+            return self.produce_fake_densepose_losses(densepose_predictor_outputs)
+        losses_uv = self.produce_densepose_losses_uv(
+            proposals_with_gt,
+            densepose_predictor_outputs,
+            packed_annotations,
+            interpolator,
+            j_valid_fg,  # pyre-ignore[6]
+        )
+        losses_segm = self.produce_densepose_losses_segm(
+            proposals_with_gt,
+            densepose_predictor_outputs,
+            packed_annotations,
+            interpolator,
+            j_valid_fg,  # pyre-ignore[6]
+        )
+        return {**losses_uv, **losses_segm}
+    def produce_fake_densepose_losses(self, densepose_predictor_outputs: Any) -> LossDict:
+        """
+        Fake losses for fine segmentation and U/V coordinates. These are used when
+        no suitable ground truth data was found in a batch. The loss has a value 0
+        and is primarily used to construct the computation graph, so that
+        `DistributedDataParallel` has similar graphs on all GPUs and can perform
+        reduction properly.
+        Args:
+            densepose_predictor_outputs: DensePose predictor outputs, an object
+                of a dataclass that is assumed to have the following attributes:
+             * fine_segm - fine segmentation estimates, tensor of shape [N, C, S, S]
+             * u - U coordinate estimates per fine labels, tensor of shape [N, C, S, S]
+             * v - V coordinate estimates per fine labels, tensor of shape [N, C, S, S]
+        Return:
+            dict: str -> tensor: dict of losses with the following entries:
+             * `loss_densepose_U`: has value 0
+             * `loss_densepose_V`: has value 0
+             * `loss_densepose_I`: has value 0
+             * `loss_densepose_S`: has value 0
+        """
+        losses_uv = self.produce_fake_densepose_losses_uv(densepose_predictor_outputs)
+        losses_segm = self.produce_fake_densepose_losses_segm(densepose_predictor_outputs)
+        return {**losses_uv, **losses_segm}
+    def produce_fake_densepose_losses_uv(self, densepose_predictor_outputs: Any) -> LossDict:
+        """
+        Fake losses for U/V coordinates. These are used when no suitable ground
+        truth data was found in a batch. The loss has a value 0
+        and is primarily used to construct the computation graph, so that
+        `DistributedDataParallel` has similar graphs on all GPUs and can perform
+        reduction properly.
+        Args:
+            densepose_predictor_outputs: DensePose predictor outputs, an object
+                of a dataclass that is assumed to have the following attributes:
+             * u - U coordinate estimates per fine labels, tensor of shape [N, C, S, S]
+             * v - V coordinate estimates per fine labels, tensor of shape [N, C, S, S]
+        Return:
+            dict: str -> tensor: dict of losses with the following entries:
+             * `loss_densepose_U`: has value 0
+             * `loss_densepose_V`: has value 0
+        """
+        return {
+            "loss_densepose_U": densepose_predictor_outputs.u.sum() * 0,
+            "loss_densepose_V": densepose_predictor_outputs.v.sum() * 0,
+        }
+    def produce_fake_densepose_losses_segm(self, densepose_predictor_outputs: Any) -> LossDict:
+        """
+        Fake losses for fine / coarse segmentation. These are used when
+        no suitable ground truth data was found in a batch. The loss has a value 0
+        and is primarily used to construct the computation graph, so that
+        `DistributedDataParallel` has similar graphs on all GPUs and can perform
+        reduction properly.
+        Args:
+            densepose_predictor_outputs: DensePose predictor outputs, an object
+                of a dataclass that is assumed to have the following attributes:
+             * fine_segm - fine segmentation estimates, tensor of shape [N, C, S, S]
+             * coarse_segm - coarse segmentation estimates, tensor of shape [N, D, S, S]
+        Return:
+            dict: str -> tensor: dict of losses with the following entries:
+             * `loss_densepose_I`: has value 0
+             * `loss_densepose_S`: has value 0, added only if `segm_trained_by_masks` is False
+        """
+        losses = {
+            "loss_densepose_I": densepose_predictor_outputs.fine_segm.sum() * 0,
+            "loss_densepose_S": self.segm_loss.fake_value(densepose_predictor_outputs),
+        }
+        return losses
+    def produce_densepose_losses_uv(
+        self,
+        proposals_with_gt: List[Instances],
+        densepose_predictor_outputs: Any,
+        packed_annotations: Any,
+        interpolator: BilinearInterpolationHelper,
+        j_valid_fg: torch.Tensor,
+    ) -> LossDict:
+        """
+        Compute losses for U/V coordinates: smooth L1 loss between
+        estimated coordinates and the ground truth.
+        Args:
+            proposals_with_gt (list of Instances): detections with associated ground truth data
+            densepose_predictor_outputs: DensePose predictor outputs, an object
+                of a dataclass that is assumed to have the following attributes:
+             * u - U coordinate estimates per fine labels, tensor of shape [N, C, S, S]
+             * v - V coordinate estimates per fine labels, tensor of shape [N, C, S, S]
+        Return:
+            dict: str -> tensor: dict of losses with the following entries:
+             * `loss_densepose_U`: smooth L1 loss for U coordinate estimates
+             * `loss_densepose_V`: smooth L1 loss for V coordinate estimates
+        """
+        u_gt = packed_annotations.u_gt[j_valid_fg]
+        u_est = interpolator.extract_at_points(densepose_predictor_outputs.u)[j_valid_fg]
+        v_gt = packed_annotations.v_gt[j_valid_fg]
+        v_est = interpolator.extract_at_points(densepose_predictor_outputs.v)[j_valid_fg]
+        return {
+            "loss_densepose_U": F.smooth_l1_loss(u_est, u_gt, reduction="sum") * self.w_points,
+            "loss_densepose_V": F.smooth_l1_loss(v_est, v_gt, reduction="sum") * self.w_points,
+        }
+    def produce_densepose_losses_segm(
+        self,
+        proposals_with_gt: List[Instances],
+        densepose_predictor_outputs: Any,
+        packed_annotations: Any,
+        interpolator: BilinearInterpolationHelper,
+        j_valid_fg: torch.Tensor,
+    ) -> LossDict:
+        """
+        Losses for fine / coarse segmentation: cross-entropy
+        for segmentation unnormalized scores given ground truth labels at
+        annotated points for fine segmentation and dense mask annotations
+        for coarse segmentation.
+        Args:
+            proposals_with_gt (list of Instances): detections with associated ground truth data
+            densepose_predictor_outputs: DensePose predictor outputs, an object
+                of a dataclass that is assumed to have the following attributes:
+             * fine_segm - fine segmentation estimates, tensor of shape [N, C, S, S]
+             * coarse_segm - coarse segmentation estimates, tensor of shape [N, D, S, S]
+        Return:
+            dict: str -> tensor: dict of losses with the following entries:
+             * `loss_densepose_I`: cross entropy for raw unnormalized scores for fine
+                 segmentation estimates given ground truth labels
+             * `loss_densepose_S`: cross entropy for raw unnormalized scores for coarse
+                 segmentation estimates given ground truth labels;
+                 may be included if coarse segmentation is only trained
+                 using DensePose ground truth; if additional supervision through
+                 instance segmentation data is performed (`segm_trained_by_masks` is True),
+                 this loss is handled by `produce_mask_losses` instead
+        """
+        fine_segm_gt = packed_annotations.fine_segm_labels_gt[
+            interpolator.j_valid  # pyre-ignore[16]
+        ]
+        fine_segm_est = interpolator.extract_at_points(
+            densepose_predictor_outputs.fine_segm,
+            slice_fine_segm=slice(None),
+            w_ylo_xlo=interpolator.w_ylo_xlo[:, None],  # pyre-ignore[16]
+            w_ylo_xhi=interpolator.w_ylo_xhi[:, None],  # pyre-ignore[16]
+            w_yhi_xlo=interpolator.w_yhi_xlo[:, None],  # pyre-ignore[16]
+            w_yhi_xhi=interpolator.w_yhi_xhi[:, None],  # pyre-ignore[16]
+        )[interpolator.j_valid, :]
+        return {
+            "loss_densepose_I": F.cross_entropy(fine_segm_est, fine_segm_gt.long()) * self.w_part,
+            "loss_densepose_S": self.segm_loss(
+                proposals_with_gt, densepose_predictor_outputs, packed_annotations
+            )
+            * self.w_segm,
+        }

Leffa/3rdparty/densepose/modeling/losses/chart_with_confidences.py ADDED Viewed

	@@ -0,0 +1,209 @@

+# Copyright (c) Facebook, Inc. and its affiliates.
+import math
+from typing import Any, List
+import torch
+from torch import nn
+from torch.nn import functional as F
+from detectron2.config import CfgNode
+from detectron2.structures import Instances
+from .. import DensePoseConfidenceModelConfig, DensePoseUVConfidenceType
+from .chart import DensePoseChartLoss
+from .registry import DENSEPOSE_LOSS_REGISTRY
+from .utils import BilinearInterpolationHelper, LossDict
+@DENSEPOSE_LOSS_REGISTRY.register()
+class DensePoseChartWithConfidenceLoss(DensePoseChartLoss):
+    """ """
+    def __init__(self, cfg: CfgNode):
+        super().__init__(cfg)
+        self.confidence_model_cfg = DensePoseConfidenceModelConfig.from_cfg(cfg)
+        if self.confidence_model_cfg.uv_confidence.type == DensePoseUVConfidenceType.IID_ISO:
+            self.uv_loss_with_confidences = IIDIsotropicGaussianUVLoss(
+                self.confidence_model_cfg.uv_confidence.epsilon
+            )
+        elif self.confidence_model_cfg.uv_confidence.type == DensePoseUVConfidenceType.INDEP_ANISO:
+            self.uv_loss_with_confidences = IndepAnisotropicGaussianUVLoss(
+                self.confidence_model_cfg.uv_confidence.epsilon
+            )
+    def produce_fake_densepose_losses_uv(self, densepose_predictor_outputs: Any) -> LossDict:
+        """
+        Overrides fake losses for fine segmentation and U/V coordinates to
+        include computation graphs for additional confidence parameters.
+        These are used when no suitable ground truth data was found in a batch.
+        The loss has a value 0 and is primarily used to construct the computation graph,
+        so that `DistributedDataParallel` has similar graphs on all GPUs and can
+        perform reduction properly.
+        Args:
+            densepose_predictor_outputs: DensePose predictor outputs, an object
+                of a dataclass that is assumed to have the following attributes:
+             * fine_segm - fine segmentation estimates, tensor of shape [N, C, S, S]
+             * u - U coordinate estimates per fine labels, tensor of shape [N, C, S, S]
+             * v - V coordinate estimates per fine labels, tensor of shape [N, C, S, S]
+        Return:
+            dict: str -> tensor: dict of losses with the following entries:
+             * `loss_densepose_U`: has value 0
+             * `loss_densepose_V`: has value 0
+             * `loss_densepose_I`: has value 0
+        """
+        conf_type = self.confidence_model_cfg.uv_confidence.type
+        if self.confidence_model_cfg.uv_confidence.enabled:
+            loss_uv = (
+                densepose_predictor_outputs.u.sum() + densepose_predictor_outputs.v.sum()
+            ) * 0
+            if conf_type == DensePoseUVConfidenceType.IID_ISO:
+                loss_uv += densepose_predictor_outputs.sigma_2.sum() * 0
+            elif conf_type == DensePoseUVConfidenceType.INDEP_ANISO:
+                loss_uv += (
+                    densepose_predictor_outputs.sigma_2.sum()
+                    + densepose_predictor_outputs.kappa_u.sum()
+                    + densepose_predictor_outputs.kappa_v.sum()
+                ) * 0
+            return {"loss_densepose_UV": loss_uv}
+        else:
+            return super().produce_fake_densepose_losses_uv(densepose_predictor_outputs)
+    def produce_densepose_losses_uv(
+        self,
+        proposals_with_gt: List[Instances],
+        densepose_predictor_outputs: Any,
+        packed_annotations: Any,
+        interpolator: BilinearInterpolationHelper,
+        j_valid_fg: torch.Tensor,
+    ) -> LossDict:
+        conf_type = self.confidence_model_cfg.uv_confidence.type
+        if self.confidence_model_cfg.uv_confidence.enabled:
+            u_gt = packed_annotations.u_gt[j_valid_fg]
+            u_est = interpolator.extract_at_points(densepose_predictor_outputs.u)[j_valid_fg]
+            v_gt = packed_annotations.v_gt[j_valid_fg]
+            v_est = interpolator.extract_at_points(densepose_predictor_outputs.v)[j_valid_fg]
+            sigma_2_est = interpolator.extract_at_points(densepose_predictor_outputs.sigma_2)[
+                j_valid_fg
+            ]
+            if conf_type == DensePoseUVConfidenceType.IID_ISO:
+                return {
+                    "loss_densepose_UV": (
+                        self.uv_loss_with_confidences(u_est, v_est, sigma_2_est, u_gt, v_gt)
+                        * self.w_points
+                    )
+                }
+            elif conf_type in [DensePoseUVConfidenceType.INDEP_ANISO]:
+                kappa_u_est = interpolator.extract_at_points(densepose_predictor_outputs.kappa_u)[
+                    j_valid_fg
+                ]
+                kappa_v_est = interpolator.extract_at_points(densepose_predictor_outputs.kappa_v)[
+                    j_valid_fg
+                ]
+                return {
+                    "loss_densepose_UV": (
+                        self.uv_loss_with_confidences(
+                            u_est, v_est, sigma_2_est, kappa_u_est, kappa_v_est, u_gt, v_gt
+                        )
+                        * self.w_points
+                    )
+                }
+        return super().produce_densepose_losses_uv(
+            proposals_with_gt,
+            densepose_predictor_outputs,
+            packed_annotations,
+            interpolator,
+            j_valid_fg,
+        )
+class IIDIsotropicGaussianUVLoss(nn.Module):
+    """
+    Loss for the case of iid residuals with isotropic covariance:
+    $Sigma_i = sigma_i^2 I$
+    The loss (negative log likelihood) is then:
+    $1/2 sum_{i=1}^n (log(2 pi) + 2 log sigma_i^2 + ||delta_i||^2 / sigma_i^2)$,
+    where $delta_i=(u - u', v - v')$ is a 2D vector containing UV coordinates
+    difference between estimated and ground truth UV values
+    For details, see:
+    N. Neverova, D. Novotny, A. Vedaldi "Correlated Uncertainty for Learning
+    Dense Correspondences from Noisy Labels", p. 918--926, in Proc. NIPS 2019
+    """
+    def __init__(self, sigma_lower_bound: float):
+        super(IIDIsotropicGaussianUVLoss, self).__init__()
+        self.sigma_lower_bound = sigma_lower_bound
+        self.log2pi = math.log(2 * math.pi)
+    def forward(
+        self,
+        u: torch.Tensor,
+        v: torch.Tensor,
+        sigma_u: torch.Tensor,
+        target_u: torch.Tensor,
+        target_v: torch.Tensor,
+    ):
+        # compute $\sigma_i^2$
+        # use sigma_lower_bound to avoid degenerate solution for variance
+        # (sigma -> 0)
+        sigma2 = F.softplus(sigma_u) + self.sigma_lower_bound
+        # compute \|delta_i\|^2
+        # pyre-fixme[58]: `**` is not supported for operand types `Tensor` and `int`.
+        delta_t_delta = (u - target_u) ** 2 + (v - target_v) ** 2
+        # the total loss from the formula above:
+        loss = 0.5 * (self.log2pi + 2 * torch.log(sigma2) + delta_t_delta / sigma2)
+        return loss.sum()
+class IndepAnisotropicGaussianUVLoss(nn.Module):
+    """
+    Loss for the case of independent residuals with anisotropic covariances:
+    $Sigma_i = sigma_i^2 I + r_i r_i^T$
+    The loss (negative log likelihood) is then:
+    $1/2 sum_{i=1}^n (log(2 pi)
+      + log sigma_i^2 (sigma_i^2 + ||r_i||^2)
+      + ||delta_i||^2 / sigma_i^2
+      - <delta_i, r_i>^2 / (sigma_i^2 * (sigma_i^2 + ||r_i||^2)))$,
+    where $delta_i=(u - u', v - v')$ is a 2D vector containing UV coordinates
+    difference between estimated and ground truth UV values
+    For details, see:
+    N. Neverova, D. Novotny, A. Vedaldi "Correlated Uncertainty for Learning
+    Dense Correspondences from Noisy Labels", p. 918--926, in Proc. NIPS 2019
+    """
+    def __init__(self, sigma_lower_bound: float):
+        super(IndepAnisotropicGaussianUVLoss, self).__init__()
+        self.sigma_lower_bound = sigma_lower_bound
+        self.log2pi = math.log(2 * math.pi)
+    def forward(
+        self,
+        u: torch.Tensor,
+        v: torch.Tensor,
+        sigma_u: torch.Tensor,
+        kappa_u_est: torch.Tensor,
+        kappa_v_est: torch.Tensor,
+        target_u: torch.Tensor,
+        target_v: torch.Tensor,
+    ):
+        # compute $\sigma_i^2$
+        sigma2 = F.softplus(sigma_u) + self.sigma_lower_bound
+        # compute \|r_i\|^2
+        # pyre-fixme[58]: `**` is not supported for operand types `Tensor` and `int`.
+        r_sqnorm2 = kappa_u_est**2 + kappa_v_est**2
+        delta_u = u - target_u
+        delta_v = v - target_v
+        # compute \|delta_i\|^2
+        # pyre-fixme[58]: `**` is not supported for operand types `Tensor` and `int`.
+        delta_sqnorm = delta_u**2 + delta_v**2
+        delta_u_r_u = delta_u * kappa_u_est
+        delta_v_r_v = delta_v * kappa_v_est
+        # compute the scalar product <delta_i, r_i>
+        delta_r = delta_u_r_u + delta_v_r_v
+        # compute squared scalar product <delta_i, r_i>^2
+        # pyre-fixme[58]: `**` is not supported for operand types `Tensor` and `int`.
+        delta_r_sqnorm = delta_r**2
+        denom2 = sigma2 * (sigma2 + r_sqnorm2)
+        loss = 0.5 * (
+            self.log2pi + torch.log(denom2) + delta_sqnorm / sigma2 - delta_r_sqnorm / denom2
+        )
+        return loss.sum()

Leffa/3rdparty/densepose/modeling/losses/cse.py ADDED Viewed

	@@ -0,0 +1,115 @@

+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+from typing import Any, List
+from torch import nn
+from detectron2.config import CfgNode
+from detectron2.structures import Instances
+from .cycle_pix2shape import PixToShapeCycleLoss
+from .cycle_shape2shape import ShapeToShapeCycleLoss
+from .embed import EmbeddingLoss
+from .embed_utils import CseAnnotationsAccumulator
+from .mask_or_segm import MaskOrSegmentationLoss
+from .registry import DENSEPOSE_LOSS_REGISTRY
+from .soft_embed import SoftEmbeddingLoss
+from .utils import BilinearInterpolationHelper, LossDict, extract_packed_annotations_from_matches
+@DENSEPOSE_LOSS_REGISTRY.register()
+class DensePoseCseLoss:
+    """ """
+    _EMBED_LOSS_REGISTRY = {
+        EmbeddingLoss.__name__: EmbeddingLoss,
+        SoftEmbeddingLoss.__name__: SoftEmbeddingLoss,
+    }
+    def __init__(self, cfg: CfgNode):
+        """
+        Initialize CSE loss from configuration options
+        Args:
+            cfg (CfgNode): configuration options
+        """
+        self.w_segm = cfg.MODEL.ROI_DENSEPOSE_HEAD.INDEX_WEIGHTS
+        self.w_embed = cfg.MODEL.ROI_DENSEPOSE_HEAD.CSE.EMBED_LOSS_WEIGHT
+        self.segm_loss = MaskOrSegmentationLoss(cfg)
+        self.embed_loss = DensePoseCseLoss.create_embed_loss(cfg)
+        self.do_shape2shape = cfg.MODEL.ROI_DENSEPOSE_HEAD.CSE.SHAPE_TO_SHAPE_CYCLE_LOSS.ENABLED
+        if self.do_shape2shape:
+            self.w_shape2shape = cfg.MODEL.ROI_DENSEPOSE_HEAD.CSE.SHAPE_TO_SHAPE_CYCLE_LOSS.WEIGHT
+            self.shape2shape_loss = ShapeToShapeCycleLoss(cfg)
+        self.do_pix2shape = cfg.MODEL.ROI_DENSEPOSE_HEAD.CSE.PIX_TO_SHAPE_CYCLE_LOSS.ENABLED
+        if self.do_pix2shape:
+            self.w_pix2shape = cfg.MODEL.ROI_DENSEPOSE_HEAD.CSE.PIX_TO_SHAPE_CYCLE_LOSS.WEIGHT
+            self.pix2shape_loss = PixToShapeCycleLoss(cfg)
+    @classmethod
+    def create_embed_loss(cls, cfg: CfgNode):
+        # registry not used here, since embedding losses are currently local
+        # and are not used anywhere else
+        return cls._EMBED_LOSS_REGISTRY[cfg.MODEL.ROI_DENSEPOSE_HEAD.CSE.EMBED_LOSS_NAME](cfg)
+    def __call__(
+        self,
+        proposals_with_gt: List[Instances],
+        densepose_predictor_outputs: Any,
+        embedder: nn.Module,
+    ) -> LossDict:
+        if not len(proposals_with_gt):
+            return self.produce_fake_losses(densepose_predictor_outputs, embedder)
+        accumulator = CseAnnotationsAccumulator()
+        packed_annotations = extract_packed_annotations_from_matches(proposals_with_gt, accumulator)
+        if packed_annotations is None:
+            return self.produce_fake_losses(densepose_predictor_outputs, embedder)
+        h, w = densepose_predictor_outputs.embedding.shape[2:]
+        interpolator = BilinearInterpolationHelper.from_matches(
+            packed_annotations,
+            (h, w),
+        )
+        meshid_to_embed_losses = self.embed_loss(
+            proposals_with_gt,
+            densepose_predictor_outputs,
+            packed_annotations,
+            interpolator,
+            embedder,
+        )
+        embed_loss_dict = {
+            f"loss_densepose_E{meshid}": self.w_embed * meshid_to_embed_losses[meshid]
+            for meshid in meshid_to_embed_losses
+        }
+        all_loss_dict = {
+            "loss_densepose_S": self.w_segm
+            * self.segm_loss(proposals_with_gt, densepose_predictor_outputs, packed_annotations),
+            **embed_loss_dict,
+        }
+        if self.do_shape2shape:
+            all_loss_dict["loss_shape2shape"] = self.w_shape2shape * self.shape2shape_loss(embedder)
+        if self.do_pix2shape:
+            all_loss_dict["loss_pix2shape"] = self.w_pix2shape * self.pix2shape_loss(
+                proposals_with_gt, densepose_predictor_outputs, packed_annotations, embedder
+            )
+        return all_loss_dict
+    def produce_fake_losses(
+        self, densepose_predictor_outputs: Any, embedder: nn.Module
+    ) -> LossDict:
+        meshname_to_embed_losses = self.embed_loss.fake_values(
+            densepose_predictor_outputs, embedder=embedder
+        )
+        embed_loss_dict = {
+            f"loss_densepose_E{mesh_name}": meshname_to_embed_losses[mesh_name]
+            for mesh_name in meshname_to_embed_losses
+        }
+        all_loss_dict = {
+            "loss_densepose_S": self.segm_loss.fake_value(densepose_predictor_outputs),
+            **embed_loss_dict,
+        }
+        if self.do_shape2shape:
+            all_loss_dict["loss_shape2shape"] = self.shape2shape_loss.fake_value(embedder)
+        if self.do_pix2shape:
+            all_loss_dict["loss_pix2shape"] = self.pix2shape_loss.fake_value(
+                densepose_predictor_outputs, embedder
+            )
+        return all_loss_dict

Leffa/3rdparty/densepose/modeling/losses/cycle_pix2shape.py ADDED Viewed

	@@ -0,0 +1,152 @@

+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+from typing import Any, List
+import torch
+from torch import nn
+from torch.nn import functional as F
+from detectron2.config import CfgNode
+from detectron2.structures import Instances
+from densepose.data.meshes.catalog import MeshCatalog
+from densepose.modeling.cse.utils import normalize_embeddings, squared_euclidean_distance_matrix
+from .embed_utils import PackedCseAnnotations
+from .mask import extract_data_for_mask_loss_from_matches
+def _create_pixel_dist_matrix(grid_size: int) -> torch.Tensor:
+    rows = torch.arange(grid_size)
+    cols = torch.arange(grid_size)
+    # at index `i` contains [row, col], where
+    # row = i // grid_size
+    # col = i % grid_size
+    pix_coords = (
+        torch.stack(torch.meshgrid(rows, cols), -1).reshape((grid_size * grid_size, 2)).float()
+    )
+    return squared_euclidean_distance_matrix(pix_coords, pix_coords)
+def _sample_fg_pixels_randperm(fg_mask: torch.Tensor, sample_size: int) -> torch.Tensor:
+    fg_mask_flattened = fg_mask.reshape((-1,))
+    num_pixels = int(fg_mask_flattened.sum().item())
+    fg_pixel_indices = fg_mask_flattened.nonzero(as_tuple=True)[0]
+    if (sample_size <= 0) or (num_pixels <= sample_size):
+        return fg_pixel_indices
+    sample_indices = torch.randperm(num_pixels, device=fg_mask.device)[:sample_size]
+    return fg_pixel_indices[sample_indices]
+def _sample_fg_pixels_multinomial(fg_mask: torch.Tensor, sample_size: int) -> torch.Tensor:
+    fg_mask_flattened = fg_mask.reshape((-1,))
+    num_pixels = int(fg_mask_flattened.sum().item())
+    if (sample_size <= 0) or (num_pixels <= sample_size):
+        return fg_mask_flattened.nonzero(as_tuple=True)[0]
+    return fg_mask_flattened.float().multinomial(sample_size, replacement=False)
+class PixToShapeCycleLoss(nn.Module):
+    """
+    Cycle loss for pixel-vertex correspondence
+    """
+    def __init__(self, cfg: CfgNode):
+        super().__init__()
+        self.shape_names = list(cfg.MODEL.ROI_DENSEPOSE_HEAD.CSE.EMBEDDERS.keys())
+        self.embed_size = cfg.MODEL.ROI_DENSEPOSE_HEAD.CSE.EMBED_SIZE
+        self.norm_p = cfg.MODEL.ROI_DENSEPOSE_HEAD.CSE.PIX_TO_SHAPE_CYCLE_LOSS.NORM_P
+        self.use_all_meshes_not_gt_only = (
+            cfg.MODEL.ROI_DENSEPOSE_HEAD.CSE.PIX_TO_SHAPE_CYCLE_LOSS.USE_ALL_MESHES_NOT_GT_ONLY
+        )
+        self.num_pixels_to_sample = (
+            cfg.MODEL.ROI_DENSEPOSE_HEAD.CSE.PIX_TO_SHAPE_CYCLE_LOSS.NUM_PIXELS_TO_SAMPLE
+        )
+        self.pix_sigma = cfg.MODEL.ROI_DENSEPOSE_HEAD.CSE.PIX_TO_SHAPE_CYCLE_LOSS.PIXEL_SIGMA
+        self.temperature_pix_to_vertex = (
+            cfg.MODEL.ROI_DENSEPOSE_HEAD.CSE.PIX_TO_SHAPE_CYCLE_LOSS.TEMPERATURE_PIXEL_TO_VERTEX
+        )
+        self.temperature_vertex_to_pix = (
+            cfg.MODEL.ROI_DENSEPOSE_HEAD.CSE.PIX_TO_SHAPE_CYCLE_LOSS.TEMPERATURE_VERTEX_TO_PIXEL
+        )
+        self.pixel_dists = _create_pixel_dist_matrix(cfg.MODEL.ROI_DENSEPOSE_HEAD.HEATMAP_SIZE)
+    def forward(
+        self,
+        proposals_with_gt: List[Instances],
+        densepose_predictor_outputs: Any,
+        packed_annotations: PackedCseAnnotations,
+        embedder: nn.Module,
+    ):
+        """
+        Args:
+            proposals_with_gt (list of Instances): detections with associated
+                ground truth data; each item corresponds to instances detected
+                on 1 image; the number of items corresponds to the number of
+                images in a batch
+            densepose_predictor_outputs: an object of a dataclass that contains predictor
+                outputs with estimated values; assumed to have the following attributes:
+                * embedding - embedding estimates, tensor of shape [N, D, S, S], where
+                  N = number of instances (= sum N_i, where N_i is the number of
+                      instances on image i)
+                  D = embedding space dimensionality (MODEL.ROI_DENSEPOSE_HEAD.CSE.EMBED_SIZE)
+                  S = output size (width and height)
+            packed_annotations (PackedCseAnnotations): contains various data useful
+                for loss computation, each data is packed into a single tensor
+            embedder (nn.Module): module that computes vertex embeddings for different meshes
+        """
+        pix_embeds = densepose_predictor_outputs.embedding
+        if self.pixel_dists.device != pix_embeds.device:
+            # should normally be done only once
+            self.pixel_dists = self.pixel_dists.to(device=pix_embeds.device)
+        with torch.no_grad():
+            mask_loss_data = extract_data_for_mask_loss_from_matches(
+                proposals_with_gt, densepose_predictor_outputs.coarse_segm
+            )
+        # GT masks - tensor of shape [N, S, S] of int64
+        masks_gt = mask_loss_data.masks_gt.long()  # pyre-ignore[16]
+        assert len(pix_embeds) == len(masks_gt), (
+            f"Number of instances with embeddings {len(pix_embeds)} != "
+            f"number of instances with GT masks {len(masks_gt)}"
+        )
+        losses = []
+        mesh_names = (
+            self.shape_names
+            if self.use_all_meshes_not_gt_only
+            else [
+                MeshCatalog.get_mesh_name(mesh_id.item())
+                for mesh_id in packed_annotations.vertex_mesh_ids_gt.unique()
+            ]
+        )
+        for pixel_embeddings, mask_gt in zip(pix_embeds, masks_gt):
+            # pixel_embeddings [D, S, S]
+            # mask_gt [S, S]
+            for mesh_name in mesh_names:
+                mesh_vertex_embeddings = embedder(mesh_name)
+                # pixel indices [M]
+                pixel_indices_flattened = _sample_fg_pixels_randperm(
+                    mask_gt, self.num_pixels_to_sample
+                )
+                # pixel distances [M, M]
+                pixel_dists = self.pixel_dists.to(pixel_embeddings.device)[
+                    torch.meshgrid(pixel_indices_flattened, pixel_indices_flattened)
+                ]
+                # pixel embeddings [M, D]
+                pixel_embeddings_sampled = normalize_embeddings(
+                    pixel_embeddings.reshape((self.embed_size, -1))[:, pixel_indices_flattened].T
+                )
+                # pixel-vertex similarity [M, K]
+                sim_matrix = pixel_embeddings_sampled.mm(mesh_vertex_embeddings.T)
+                c_pix_vertex = F.softmax(sim_matrix / self.temperature_pix_to_vertex, dim=1)
+                c_vertex_pix = F.softmax(sim_matrix.T / self.temperature_vertex_to_pix, dim=1)
+                c_cycle = c_pix_vertex.mm(c_vertex_pix)
+                loss_cycle = torch.norm(pixel_dists * c_cycle, p=self.norm_p)
+                losses.append(loss_cycle)
+        if len(losses) == 0:
+            return pix_embeds.sum() * 0
+        return torch.stack(losses, dim=0).mean()
+    def fake_value(self, densepose_predictor_outputs: Any, embedder: nn.Module):
+        losses = [embedder(mesh_name).sum() * 0 for mesh_name in embedder.mesh_names]
+        losses.append(densepose_predictor_outputs.embedding.sum() * 0)
+        return torch.mean(torch.stack(losses))

Leffa/3rdparty/densepose/modeling/losses/cycle_shape2shape.py ADDED Viewed

	@@ -0,0 +1,117 @@

+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+import random
+from typing import Tuple
+import torch
+from torch import nn
+from torch.nn import functional as F
+from detectron2.config import CfgNode
+from densepose.structures.mesh import create_mesh
+from .utils import sample_random_indices
+class ShapeToShapeCycleLoss(nn.Module):
+    """
+    Cycle Loss for Shapes.
+    Inspired by:
+    "Mapping in a Cycle: Sinkhorn Regularized Unsupervised Learning for Point Cloud Shapes".
+    """
+    def __init__(self, cfg: CfgNode):
+        super().__init__()
+        self.shape_names = list(cfg.MODEL.ROI_DENSEPOSE_HEAD.CSE.EMBEDDERS.keys())
+        self.all_shape_pairs = [
+            (x, y) for i, x in enumerate(self.shape_names) for y in self.shape_names[i + 1 :]
+        ]
+        random.shuffle(self.all_shape_pairs)
+        self.cur_pos = 0
+        self.norm_p = cfg.MODEL.ROI_DENSEPOSE_HEAD.CSE.SHAPE_TO_SHAPE_CYCLE_LOSS.NORM_P
+        self.temperature = cfg.MODEL.ROI_DENSEPOSE_HEAD.CSE.SHAPE_TO_SHAPE_CYCLE_LOSS.TEMPERATURE
+        self.max_num_vertices = (
+            cfg.MODEL.ROI_DENSEPOSE_HEAD.CSE.SHAPE_TO_SHAPE_CYCLE_LOSS.MAX_NUM_VERTICES
+        )
+    def _sample_random_pair(self) -> Tuple[str, str]:
+        """
+        Produce a random pair of different mesh names
+        Return:
+            tuple(str, str): a pair of different mesh names
+        """
+        if self.cur_pos >= len(self.all_shape_pairs):
+            random.shuffle(self.all_shape_pairs)
+            self.cur_pos = 0
+        shape_pair = self.all_shape_pairs[self.cur_pos]
+        self.cur_pos += 1
+        return shape_pair
+    def forward(self, embedder: nn.Module):
+        """
+        Do a forward pass with a random pair (src, dst) pair of shapes
+        Args:
+            embedder (nn.Module): module that computes vertex embeddings for different meshes
+        """
+        src_mesh_name, dst_mesh_name = self._sample_random_pair()
+        return self._forward_one_pair(embedder, src_mesh_name, dst_mesh_name)
+    def fake_value(self, embedder: nn.Module):
+        losses = []
+        for mesh_name in embedder.mesh_names:
+            losses.append(embedder(mesh_name).sum() * 0)
+        return torch.mean(torch.stack(losses))
+    def _get_embeddings_and_geodists_for_mesh(
+        self, embedder: nn.Module, mesh_name: str
+    ) -> Tuple[torch.Tensor, torch.Tensor]:
+        """
+        Produces embeddings and geodesic distance tensors for a given mesh. May subsample
+        the mesh, if it contains too many vertices (controlled by
+        SHAPE_CYCLE_LOSS_MAX_NUM_VERTICES parameter).
+        Args:
+            embedder (nn.Module): module that computes embeddings for mesh vertices
+            mesh_name (str): mesh name
+        Return:
+            embeddings (torch.Tensor of size [N, D]): embeddings for selected mesh
+                vertices (N = number of selected vertices, D = embedding space dim)
+            geodists (torch.Tensor of size [N, N]): geodesic distances for the selected
+                mesh vertices (N = number of selected vertices)
+        """
+        embeddings = embedder(mesh_name)
+        indices = sample_random_indices(
+            embeddings.shape[0], self.max_num_vertices, embeddings.device
+        )
+        mesh = create_mesh(mesh_name, embeddings.device)
+        geodists = mesh.geodists
+        if indices is not None:
+            embeddings = embeddings[indices]
+            geodists = geodists[torch.meshgrid(indices, indices)]
+        return embeddings, geodists
+    def _forward_one_pair(
+        self, embedder: nn.Module, mesh_name_1: str, mesh_name_2: str
+    ) -> torch.Tensor:
+        """
+        Do a forward pass with a selected pair of meshes
+        Args:
+            embedder (nn.Module): module that computes vertex embeddings for different meshes
+            mesh_name_1 (str): first mesh name
+            mesh_name_2 (str): second mesh name
+        Return:
+            Tensor containing the loss value
+        """
+        embeddings_1, geodists_1 = self._get_embeddings_and_geodists_for_mesh(embedder, mesh_name_1)
+        embeddings_2, geodists_2 = self._get_embeddings_and_geodists_for_mesh(embedder, mesh_name_2)
+        sim_matrix_12 = embeddings_1.mm(embeddings_2.T)
+        c_12 = F.softmax(sim_matrix_12 / self.temperature, dim=1)
+        c_21 = F.softmax(sim_matrix_12.T / self.temperature, dim=1)
+        c_11 = c_12.mm(c_21)
+        c_22 = c_21.mm(c_12)
+        loss_cycle_11 = torch.norm(geodists_1 * c_11, p=self.norm_p)
+        loss_cycle_22 = torch.norm(geodists_2 * c_22, p=self.norm_p)
+        return loss_cycle_11 + loss_cycle_22

Leffa/3rdparty/densepose/modeling/losses/embed.py ADDED Viewed

	@@ -0,0 +1,119 @@

+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+from typing import Any, Dict, List
+import torch
+from torch import nn
+from torch.nn import functional as F
+from detectron2.config import CfgNode
+from detectron2.structures import Instances
+from densepose.data.meshes.catalog import MeshCatalog
+from densepose.modeling.cse.utils import normalize_embeddings, squared_euclidean_distance_matrix
+from .embed_utils import PackedCseAnnotations
+from .utils import BilinearInterpolationHelper
+class EmbeddingLoss:
+    """
+    Computes losses for estimated embeddings given annotated vertices.
+    Instances in a minibatch that correspond to the same mesh are grouped
+    together. For each group, loss is computed as cross-entropy for
+    unnormalized scores given ground truth mesh vertex ids.
+    Scores are based on squared distances between estimated vertex embeddings
+    and mesh vertex embeddings.
+    """
+    def __init__(self, cfg: CfgNode):
+        """
+        Initialize embedding loss from config
+        """
+        self.embdist_gauss_sigma = cfg.MODEL.ROI_DENSEPOSE_HEAD.CSE.EMBEDDING_DIST_GAUSS_SIGMA
+    def __call__(
+        self,
+        proposals_with_gt: List[Instances],
+        densepose_predictor_outputs: Any,
+        packed_annotations: PackedCseAnnotations,
+        interpolator: BilinearInterpolationHelper,
+        embedder: nn.Module,
+    ) -> Dict[int, torch.Tensor]:
+        """
+        Produces losses for estimated embeddings given annotated vertices.
+        Embeddings for all the vertices of a mesh are computed by the embedder.
+        Embeddings for observed pixels are estimated by a predictor.
+        Losses are computed as cross-entropy for squared distances between
+        observed vertex embeddings and all mesh vertex embeddings given
+        ground truth vertex IDs.
+        Args:
+            proposals_with_gt (list of Instances): detections with associated
+                ground truth data; each item corresponds to instances detected
+                on 1 image; the number of items corresponds to the number of
+                images in a batch
+            densepose_predictor_outputs: an object of a dataclass that contains predictor
+                outputs with estimated values; assumed to have the following attributes:
+                * embedding - embedding estimates, tensor of shape [N, D, S, S], where
+                  N = number of instances (= sum N_i, where N_i is the number of
+                      instances on image i)
+                  D = embedding space dimensionality (MODEL.ROI_DENSEPOSE_HEAD.CSE.EMBED_SIZE)
+                  S = output size (width and height)
+            packed_annotations (PackedCseAnnotations): contains various data useful
+                for loss computation, each data is packed into a single tensor
+            interpolator (BilinearInterpolationHelper): bilinear interpolation helper
+            embedder (nn.Module): module that computes vertex embeddings for different meshes
+        Return:
+            dict(int -> tensor): losses for different mesh IDs
+        """
+        losses = {}
+        for mesh_id_tensor in packed_annotations.vertex_mesh_ids_gt.unique():
+            mesh_id = mesh_id_tensor.item()
+            mesh_name = MeshCatalog.get_mesh_name(mesh_id)
+            # valid points are those that fall into estimated bbox
+            # and correspond to the current mesh
+            j_valid = interpolator.j_valid * (  # pyre-ignore[16]
+                packed_annotations.vertex_mesh_ids_gt == mesh_id
+            )
+            if not torch.any(j_valid):
+                continue
+            # extract estimated embeddings for valid points
+            # -> tensor [J, D]
+            vertex_embeddings_i = normalize_embeddings(
+                interpolator.extract_at_points(
+                    densepose_predictor_outputs.embedding,
+                    slice_fine_segm=slice(None),
+                    w_ylo_xlo=interpolator.w_ylo_xlo[:, None],  # pyre-ignore[16]
+                    w_ylo_xhi=interpolator.w_ylo_xhi[:, None],  # pyre-ignore[16]
+                    w_yhi_xlo=interpolator.w_yhi_xlo[:, None],  # pyre-ignore[16]
+                    w_yhi_xhi=interpolator.w_yhi_xhi[:, None],  # pyre-ignore[16]
+                )[j_valid, :]
+            )
+            # extract vertex ids for valid points
+            # -> tensor [J]
+            vertex_indices_i = packed_annotations.vertex_ids_gt[j_valid]
+            # embeddings for all mesh vertices
+            # -> tensor [K, D]
+            mesh_vertex_embeddings = embedder(mesh_name)
+            # unnormalized scores for valid points
+            # -> tensor [J, K]
+            scores = squared_euclidean_distance_matrix(
+                vertex_embeddings_i, mesh_vertex_embeddings
+            ) / (-self.embdist_gauss_sigma)
+            losses[mesh_name] = F.cross_entropy(scores, vertex_indices_i, ignore_index=-1)
+        for mesh_name in embedder.mesh_names:
+            if mesh_name not in losses:
+                losses[mesh_name] = self.fake_value(
+                    densepose_predictor_outputs, embedder, mesh_name
+                )
+        return losses
+    def fake_values(self, densepose_predictor_outputs: Any, embedder: nn.Module):
+        losses = {}
+        for mesh_name in embedder.mesh_names:
+            losses[mesh_name] = self.fake_value(densepose_predictor_outputs, embedder, mesh_name)
+        return losses
+    def fake_value(self, densepose_predictor_outputs: Any, embedder: nn.Module, mesh_name: str):
+        return densepose_predictor_outputs.embedding.sum() * 0 + embedder(mesh_name).sum() * 0

Leffa/3rdparty/densepose/modeling/losses/embed_utils.py ADDED Viewed

	@@ -0,0 +1,137 @@

+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+from dataclasses import dataclass
+from typing import Any, Optional
+import torch
+from detectron2.structures import BoxMode, Instances
+from .utils import AnnotationsAccumulator
+@dataclass
+class PackedCseAnnotations:
+    x_gt: torch.Tensor
+    y_gt: torch.Tensor
+    coarse_segm_gt: Optional[torch.Tensor]
+    vertex_mesh_ids_gt: torch.Tensor
+    vertex_ids_gt: torch.Tensor
+    bbox_xywh_gt: torch.Tensor
+    bbox_xywh_est: torch.Tensor
+    point_bbox_with_dp_indices: torch.Tensor
+    point_bbox_indices: torch.Tensor
+    bbox_indices: torch.Tensor
+class CseAnnotationsAccumulator(AnnotationsAccumulator):
+    """
+    Accumulates annotations by batches that correspond to objects detected on
+    individual images. Can pack them together into single tensors.
+    """
+    def __init__(self):
+        self.x_gt = []
+        self.y_gt = []
+        self.s_gt = []
+        self.vertex_mesh_ids_gt = []
+        self.vertex_ids_gt = []
+        self.bbox_xywh_gt = []
+        self.bbox_xywh_est = []
+        self.point_bbox_with_dp_indices = []
+        self.point_bbox_indices = []
+        self.bbox_indices = []
+        self.nxt_bbox_with_dp_index = 0
+        self.nxt_bbox_index = 0
+    def accumulate(self, instances_one_image: Instances):
+        """
+        Accumulate instances data for one image
+        Args:
+            instances_one_image (Instances): instances data to accumulate
+        """
+        boxes_xywh_est = BoxMode.convert(
+            instances_one_image.proposal_boxes.tensor.clone(), BoxMode.XYXY_ABS, BoxMode.XYWH_ABS
+        )
+        boxes_xywh_gt = BoxMode.convert(
+            instances_one_image.gt_boxes.tensor.clone(), BoxMode.XYXY_ABS, BoxMode.XYWH_ABS
+        )
+        n_matches = len(boxes_xywh_gt)
+        assert n_matches == len(
+            boxes_xywh_est
+        ), f"Got {len(boxes_xywh_est)} proposal boxes and {len(boxes_xywh_gt)} GT boxes"
+        if not n_matches:
+            # no detection - GT matches
+            return
+        if (
+            not hasattr(instances_one_image, "gt_densepose")
+            or instances_one_image.gt_densepose is None
+        ):
+            # no densepose GT for the detections, just increase the bbox index
+            self.nxt_bbox_index += n_matches
+            return
+        for box_xywh_est, box_xywh_gt, dp_gt in zip(
+            boxes_xywh_est, boxes_xywh_gt, instances_one_image.gt_densepose
+        ):
+            if (dp_gt is not None) and (len(dp_gt.x) > 0):
+                # pyre-fixme[6]: For 1st argument expected `Tensor` but got `float`.
+                # pyre-fixme[6]: For 2nd argument expected `Tensor` but got `float`.
+                self._do_accumulate(box_xywh_gt, box_xywh_est, dp_gt)
+            self.nxt_bbox_index += 1
+    def _do_accumulate(self, box_xywh_gt: torch.Tensor, box_xywh_est: torch.Tensor, dp_gt: Any):
+        """
+        Accumulate instances data for one image, given that the data is not empty
+        Args:
+            box_xywh_gt (tensor): GT bounding box
+            box_xywh_est (tensor): estimated bounding box
+            dp_gt: GT densepose data with the following attributes:
+             - x: normalized X coordinates
+             - y: normalized Y coordinates
+             - segm: tensor of size [S, S] with coarse segmentation
+             -
+        """
+        self.x_gt.append(dp_gt.x)
+        self.y_gt.append(dp_gt.y)
+        if hasattr(dp_gt, "segm"):
+            self.s_gt.append(dp_gt.segm.unsqueeze(0))
+        self.vertex_ids_gt.append(dp_gt.vertex_ids)
+        self.vertex_mesh_ids_gt.append(torch.full_like(dp_gt.vertex_ids, dp_gt.mesh_id))
+        self.bbox_xywh_gt.append(box_xywh_gt.view(-1, 4))
+        self.bbox_xywh_est.append(box_xywh_est.view(-1, 4))
+        self.point_bbox_with_dp_indices.append(
+            torch.full_like(dp_gt.vertex_ids, self.nxt_bbox_with_dp_index)
+        )
+        self.point_bbox_indices.append(torch.full_like(dp_gt.vertex_ids, self.nxt_bbox_index))
+        self.bbox_indices.append(self.nxt_bbox_index)
+        self.nxt_bbox_with_dp_index += 1
+    def pack(self) -> Optional[PackedCseAnnotations]:
+        """
+        Pack data into tensors
+        """
+        if not len(self.x_gt):
+            # TODO:
+            # returning proper empty annotations would require
+            # creating empty tensors of appropriate shape and
+            # type on an appropriate device;
+            # we return None so far to indicate empty annotations
+            return None
+        return PackedCseAnnotations(
+            x_gt=torch.cat(self.x_gt, 0),
+            y_gt=torch.cat(self.y_gt, 0),
+            vertex_mesh_ids_gt=torch.cat(self.vertex_mesh_ids_gt, 0),
+            vertex_ids_gt=torch.cat(self.vertex_ids_gt, 0),
+            # ignore segmentation annotations, if not all the instances contain those
+            coarse_segm_gt=torch.cat(self.s_gt, 0)
+            if len(self.s_gt) == len(self.bbox_xywh_gt)
+            else None,
+            bbox_xywh_gt=torch.cat(self.bbox_xywh_gt, 0),
+            bbox_xywh_est=torch.cat(self.bbox_xywh_est, 0),
+            point_bbox_with_dp_indices=torch.cat(self.point_bbox_with_dp_indices, 0),
+            point_bbox_indices=torch.cat(self.point_bbox_indices, 0),
+            bbox_indices=torch.as_tensor(
+                self.bbox_indices, dtype=torch.long, device=self.x_gt[0].device
+            ),
+        )

Leffa/3rdparty/densepose/modeling/losses/mask.py ADDED Viewed

	@@ -0,0 +1,125 @@

+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+from dataclasses import dataclass
+from typing import Any, Iterable, List, Optional
+import torch
+from torch.nn import functional as F
+from detectron2.structures import Instances
+@dataclass
+class DataForMaskLoss:
+    """
+    Contains mask GT and estimated data for proposals from multiple images:
+    """
+    # tensor of size (K, H, W) containing GT labels
+    masks_gt: Optional[torch.Tensor] = None
+    # tensor of size (K, C, H, W) containing estimated scores
+    masks_est: Optional[torch.Tensor] = None
+def extract_data_for_mask_loss_from_matches(
+    proposals_targets: Iterable[Instances], estimated_segm: torch.Tensor
+) -> DataForMaskLoss:
+    """
+    Extract data for mask loss from instances that contain matched GT and
+    estimated bounding boxes.
+    Args:
+        proposals_targets: Iterable[Instances]
+            matched GT and estimated results, each item in the iterable
+            corresponds to data in 1 image
+        estimated_segm: tensor(K, C, S, S) of float - raw unnormalized
+            segmentation scores, here S is the size to which GT masks are
+            to be resized
+    Return:
+        masks_est: tensor(K, C, S, S) of float - class scores
+        masks_gt: tensor(K, S, S) of int64 - labels
+    """
+    data = DataForMaskLoss()
+    masks_gt = []
+    offset = 0
+    assert estimated_segm.shape[2] == estimated_segm.shape[3], (
+        f"Expected estimated segmentation to have a square shape, "
+        f"but the actual shape is {estimated_segm.shape[2:]}"
+    )
+    mask_size = estimated_segm.shape[2]
+    num_proposals = sum(inst.proposal_boxes.tensor.size(0) for inst in proposals_targets)
+    num_estimated = estimated_segm.shape[0]
+    assert (
+        num_proposals == num_estimated
+    ), "The number of proposals {} must be equal to the number of estimates {}".format(
+        num_proposals, num_estimated
+    )
+    for proposals_targets_per_image in proposals_targets:
+        n_i = proposals_targets_per_image.proposal_boxes.tensor.size(0)
+        if not n_i:
+            continue
+        gt_masks_per_image = proposals_targets_per_image.gt_masks.crop_and_resize(
+            proposals_targets_per_image.proposal_boxes.tensor, mask_size
+        ).to(device=estimated_segm.device)
+        masks_gt.append(gt_masks_per_image)
+        offset += n_i
+    if masks_gt:
+        data.masks_est = estimated_segm
+        data.masks_gt = torch.cat(masks_gt, dim=0)
+    return data
+class MaskLoss:
+    """
+    Mask loss as cross-entropy for raw unnormalized scores given ground truth labels.
+    Mask ground truth labels are defined for the whole image and not only the
+    bounding box of interest. They are stored as objects that are assumed to implement
+    the `crop_and_resize` interface (e.g. BitMasks, PolygonMasks).
+    """
+    def __call__(
+        self, proposals_with_gt: List[Instances], densepose_predictor_outputs: Any
+    ) -> torch.Tensor:
+        """
+        Computes segmentation loss as cross-entropy for raw unnormalized
+        scores given ground truth labels.
+        Args:
+            proposals_with_gt (list of Instances): detections with associated ground truth data
+            densepose_predictor_outputs: an object of a dataclass that contains predictor outputs
+                with estimated values; assumed to have the following attribute:
+                * coarse_segm (tensor of shape [N, D, S, S]): coarse segmentation estimates
+                    as raw unnormalized scores
+                where N is the number of detections, S is the estimate size ( = width = height)
+                and D is the number of coarse segmentation channels.
+        Return:
+            Cross entropy for raw unnormalized scores for coarse segmentation given
+            ground truth labels from masks
+        """
+        if not len(proposals_with_gt):
+            return self.fake_value(densepose_predictor_outputs)
+        # densepose outputs are computed for all images and all bounding boxes;
+        # i.e. if a batch has 4 images with (3, 1, 2, 1) proposals respectively,
+        # the outputs will have size(0) == 3+1+2+1 == 7
+        with torch.no_grad():
+            mask_loss_data = extract_data_for_mask_loss_from_matches(
+                proposals_with_gt, densepose_predictor_outputs.coarse_segm
+            )
+        if (mask_loss_data.masks_gt is None) or (mask_loss_data.masks_est is None):
+            return self.fake_value(densepose_predictor_outputs)
+        return F.cross_entropy(mask_loss_data.masks_est, mask_loss_data.masks_gt.long())
+    def fake_value(self, densepose_predictor_outputs: Any) -> torch.Tensor:
+        """
+        Fake segmentation loss used when no suitable ground truth data
+        was found in a batch. The loss has a value 0 and is primarily used to
+        construct the computation graph, so that `DistributedDataParallel`
+        has similar graphs on all GPUs and can perform reduction properly.
+        Args:
+            densepose_predictor_outputs: DensePose predictor outputs, an object
+                of a dataclass that is assumed to have `coarse_segm`
+                attribute
+        Return:
+            Zero value loss with proper computation graph
+        """
+        return densepose_predictor_outputs.coarse_segm.sum() * 0

Leffa/3rdparty/densepose/modeling/losses/mask_or_segm.py ADDED Viewed

	@@ -0,0 +1,77 @@

+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+from typing import Any, List
+import torch
+from detectron2.config import CfgNode
+from detectron2.structures import Instances
+from .mask import MaskLoss
+from .segm import SegmentationLoss
+class MaskOrSegmentationLoss:
+    """
+    Mask or segmentation loss as cross-entropy for raw unnormalized scores
+    given ground truth labels. Ground truth labels are either defined by coarse
+    segmentation annotation, or by mask annotation, depending on the config
+    value MODEL.ROI_DENSEPOSE_HEAD.COARSE_SEGM_TRAINED_BY_MASKS
+    """
+    def __init__(self, cfg: CfgNode):
+        """
+        Initialize segmentation loss from configuration options
+        Args:
+            cfg (CfgNode): configuration options
+        """
+        self.segm_trained_by_masks = (
+            cfg.MODEL.ROI_DENSEPOSE_HEAD.COARSE_SEGM_TRAINED_BY_MASKS
+        )
+        if self.segm_trained_by_masks:
+            self.mask_loss = MaskLoss()
+        self.segm_loss = SegmentationLoss(cfg)
+    def __call__(
+        self,
+        proposals_with_gt: List[Instances],
+        densepose_predictor_outputs: Any,
+        packed_annotations: Any,
+    ) -> torch.Tensor:
+        """
+        Compute segmentation loss as cross-entropy between aligned unnormalized
+        score estimates and ground truth; with ground truth given
+        either by masks, or by coarse segmentation annotations.
+        Args:
+            proposals_with_gt (list of Instances): detections with associated ground truth data
+            densepose_predictor_outputs: an object of a dataclass that contains predictor outputs
+                with estimated values; assumed to have the following attributes:
+                * coarse_segm - coarse segmentation estimates, tensor of shape [N, D, S, S]
+            packed_annotations: packed annotations for efficient loss computation
+        Return:
+            tensor: loss value as cross-entropy for raw unnormalized scores
+                given ground truth labels
+        """
+        if self.segm_trained_by_masks:
+            return self.mask_loss(proposals_with_gt, densepose_predictor_outputs)
+        return self.segm_loss(
+            proposals_with_gt, densepose_predictor_outputs, packed_annotations
+        )
+    def fake_value(self, densepose_predictor_outputs: Any) -> torch.Tensor:
+        """
+        Fake segmentation loss used when no suitable ground truth data
+        was found in a batch. The loss has a value 0 and is primarily used to
+        construct the computation graph, so that `DistributedDataParallel`
+        has similar graphs on all GPUs and can perform reduction properly.
+        Args:
+            densepose_predictor_outputs: DensePose predictor outputs, an object
+                of a dataclass that is assumed to have `coarse_segm`
+                attribute
+        Return:
+            Zero value loss with proper computation graph
+        """
+        return densepose_predictor_outputs.coarse_segm.sum() * 0

Leffa/3rdparty/densepose/modeling/losses/registry.py ADDED Viewed

	@@ -0,0 +1,5 @@

+# Copyright (c) Facebook, Inc. and its affiliates.
+from detectron2.utils.registry import Registry
+DENSEPOSE_LOSS_REGISTRY = Registry("DENSEPOSE_LOSS")

Leffa/3rdparty/densepose/modeling/losses/soft_embed.py ADDED Viewed

	@@ -0,0 +1,133 @@

+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+from typing import Any, Dict, List
+import torch
+from torch import nn
+from torch.nn import functional as F
+from detectron2.config import CfgNode
+from detectron2.structures import Instances
+from densepose.data.meshes.catalog import MeshCatalog
+from densepose.modeling.cse.utils import normalize_embeddings, squared_euclidean_distance_matrix
+from densepose.structures.mesh import create_mesh
+from .embed_utils import PackedCseAnnotations
+from .utils import BilinearInterpolationHelper
+class SoftEmbeddingLoss:
+    """
+    Computes losses for estimated embeddings given annotated vertices.
+    Instances in a minibatch that correspond to the same mesh are grouped
+    together. For each group, loss is computed as cross-entropy for
+    unnormalized scores given ground truth mesh vertex ids.
+    Scores are based on:
+     1) squared distances between estimated vertex embeddings
+        and mesh vertex embeddings;
+     2) geodesic distances between vertices of a mesh
+    """
+    def __init__(self, cfg: CfgNode):
+        """
+        Initialize embedding loss from config
+        """
+        self.embdist_gauss_sigma = cfg.MODEL.ROI_DENSEPOSE_HEAD.CSE.EMBEDDING_DIST_GAUSS_SIGMA
+        self.geodist_gauss_sigma = cfg.MODEL.ROI_DENSEPOSE_HEAD.CSE.GEODESIC_DIST_GAUSS_SIGMA
+    def __call__(
+        self,
+        proposals_with_gt: List[Instances],
+        densepose_predictor_outputs: Any,
+        packed_annotations: PackedCseAnnotations,
+        interpolator: BilinearInterpolationHelper,
+        embedder: nn.Module,
+    ) -> Dict[int, torch.Tensor]:
+        """
+        Produces losses for estimated embeddings given annotated vertices.
+        Embeddings for all the vertices of a mesh are computed by the embedder.
+        Embeddings for observed pixels are estimated by a predictor.
+        Losses are computed as cross-entropy for unnormalized scores given
+        ground truth vertex IDs.
+         1) squared distances between estimated vertex embeddings
+            and mesh vertex embeddings;
+         2) geodesic distances between vertices of a mesh
+        Args:
+            proposals_with_gt (list of Instances): detections with associated
+                ground truth data; each item corresponds to instances detected
+                on 1 image; the number of items corresponds to the number of
+                images in a batch
+            densepose_predictor_outputs: an object of a dataclass that contains predictor
+                outputs with estimated values; assumed to have the following attributes:
+                * embedding - embedding estimates, tensor of shape [N, D, S, S], where
+                  N = number of instances (= sum N_i, where N_i is the number of
+                      instances on image i)
+                  D = embedding space dimensionality (MODEL.ROI_DENSEPOSE_HEAD.CSE.EMBED_SIZE)
+                  S = output size (width and height)
+            packed_annotations (PackedCseAnnotations): contains various data useful
+                for loss computation, each data is packed into a single tensor
+            interpolator (BilinearInterpolationHelper): bilinear interpolation helper
+            embedder (nn.Module): module that computes vertex embeddings for different meshes
+        Return:
+            dict(int -> tensor): losses for different mesh IDs
+        """
+        losses = {}
+        for mesh_id_tensor in packed_annotations.vertex_mesh_ids_gt.unique():
+            mesh_id = mesh_id_tensor.item()
+            mesh_name = MeshCatalog.get_mesh_name(mesh_id)
+            # valid points are those that fall into estimated bbox
+            # and correspond to the current mesh
+            j_valid = interpolator.j_valid * (  # pyre-ignore[16]
+                packed_annotations.vertex_mesh_ids_gt == mesh_id
+            )
+            if not torch.any(j_valid):
+                continue
+            # extract estimated embeddings for valid points
+            # -> tensor [J, D]
+            vertex_embeddings_i = normalize_embeddings(
+                interpolator.extract_at_points(
+                    densepose_predictor_outputs.embedding,
+                    slice_fine_segm=slice(None),
+                    w_ylo_xlo=interpolator.w_ylo_xlo[:, None],  # pyre-ignore[16]
+                    w_ylo_xhi=interpolator.w_ylo_xhi[:, None],  # pyre-ignore[16]
+                    w_yhi_xlo=interpolator.w_yhi_xlo[:, None],  # pyre-ignore[16]
+                    w_yhi_xhi=interpolator.w_yhi_xhi[:, None],  # pyre-ignore[16]
+                )[j_valid, :]
+            )
+            # extract vertex ids for valid points
+            # -> tensor [J]
+            vertex_indices_i = packed_annotations.vertex_ids_gt[j_valid]
+            # embeddings for all mesh vertices
+            # -> tensor [K, D]
+            mesh_vertex_embeddings = embedder(mesh_name)
+            # softmax values of geodesic distances for GT mesh vertices
+            # -> tensor [J, K]
+            mesh = create_mesh(mesh_name, mesh_vertex_embeddings.device)
+            geodist_softmax_values = F.softmax(
+                mesh.geodists[vertex_indices_i] / (-self.geodist_gauss_sigma), dim=1
+            )
+            # logsoftmax values for valid points
+            # -> tensor [J, K]
+            embdist_logsoftmax_values = F.log_softmax(
+                squared_euclidean_distance_matrix(vertex_embeddings_i, mesh_vertex_embeddings)
+                / (-self.embdist_gauss_sigma),
+                dim=1,
+            )
+            losses[mesh_name] = (-geodist_softmax_values * embdist_logsoftmax_values).sum(1).mean()
+        for mesh_name in embedder.mesh_names:
+            if mesh_name not in losses:
+                losses[mesh_name] = self.fake_value(
+                    densepose_predictor_outputs, embedder, mesh_name
+                )
+        return losses
+    def fake_values(self, densepose_predictor_outputs: Any, embedder: nn.Module):
+        losses = {}
+        for mesh_name in embedder.mesh_names:
+            losses[mesh_name] = self.fake_value(densepose_predictor_outputs, embedder, mesh_name)
+        return losses
+    def fake_value(self, densepose_predictor_outputs: Any, embedder: nn.Module, mesh_name: str):
+        return densepose_predictor_outputs.embedding.sum() * 0 + embedder(mesh_name).sum() * 0

Leffa/3rdparty/densepose/modeling/losses/utils.py ADDED Viewed

	@@ -0,0 +1,443 @@

+# Copyright (c) Facebook, Inc. and its affiliates.
+from abc import ABC, abstractmethod
+from dataclasses import dataclass
+from typing import Any, Dict, List, Optional, Tuple
+import torch
+from torch.nn import functional as F
+from detectron2.structures import BoxMode, Instances
+from densepose import DensePoseDataRelative
+LossDict = Dict[str, torch.Tensor]
+def _linear_interpolation_utilities(v_norm, v0_src, size_src, v0_dst, size_dst, size_z):
+    """
+    Computes utility values for linear interpolation at points v.
+    The points are given as normalized offsets in the source interval
+    (v0_src, v0_src + size_src), more precisely:
+        v = v0_src + v_norm * size_src / 256.0
+    The computed utilities include lower points v_lo, upper points v_hi,
+    interpolation weights v_w and flags j_valid indicating whether the
+    points falls into the destination interval (v0_dst, v0_dst + size_dst).
+    Args:
+        v_norm (:obj: `torch.Tensor`): tensor of size N containing
+            normalized point offsets
+        v0_src (:obj: `torch.Tensor`): tensor of size N containing
+            left bounds of source intervals for normalized points
+        size_src (:obj: `torch.Tensor`): tensor of size N containing
+            source interval sizes for normalized points
+        v0_dst (:obj: `torch.Tensor`): tensor of size N containing
+            left bounds of destination intervals
+        size_dst (:obj: `torch.Tensor`): tensor of size N containing
+            destination interval sizes
+        size_z (int): interval size for data to be interpolated
+    Returns:
+        v_lo (:obj: `torch.Tensor`): int tensor of size N containing
+            indices of lower values used for interpolation, all values are
+            integers from [0, size_z - 1]
+        v_hi (:obj: `torch.Tensor`): int tensor of size N containing
+            indices of upper values used for interpolation, all values are
+            integers from [0, size_z - 1]
+        v_w (:obj: `torch.Tensor`): float tensor of size N containing
+            interpolation weights
+        j_valid (:obj: `torch.Tensor`): uint8 tensor of size N containing
+            0 for points outside the estimation interval
+            (v0_est, v0_est + size_est) and 1 otherwise
+    """
+    v = v0_src + v_norm * size_src / 256.0
+    j_valid = (v - v0_dst >= 0) * (v - v0_dst < size_dst)
+    v_grid = (v - v0_dst) * size_z / size_dst
+    v_lo = v_grid.floor().long().clamp(min=0, max=size_z - 1)
+    v_hi = (v_lo + 1).clamp(max=size_z - 1)
+    v_grid = torch.min(v_hi.float(), v_grid)
+    v_w = v_grid - v_lo.float()
+    return v_lo, v_hi, v_w, j_valid
+class BilinearInterpolationHelper:
+    """
+    Args:
+        packed_annotations: object that contains packed annotations
+        j_valid (:obj: `torch.Tensor`): uint8 tensor of size M containing
+            0 for points to be discarded and 1 for points to be selected
+        y_lo (:obj: `torch.Tensor`): int tensor of indices of upper values
+            in z_est for each point
+        y_hi (:obj: `torch.Tensor`): int tensor of indices of lower values
+            in z_est for each point
+        x_lo (:obj: `torch.Tensor`): int tensor of indices of left values
+            in z_est for each point
+        x_hi (:obj: `torch.Tensor`): int tensor of indices of right values
+            in z_est for each point
+        w_ylo_xlo (:obj: `torch.Tensor`): float tensor of size M;
+            contains upper-left value weight for each point
+        w_ylo_xhi (:obj: `torch.Tensor`): float tensor of size M;
+            contains upper-right value weight for each point
+        w_yhi_xlo (:obj: `torch.Tensor`): float tensor of size M;
+            contains lower-left value weight for each point
+        w_yhi_xhi (:obj: `torch.Tensor`): float tensor of size M;
+            contains lower-right value weight for each point
+    """
+    def __init__(
+        self,
+        packed_annotations: Any,
+        j_valid: torch.Tensor,
+        y_lo: torch.Tensor,
+        y_hi: torch.Tensor,
+        x_lo: torch.Tensor,
+        x_hi: torch.Tensor,
+        w_ylo_xlo: torch.Tensor,
+        w_ylo_xhi: torch.Tensor,
+        w_yhi_xlo: torch.Tensor,
+        w_yhi_xhi: torch.Tensor,
+    ):
+        for k, v in locals().items():
+            if k != "self":
+                setattr(self, k, v)
+    @staticmethod
+    def from_matches(
+        packed_annotations: Any, densepose_outputs_size_hw: Tuple[int, int]
+    ) -> "BilinearInterpolationHelper":
+        """
+        Args:
+            packed_annotations: annotations packed into tensors, the following
+                attributes are required:
+                 - bbox_xywh_gt
+                 - bbox_xywh_est
+                 - x_gt
+                 - y_gt
+                 - point_bbox_with_dp_indices
+                 - point_bbox_indices
+            densepose_outputs_size_hw (tuple [int, int]): resolution of
+                DensePose predictor outputs (H, W)
+        Return:
+            An instance of `BilinearInterpolationHelper` used to perform
+            interpolation for the given annotation points and output resolution
+        """
+        zh, zw = densepose_outputs_size_hw
+        x0_gt, y0_gt, w_gt, h_gt = packed_annotations.bbox_xywh_gt[
+            packed_annotations.point_bbox_with_dp_indices
+        ].unbind(dim=1)
+        x0_est, y0_est, w_est, h_est = packed_annotations.bbox_xywh_est[
+            packed_annotations.point_bbox_with_dp_indices
+        ].unbind(dim=1)
+        x_lo, x_hi, x_w, jx_valid = _linear_interpolation_utilities(
+            packed_annotations.x_gt, x0_gt, w_gt, x0_est, w_est, zw
+        )
+        y_lo, y_hi, y_w, jy_valid = _linear_interpolation_utilities(
+            packed_annotations.y_gt, y0_gt, h_gt, y0_est, h_est, zh
+        )
+        j_valid = jx_valid * jy_valid
+        w_ylo_xlo = (1.0 - x_w) * (1.0 - y_w)
+        w_ylo_xhi = x_w * (1.0 - y_w)
+        w_yhi_xlo = (1.0 - x_w) * y_w
+        w_yhi_xhi = x_w * y_w
+        return BilinearInterpolationHelper(
+            packed_annotations,
+            j_valid,
+            y_lo,
+            y_hi,
+            x_lo,
+            x_hi,
+            w_ylo_xlo,  # pyre-ignore[6]
+            w_ylo_xhi,
+            # pyre-fixme[6]: Expected `Tensor` for 9th param but got `float`.
+            w_yhi_xlo,
+            w_yhi_xhi,
+        )
+    def extract_at_points(
+        self,
+        z_est,
+        slice_fine_segm=None,
+        w_ylo_xlo=None,
+        w_ylo_xhi=None,
+        w_yhi_xlo=None,
+        w_yhi_xhi=None,
+    ):
+        """
+        Extract ground truth values z_gt for valid point indices and estimated
+        values z_est using bilinear interpolation over top-left (y_lo, x_lo),
+        top-right (y_lo, x_hi), bottom-left (y_hi, x_lo) and bottom-right
+        (y_hi, x_hi) values in z_est with corresponding weights:
+        w_ylo_xlo, w_ylo_xhi, w_yhi_xlo and w_yhi_xhi.
+        Use slice_fine_segm to slice dim=1 in z_est
+        """
+        slice_fine_segm = (
+            self.packed_annotations.fine_segm_labels_gt
+            if slice_fine_segm is None
+            else slice_fine_segm
+        )
+        w_ylo_xlo = self.w_ylo_xlo if w_ylo_xlo is None else w_ylo_xlo
+        w_ylo_xhi = self.w_ylo_xhi if w_ylo_xhi is None else w_ylo_xhi
+        w_yhi_xlo = self.w_yhi_xlo if w_yhi_xlo is None else w_yhi_xlo
+        w_yhi_xhi = self.w_yhi_xhi if w_yhi_xhi is None else w_yhi_xhi
+        index_bbox = self.packed_annotations.point_bbox_indices
+        z_est_sampled = (
+            z_est[index_bbox, slice_fine_segm, self.y_lo, self.x_lo] * w_ylo_xlo
+            + z_est[index_bbox, slice_fine_segm, self.y_lo, self.x_hi] * w_ylo_xhi
+            + z_est[index_bbox, slice_fine_segm, self.y_hi, self.x_lo] * w_yhi_xlo
+            + z_est[index_bbox, slice_fine_segm, self.y_hi, self.x_hi] * w_yhi_xhi
+        )
+        return z_est_sampled
+def resample_data(
+    z, bbox_xywh_src, bbox_xywh_dst, wout, hout, mode: str = "nearest", padding_mode: str = "zeros"
+):
+    """
+    Args:
+        z (:obj: `torch.Tensor`): tensor of size (N,C,H,W) with data to be
+            resampled
+        bbox_xywh_src (:obj: `torch.Tensor`): tensor of size (N,4) containing
+            source bounding boxes in format XYWH
+        bbox_xywh_dst (:obj: `torch.Tensor`): tensor of size (N,4) containing
+            destination bounding boxes in format XYWH
+    Return:
+        zresampled (:obj: `torch.Tensor`): tensor of size (N, C, Hout, Wout)
+            with resampled values of z, where D is the discretization size
+    """
+    n = bbox_xywh_src.size(0)
+    assert n == bbox_xywh_dst.size(0), (
+        "The number of "
+        "source ROIs for resampling ({}) should be equal to the number "
+        "of destination ROIs ({})".format(bbox_xywh_src.size(0), bbox_xywh_dst.size(0))
+    )
+    x0src, y0src, wsrc, hsrc = bbox_xywh_src.unbind(dim=1)
+    x0dst, y0dst, wdst, hdst = bbox_xywh_dst.unbind(dim=1)
+    x0dst_norm = 2 * (x0dst - x0src) / wsrc - 1
+    y0dst_norm = 2 * (y0dst - y0src) / hsrc - 1
+    x1dst_norm = 2 * (x0dst + wdst - x0src) / wsrc - 1
+    y1dst_norm = 2 * (y0dst + hdst - y0src) / hsrc - 1
+    grid_w = torch.arange(wout, device=z.device, dtype=torch.float) / wout
+    grid_h = torch.arange(hout, device=z.device, dtype=torch.float) / hout
+    grid_w_expanded = grid_w[None, None, :].expand(n, hout, wout)
+    grid_h_expanded = grid_h[None, :, None].expand(n, hout, wout)
+    dx_expanded = (x1dst_norm - x0dst_norm)[:, None, None].expand(n, hout, wout)
+    dy_expanded = (y1dst_norm - y0dst_norm)[:, None, None].expand(n, hout, wout)
+    x0_expanded = x0dst_norm[:, None, None].expand(n, hout, wout)
+    y0_expanded = y0dst_norm[:, None, None].expand(n, hout, wout)
+    grid_x = grid_w_expanded * dx_expanded + x0_expanded
+    grid_y = grid_h_expanded * dy_expanded + y0_expanded
+    grid = torch.stack((grid_x, grid_y), dim=3)
+    # resample Z from (N, C, H, W) into (N, C, Hout, Wout)
+    zresampled = F.grid_sample(z, grid, mode=mode, padding_mode=padding_mode, align_corners=True)
+    return zresampled
+class AnnotationsAccumulator(ABC):
+    """
+    Abstract class for an accumulator for annotations that can produce
+    dense annotations packed into tensors.
+    """
+    @abstractmethod
+    def accumulate(self, instances_one_image: Instances):
+        """
+        Accumulate instances data for one image
+        Args:
+            instances_one_image (Instances): instances data to accumulate
+        """
+        pass
+    @abstractmethod
+    def pack(self) -> Any:
+        """
+        Pack data into tensors
+        """
+        pass
+@dataclass
+class PackedChartBasedAnnotations:
+    """
+    Packed annotations for chart-based model training. The following attributes
+    are defined:
+     - fine_segm_labels_gt (tensor [K] of `int64`): GT fine segmentation point labels
+     - x_gt (tensor [K] of `float32`): GT normalized X point coordinates
+     - y_gt (tensor [K] of `float32`): GT normalized Y point coordinates
+     - u_gt (tensor [K] of `float32`): GT point U values
+     - v_gt (tensor [K] of `float32`): GT point V values
+     - coarse_segm_gt (tensor [N, S, S] of `float32`): GT segmentation for bounding boxes
+     - bbox_xywh_gt (tensor [N, 4] of `float32`): selected GT bounding boxes in
+         XYWH format
+     - bbox_xywh_est (tensor [N, 4] of `float32`): selected matching estimated
+         bounding boxes in XYWH format
+     - point_bbox_with_dp_indices (tensor [K] of `int64`): indices of bounding boxes
+         with DensePose annotations that correspond to the point data
+     - point_bbox_indices (tensor [K] of `int64`): indices of bounding boxes
+         (not necessarily the selected ones with DensePose data) that correspond
+         to the point data
+     - bbox_indices (tensor [N] of `int64`): global indices of selected bounding
+         boxes with DensePose annotations; these indices could be used to access
+         features that are computed for all bounding boxes, not only the ones with
+         DensePose annotations.
+    Here K is the total number of points and N is the total number of instances
+    with DensePose annotations.
+    """
+    fine_segm_labels_gt: torch.Tensor
+    x_gt: torch.Tensor
+    y_gt: torch.Tensor
+    u_gt: torch.Tensor
+    v_gt: torch.Tensor
+    coarse_segm_gt: Optional[torch.Tensor]
+    bbox_xywh_gt: torch.Tensor
+    bbox_xywh_est: torch.Tensor
+    point_bbox_with_dp_indices: torch.Tensor
+    point_bbox_indices: torch.Tensor
+    bbox_indices: torch.Tensor
+class ChartBasedAnnotationsAccumulator(AnnotationsAccumulator):
+    """
+    Accumulates annotations by batches that correspond to objects detected on
+    individual images. Can pack them together into single tensors.
+    """
+    def __init__(self):
+        self.i_gt = []
+        self.x_gt = []
+        self.y_gt = []
+        self.u_gt = []
+        self.v_gt = []
+        self.s_gt = []
+        self.bbox_xywh_gt = []
+        self.bbox_xywh_est = []
+        self.point_bbox_with_dp_indices = []
+        self.point_bbox_indices = []
+        self.bbox_indices = []
+        self.nxt_bbox_with_dp_index = 0
+        self.nxt_bbox_index = 0
+    def accumulate(self, instances_one_image: Instances):
+        """
+        Accumulate instances data for one image
+        Args:
+            instances_one_image (Instances): instances data to accumulate
+        """
+        boxes_xywh_est = BoxMode.convert(
+            instances_one_image.proposal_boxes.tensor.clone(), BoxMode.XYXY_ABS, BoxMode.XYWH_ABS
+        )
+        boxes_xywh_gt = BoxMode.convert(
+            instances_one_image.gt_boxes.tensor.clone(), BoxMode.XYXY_ABS, BoxMode.XYWH_ABS
+        )
+        n_matches = len(boxes_xywh_gt)
+        assert n_matches == len(
+            boxes_xywh_est
+        ), f"Got {len(boxes_xywh_est)} proposal boxes and {len(boxes_xywh_gt)} GT boxes"
+        if not n_matches:
+            # no detection - GT matches
+            return
+        if (
+            not hasattr(instances_one_image, "gt_densepose")
+            or instances_one_image.gt_densepose is None
+        ):
+            # no densepose GT for the detections, just increase the bbox index
+            self.nxt_bbox_index += n_matches
+            return
+        for box_xywh_est, box_xywh_gt, dp_gt in zip(
+            boxes_xywh_est, boxes_xywh_gt, instances_one_image.gt_densepose
+        ):
+            if (dp_gt is not None) and (len(dp_gt.x) > 0):
+                # pyre-fixme[6]: For 1st argument expected `Tensor` but got `float`.
+                # pyre-fixme[6]: For 2nd argument expected `Tensor` but got `float`.
+                self._do_accumulate(box_xywh_gt, box_xywh_est, dp_gt)
+            self.nxt_bbox_index += 1
+    def _do_accumulate(
+        self, box_xywh_gt: torch.Tensor, box_xywh_est: torch.Tensor, dp_gt: DensePoseDataRelative
+    ):
+        """
+        Accumulate instances data for one image, given that the data is not empty
+        Args:
+            box_xywh_gt (tensor): GT bounding box
+            box_xywh_est (tensor): estimated bounding box
+            dp_gt (DensePoseDataRelative): GT densepose data
+        """
+        self.i_gt.append(dp_gt.i)
+        self.x_gt.append(dp_gt.x)
+        self.y_gt.append(dp_gt.y)
+        self.u_gt.append(dp_gt.u)
+        self.v_gt.append(dp_gt.v)
+        if hasattr(dp_gt, "segm"):
+            self.s_gt.append(dp_gt.segm.unsqueeze(0))
+        self.bbox_xywh_gt.append(box_xywh_gt.view(-1, 4))
+        self.bbox_xywh_est.append(box_xywh_est.view(-1, 4))
+        self.point_bbox_with_dp_indices.append(
+            torch.full_like(dp_gt.i, self.nxt_bbox_with_dp_index)
+        )
+        self.point_bbox_indices.append(torch.full_like(dp_gt.i, self.nxt_bbox_index))
+        self.bbox_indices.append(self.nxt_bbox_index)
+        self.nxt_bbox_with_dp_index += 1
+    def pack(self) -> Optional[PackedChartBasedAnnotations]:
+        """
+        Pack data into tensors
+        """
+        if not len(self.i_gt):
+            # TODO:
+            # returning proper empty annotations would require
+            # creating empty tensors of appropriate shape and
+            # type on an appropriate device;
+            # we return None so far to indicate empty annotations
+            return None
+        return PackedChartBasedAnnotations(
+            fine_segm_labels_gt=torch.cat(self.i_gt, 0).long(),
+            x_gt=torch.cat(self.x_gt, 0),
+            y_gt=torch.cat(self.y_gt, 0),
+            u_gt=torch.cat(self.u_gt, 0),
+            v_gt=torch.cat(self.v_gt, 0),
+            # ignore segmentation annotations, if not all the instances contain those
+            coarse_segm_gt=torch.cat(self.s_gt, 0)
+            if len(self.s_gt) == len(self.bbox_xywh_gt)
+            else None,
+            bbox_xywh_gt=torch.cat(self.bbox_xywh_gt, 0),
+            bbox_xywh_est=torch.cat(self.bbox_xywh_est, 0),
+            point_bbox_with_dp_indices=torch.cat(self.point_bbox_with_dp_indices, 0).long(),
+            point_bbox_indices=torch.cat(self.point_bbox_indices, 0).long(),
+            bbox_indices=torch.as_tensor(
+                self.bbox_indices, dtype=torch.long, device=self.x_gt[0].device
+            ).long(),
+        )
+def extract_packed_annotations_from_matches(
+    proposals_with_targets: List[Instances], accumulator: AnnotationsAccumulator
+) -> Any:
+    for proposals_targets_per_image in proposals_with_targets:
+        accumulator.accumulate(proposals_targets_per_image)
+    return accumulator.pack()
+def sample_random_indices(
+    n_indices: int, n_samples: int, device: Optional[torch.device] = None
+) -> Optional[torch.Tensor]:
+    """
+    Samples `n_samples` random indices from range `[0..n_indices - 1]`.
+    If `n_indices` is smaller than `n_samples`, returns `None` meaning that all indices
+    are selected.
+    Args:
+        n_indices (int): total number of indices
+        n_samples (int): number of indices to sample
+        device (torch.device): the desired device of returned tensor
+    Return:
+        Tensor of selected vertex indices, or `None`, if all vertices are selected
+    """
+    if (n_samples <= 0) or (n_indices <= n_samples):
+        return None
+    indices = torch.randperm(n_indices, device=device)[:n_samples]
+    return indices

Leffa/3rdparty/densepose/modeling/predictors/__init__.py ADDED Viewed

	@@ -0,0 +1,9 @@

+# Copyright (c) Facebook, Inc. and its affiliates.
+from .chart import DensePoseChartPredictor
+from .chart_confidence import DensePoseChartConfidencePredictorMixin
+from .chart_with_confidence import DensePoseChartWithConfidencePredictor
+from .cse import DensePoseEmbeddingPredictor
+from .cse_confidence import DensePoseEmbeddingConfidencePredictorMixin
+from .cse_with_confidence import DensePoseEmbeddingWithConfidencePredictor
+from .registry import DENSEPOSE_PREDICTOR_REGISTRY

Leffa/3rdparty/densepose/modeling/predictors/chart.py ADDED Viewed

	@@ -0,0 +1,94 @@

+# Copyright (c) Facebook, Inc. and its affiliates.
+import torch
+from torch import nn
+from detectron2.config import CfgNode
+from detectron2.layers import ConvTranspose2d, interpolate
+from ...structures import DensePoseChartPredictorOutput
+from ..utils import initialize_module_params
+from .registry import DENSEPOSE_PREDICTOR_REGISTRY
+@DENSEPOSE_PREDICTOR_REGISTRY.register()
+class DensePoseChartPredictor(nn.Module):
+    """
+    Predictor (last layers of a DensePose model) that takes DensePose head outputs as an input
+    and produces 4 tensors which represent DensePose results for predefined body parts
+    (patches / charts):
+     * coarse segmentation, a tensor of shape [N, K, Hout, Wout]
+     * fine segmentation, a tensor of shape [N, C, Hout, Wout]
+     * U coordinates, a tensor of shape [N, C, Hout, Wout]
+     * V coordinates, a tensor of shape [N, C, Hout, Wout]
+    where
+     - N is the number of instances
+     - K is the number of coarse segmentation channels (
+         2 = foreground / background,
+         15 = one of 14 body parts / background)
+     - C is the number of fine segmentation channels (
+         24 fine body parts / background)
+     - Hout and Wout are height and width of predictions
+    """
+    def __init__(self, cfg: CfgNode, input_channels: int):
+        """
+        Initialize predictor using configuration options
+        Args:
+            cfg (CfgNode): configuration options
+            input_channels (int): input tensor size along the channel dimension
+        """
+        super().__init__()
+        dim_in = input_channels
+        n_segm_chan = cfg.MODEL.ROI_DENSEPOSE_HEAD.NUM_COARSE_SEGM_CHANNELS
+        dim_out_patches = cfg.MODEL.ROI_DENSEPOSE_HEAD.NUM_PATCHES + 1
+        kernel_size = cfg.MODEL.ROI_DENSEPOSE_HEAD.DECONV_KERNEL
+        # coarse segmentation
+        self.ann_index_lowres = ConvTranspose2d(
+            dim_in, n_segm_chan, kernel_size, stride=2, padding=int(kernel_size / 2 - 1)
+        )
+        # fine segmentation
+        self.index_uv_lowres = ConvTranspose2d(
+            dim_in, dim_out_patches, kernel_size, stride=2, padding=int(kernel_size / 2 - 1)
+        )
+        # U
+        self.u_lowres = ConvTranspose2d(
+            dim_in, dim_out_patches, kernel_size, stride=2, padding=int(kernel_size / 2 - 1)
+        )
+        # V
+        self.v_lowres = ConvTranspose2d(
+            dim_in, dim_out_patches, kernel_size, stride=2, padding=int(kernel_size / 2 - 1)
+        )
+        self.scale_factor = cfg.MODEL.ROI_DENSEPOSE_HEAD.UP_SCALE
+        initialize_module_params(self)
+    def interp2d(self, tensor_nchw: torch.Tensor):
+        """
+        Bilinear interpolation method to be used for upscaling
+        Args:
+            tensor_nchw (tensor): tensor of shape (N, C, H, W)
+        Return:
+            tensor of shape (N, C, Hout, Wout), where Hout and Wout are computed
+                by applying the scale factor to H and W
+        """
+        return interpolate(
+            tensor_nchw, scale_factor=self.scale_factor, mode="bilinear", align_corners=False
+        )
+    def forward(self, head_outputs: torch.Tensor):
+        """
+        Perform forward step on DensePose head outputs
+        Args:
+            head_outputs (tensor): DensePose head outputs, tensor of shape [N, D, H, W]
+        Return:
+           An instance of DensePoseChartPredictorOutput
+        """
+        return DensePoseChartPredictorOutput(
+            coarse_segm=self.interp2d(self.ann_index_lowres(head_outputs)),
+            fine_segm=self.interp2d(self.index_uv_lowres(head_outputs)),
+            u=self.interp2d(self.u_lowres(head_outputs)),
+            v=self.interp2d(self.v_lowres(head_outputs)),
+        )

Leffa/3rdparty/densepose/modeling/predictors/chart_confidence.py ADDED Viewed

	@@ -0,0 +1,174 @@

+# Copyright (c) Facebook, Inc. and its affiliates.
+from typing import Any
+import torch
+from torch.nn import functional as F
+from detectron2.config import CfgNode
+from detectron2.layers import ConvTranspose2d
+from ...structures import decorate_predictor_output_class_with_confidences
+from ..confidence import DensePoseConfidenceModelConfig, DensePoseUVConfidenceType
+from ..utils import initialize_module_params
+class DensePoseChartConfidencePredictorMixin:
+    """
+    Predictor contains the last layers of a DensePose model that take DensePose head
+    outputs as an input and produce model outputs. Confidence predictor mixin is used
+    to generate confidences for segmentation and UV tensors estimated by some
+    base predictor. Several assumptions need to hold for the base predictor:
+    1) the `forward` method must return SIUV tuple as the first result (
+        S = coarse segmentation, I = fine segmentation, U and V are intrinsic
+        chart coordinates)
+    2) `interp2d` method must be defined to perform bilinear interpolation;
+        the same method is typically used for SIUV and confidences
+    Confidence predictor mixin provides confidence estimates, as described in:
+        N. Neverova et al., Correlated Uncertainty for Learning Dense Correspondences
+            from Noisy Labels, NeurIPS 2019
+        A. Sanakoyeu et al., Transferring Dense Pose to Proximal Animal Classes, CVPR 2020
+    """
+    def __init__(self, cfg: CfgNode, input_channels: int):
+        """
+        Initialize confidence predictor using configuration options.
+        Args:
+            cfg (CfgNode): configuration options
+            input_channels (int): number of input channels
+        """
+        # we rely on base predictor to call nn.Module.__init__
+        super().__init__(cfg, input_channels)  # pyre-ignore[19]
+        self.confidence_model_cfg = DensePoseConfidenceModelConfig.from_cfg(cfg)
+        self._initialize_confidence_estimation_layers(cfg, input_channels)
+        self._registry = {}
+        initialize_module_params(self)  # pyre-ignore[6]
+    def _initialize_confidence_estimation_layers(self, cfg: CfgNode, dim_in: int):
+        """
+        Initialize confidence estimation layers based on configuration options
+        Args:
+            cfg (CfgNode): configuration options
+            dim_in (int): number of input channels
+        """
+        dim_out_patches = cfg.MODEL.ROI_DENSEPOSE_HEAD.NUM_PATCHES + 1
+        kernel_size = cfg.MODEL.ROI_DENSEPOSE_HEAD.DECONV_KERNEL
+        if self.confidence_model_cfg.uv_confidence.enabled:
+            if self.confidence_model_cfg.uv_confidence.type == DensePoseUVConfidenceType.IID_ISO:
+                self.sigma_2_lowres = ConvTranspose2d(  # pyre-ignore[16]
+                    dim_in, dim_out_patches, kernel_size, stride=2, padding=int(kernel_size / 2 - 1)
+                )
+            elif (
+                self.confidence_model_cfg.uv_confidence.type
+                == DensePoseUVConfidenceType.INDEP_ANISO
+            ):
+                self.sigma_2_lowres = ConvTranspose2d(
+                    dim_in, dim_out_patches, kernel_size, stride=2, padding=int(kernel_size / 2 - 1)
+                )
+                self.kappa_u_lowres = ConvTranspose2d(  # pyre-ignore[16]
+                    dim_in, dim_out_patches, kernel_size, stride=2, padding=int(kernel_size / 2 - 1)
+                )
+                self.kappa_v_lowres = ConvTranspose2d(  # pyre-ignore[16]
+                    dim_in, dim_out_patches, kernel_size, stride=2, padding=int(kernel_size / 2 - 1)
+                )
+            else:
+                raise ValueError(
+                    f"Unknown confidence model type: "
+                    f"{self.confidence_model_cfg.confidence_model_type}"
+                )
+        if self.confidence_model_cfg.segm_confidence.enabled:
+            self.fine_segm_confidence_lowres = ConvTranspose2d(  # pyre-ignore[16]
+                dim_in, 1, kernel_size, stride=2, padding=int(kernel_size / 2 - 1)
+            )
+            self.coarse_segm_confidence_lowres = ConvTranspose2d(  # pyre-ignore[16]
+                dim_in, 1, kernel_size, stride=2, padding=int(kernel_size / 2 - 1)
+            )
+    def forward(self, head_outputs: torch.Tensor):
+        """
+        Perform forward operation on head outputs used as inputs for the predictor.
+        Calls forward method from the base predictor and uses its outputs to compute
+        confidences.
+        Args:
+            head_outputs (Tensor): head outputs used as predictor inputs
+        Return:
+            An instance of outputs with confidences,
+            see `decorate_predictor_output_class_with_confidences`
+        """
+        # assuming base class returns SIUV estimates in its first result
+        base_predictor_outputs = super().forward(head_outputs)  # pyre-ignore[16]
+        # create output instance by extending base predictor outputs:
+        output = self._create_output_instance(base_predictor_outputs)
+        if self.confidence_model_cfg.uv_confidence.enabled:
+            if self.confidence_model_cfg.uv_confidence.type == DensePoseUVConfidenceType.IID_ISO:
+                # assuming base class defines interp2d method for bilinear interpolation
+                output.sigma_2 = self.interp2d(self.sigma_2_lowres(head_outputs))  # pyre-ignore[16]
+            elif (
+                self.confidence_model_cfg.uv_confidence.type
+                == DensePoseUVConfidenceType.INDEP_ANISO
+            ):
+                # assuming base class defines interp2d method for bilinear interpolation
+                output.sigma_2 = self.interp2d(self.sigma_2_lowres(head_outputs))
+                output.kappa_u = self.interp2d(self.kappa_u_lowres(head_outputs))  # pyre-ignore[16]
+                output.kappa_v = self.interp2d(self.kappa_v_lowres(head_outputs))  # pyre-ignore[16]
+            else:
+                raise ValueError(
+                    f"Unknown confidence model type: "
+                    f"{self.confidence_model_cfg.confidence_model_type}"
+                )
+        if self.confidence_model_cfg.segm_confidence.enabled:
+            # base predictor outputs are assumed to have `fine_segm` and `coarse_segm` attributes
+            # base predictor is assumed to define `interp2d` method for bilinear interpolation
+            output.fine_segm_confidence = (
+                F.softplus(
+                    self.interp2d(self.fine_segm_confidence_lowres(head_outputs))  # pyre-ignore[16]
+                )
+                + self.confidence_model_cfg.segm_confidence.epsilon
+            )
+            output.fine_segm = base_predictor_outputs.fine_segm * torch.repeat_interleave(
+                output.fine_segm_confidence, base_predictor_outputs.fine_segm.shape[1], dim=1
+            )
+            output.coarse_segm_confidence = (
+                F.softplus(
+                    self.interp2d(
+                        self.coarse_segm_confidence_lowres(head_outputs)  # pyre-ignore[16]
+                    )
+                )
+                + self.confidence_model_cfg.segm_confidence.epsilon
+            )
+            output.coarse_segm = base_predictor_outputs.coarse_segm * torch.repeat_interleave(
+                output.coarse_segm_confidence, base_predictor_outputs.coarse_segm.shape[1], dim=1
+            )
+        return output
+    def _create_output_instance(self, base_predictor_outputs: Any):
+        """
+        Create an instance of predictor outputs by copying the outputs from the
+        base predictor and initializing confidence
+        Args:
+            base_predictor_outputs: an instance of base predictor outputs
+                (the outputs type is assumed to be a dataclass)
+        Return:
+           An instance of outputs with confidences
+        """
+        PredictorOutput = decorate_predictor_output_class_with_confidences(
+            type(base_predictor_outputs)  # pyre-ignore[6]
+        )
+        # base_predictor_outputs is assumed to be a dataclass
+        # reassign all the fields from base_predictor_outputs (no deep copy!), add new fields
+        output = PredictorOutput(
+            **base_predictor_outputs.__dict__,
+            coarse_segm_confidence=None,
+            fine_segm_confidence=None,
+            sigma_1=None,
+            sigma_2=None,
+            kappa_u=None,
+            kappa_v=None,
+        )
+        return output

Leffa/3rdparty/densepose/modeling/predictors/chart_with_confidence.py ADDED Viewed

	@@ -0,0 +1,15 @@

+# Copyright (c) Facebook, Inc. and its affiliates.
+from . import DensePoseChartConfidencePredictorMixin, DensePoseChartPredictor
+from .registry import DENSEPOSE_PREDICTOR_REGISTRY
+@DENSEPOSE_PREDICTOR_REGISTRY.register()
+class DensePoseChartWithConfidencePredictor(
+    DensePoseChartConfidencePredictorMixin, DensePoseChartPredictor
+):
+    """
+    Predictor that combines chart and chart confidence estimation
+    """
+    pass

Leffa/3rdparty/densepose/modeling/predictors/cse.py ADDED Viewed

	@@ -0,0 +1,70 @@

+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+import torch
+from torch import nn
+from detectron2.config import CfgNode
+from detectron2.layers import ConvTranspose2d, interpolate
+from ...structures import DensePoseEmbeddingPredictorOutput
+from ..utils import initialize_module_params
+from .registry import DENSEPOSE_PREDICTOR_REGISTRY
+@DENSEPOSE_PREDICTOR_REGISTRY.register()
+class DensePoseEmbeddingPredictor(nn.Module):
+    """
+    Last layers of a DensePose model that take DensePose head outputs as an input
+    and produce model outputs for continuous surface embeddings (CSE).
+    """
+    def __init__(self, cfg: CfgNode, input_channels: int):
+        """
+        Initialize predictor using configuration options
+        Args:
+            cfg (CfgNode): configuration options
+            input_channels (int): input tensor size along the channel dimension
+        """
+        super().__init__()
+        dim_in = input_channels
+        n_segm_chan = cfg.MODEL.ROI_DENSEPOSE_HEAD.NUM_COARSE_SEGM_CHANNELS
+        embed_size = cfg.MODEL.ROI_DENSEPOSE_HEAD.CSE.EMBED_SIZE
+        kernel_size = cfg.MODEL.ROI_DENSEPOSE_HEAD.DECONV_KERNEL
+        # coarse segmentation
+        self.coarse_segm_lowres = ConvTranspose2d(
+            dim_in, n_segm_chan, kernel_size, stride=2, padding=int(kernel_size / 2 - 1)
+        )
+        # embedding
+        self.embed_lowres = ConvTranspose2d(
+            dim_in, embed_size, kernel_size, stride=2, padding=int(kernel_size / 2 - 1)
+        )
+        self.scale_factor = cfg.MODEL.ROI_DENSEPOSE_HEAD.UP_SCALE
+        initialize_module_params(self)
+    def interp2d(self, tensor_nchw: torch.Tensor):
+        """
+        Bilinear interpolation method to be used for upscaling
+        Args:
+            tensor_nchw (tensor): tensor of shape (N, C, H, W)
+        Return:
+            tensor of shape (N, C, Hout, Wout), where Hout and Wout are computed
+                by applying the scale factor to H and W
+        """
+        return interpolate(
+            tensor_nchw, scale_factor=self.scale_factor, mode="bilinear", align_corners=False
+        )
+    def forward(self, head_outputs):
+        """
+        Perform forward step on DensePose head outputs
+        Args:
+            head_outputs (tensor): DensePose head outputs, tensor of shape [N, D, H, W]
+        """
+        embed_lowres = self.embed_lowres(head_outputs)
+        coarse_segm_lowres = self.coarse_segm_lowres(head_outputs)
+        embed = self.interp2d(embed_lowres)
+        coarse_segm = self.interp2d(coarse_segm_lowres)
+        return DensePoseEmbeddingPredictorOutput(embedding=embed, coarse_segm=coarse_segm)

Leffa/3rdparty/densepose/modeling/predictors/cse_confidence.py ADDED Viewed

	@@ -0,0 +1,115 @@

+# Copyright (c) Facebook, Inc. and its affiliates.
+from typing import Any
+import torch
+from torch.nn import functional as F
+from detectron2.config import CfgNode
+from detectron2.layers import ConvTranspose2d
+from densepose.modeling.confidence import DensePoseConfidenceModelConfig
+from densepose.modeling.utils import initialize_module_params
+from densepose.structures import decorate_cse_predictor_output_class_with_confidences
+class DensePoseEmbeddingConfidencePredictorMixin:
+    """
+    Predictor contains the last layers of a DensePose model that take DensePose head
+    outputs as an input and produce model outputs. Confidence predictor mixin is used
+    to generate confidences for coarse segmentation estimated by some
+    base predictor. Several assumptions need to hold for the base predictor:
+    1) the `forward` method must return CSE DensePose head outputs,
+        tensor of shape [N, D, H, W]
+    2) `interp2d` method must be defined to perform bilinear interpolation;
+        the same method is typically used for masks and confidences
+    Confidence predictor mixin provides confidence estimates, as described in:
+        N. Neverova et al., Correlated Uncertainty for Learning Dense Correspondences
+            from Noisy Labels, NeurIPS 2019
+        A. Sanakoyeu et al., Transferring Dense Pose to Proximal Animal Classes, CVPR 2020
+    """
+    def __init__(self, cfg: CfgNode, input_channels: int):
+        """
+        Initialize confidence predictor using configuration options.
+        Args:
+            cfg (CfgNode): configuration options
+            input_channels (int): number of input channels
+        """
+        # we rely on base predictor to call nn.Module.__init__
+        super().__init__(cfg, input_channels)  # pyre-ignore[19]
+        self.confidence_model_cfg = DensePoseConfidenceModelConfig.from_cfg(cfg)
+        self._initialize_confidence_estimation_layers(cfg, input_channels)
+        self._registry = {}
+        initialize_module_params(self)  # pyre-ignore[6]
+    def _initialize_confidence_estimation_layers(self, cfg: CfgNode, dim_in: int):
+        """
+        Initialize confidence estimation layers based on configuration options
+        Args:
+            cfg (CfgNode): configuration options
+            dim_in (int): number of input channels
+        """
+        kernel_size = cfg.MODEL.ROI_DENSEPOSE_HEAD.DECONV_KERNEL
+        if self.confidence_model_cfg.segm_confidence.enabled:
+            self.coarse_segm_confidence_lowres = ConvTranspose2d(  # pyre-ignore[16]
+                dim_in, 1, kernel_size, stride=2, padding=int(kernel_size / 2 - 1)
+            )
+    def forward(self, head_outputs: torch.Tensor):
+        """
+        Perform forward operation on head outputs used as inputs for the predictor.
+        Calls forward method from the base predictor and uses its outputs to compute
+        confidences.
+        Args:
+            head_outputs (Tensor): head outputs used as predictor inputs
+        Return:
+            An instance of outputs with confidences,
+            see `decorate_cse_predictor_output_class_with_confidences`
+        """
+        # assuming base class returns SIUV estimates in its first result
+        base_predictor_outputs = super().forward(head_outputs)  # pyre-ignore[16]
+        # create output instance by extending base predictor outputs:
+        output = self._create_output_instance(base_predictor_outputs)
+        if self.confidence_model_cfg.segm_confidence.enabled:
+            # base predictor outputs are assumed to have `coarse_segm` attribute
+            # base predictor is assumed to define `interp2d` method for bilinear interpolation
+            output.coarse_segm_confidence = (
+                F.softplus(
+                    self.interp2d(  # pyre-ignore[16]
+                        self.coarse_segm_confidence_lowres(head_outputs)  # pyre-ignore[16]
+                    )
+                )
+                + self.confidence_model_cfg.segm_confidence.epsilon
+            )
+            output.coarse_segm = base_predictor_outputs.coarse_segm * torch.repeat_interleave(
+                output.coarse_segm_confidence, base_predictor_outputs.coarse_segm.shape[1], dim=1
+            )
+        return output
+    def _create_output_instance(self, base_predictor_outputs: Any):
+        """
+        Create an instance of predictor outputs by copying the outputs from the
+        base predictor and initializing confidence
+        Args:
+            base_predictor_outputs: an instance of base predictor outputs
+                (the outputs type is assumed to be a dataclass)
+        Return:
+           An instance of outputs with confidences
+        """
+        PredictorOutput = decorate_cse_predictor_output_class_with_confidences(
+            type(base_predictor_outputs)  # pyre-ignore[6]
+        )
+        # base_predictor_outputs is assumed to be a dataclass
+        # reassign all the fields from base_predictor_outputs (no deep copy!), add new fields
+        output = PredictorOutput(
+            **base_predictor_outputs.__dict__,
+            coarse_segm_confidence=None,
+        )
+        return output

Leffa/3rdparty/densepose/modeling/predictors/cse_with_confidence.py ADDED Viewed

	@@ -0,0 +1,15 @@

+# Copyright (c) Facebook, Inc. and its affiliates.
+from . import DensePoseEmbeddingConfidencePredictorMixin, DensePoseEmbeddingPredictor
+from .registry import DENSEPOSE_PREDICTOR_REGISTRY
+@DENSEPOSE_PREDICTOR_REGISTRY.register()
+class DensePoseEmbeddingWithConfidencePredictor(
+    DensePoseEmbeddingConfidencePredictorMixin, DensePoseEmbeddingPredictor
+):
+    """
+    Predictor that combines CSE and CSE confidence estimation
+    """
+    pass

Leffa/3rdparty/densepose/modeling/predictors/registry.py ADDED Viewed

	@@ -0,0 +1,5 @@

+# Copyright (c) Facebook, Inc. and its affiliates.
+from detectron2.utils.registry import Registry
+DENSEPOSE_PREDICTOR_REGISTRY = Registry("DENSEPOSE_PREDICTOR")

Leffa/3rdparty/densepose/modeling/roi_heads/__init__.py ADDED Viewed

	@@ -0,0 +1,6 @@

+# Copyright (c) Facebook, Inc. and its affiliates.
+from .v1convx import DensePoseV1ConvXHead
+from .deeplab import DensePoseDeepLabHead
+from .registry import ROI_DENSEPOSE_HEAD_REGISTRY
+from .roi_head import Decoder, DensePoseROIHeads

Leffa/3rdparty/densepose/modeling/roi_heads/deeplab.py ADDED Viewed

	@@ -0,0 +1,263 @@

+# Copyright (c) Facebook, Inc. and its affiliates.
+import fvcore.nn.weight_init as weight_init
+import torch
+from torch import nn
+from torch.nn import functional as F
+from detectron2.config import CfgNode
+from detectron2.layers import Conv2d
+from .registry import ROI_DENSEPOSE_HEAD_REGISTRY
+@ROI_DENSEPOSE_HEAD_REGISTRY.register()
+class DensePoseDeepLabHead(nn.Module):
+    """
+    DensePose head using DeepLabV3 model from
+    "Rethinking Atrous Convolution for Semantic Image Segmentation"
+    <https://arxiv.org/abs/1706.05587>.
+    """
+    def __init__(self, cfg: CfgNode, input_channels: int):
+        super(DensePoseDeepLabHead, self).__init__()
+        # fmt: off
+        hidden_dim           = cfg.MODEL.ROI_DENSEPOSE_HEAD.CONV_HEAD_DIM
+        kernel_size          = cfg.MODEL.ROI_DENSEPOSE_HEAD.CONV_HEAD_KERNEL
+        norm                 = cfg.MODEL.ROI_DENSEPOSE_HEAD.DEEPLAB.NORM
+        self.n_stacked_convs = cfg.MODEL.ROI_DENSEPOSE_HEAD.NUM_STACKED_CONVS
+        self.use_nonlocal    = cfg.MODEL.ROI_DENSEPOSE_HEAD.DEEPLAB.NONLOCAL_ON
+        # fmt: on
+        pad_size = kernel_size // 2
+        n_channels = input_channels
+        self.ASPP = ASPP(input_channels, [6, 12, 56], n_channels)  # 6, 12, 56
+        self.add_module("ASPP", self.ASPP)
+        if self.use_nonlocal:
+            self.NLBlock = NONLocalBlock2D(input_channels, bn_layer=True)
+            self.add_module("NLBlock", self.NLBlock)
+        # weight_init.c2_msra_fill(self.ASPP)
+        for i in range(self.n_stacked_convs):
+            norm_module = nn.GroupNorm(32, hidden_dim) if norm == "GN" else None
+            layer = Conv2d(
+                n_channels,
+                hidden_dim,
+                kernel_size,
+                stride=1,
+                padding=pad_size,
+                bias=not norm,
+                norm=norm_module,
+            )
+            weight_init.c2_msra_fill(layer)
+            n_channels = hidden_dim
+            layer_name = self._get_layer_name(i)
+            self.add_module(layer_name, layer)
+        self.n_out_channels = hidden_dim
+        # initialize_module_params(self)
+    def forward(self, features):
+        x0 = features
+        x = self.ASPP(x0)
+        if self.use_nonlocal:
+            x = self.NLBlock(x)
+        output = x
+        for i in range(self.n_stacked_convs):
+            layer_name = self._get_layer_name(i)
+            x = getattr(self, layer_name)(x)
+            x = F.relu(x)
+            output = x
+        return output
+    def _get_layer_name(self, i: int):
+        layer_name = "body_conv_fcn{}".format(i + 1)
+        return layer_name
+# Copied from
+# https://github.com/pytorch/vision/blob/master/torchvision/models/segmentation/deeplabv3.py
+# See https://arxiv.org/pdf/1706.05587.pdf for details
+class ASPPConv(nn.Sequential):
+    def __init__(self, in_channels, out_channels, dilation):
+        modules = [
+            nn.Conv2d(
+                in_channels, out_channels, 3, padding=dilation, dilation=dilation, bias=False
+            ),
+            nn.GroupNorm(32, out_channels),
+            nn.ReLU(),
+        ]
+        super(ASPPConv, self).__init__(*modules)
+class ASPPPooling(nn.Sequential):
+    def __init__(self, in_channels, out_channels):
+        super(ASPPPooling, self).__init__(
+            nn.AdaptiveAvgPool2d(1),
+            nn.Conv2d(in_channels, out_channels, 1, bias=False),
+            nn.GroupNorm(32, out_channels),
+            nn.ReLU(),
+        )
+    def forward(self, x):
+        size = x.shape[-2:]
+        x = super(ASPPPooling, self).forward(x)
+        return F.interpolate(x, size=size, mode="bilinear", align_corners=False)
+class ASPP(nn.Module):
+    def __init__(self, in_channels, atrous_rates, out_channels):
+        super(ASPP, self).__init__()
+        modules = []
+        modules.append(
+            nn.Sequential(
+                nn.Conv2d(in_channels, out_channels, 1, bias=False),
+                nn.GroupNorm(32, out_channels),
+                nn.ReLU(),
+            )
+        )
+        rate1, rate2, rate3 = tuple(atrous_rates)
+        modules.append(ASPPConv(in_channels, out_channels, rate1))
+        modules.append(ASPPConv(in_channels, out_channels, rate2))
+        modules.append(ASPPConv(in_channels, out_channels, rate3))
+        modules.append(ASPPPooling(in_channels, out_channels))
+        self.convs = nn.ModuleList(modules)
+        self.project = nn.Sequential(
+            nn.Conv2d(5 * out_channels, out_channels, 1, bias=False),
+            # nn.BatchNorm2d(out_channels),
+            nn.ReLU()
+            # nn.Dropout(0.5)
+        )
+    def forward(self, x):
+        res = []
+        for conv in self.convs:
+            res.append(conv(x))
+        res = torch.cat(res, dim=1)
+        return self.project(res)
+# copied from
+# https://github.com/AlexHex7/Non-local_pytorch/blob/master/lib/non_local_embedded_gaussian.py
+# See https://arxiv.org/abs/1711.07971 for details
+class _NonLocalBlockND(nn.Module):
+    def __init__(
+        self, in_channels, inter_channels=None, dimension=3, sub_sample=True, bn_layer=True
+    ):
+        super(_NonLocalBlockND, self).__init__()
+        assert dimension in [1, 2, 3]
+        self.dimension = dimension
+        self.sub_sample = sub_sample
+        self.in_channels = in_channels
+        self.inter_channels = inter_channels
+        if self.inter_channels is None:
+            self.inter_channels = in_channels // 2
+            if self.inter_channels == 0:
+                self.inter_channels = 1
+        if dimension == 3:
+            conv_nd = nn.Conv3d
+            max_pool_layer = nn.MaxPool3d(kernel_size=(1, 2, 2))
+            bn = nn.GroupNorm  # (32, hidden_dim) #nn.BatchNorm3d
+        elif dimension == 2:
+            conv_nd = nn.Conv2d
+            max_pool_layer = nn.MaxPool2d(kernel_size=(2, 2))
+            bn = nn.GroupNorm  # (32, hidden_dim)nn.BatchNorm2d
+        else:
+            conv_nd = nn.Conv1d
+            max_pool_layer = nn.MaxPool1d(kernel_size=2)
+            bn = nn.GroupNorm  # (32, hidden_dim)nn.BatchNorm1d
+        self.g = conv_nd(
+            in_channels=self.in_channels,
+            out_channels=self.inter_channels,
+            kernel_size=1,
+            stride=1,
+            padding=0,
+        )
+        if bn_layer:
+            self.W = nn.Sequential(
+                conv_nd(
+                    in_channels=self.inter_channels,
+                    out_channels=self.in_channels,
+                    kernel_size=1,
+                    stride=1,
+                    padding=0,
+                ),
+                bn(32, self.in_channels),
+            )
+            nn.init.constant_(self.W[1].weight, 0)
+            nn.init.constant_(self.W[1].bias, 0)
+        else:
+            self.W = conv_nd(
+                in_channels=self.inter_channels,
+                out_channels=self.in_channels,
+                kernel_size=1,
+                stride=1,
+                padding=0,
+            )
+            nn.init.constant_(self.W.weight, 0)
+            nn.init.constant_(self.W.bias, 0)
+        self.theta = conv_nd(
+            in_channels=self.in_channels,
+            out_channels=self.inter_channels,
+            kernel_size=1,
+            stride=1,
+            padding=0,
+        )
+        self.phi = conv_nd(
+            in_channels=self.in_channels,
+            out_channels=self.inter_channels,
+            kernel_size=1,
+            stride=1,
+            padding=0,
+        )
+        if sub_sample:
+            self.g = nn.Sequential(self.g, max_pool_layer)
+            self.phi = nn.Sequential(self.phi, max_pool_layer)
+    def forward(self, x):
+        """
+        :param x: (b, c, t, h, w)
+        :return:
+        """
+        batch_size = x.size(0)
+        g_x = self.g(x).view(batch_size, self.inter_channels, -1)
+        g_x = g_x.permute(0, 2, 1)
+        theta_x = self.theta(x).view(batch_size, self.inter_channels, -1)
+        theta_x = theta_x.permute(0, 2, 1)
+        phi_x = self.phi(x).view(batch_size, self.inter_channels, -1)
+        f = torch.matmul(theta_x, phi_x)
+        f_div_C = F.softmax(f, dim=-1)
+        y = torch.matmul(f_div_C, g_x)
+        y = y.permute(0, 2, 1).contiguous()
+        y = y.view(batch_size, self.inter_channels, *x.size()[2:])
+        W_y = self.W(y)
+        z = W_y + x
+        return z
+class NONLocalBlock2D(_NonLocalBlockND):
+    def __init__(self, in_channels, inter_channels=None, sub_sample=True, bn_layer=True):
+        super(NONLocalBlock2D, self).__init__(
+            in_channels,
+            inter_channels=inter_channels,
+            dimension=2,
+            sub_sample=sub_sample,
+            bn_layer=bn_layer,
+        )

Leffa/3rdparty/densepose/modeling/roi_heads/registry.py ADDED Viewed

	@@ -0,0 +1,5 @@

+# Copyright (c) Facebook, Inc. and its affiliates.
+from detectron2.utils.registry import Registry
+ROI_DENSEPOSE_HEAD_REGISTRY = Registry("ROI_DENSEPOSE_HEAD")

Leffa/3rdparty/densepose/modeling/roi_heads/roi_head.py ADDED Viewed

	@@ -0,0 +1,218 @@

+# Copyright (c) Facebook, Inc. and its affiliates.
+import numpy as np
+from typing import Dict, List, Optional
+import fvcore.nn.weight_init as weight_init
+import torch
+import torch.nn as nn
+from torch.nn import functional as F
+from detectron2.layers import Conv2d, ShapeSpec, get_norm
+from detectron2.modeling import ROI_HEADS_REGISTRY, StandardROIHeads
+from detectron2.modeling.poolers import ROIPooler
+from detectron2.modeling.roi_heads import select_foreground_proposals
+from detectron2.structures import ImageList, Instances
+from .. import (
+    build_densepose_data_filter,
+    build_densepose_embedder,
+    build_densepose_head,
+    build_densepose_losses,
+    build_densepose_predictor,
+    densepose_inference,
+)
+class Decoder(nn.Module):
+    """
+    A semantic segmentation head described in detail in the Panoptic Feature Pyramid Networks paper
+    (https://arxiv.org/abs/1901.02446). It takes FPN features as input and merges information from
+    all levels of the FPN into single output.
+    """
+    def __init__(self, cfg, input_shape: Dict[str, ShapeSpec], in_features):
+        super(Decoder, self).__init__()
+        # fmt: off
+        self.in_features      = in_features
+        feature_strides       = {k: v.stride for k, v in input_shape.items()}
+        feature_channels      = {k: v.channels for k, v in input_shape.items()}
+        num_classes           = cfg.MODEL.ROI_DENSEPOSE_HEAD.DECODER_NUM_CLASSES
+        conv_dims             = cfg.MODEL.ROI_DENSEPOSE_HEAD.DECODER_CONV_DIMS
+        self.common_stride    = cfg.MODEL.ROI_DENSEPOSE_HEAD.DECODER_COMMON_STRIDE
+        norm                  = cfg.MODEL.ROI_DENSEPOSE_HEAD.DECODER_NORM
+        # fmt: on
+        self.scale_heads = []
+        for in_feature in self.in_features:
+            head_ops = []
+            head_length = max(
+                1, int(np.log2(feature_strides[in_feature]) - np.log2(self.common_stride))
+            )
+            for k in range(head_length):
+                conv = Conv2d(
+                    feature_channels[in_feature] if k == 0 else conv_dims,
+                    conv_dims,
+                    kernel_size=3,
+                    stride=1,
+                    padding=1,
+                    bias=not norm,
+                    norm=get_norm(norm, conv_dims),
+                    activation=F.relu,
+                )
+                weight_init.c2_msra_fill(conv)
+                head_ops.append(conv)
+                if feature_strides[in_feature] != self.common_stride:
+                    head_ops.append(
+                        nn.Upsample(scale_factor=2, mode="bilinear", align_corners=False)
+                    )
+            self.scale_heads.append(nn.Sequential(*head_ops))
+            self.add_module(in_feature, self.scale_heads[-1])
+        self.predictor = Conv2d(conv_dims, num_classes, kernel_size=1, stride=1, padding=0)
+        weight_init.c2_msra_fill(self.predictor)
+    def forward(self, features: List[torch.Tensor]):
+        for i, _ in enumerate(self.in_features):
+            if i == 0:
+                x = self.scale_heads[i](features[i])
+            else:
+                x = x + self.scale_heads[i](features[i])
+        x = self.predictor(x)
+        return x
+@ROI_HEADS_REGISTRY.register()
+class DensePoseROIHeads(StandardROIHeads):
+    """
+    A Standard ROIHeads which contains an addition of DensePose head.
+    """
+    def __init__(self, cfg, input_shape):
+        super().__init__(cfg, input_shape)
+        self._init_densepose_head(cfg, input_shape)
+    def _init_densepose_head(self, cfg, input_shape):
+        # fmt: off
+        self.densepose_on          = cfg.MODEL.DENSEPOSE_ON
+        if not self.densepose_on:
+            return
+        self.densepose_data_filter = build_densepose_data_filter(cfg)
+        dp_pooler_resolution       = cfg.MODEL.ROI_DENSEPOSE_HEAD.POOLER_RESOLUTION
+        dp_pooler_sampling_ratio   = cfg.MODEL.ROI_DENSEPOSE_HEAD.POOLER_SAMPLING_RATIO
+        dp_pooler_type             = cfg.MODEL.ROI_DENSEPOSE_HEAD.POOLER_TYPE
+        self.use_decoder           = cfg.MODEL.ROI_DENSEPOSE_HEAD.DECODER_ON
+        # fmt: on
+        if self.use_decoder:
+            dp_pooler_scales = (1.0 / input_shape[self.in_features[0]].stride,)
+        else:
+            dp_pooler_scales = tuple(1.0 / input_shape[k].stride for k in self.in_features)
+        in_channels = [input_shape[f].channels for f in self.in_features][0]
+        if self.use_decoder:
+            self.decoder = Decoder(cfg, input_shape, self.in_features)
+        self.densepose_pooler = ROIPooler(
+            output_size=dp_pooler_resolution,
+            scales=dp_pooler_scales,
+            sampling_ratio=dp_pooler_sampling_ratio,
+            pooler_type=dp_pooler_type,
+        )
+        self.densepose_head = build_densepose_head(cfg, in_channels)
+        self.densepose_predictor = build_densepose_predictor(
+            cfg, self.densepose_head.n_out_channels
+        )
+        self.densepose_losses = build_densepose_losses(cfg)
+        self.embedder = build_densepose_embedder(cfg)
+    def _forward_densepose(self, features: Dict[str, torch.Tensor], instances: List[Instances]):
+        """
+        Forward logic of the densepose prediction branch.
+        Args:
+            features (dict[str, Tensor]): input data as a mapping from feature
+                map name to tensor. Axis 0 represents the number of images `N` in
+                the input data; axes 1-3 are channels, height, and width, which may
+                vary between feature maps (e.g., if a feature pyramid is used).
+            instances (list[Instances]): length `N` list of `Instances`. The i-th
+                `Instances` contains instances for the i-th input image,
+                In training, they can be the proposals.
+                In inference, they can be the predicted boxes.
+        Returns:
+            In training, a dict of losses.
+            In inference, update `instances` with new fields "densepose" and return it.
+        """
+        if not self.densepose_on:
+            return {} if self.training else instances
+        features_list = [features[f] for f in self.in_features]
+        if self.training:
+            proposals, _ = select_foreground_proposals(instances, self.num_classes)
+            features_list, proposals = self.densepose_data_filter(features_list, proposals)
+            if len(proposals) > 0:
+                proposal_boxes = [x.proposal_boxes for x in proposals]
+                if self.use_decoder:
+                    features_list = [self.decoder(features_list)]
+                features_dp = self.densepose_pooler(features_list, proposal_boxes)
+                densepose_head_outputs = self.densepose_head(features_dp)
+                densepose_predictor_outputs = self.densepose_predictor(densepose_head_outputs)
+                densepose_loss_dict = self.densepose_losses(
+                    proposals, densepose_predictor_outputs, embedder=self.embedder
+                )
+                return densepose_loss_dict
+        else:
+            pred_boxes = [x.pred_boxes for x in instances]
+            if self.use_decoder:
+                features_list = [self.decoder(features_list)]
+            features_dp = self.densepose_pooler(features_list, pred_boxes)
+            if len(features_dp) > 0:
+                densepose_head_outputs = self.densepose_head(features_dp)
+                densepose_predictor_outputs = self.densepose_predictor(densepose_head_outputs)
+            else:
+                densepose_predictor_outputs = None
+            densepose_inference(densepose_predictor_outputs, instances)
+            return instances
+    def forward(
+        self,
+        images: ImageList,
+        features: Dict[str, torch.Tensor],
+        proposals: List[Instances],
+        targets: Optional[List[Instances]] = None,
+    ):
+        instances, losses = super().forward(images, features, proposals, targets)
+        del targets, images
+        if self.training:
+            losses.update(self._forward_densepose(features, instances))
+        return instances, losses
+    def forward_with_given_boxes(
+        self, features: Dict[str, torch.Tensor], instances: List[Instances]
+    ):
+        """
+        Use the given boxes in `instances` to produce other (non-box) per-ROI outputs.
+        This is useful for downstream tasks where a box is known, but need to obtain
+        other attributes (outputs of other heads).
+        Test-time augmentation also uses this.
+        Args:
+            features: same as in `forward()`
+            instances (list[Instances]): instances to predict other outputs. Expect the keys
+                "pred_boxes" and "pred_classes" to exist.
+        Returns:
+            instances (list[Instances]):
+                the same `Instances` objects, with extra
+                fields such as `pred_masks` or `pred_keypoints`.
+        """
+        instances = super().forward_with_given_boxes(features, instances)
+        instances = self._forward_densepose(features, instances)
+        return instances

Leffa/3rdparty/densepose/modeling/roi_heads/v1convx.py ADDED Viewed

	@@ -0,0 +1,64 @@

+# Copyright (c) Facebook, Inc. and its affiliates.
+import torch
+from torch import nn
+from torch.nn import functional as F
+from detectron2.config import CfgNode
+from detectron2.layers import Conv2d
+from ..utils import initialize_module_params
+from .registry import ROI_DENSEPOSE_HEAD_REGISTRY
+@ROI_DENSEPOSE_HEAD_REGISTRY.register()
+class DensePoseV1ConvXHead(nn.Module):
+    """
+    Fully convolutional DensePose head.
+    """
+    def __init__(self, cfg: CfgNode, input_channels: int):
+        """
+        Initialize DensePose fully convolutional head
+        Args:
+            cfg (CfgNode): configuration options
+            input_channels (int): number of input channels
+        """
+        super(DensePoseV1ConvXHead, self).__init__()
+        # fmt: off
+        hidden_dim           = cfg.MODEL.ROI_DENSEPOSE_HEAD.CONV_HEAD_DIM
+        kernel_size          = cfg.MODEL.ROI_DENSEPOSE_HEAD.CONV_HEAD_KERNEL
+        self.n_stacked_convs = cfg.MODEL.ROI_DENSEPOSE_HEAD.NUM_STACKED_CONVS
+        # fmt: on
+        pad_size = kernel_size // 2
+        n_channels = input_channels
+        for i in range(self.n_stacked_convs):
+            layer = Conv2d(n_channels, hidden_dim, kernel_size, stride=1, padding=pad_size)
+            layer_name = self._get_layer_name(i)
+            self.add_module(layer_name, layer)
+            n_channels = hidden_dim
+        self.n_out_channels = n_channels
+        initialize_module_params(self)
+    def forward(self, features: torch.Tensor):
+        """
+        Apply DensePose fully convolutional head to the input features
+        Args:
+            features (tensor): input features
+        Result:
+            A tensor of DensePose head outputs
+        """
+        x = features
+        output = x
+        for i in range(self.n_stacked_convs):
+            layer_name = self._get_layer_name(i)
+            x = getattr(self, layer_name)(x)
+            x = F.relu(x)
+            output = x
+        return output
+    def _get_layer_name(self, i: int):
+        layer_name = "body_conv_fcn{}".format(i + 1)
+        return layer_name

Leffa/3rdparty/densepose/modeling/test_time_augmentation.py ADDED Viewed

	@@ -0,0 +1,207 @@

+# Copyright (c) Facebook, Inc. and its affiliates.
+import copy
+import numpy as np
+import torch
+from fvcore.transforms import HFlipTransform, TransformList
+from torch.nn import functional as F
+from detectron2.data.transforms import RandomRotation, RotationTransform, apply_transform_gens
+from detectron2.modeling.postprocessing import detector_postprocess
+from detectron2.modeling.test_time_augmentation import DatasetMapperTTA, GeneralizedRCNNWithTTA
+from ..converters import HFlipConverter
+class DensePoseDatasetMapperTTA(DatasetMapperTTA):
+    def __init__(self, cfg):
+        super().__init__(cfg=cfg)
+        self.angles = cfg.TEST.AUG.ROTATION_ANGLES
+    def __call__(self, dataset_dict):
+        ret = super().__call__(dataset_dict=dataset_dict)
+        numpy_image = dataset_dict["image"].permute(1, 2, 0).numpy()
+        for angle in self.angles:
+            rotate = RandomRotation(angle=angle, expand=True)
+            new_numpy_image, tfms = apply_transform_gens([rotate], np.copy(numpy_image))
+            torch_image = torch.from_numpy(np.ascontiguousarray(new_numpy_image.transpose(2, 0, 1)))
+            dic = copy.deepcopy(dataset_dict)
+            # In DatasetMapperTTA, there is a pre_tfm transform (resize or no-op) that is
+            # added at the beginning of each TransformList. That's '.transforms[0]'.
+            dic["transforms"] = TransformList(
+                [ret[-1]["transforms"].transforms[0]] + tfms.transforms
+            )
+            dic["image"] = torch_image
+            ret.append(dic)
+        return ret
+class DensePoseGeneralizedRCNNWithTTA(GeneralizedRCNNWithTTA):
+    def __init__(self, cfg, model, transform_data, tta_mapper=None, batch_size=1):
+        """
+        Args:
+            cfg (CfgNode):
+            model (GeneralizedRCNN): a GeneralizedRCNN to apply TTA on.
+            transform_data (DensePoseTransformData): contains symmetry label
+                transforms used for horizontal flip
+            tta_mapper (callable): takes a dataset dict and returns a list of
+                augmented versions of the dataset dict. Defaults to
+                `DatasetMapperTTA(cfg)`.
+            batch_size (int): batch the augmented images into this batch size for inference.
+        """
+        self._transform_data = transform_data.to(model.device)
+        super().__init__(cfg=cfg, model=model, tta_mapper=tta_mapper, batch_size=batch_size)
+    # the implementation follows closely the one from detectron2/modeling
+    def _inference_one_image(self, input):
+        """
+        Args:
+            input (dict): one dataset dict with "image" field being a CHW tensor
+        Returns:
+            dict: one output dict
+        """
+        orig_shape = (input["height"], input["width"])
+        # For some reason, resize with uint8 slightly increases box AP but decreases densepose AP
+        input["image"] = input["image"].to(torch.uint8)
+        augmented_inputs, tfms = self._get_augmented_inputs(input)
+        # Detect boxes from all augmented versions
+        with self._turn_off_roi_heads(["mask_on", "keypoint_on", "densepose_on"]):
+            # temporarily disable roi heads
+            all_boxes, all_scores, all_classes = self._get_augmented_boxes(augmented_inputs, tfms)
+        merged_instances = self._merge_detections(all_boxes, all_scores, all_classes, orig_shape)
+        if self.cfg.MODEL.MASK_ON or self.cfg.MODEL.DENSEPOSE_ON:
+            # Use the detected boxes to obtain new fields
+            augmented_instances = self._rescale_detected_boxes(
+                augmented_inputs, merged_instances, tfms
+            )
+            # run forward on the detected boxes
+            outputs = self._batch_inference(augmented_inputs, augmented_instances)
+            # Delete now useless variables to avoid being out of memory
+            del augmented_inputs, augmented_instances
+            # average the predictions
+            if self.cfg.MODEL.MASK_ON:
+                merged_instances.pred_masks = self._reduce_pred_masks(outputs, tfms)
+            if self.cfg.MODEL.DENSEPOSE_ON:
+                merged_instances.pred_densepose = self._reduce_pred_densepose(outputs, tfms)
+            # postprocess
+            merged_instances = detector_postprocess(merged_instances, *orig_shape)
+            return {"instances": merged_instances}
+        else:
+            return {"instances": merged_instances}
+    def _get_augmented_boxes(self, augmented_inputs, tfms):
+        # Heavily based on detectron2/modeling/test_time_augmentation.py
+        # Only difference is that RotationTransform is excluded from bbox computation
+        # 1: forward with all augmented images
+        outputs = self._batch_inference(augmented_inputs)
+        # 2: union the results
+        all_boxes = []
+        all_scores = []
+        all_classes = []
+        for output, tfm in zip(outputs, tfms):
+            # Need to inverse the transforms on boxes, to obtain results on original image
+            if not any(isinstance(t, RotationTransform) for t in tfm.transforms):
+                # Some transforms can't compute bbox correctly
+                pred_boxes = output.pred_boxes.tensor
+                original_pred_boxes = tfm.inverse().apply_box(pred_boxes.cpu().numpy())
+                all_boxes.append(torch.from_numpy(original_pred_boxes).to(pred_boxes.device))
+                all_scores.extend(output.scores)
+                all_classes.extend(output.pred_classes)
+        all_boxes = torch.cat(all_boxes, dim=0)
+        return all_boxes, all_scores, all_classes
+    def _reduce_pred_densepose(self, outputs, tfms):
+        # Should apply inverse transforms on densepose preds.
+        # We assume only rotation, resize & flip are used. pred_masks is a scale-invariant
+        # representation, so we handle the other ones specially
+        for idx, (output, tfm) in enumerate(zip(outputs, tfms)):
+            for t in tfm.transforms:
+                for attr in ["coarse_segm", "fine_segm", "u", "v"]:
+                    setattr(
+                        output.pred_densepose,
+                        attr,
+                        _inverse_rotation(
+                            getattr(output.pred_densepose, attr), output.pred_boxes.tensor, t
+                        ),
+                    )
+            if any(isinstance(t, HFlipTransform) for t in tfm.transforms):
+                output.pred_densepose = HFlipConverter.convert(
+                    output.pred_densepose, self._transform_data
+                )
+            self._incremental_avg_dp(outputs[0].pred_densepose, output.pred_densepose, idx)
+        return outputs[0].pred_densepose
+    # incrementally computed average: u_(n + 1) = u_n + (x_(n+1) - u_n) / (n + 1).
+    def _incremental_avg_dp(self, avg, new_el, idx):
+        for attr in ["coarse_segm", "fine_segm", "u", "v"]:
+            setattr(avg, attr, (getattr(avg, attr) * idx + getattr(new_el, attr)) / (idx + 1))
+            if idx:
+                # Deletion of the > 0 index intermediary values to prevent GPU OOM
+                setattr(new_el, attr, None)
+        return avg
+def _inverse_rotation(densepose_attrs, boxes, transform):
+    # resample outputs to image size and rotate back the densepose preds
+    # on the rotated images to the space of the original image
+    if len(boxes) == 0 or not isinstance(transform, RotationTransform):
+        return densepose_attrs
+    boxes = boxes.int().cpu().numpy()
+    wh_boxes = boxes[:, 2:] - boxes[:, :2]  # bboxes in the rotated space
+    inv_boxes = rotate_box_inverse(transform, boxes).astype(int)  # bboxes in original image
+    wh_diff = (inv_boxes[:, 2:] - inv_boxes[:, :2] - wh_boxes) // 2  # diff between new/old bboxes
+    rotation_matrix = torch.tensor([transform.rm_image]).to(device=densepose_attrs.device).float()
+    rotation_matrix[:, :, -1] = 0
+    # To apply grid_sample for rotation, we need to have enough space to fit the original and
+    # rotated bboxes. l_bds and r_bds are the left/right bounds that will be used to
+    # crop the difference once the rotation is done
+    l_bds = np.maximum(0, -wh_diff)
+    for i in range(len(densepose_attrs)):
+        if min(wh_boxes[i]) <= 0:
+            continue
+        densepose_attr = densepose_attrs[[i]].clone()
+        # 1. Interpolate densepose attribute to size of the rotated bbox
+        densepose_attr = F.interpolate(densepose_attr, wh_boxes[i].tolist()[::-1], mode="bilinear")
+        # 2. Pad the interpolated attribute so it has room for the original + rotated bbox
+        densepose_attr = F.pad(densepose_attr, tuple(np.repeat(np.maximum(0, wh_diff[i]), 2)))
+        # 3. Compute rotation grid and transform
+        grid = F.affine_grid(rotation_matrix, size=densepose_attr.shape)
+        densepose_attr = F.grid_sample(densepose_attr, grid)
+        # 4. Compute right bounds and crop the densepose_attr to the size of the original bbox
+        r_bds = densepose_attr.shape[2:][::-1] - l_bds[i]
+        densepose_attr = densepose_attr[:, :, l_bds[i][1] : r_bds[1], l_bds[i][0] : r_bds[0]]
+        if min(densepose_attr.shape) > 0:
+            # Interpolate back to the original size of the densepose attribute
+            densepose_attr = F.interpolate(
+                densepose_attr, densepose_attrs.shape[-2:], mode="bilinear"
+            )
+            # Adding a very small probability to the background class to fill padded zones
+            densepose_attr[:, 0] += 1e-10
+            densepose_attrs[i] = densepose_attr
+    return densepose_attrs
+def rotate_box_inverse(rot_tfm, rotated_box):
+    """
+    rotated_box is a N * 4 array of [x0, y0, x1, y1] boxes
+    When a bbox is rotated, it gets bigger, because we need to surround the tilted bbox
+    So when a bbox is rotated then inverse-rotated, it is much bigger than the original
+    This function aims to invert the rotation on the box, but also resize it to its original size
+    """
+    # 1. Compute the inverse rotation of the rotated bboxes (bigger than it )
+    invrot_box = rot_tfm.inverse().apply_box(rotated_box)
+    h, w = rotated_box[:, 3] - rotated_box[:, 1], rotated_box[:, 2] - rotated_box[:, 0]
+    ih, iw = invrot_box[:, 3] - invrot_box[:, 1], invrot_box[:, 2] - invrot_box[:, 0]
+    assert 2 * rot_tfm.abs_sin**2 != 1, "45 degrees angle can't be inverted"
+    # 2. Inverse the corresponding computation in the rotation transform
+    # to get the original height/width of the rotated boxes
+    orig_h = (h * rot_tfm.abs_cos - w * rot_tfm.abs_sin) / (1 - 2 * rot_tfm.abs_sin**2)
+    orig_w = (w * rot_tfm.abs_cos - h * rot_tfm.abs_sin) / (1 - 2 * rot_tfm.abs_sin**2)
+    # 3. Resize the inverse-rotated bboxes to their original size
+    invrot_box[:, 0] += (iw - orig_w) / 2
+    invrot_box[:, 1] += (ih - orig_h) / 2
+    invrot_box[:, 2] -= (iw - orig_w) / 2
+    invrot_box[:, 3] -= (ih - orig_h) / 2
+    return invrot_box

Leffa/3rdparty/densepose/modeling/utils.py ADDED Viewed

	@@ -0,0 +1,11 @@

+# Copyright (c) Facebook, Inc. and its affiliates.
+from torch import nn
+def initialize_module_params(module: nn.Module) -> None:
+    for name, param in module.named_parameters():
+        if "bias" in name:
+            nn.init.constant_(param, 0)
+        elif "weight" in name:
+            nn.init.kaiming_normal_(param, mode="fan_out", nonlinearity="relu")

Leffa/3rdparty/densepose/utils/__init__.py ADDED Viewed

File without changes